Delaying new DRED data when just out of silence

We don't need redundancy for the first active frame since we already have the main Opus payload.
Add dred_end return value to opus_dred_parse()
2024-02-06 22:13:48 -05:00 · 2024-02-06 20:53:51 -05:00 · 2024-02-06 20:53:50 -05:00 · 2024-02-06 20:53:50 -05:00 · 2024-02-06 20:53:50 -05:00 · 2024-02-06 20:52:21 -05:00
439 changed files with 47437 additions and 2866 deletions
--- a/.appveyor.yml
+++ b/.appveyor.yml
@ -1,37 +0,0 @@
-image: Visual Studio 2015
-configuration:
- Debug
- DebugDLL
- DebugDLL_fixed
- Release
- ReleaseDLL
- ReleaseDLL_fixed
-
-platform:
- Win32
- x64
-
-environment:
-  api_key:
-    secure: kR3Ac0NjGwFnTmXdFrR8d6VXjdk5F7L4F/BilC4nvaM=
-
-build:
-  project: win32\VS2015\opus.sln
-  parallel: true
-  verbosity: minimal
-
-after_build:
- cd %APPVEYOR_BUILD_FOLDER%
- 7z a opus.zip win32\VS2015\%PLATFORM%\%CONFIGURATION%\opus.??? include\*.h
-
-test_script:
- cd %APPVEYOR_BUILD_FOLDER%\win32\VS2015\%PLATFORM%\%CONFIGURATION%
- test_opus_api.exe
- test_opus_decode.exe
- test_opus_encode.exe
-
-artifacts:
- path: opus.zip
-
-on_success:
- ps: if ($env:api_key -and "$env:configuration/$env:platform" -eq "ReleaseDLL_fixed/x64") { Start-AppveyorBuild -ApiKey $env:api_key -ProjectSlug 'opus-tools' }
--- a/.github/workflows/autotools.yml
+++ b/.github/workflows/autotools.yml
@ -0,0 +1,51 @@
+name: Autotools
+
+on: [push, pull_request]
+
+jobs:
+
+ AutoMakeBuild:
+    name: AutoMake/${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+        - {
+            name: "Linux/GCC",
+            os: ubuntu-latest,
+            compiler: gcc,
+            automakeconfig:
+          }
+        - {
+            name: "Linux/GCC/EnableAssertions",
+            os: ubuntu-latest,
+            compiler: gcc,
+            buildconfig: --enable-assertions
+          }
+        - {
+            name: "Linux/GCC/EnableCustomModes",
+            os: ubuntu-latest,
+            compiler: gcc,
+            buildconfig: --enable-assertions --enable-custom-modes
+          }
+        - {
+            name: "Linux/GCC/EnableDNN",
+            os: ubuntu-latest,
+            compiler: gcc,
+            buildconfig: --enable-assertions --enable-custom-modes --enable-dred --enable-osce
+          }
+    steps:
+      - uses: actions/checkout@v3
+        # No AutoMake on Mac so let's install it
+      - name: Install AutoConf, AutoMake and LibTool on MacOSX
+        if: matrix.config.os == 'macos-latest'
+        run: brew install autoconf automake libtool
+      - name: Autogen
+        run: CC=${{ matrix.config.compiler }} ./autogen.sh
+      - name: Configure
+        run: CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx ${{ matrix.config.buildconfig }}
+      - name: Build
+        run: make -j 2
+      - name: Test
+        run: make check -j 2
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@ -0,0 +1,252 @@
+name: CMake
+
+on: [push, pull_request]
+
+jobs:
+  CMakeVersionTest:
+    name: Test build with CMake 3.1.0
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Download models
+        run: ./autogen.sh
+      - name: Install CMake 3.1
+        run: |
+          curl -sL https://github.com/Kitware/CMake/releases/download/v3.1.0/cmake-3.1.0-Linux-x86_64.sh -o cmakeinstall.sh
+          chmod +x cmakeinstall.sh
+          sudo ./cmakeinstall.sh --prefix=/usr/local --exclude-subdir
+          rm cmakeinstall.sh
+          sudo apt-get install libidn11
+      - name: Create Work Dir
+        run: mkdir build
+      - name: Configure
+        working-directory: ./build
+        run: cmake .. -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON
+      - name: Build
+        working-directory: ./build
+        run: make -j 2 -s
+      - name: Test
+        working-directory: ./build
+        run: ctest -j 2
+
+  CMakeMINGW:
+    name: CMake MINGW
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Download models
+        run: ./autogen.sh
+      - name: Install MINGW
+        run: sudo apt-get install -y mingw-w64
+      - name: Create Work Dir
+        run: mkdir build
+      - name: Configure
+        working-directory: ./build
+        run: cmake .. -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DCMAKE_SYSTEM_NAME=Windows -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc
+      - name: Build
+        working-directory: ./build
+        run: cmake --build . -j 2 --config Release --target package
+
+  CMakeBuild:
+    name: CMake/${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+        - {
+            name: "Android/So/ARMv8/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DBUILD_SHARED_LIBS=ON"
+          }
+        # TODO: Android ARMv7
+        # - {
+        #     name: "Android/So/ARMv7/Release",
+        #     os: ubuntu-latest,
+        #     config: Release,
+        #     args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=armeabi-v7a -DBUILD_SHARED_LIBS=ON"
+        #   }
+        - {
+            name: "Android/So/X86/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86 -DBUILD_SHARED_LIBS=ON"
+          }
+        - {
+            name: "Android/So/X64/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 -DBUILD_SHARED_LIBS=ON"
+          }
+        - {
+            name: "Windows/Dll/X86/Release",
+            os: windows-latest,
+            config: Release,
+            args: -G "Visual Studio 17 2022" -A Win32 -DBUILD_SHARED_LIBS=ON
+          }
+        - {
+            name: "Windows/Dll/X64/Release",
+            os: windows-latest,
+            config: Release,
+            args: -G "Visual Studio 17 2022" -A Win32 -DBUILD_SHARED_LIBS=ON
+          }
+        - {
+            name: "Windows/Dll/ARMv8/Release",
+            os: windows-latest,
+            config: Release,
+            args: -G "Visual Studio 17 2022" -A ARM64 -DBUILD_SHARED_LIBS=ON
+          }
+        - {
+            name: "Linux/So/X64/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: -DBUILD_SHARED_LIBS=ON
+          }
+        - {
+            name: "MacOSX/So/X64/Release",
+            os: macos-latest,
+            config: Release,
+            args: -DBUILD_SHARED_LIBS=ON
+          }
+        - {
+            name: "MacOSX/Framework/X64/Release",
+            os: macos-latest,
+            config: Release,
+            args: -DBUILD_FRAMEWORK=ON -DCMAKE_INSTALL_PREFIX=install
+          }
+          # use unix makefiles for iOS to avoid Xcode to complain about signing.
+        - {
+            name: "iOS/Dll/arm64/Release",
+            os: macos-latest,
+            config: Release,
+            args: -G "Unix Makefiles" -DBUILD_SHARED_LIBS=ON -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
+          }
+          # use unix makefiles for iOS to avoid Xcode to complain about signing.
+        - {
+            name: "iOS/Framework/arm64/Release",
+            os: macos-latest,
+            config: Release,
+            args: -G "Unix Makefiles" -DBUILD_FRAMEWORK=ON -DCMAKE_INSTALL_PREFIX=install -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
+          }
+        - {
+            name: "Windows/Lib/X86/Release",
+            os: windows-latest,
+            config: Release,
+            args: -G "Visual Studio 17 2022" -A Win32
+          }
+        - {
+            name: "Windows/Lib/X64/Release",
+            os: windows-latest,
+            config: Release,
+            args: -G "Visual Studio 17 2022" -A Win32
+          }
+        - {
+            name: "Windows/Lib/armv8/Release",
+            os: windows-latest,
+            config: Release,
+            args: -G "Visual Studio 17 2022" -A ARM64
+          }
+        - {
+            name: "Linux/Lib/X64/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: ""
+          }
+        - {
+            name: "MacOSX/Lib/X64/Release",
+            os: macos-latest,
+            config: Release,
+            args: ""
+          }
+          # use unix makefiles for iOS to avoid Xcode to complain about signing.
+        - {
+            name: "iOS/Lib/arm64/Release",
+            os: macos-latest,
+            config: Release,
+            args: -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
+          }
+        - {
+            name: "Android/Lib/ARMv8/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a"
+          }
+        # TODO: Android ARMv7
+        # - {
+        #     name: "Android/Lib/ARMv7/Release",
+        #     os: ubuntu-latest,
+        #     config: Release,
+        #     args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=armeabi-v7a"
+        #   }
+        - {
+            name: "Android/Lib/X86/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86"
+          }
+        - {
+            name: "Android/Lib/X64/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64"
+          }
+        - {
+            name: "CustomModes/Linux/Lib/X64/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DOPUS_CUSTOM_MODES=ON"
+          }
+        - {
+            name: "AssertionsFuzz/Windows/Lib/X64/Release",
+            os: windows-latest,
+            config: Release,
+            args: -G "Visual Studio 17 2022" -A Win32 -DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON
+          }
+        - {
+            name: "AssertionsFuzz/Linux/Lib/X64/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: -DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON
+          }
+        - {
+            name: "AssertionsFuzz/MacOSX/Lib/X64/Release",
+            os: macos-latest,
+            config: Release,
+            args: -DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON
+          }
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Install AutoConf, AutoMake and LibTool # Needed for autogen.sh
+        if: matrix.config.os == 'macos-latest'
+        run: brew install autoconf automake libtool
+      - name: Download models Windows
+        if: contains(matrix.config.name, 'Windows')
+        run: .\autogen.bat
+      - name: Download models
+        if: contains(matrix.config.name, 'MacOSX') ||
+            contains(matrix.config.name, 'Linux') ||
+            contains(matrix.config.name, 'Android') ||
+            contains(matrix.config.name, 'iOS')
+        run: ./autogen.sh
+      - name: Create Work Dir
+        run: mkdir build
+      - name: Configure
+        working-directory: ./build
+        run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON
+      - name: Build
+        working-directory: ./build
+        run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package
+      - name: Test
+        if: contains(matrix.config.name, 'Windows') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll') ||
+            contains(matrix.config.name, 'MacOSX') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll') ||
+            contains(matrix.config.name, 'Linux') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll')
+        working-directory: ./build
+        run: ctest -j 2 -C ${{ matrix.config.config }} --output-on-failure
--- a/.github/workflows/dred.yml
+++ b/.github/workflows/dred.yml
@ -0,0 +1,121 @@
+# Configs that enables Deep Redudancy (DRED)
+name: DRED
+
+on: [push, pull_request]
+
+jobs:
+  CMakeBuild:
+    name: CMake/${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+        - {
+            name: "Windows/Lib/X64/Release",
+            os: windows-latest,
+            config: Release,
+            args: -G "Visual Studio 17 2022" -DOPUS_X86_PRESUME_AVX2=ON
+          }
+        - {
+           name: "Windows/Lib/armv8/Release",
+           os: windows-latest,
+           config: Release,
+           args: -G "Visual Studio 17 2022" -A ARM64
+          }
+        - {
+            name: "Linux/Lib/X64/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: -DOPUS_X86_PRESUME_AVX2=ON
+          }
+        - {
+            name: "Android/Lib/X64/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64"
+          }
+        - {
+            name: "Android/Lib/ARMv8/Release",
+            os: ubuntu-latest,
+            config: Release,
+            args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a"
+          }
+        - {
+            name: "MacOSX/Lib/X64/Release",
+            os: macos-latest,
+            config: Release,
+            # some macs are really old in githubs lab so they don't support avx
+            args: -DOPUS_X86_PRESUME_AVX2=OFF
+          }
+        - {
+            name: "iOS/Lib/arm64/Release",
+            os: macos-latest,
+            config: Release,
+            args: -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
+          }
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Install AutoConf, AutoMake and LibTool # Needed for autogen.sh
+        if: matrix.config.os == 'macos-latest'
+        run: brew install autoconf automake libtool
+      - name: Download models Windows
+        if: contains(matrix.config.name, 'Windows')
+        run: .\autogen.bat
+      - name: Download models
+        if: contains(matrix.config.name, 'MacOSX') ||
+            contains(matrix.config.name, 'Linux') ||
+            contains(matrix.config.name, 'Android') ||
+            contains(matrix.config.name, 'iOS')
+        run: ./autogen.sh
+      - name: Create Work Dir
+        run: mkdir build
+      - name: Configure
+        working-directory: ./build
+        run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON
+      - name: Build
+        working-directory: ./build
+        run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package
+      - name: Test
+        if: contains(matrix.config.name, 'Windows') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll') ||
+            contains(matrix.config.name, 'MacOSX') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll') ||
+            contains(matrix.config.name, 'Linux') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll')
+        working-directory: ./build
+        run: ctest -j 2 -C ${{ matrix.config.config }} --output-on-failure
+
+  AutoToolsBuild:
+    name: AutoTools/${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+        - {
+            name: "Linux/GCC",
+            os: ubuntu-latest,
+            compiler: gcc,
+            automakeconfig:
+          }
+        - {
+            name: "Linux/Clang",
+            os: ubuntu-latest,
+            compiler: clang,
+            automakeconfig:
+          }
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Install AutoConf, AutoMake and LibTool on MacOSX
+        if: matrix.config.os == 'macos-latest'
+        run: brew install autoconf automake libtool
+      - name: Autogen
+        run: CC=${{ matrix.config.compiler }} ./autogen.sh
+      - name: Configure
+        run: CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx
+      - name: Build
+        run: make -j 2
+      - name: Test
+        run: make check -j 2
--- a/.github/workflows/repository.yml
+++ b/.github/workflows/repository.yml
@ -0,0 +1,15 @@
+name: Repository
+
+on: [push, pull_request]
+
+jobs:
+  CheckTrailingWhiteSpaces:
+    name: Check trailing white spaces
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Check Whitespaces
+        run: |
+          git diff-tree --check origin/opus-ng HEAD
--- a/.gitignore
+++ b/.gitignore
@ -49,6 +49,7 @@ tests/*test
 tests/test_opus_api
 tests/test_opus_decode
 tests/test_opus_encode
+tests/test_opus_extensions
 tests/test_opus_padding
 tests/test_opus_projection
 celt/arm/armopts.s
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -56,15 +56,17 @@ ci-fairy:

 autoconf:
  stage: build
+  tags:
+    - avx2
  before_script:
    - apt-get update &&
-      apt-get install -y zip doxygen git automake libtool make
+      apt-get install -y zip doxygen git automake libtool make wget
    - !reference [.snippets, git_prep]
  script:
    - ./autogen.sh
-    - ./configure
-    - make -j4
-    - make distcheck
+    - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx --enable-dred --enable-osce
+    - make -j16
+    - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx --enable-dred --enable-osce CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
  cache:
    paths:
      - "src/*.o"
@ -76,32 +78,38 @@ autoconf:

 cmake:
  stage: build
+  tags:
+    - avx2
  before_script:
    - apt-get update &&
-      apt-get install -y cmake ninja-build git
+      apt-get install -y cmake ninja-build git automake libtool wget
    - !reference [.snippets, git_prep]
  script:
+    - ./autogen.sh
    - mkdir build
-    - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_TESTING=ON -DOPUS_BUILD_PROGRAMS=ON
+    - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON -DOPUS_X86_PRESUME_AVX2=ON
    - cmake --build build
-    - cd build && ctest --output-on-failure
+    - cd build && ctest --output-on-failure -j 16

 .meson:
  image: 'debian:bookworm-slim'
  stage: build
  before_script:
    - apt-get update &&
-      apt-get install -y ninja-build doxygen meson git
+      apt-get install -y ninja-build doxygen meson git automake libtool wget
    - !reference [.snippets, git_prep]
  script:
+    - ./autogen.sh
    - mkdir builddir
-    - meson setup -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir ${MESON_EXTRA_ARGS}
+    - meson setup -Denable-deep-plc=true -Denable-osce=true -Denable-dred=true -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
    - meson compile -C builddir
    - meson test -C builddir
    #- meson dist --no-tests -C builddir

 meson x86_64:
  extends: '.meson'
+  tags:
+    - avx2
  variables:
    MESON_EXTRA_ARGS: '--werror'

--- a/.gitmodules
+++ b/.gitmodules
--- a/.travis.yml
+++ b/.travis.yml
@ -1,21 +0,0 @@
-language: c
-
-compiler:
-  - gcc
-  - clang
-
-os:
-  - linux
-  - osx
-
-env:
-  - CONFIG=""
-  - CONFIG="--enable-assertions"
-  - CONFIG="--enable-fixed-point"
-  - CONFIG="--enable-fixed-point --disable-float-api"
-  - CONFIG="--enable-fixed-point --enable-assertions"
-
-script:
-  - ./autogen.sh
-  - ./configure $CONFIG
-  - make distcheck
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -71,6 +71,10 @@ set(OPUS_CHECK_ASM_HELP_STR "enable bit-exactness checks between optimized and c
 option(OPUS_CHECK_ASM ${OPUS_CHECK_ASM_HELP_STR} OFF)
 add_feature_info(OPUS_CHECK_ASM OPUS_CHECK_ASM ${OPUS_CHECK_ASM_HELP_STR})

+set(OPUS_DNN_FLOAT_DEBUG_HELP_STR "Run DNN computations as float for debugging purposes.")
+option(OPUS_DNN_FLOAT_DEBUG ${OPUS_DNN_FLOAT_DEBUG_HELP_STR} OFF)
+add_feature_info(OPUS_DNN_FLOAT_DEBUG OPUS_DNN_FLOAT_DEBUG ${OPUS_DNN_FLOAT_DEBUG_HELP_STR})
+
 set(OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR "install pkg-config module.")
 option(OPUS_INSTALL_PKG_CONFIG_MODULE ${OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR} ON)
 add_feature_info(OPUS_INSTALL_PKG_CONFIG_MODULE OPUS_INSTALL_PKG_CONFIG_MODULE ${OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR})
@ -79,6 +83,14 @@ set(OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR "install CMake package config modu
 option(OPUS_INSTALL_CMAKE_CONFIG_MODULE ${OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR} ON)
 add_feature_info(OPUS_INSTALL_CMAKE_CONFIG_MODULE OPUS_INSTALL_CMAKE_CONFIG_MODULE ${OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR})

+set(OPUS_DRED_HELP_STR "enable DRED.")
+option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF)
+add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR})
+
+set(OPUS_OSCE_HELP_STR "enable OSCE.")
+option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF)
+add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR})
+
 if(APPLE)
  set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.")
  option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF)
@ -173,13 +185,13 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64)
                         OFF)
  add_feature_info(OPUS_X86_MAY_HAVE_SSE4_1 OPUS_X86_MAY_HAVE_SSE4_1 ${OPUS_X86_MAY_HAVE_SSE4_1_HELP_STR})

-  set(OPUS_X86_MAY_HAVE_AVX_HELP_STR "does runtime check for AVX support.")
-  cmake_dependent_option(OPUS_X86_MAY_HAVE_AVX
-                         ${OPUS_X86_MAY_HAVE_AVX_HELP_STR}
+  set(OPUS_X86_MAY_HAVE_AVX2_HELP_STR "does runtime check for AVX FMA AVX2 support.")
+  cmake_dependent_option(OPUS_X86_MAY_HAVE_AVX2
+                         ${OPUS_X86_MAY_HAVE_AVX2_HELP_STR}
                         ON
-                         "AVX_SUPPORTED; NOT OPUS_DISABLE_INTRINSICS"
+                         "AVX2_SUPPORTED; NOT OPUS_DISABLE_INTRINSICS"
                         OFF)
-  add_feature_info(OPUS_X86_MAY_HAVE_AVX OPUS_X86_MAY_HAVE_AVX ${OPUS_X86_MAY_HAVE_AVX_HELP_STR})
+  add_feature_info(OPUS_X86_MAY_HAVE_AVX2 OPUS_X86_MAY_HAVE_AVX2 ${OPUS_X86_MAY_HAVE_AVX2_HELP_STR})

  # PRESUME depends on MAY HAVE, but PRESUME will override runtime detection
  set(OPUS_X86_PRESUME_SSE_HELP_STR "assume target CPU has SSE1 support (override runtime check).")
@ -220,13 +232,13 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64)
                         OFF)
  add_feature_info(OPUS_X86_PRESUME_SSE4_1 OPUS_X86_PRESUME_SSE4_1 ${OPUS_X86_PRESUME_SSE4_1_HELP_STR})

-  set(OPUS_X86_PRESUME_AVX_HELP_STR "assume target CPU has AVX support (override runtime check).")
-  cmake_dependent_option(OPUS_X86_PRESUME_AVX
-                         ${OPUS_X86_PRESUME_AVX_HELP_STR}
+  set(OPUS_X86_PRESUME_AVX2_HELP_STR "assume target CPU has AVX FMA AVX2 support (override runtime check).")
+  cmake_dependent_option(OPUS_X86_PRESUME_AVX2
+                         ${OPUS_X86_PRESUME_AVX2_HELP_STR}
                         OFF
-                         "OPUS_X86_MAY_HAVE_AVX; NOT OPUS_DISABLE_INTRINSICS"
+                         "OPUS_X86_MAY_HAVE_AVX2; NOT OPUS_DISABLE_INTRINSICS"
                         OFF)
-  add_feature_info(OPUS_X86_PRESUME_AVX OPUS_X86_PRESUME_AVX ${OPUS_X86_PRESUME_AVX_HELP_STR})
+  add_feature_info(OPUS_X86_PRESUME_AVX2 OPUS_X86_PRESUME_AVX2 ${OPUS_X86_PRESUME_AVX2_HELP_STR})
 endif()

 feature_summary(WHAT ALL)
@ -274,6 +286,7 @@ target_include_directories(
         $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/opus>
  PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
          ${CMAKE_CURRENT_SOURCE_DIR}
+          ${CMAKE_CURRENT_SOURCE_DIR}/dnn
          celt
          silk)

@ -309,6 +322,10 @@ if(OPUS_CHECK_ASM)
  target_compile_definitions(opus PRIVATE OPUS_CHECK_ASM)
 endif()

+if(NOT OPUS_DNN_FLOAT_DEBUG)
+  target_compile_definitions(opus PRIVATE DISABLE_DEBUG_FLOAT)
+endif()
+
 if(OPUS_VAR_ARRAYS)
  target_compile_definitions(opus PRIVATE VAR_ARRAYS)
 elseif(OPUS_USE_ALLOCA)
@ -365,11 +382,33 @@ if(NOT OPUS_ENABLE_FLOAT_API)
  target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
 endif()

+if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE)
+  add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
+  set(OPUS_DNN TRUE)
+else()
+  set(OPUS_DNN FALSE)
+endif()
+
+if (OPUS_DNN)
+  add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
+  target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
+endif()
+
+if (OPUS_DRED)
+  add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
+  target_compile_definitions(opus PRIVATE ENABLE_DRED)
+endif()
+
+if (OPUS_OSCE)
+  add_sources_group(opus lpcnet ${osce_headers} ${osce_sources})
+  target_compile_definitions(opus PRIVATE ENABLE_OSCE)
+endif()
+
 if(NOT OPUS_DISABLE_INTRINSICS)
  if(((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR
     (OPUS_X86_MAY_HAVE_SSE2 AND NOT OPUS_X86_PRESUME_SSE2) OR
     (OPUS_X86_MAY_HAVE_SSE4_1 AND NOT OPUS_X86_PRESUME_SSE4_1) OR
-     (OPUS_X86_MAY_HAVE_AVX AND NOT OPUS_X86_PRESUME_AVX)) AND
+     (OPUS_X86_MAY_HAVE_AVX2 AND NOT OPUS_X86_PRESUME_AVX2)) AND
      RUNTIME_CPU_CAPABILITY_DETECTION)
    target_compile_definitions(opus PRIVATE OPUS_HAVE_RTCD)
    if(NOT MSVC)
@ -383,6 +422,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
    endif()
    add_sources_group(opus celt ${celt_sources_x86_rtcd})
    add_sources_group(opus silk ${silk_sources_x86_rtcd})
+    if (OPUS_DNN)
+      add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
+    endif()
  endif()

  if(SSE1_SUPPORTED)
@ -404,9 +446,12 @@ if(NOT OPUS_DISABLE_INTRINSICS)
  if(SSE2_SUPPORTED)
    if(OPUS_X86_MAY_HAVE_SSE2)
      add_sources_group(opus celt ${celt_sources_sse2})
+      if (OPUS_DNN)
+        add_sources_group(opus lpcnet ${dnn_sources_sse2})
+      endif()
      target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
      if(NOT MSVC)
-        set_source_files_properties(${celt_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
+        set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
      endif()
    endif()
    if(OPUS_X86_PRESUME_SSE2)
@ -421,9 +466,12 @@ if(NOT OPUS_DISABLE_INTRINSICS)
    if(OPUS_X86_MAY_HAVE_SSE4_1)
      add_sources_group(opus celt ${celt_sources_sse4_1})
      add_sources_group(opus silk ${silk_sources_sse4_1})
+      if (OPUS_DNN)
+        add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
+      endif()
      target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
      if(NOT MSVC)
-        set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
+        set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
      endif()

      if(OPUS_FIXED_POINT)
@ -441,22 +489,37 @@ if(NOT OPUS_DISABLE_INTRINSICS)
    endif()
  endif()

-  if(AVX_SUPPORTED)
-    # mostly placeholder in case of avx intrinsics is added
-    if(OPUS_X86_MAY_HAVE_AVX)
-      target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX)
+  if(AVX2_SUPPORTED)
+    if(OPUS_X86_MAY_HAVE_AVX2)
+      add_sources_group(opus celt ${celt_sources_avx2})
+      add_sources_group(opus silk ${silk_sources_avx2})
+      add_sources_group(opus silk ${silk_sources_float_avx2})
+      if (OPUS_DNN)
+        add_sources_group(opus lpcnet ${dnn_sources_avx2})
+      endif()
+      target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)
+      if(MSVC)
+        set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")
+      else()
+        set(AVX2_FLAGS "${AVX2_FLAGS} -mavx2 -mfma -mavx")
+      endif()
+      set_source_files_properties(${celt_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
+      set_source_files_properties(${silk_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
+      set_source_files_properties(${silk_sources_float_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
+      set_source_files_properties(${dnn_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
    endif()
-    if(OPUS_X86_PRESUME_AVX)
-      target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX)
+    if(OPUS_X86_PRESUME_AVX2)
+      target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX2)
+      target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE4_1)
      if(NOT MSVC)
-        target_compile_options(opus PRIVATE -mavx)
+        target_compile_options(opus PRIVATE -mavx2 -mfma -mavx)
      endif()
    endif()
  endif()

  if(MSVC)
-    if(AVX_SUPPORTED AND OPUS_X86_PRESUME_AVX) # on 64 bit and 32 bits
-      add_definitions(/arch:AVX)
+    if(AVX2_SUPPORTED AND OPUS_X86_PRESUME_AVX2) # on 64 bit and 32 bits
+      add_definitions(/arch:AVX2)
    elseif(OPUS_CPU_X86) # if AVX not supported then set SSE flag
      if((SSE4_1_SUPPORTED AND OPUS_X86_PRESUME_SSE4_1)
         OR (SSE2_SUPPORTED AND OPUS_X86_PRESUME_SSE2))
@ -486,6 +549,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)

    add_sources_group(opus celt ${celt_sources_arm_neon_intr})
    add_sources_group(opus silk ${silk_sources_arm_neon_intr})
+    if (OPUS_DNN)
+      add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
+    endif()

    # silk arm neon depends on main_Fix.h
    target_include_directories(opus PRIVATE silk/fixed)
@ -582,6 +648,7 @@ if(OPUS_BUILD_PROGRAMS)
  target_include_directories(opus_demo PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
  target_include_directories(opus_demo PRIVATE silk) # debug.h
  target_include_directories(opus_demo PRIVATE celt) # arch.h
+  target_include_directories(opus_demo PRIVATE dnn)
  target_link_libraries(opus_demo PRIVATE opus ${OPUS_REQUIRED_LIBRARIES})
  target_compile_definitions(opus_demo PRIVATE OPUS_BUILD)

@ -589,10 +656,6 @@ if(OPUS_BUILD_PROGRAMS)
  add_executable(opus_compare ${opus_compare_sources})
  target_include_directories(opus_compare PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
  target_link_libraries(opus_compare PRIVATE opus ${OPUS_REQUIRED_LIBRARIES})
-  if(MSVC)
-    # move cosmetic warning to level 4 for opus_compare
-    target_compile_options(opus_compare PRIVATE /w44244)
-  endif()
 endif()

 if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS)
@ -636,11 +699,32 @@ if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS)

  add_executable(test_opus_encode ${test_opus_encode_sources})
  target_include_directories(test_opus_encode
-                            PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt)
+                            PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt dnn)
  target_link_libraries(test_opus_encode PRIVATE opus)
  target_compile_definitions(test_opus_encode PRIVATE OPUS_BUILD)
  add_test(NAME test_opus_encode COMMAND ${CMAKE_COMMAND}
        -DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_encode>
        -DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
        -P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
+
+  add_executable(test_opus_extensions ${test_opus_extensions_sources})
+  target_include_directories(test_opus_extensions
+                            PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt dnn)
+  target_link_libraries(test_opus_extensions PRIVATE opus)
+  target_compile_definitions(test_opus_extensions PRIVATE OPUS_BUILD)
+  add_test(NAME test_opus_extensions COMMAND ${CMAKE_COMMAND}
+        -DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_extensions>
+        -DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
+        -P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
+  if(OPUS_DRED)
+    add_executable(test_opus_dred ${test_opus_dred_sources})
+    target_include_directories(test_opus_dred
+                              PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+    target_link_libraries(test_opus_dred PRIVATE opus)
+    target_compile_definitions(test_opus_dred PRIVATE OPUS_BUILD)
+    add_test(NAME test_opus_dred COMMAND ${CMAKE_COMMAND}
+          -DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_dred>
+          -DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
+          -P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
+  endif()
 endif()
--- a/4
+++ b/4
@ -1,7 +1,7 @@
-Copyright 2001-2011 Xiph.Org, Skype Limited, Octasic,
+Copyright 2001-2023 Xiph.Org, Skype Limited, Octasic,
                    Jean-Marc Valin, Timothy B. Terriberry,
                    CSIRO, Gregory Maxwell, Mark Borgerding,
-                    Erik de Castro Lopo
+                    Erik de Castro Lopo, Mozilla, Amazon

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
--- a/Makefile.am
+++ b/Makefile.am
@ -10,12 +10,25 @@ lib_LTLIBRARIES = libopus.la
 DIST_SUBDIRS = doc

 AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \
-              -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS)
+              -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS) \
+              -I$(top_srcdir)/dnn

 include celt_sources.mk
+include lpcnet_sources.mk
 include silk_sources.mk
 include opus_sources.mk

+LPCNET_SOURCES =
+if ENABLE_DEEP_PLC
+LPCNET_SOURCES += $(DEEP_PLC_SOURCES)
+endif
+if ENABLE_DRED
+LPCNET_SOURCES += $(DRED_SOURCES)
+endif
+if ENABLE_OSCE
+LPCNET_SOURCES += $(OSCE_SOURCES)
+endif
+
 if FIXED_POINT
 SILK_SOURCES += $(SILK_SOURCES_FIXED)
 if HAVE_SSE4_1
@ -29,6 +42,9 @@ SILK_SOURCES += $(SILK_SOURCES_FLOAT)
 if HAVE_SSE4_1
 SILK_SOURCES += $(SILK_SOURCES_SSE4_1)
 endif
+if HAVE_AVX2
+SILK_SOURCES += $(SILK_SOURCES_FLOAT_AVX2)
+endif
 endif

 if DISABLE_FLOAT_API
@ -40,15 +56,31 @@ if CPU_X86
 if HAVE_RTCD
 CELT_SOURCES += $(CELT_SOURCES_X86_RTCD)
 SILK_SOURCES += $(SILK_SOURCES_X86_RTCD)
+if ENABLE_DEEP_PLC
+LPCNET_SOURCES += $(DNN_SOURCES_X86_RTCD)
+endif
 endif
 if HAVE_SSE
 CELT_SOURCES += $(CELT_SOURCES_SSE)
 endif
 if HAVE_SSE2
 CELT_SOURCES += $(CELT_SOURCES_SSE2)
+if ENABLE_DEEP_PLC
+LPCNET_SOURCES += $(DNN_SOURCES_SSE2)
+endif
 endif
 if HAVE_SSE4_1
 CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
+if ENABLE_DEEP_PLC
+LPCNET_SOURCES += $(DNN_SOURCES_SSE4_1)
+endif
+endif
+if HAVE_AVX2
+SILK_SOURCES += $(SILK_SOURCES_AVX2)
+CELT_SOURCES += $(CELT_SOURCES_AVX2)
+if ENABLE_DEEP_PLC
+LPCNET_SOURCES += $(DNN_SOURCES_AVX2)
+endif
 endif
 endif

@ -56,6 +88,18 @@ if CPU_ARM
 if HAVE_RTCD
 CELT_SOURCES += $(CELT_SOURCES_ARM_RTCD)
 SILK_SOURCES += $(SILK_SOURCES_ARM_RTCD)
+if ENABLE_DEEP_PLC
+LPCNET_SOURCES += $(DNN_SOURCES_ARM_RTCD)
+endif
+endif
+
+if ENABLE_DEEP_PLC
+if HAVE_ARM_DOTPROD
+LPCNET_SOURCES += $(DNN_SOURCES_DOTPROD)
+endif
+if HAVE_ARM_NEON_INTR
+LPCNET_SOURCES += $(DNN_SOURCES_NEON)
+endif
 endif

 if HAVE_ARM_NEON_INTR
@ -80,10 +124,25 @@ CLEANFILES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S) \
 $(CELT_AM_SOURCES_ARM_ASM:.s.in=-gnu.S)

 include celt_headers.mk
+include lpcnet_headers.mk
 include silk_headers.mk
 include opus_headers.mk

-libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES)
+LPCNET_HEAD =
+if ENABLE_DEEP_PLC
+LPCNET_HEAD += $(DEEP_PLC_HEAD)
+endif
+if ENABLE_DRED
+LPCNET_HEAD += $(DRED_HEAD)
+endif
+if ENABLE_OSCE
+LPCNET_HEAD += $(OSCE_HEAD)
+endif
+if ENABLE_LOSSGEN
+LPCNET_HEAD += $(LOSSGEN_HEAD)
+endif
+
+libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES)
 libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
 libopus_la_LIBADD = $(NE10_LIBS) $(LIBM)
 if OPUS_ARM_EXTERNAL_ASM
@ -92,7 +151,7 @@ endif

 pkginclude_HEADERS = include/opus.h include/opus_multistream.h include/opus_types.h include/opus_defines.h include/opus_projection.h

-noinst_HEADERS = $(OPUS_HEAD) $(SILK_HEAD) $(CELT_HEAD)
+noinst_HEADERS = $(OPUS_HEAD) $(SILK_HEAD) $(CELT_HEAD) $(LPCNET_HEAD)

 if EXTRA_PROGRAMS
 noinst_PROGRAMS = celt/tests/test_unit_cwrs32 \
@ -109,7 +168,9 @@ noinst_PROGRAMS = celt/tests/test_unit_cwrs32 \
                  silk/tests/test_unit_LPC_inv_pred_gain \
                  tests/test_opus_api \
                  tests/test_opus_decode \
+                  tests/test_opus_dred \
                  tests/test_opus_encode \
+                  tests/test_opus_extensions \
                  tests/test_opus_padding \
                  tests/test_opus_projection \
                  trivial_example
@ -126,10 +187,14 @@ TESTS = celt/tests/test_unit_cwrs32 \
        tests/test_opus_api \
        tests/test_opus_decode \
        tests/test_opus_encode \
+        tests/test_opus_extensions \
        tests/test_opus_padding \
        tests/test_opus_projection

 opus_demo_SOURCES = src/opus_demo.c
+if ENABLE_LOSSGEN
+opus_demo_SOURCES += $(LOSSGEN_SOURCES)
+endif

 opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)

@ -155,18 +220,28 @@ tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
 tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h
 tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM)

+tests_test_opus_dred_SOURCES = tests/test_opus_dred.c tests/test_opus_common.h
+tests_test_opus_dred_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
+
 CELT_OBJ = $(CELT_SOURCES:.c=.lo)
 SILK_OBJ = $(SILK_SOURCES:.c=.lo)
+LPCNET_OBJ = $(LPCNET_SOURCES:.c=.lo)
 OPUS_OBJ = $(OPUS_SOURCES:.c=.lo)

+tests_test_opus_extensions_SOURCES = tests/test_opus_extensions.c tests/test_opus_common.h
+tests_test_opus_extensions_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
+if OPUS_ARM_EXTERNAL_ASM
+tests_test_opus_extensions_LDADD += libarmasm.la
+endif
+
 tests_test_opus_projection_SOURCES = tests/test_opus_projection.c tests/test_opus_common.h
-tests_test_opus_projection_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
+tests_test_opus_projection_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
 if OPUS_ARM_EXTERNAL_ASM
 tests_test_opus_projection_LDADD += libarmasm.la
 endif

 silk_tests_test_unit_LPC_inv_pred_gain_SOURCES = silk/tests/test_unit_LPC_inv_pred_gain.c
-silk_tests_test_unit_LPC_inv_pred_gain_LDADD = $(SILK_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
+silk_tests_test_unit_LPC_inv_pred_gain_LDADD = $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
 if OPUS_ARM_EXTERNAL_ASM
 silk_tests_test_unit_LPC_inv_pred_gain_LDADD += libarmasm.la
 endif
@ -175,7 +250,7 @@ celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c
 celt_tests_test_unit_cwrs32_LDADD = $(LIBM)

 celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c
-celt_tests_test_unit_dft_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
+celt_tests_test_unit_dft_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
 if OPUS_ARM_EXTERNAL_ASM
 celt_tests_test_unit_dft_LDADD += libarmasm.la
 endif
@ -187,19 +262,19 @@ celt_tests_test_unit_laplace_SOURCES = celt/tests/test_unit_laplace.c
 celt_tests_test_unit_laplace_LDADD = $(LIBM)

 celt_tests_test_unit_mathops_SOURCES = celt/tests/test_unit_mathops.c
-celt_tests_test_unit_mathops_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
+celt_tests_test_unit_mathops_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
 if OPUS_ARM_EXTERNAL_ASM
 celt_tests_test_unit_mathops_LDADD += libarmasm.la
 endif

 celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c
-celt_tests_test_unit_mdct_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
+celt_tests_test_unit_mdct_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
 if OPUS_ARM_EXTERNAL_ASM
 celt_tests_test_unit_mdct_LDADD += libarmasm.la
 endif

 celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c
-celt_tests_test_unit_rotation_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
+celt_tests_test_unit_rotation_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
 if OPUS_ARM_EXTERNAL_ASM
 celt_tests_test_unit_rotation_LDADD += libarmasm.la
 endif
@ -217,6 +292,24 @@ opus_custom_demo_LDADD = libopus.la $(LIBM)
 endif
 endif

+if EXTRA_PROGRAMS
+if ENABLE_DEEP_PLC
+noinst_PROGRAMS += lpcnet_demo dump_data dump_weights_blob
+lpcnet_demo_SOURCES = dnn/lpcnet_demo.c
+lpcnet_demo_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
+
+dump_data_SOURCES = dnn/dump_data.c
+dump_data_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
+
+dump_weights_blob_SOURCES = dnn/write_lpcnet_weights.c
+dump_weights_blob_LDADD = $(LIBM)
+dump_weights_blob_CFLAGS = $(AM_CFLAGS) -DDUMP_BINARY_WEIGHTS
+endif
+if ENABLE_DRED
+TESTS += tests/test_opus_dred
+endif
+endif
+
 EXTRA_DIST = opus.pc.in \
             opus-uninstalled.pc.in \
             opus.m4 \
@ -249,21 +342,7 @@ EXTRA_DIST = opus.pc.in \
             doc/meson.build \
             tests/run_vectors.sh \
             celt/arm/arm2gnu.pl \
-             celt/arm/celt_pitch_xcorr_arm.s \
-             win32/VS2015/opus.vcxproj \
-             win32/VS2015/test_opus_encode.vcxproj.filters \
-             win32/VS2015/test_opus_encode.vcxproj \
-             win32/VS2015/opus_demo.vcxproj \
-             win32/VS2015/test_opus_api.vcxproj.filters \
-             win32/VS2015/test_opus_api.vcxproj \
-             win32/VS2015/test_opus_decode.vcxproj.filters \
-             win32/VS2015/opus_demo.vcxproj.filters \
-             win32/VS2015/opus.vcxproj.filters \
-             win32/VS2015/test_opus_decode.vcxproj \
-             win32/VS2015/opus.sln \
-             win32/VS2015/common.props \
-             win32/genversion.bat \
-             win32/config.h
+             celt/arm/celt_pitch_xcorr_arm.s

 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = opus.pc
@ -362,21 +441,37 @@ $(SSE_OBJ): CFLAGS += $(OPUS_X86_SSE_CFLAGS)
 endif

 if HAVE_SSE2
-SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo)
+SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo) \
+           $(DNN_SOURCES_SSE2:.c=.lo)
 $(SSE2_OBJ): CFLAGS += $(OPUS_X86_SSE2_CFLAGS)
 endif

 if HAVE_SSE4_1
 SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
+             $(DNN_SOURCES_SSE4_1:.c=.lo) \
             $(SILK_SOURCES_SSE4_1:.c=.lo) \
             $(SILK_SOURCES_FIXED_SSE4_1:.c=.lo)
 $(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
 endif

+if HAVE_AVX2
+AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) \
+           $(SILK_SOURCES_AVX2:.c=.lo) \
+           $(SILK_SOURCES_FLOAT_AVX2:.c=.lo) \
+           $(DNN_SOURCES_AVX2:.c=.lo)
+$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
+endif
+
 if HAVE_ARM_NEON_INTR
 ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
                    $(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \
+                    $(DNN_SOURCES_NEON:.c=.lo) \
                    $(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo)
 $(ARM_NEON_INTR_OBJ): CFLAGS += \
 $(OPUS_ARM_NEON_INTR_CFLAGS)  $(NE10_CFLAGS)
 endif
+
+if HAVE_ARM_DOTPROD
+ARM_DOTPROD_OBJ = $(DNN_SOURCES_DOTPROD:.c=.lo)
+$(ARM_DOTPROD_OBJ): CFLAGS += $(ARM_DOTPROD_INTR_CFLAGS)
+endif
--- a/Makefile.mips
+++ b/Makefile.mips
@ -102,13 +102,16 @@ TESTOPUSDECODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSDECODE_SRCS_C))
 TESTOPUSENCODE_SRCS_C = tests/test_opus_encode.c tests/opus_encode_regressions.c
 TESTOPUSENCODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSENCODE_SRCS_C))

+TESTOPUSEXTENSIONS_SRCS_C = tests/test_opus_extensions.c
+TESTOPUSEXTENSIONS_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSEXTENSIONS_SRCS_C))
+
 TESTOPUSPADDING_SRCS_C = tests/test_opus_padding.c
 TESTOPUSPADDING_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSPADDING_SRCS_C))

 OPUSCOMPARE_SRCS_C = src/opus_compare.c
 OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C))

-TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_padding
+TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_extensions test_opus_padding

 # Rules
 all: lib opus_demo opus_compare $(TESTS)
@ -133,6 +136,9 @@ test_opus_decode$(EXESUFFIX): $(TESTOPUSDECODE_OBJS) $(TARGET)
 test_opus_encode$(EXESUFFIX): $(TESTOPUSENCODE_OBJS) $(TARGET)
 	$(LINK.o.cmdline)

+test_opus_extensions$(EXESUFFIX): $(TESTOPUSEXTENSIONS_OBJS) $(TARGET)
+	$(LINK.o.cmdline)
+
 test_opus_padding$(EXESUFFIX): $(TESTOPUSPADDING_OBJS) $(TARGET)
 	$(LINK.o.cmdline)

@ -154,8 +160,10 @@ force:
 clean:
 	rm -f opus_demo$(EXESUFFIX) opus_compare$(EXESUFFIX) $(TARGET) \
                test_opus_api$(EXESUFFIX) test_opus_decode$(EXESUFFIX) \
-                test_opus_encode$(EXESUFFIX) test_opus_padding$(EXESUFFIX) \
+                test_opus_encode$(EXESUFFIX) test_opus_extensions$(EXESUFFIX) \
+                test_opus_padding$(EXESUFFIX)
 		$(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \
-                $(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS)
+                $(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) \
+                $(TESTOPUSEXTENSIONS_OBJS) $(TESTOPUSPADDING_OBJS)

 .PHONY: all lib clean force check
--- a/Makefile.unix
+++ b/Makefile.unix
@ -100,13 +100,16 @@ TESTOPUSDECODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSDECODE_SRCS_C))
 TESTOPUSENCODE_SRCS_C = tests/test_opus_encode.c tests/opus_encode_regressions.c
 TESTOPUSENCODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSENCODE_SRCS_C))

+TESTOPUSEXTENSIONS_SRCS_C = tests/test_opus_extensions.c
+TESTOPUSEXTENSIONS_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSEXTENSIONS_SRCS_C))
+
 TESTOPUSPADDING_SRCS_C = tests/test_opus_padding.c
 TESTOPUSPADDING_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSPADDING_SRCS_C))

 OPUSCOMPARE_SRCS_C = src/opus_compare.c
 OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C))

-TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_padding
+TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_extensions test_opus_padding

 # Rules
 all: lib opus_demo opus_compare $(TESTS)
@ -131,6 +134,9 @@ test_opus_decode$(EXESUFFIX): $(TESTOPUSDECODE_OBJS) $(TARGET)
 test_opus_encode$(EXESUFFIX): $(TESTOPUSENCODE_OBJS) $(TARGET)
 	$(LINK.o.cmdline)

+test_opus_extensions$(EXESUFFIX): $(TESTOPUSEXTENSIONS_OBJS) $(TARGET)
+	$(LINK.o.cmdline)
+
 test_opus_padding$(EXESUFFIX): $(TESTOPUSPADDING_OBJS) $(TARGET)
 	$(LINK.o.cmdline)

@ -152,8 +158,10 @@ force:
 clean:
 	rm -f opus_demo$(EXESUFFIX) opus_compare$(EXESUFFIX) $(TARGET) \
                test_opus_api$(EXESUFFIX) test_opus_decode$(EXESUFFIX) \
-                test_opus_encode$(EXESUFFIX) test_opus_padding$(EXESUFFIX) \
+                test_opus_encode$(EXESUFFIX) test_opus_extensions$(EXESUFFIX) \
+                test_opus_padding$(EXESUFFIX)
 		$(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \
-                $(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS)
+                $(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) \
+                $(TESTOPUSEXTENSIONS_OBJS) $(TESTOPUSPADDING_OBJS)

 .PHONY: all lib clean force check
--- a/6
+++ b/6
@ -77,6 +77,8 @@ On Apple macOS, install Xcode and brew.sh, then in the Terminal enter:
    % ./configure
    % make

+On x86, it's a good idea to use a -march= option that allows the use of AVX2.
+
 3) Install the codec libraries (optional)

    % sudo make install
@ -133,6 +135,10 @@ To run compare the code to these test vectors:
    % tar -zxf opus_testvectors-rfc8251.tar.gz
    % ./tests/run_vectors.sh ./ opus_newvectors 48000

+== Compiling libopus for Windows and alternative build systems ==
+
+See cmake/README.md or meson/README.md.
+
 == Portability notes ==

 This implementation uses floating-point by default but can be compiled to
--- a/autogen.bat
+++ b/autogen.bat
@ -0,0 +1,13 @@
+@echo off
+REM Run this to set up the build system: configure, makefiles, etc.
+
+setlocal enabledelayedexpansion
+
+REM Parse the real autogen.sh script for version
+for /F "tokens=2 delims= " %%A in ('findstr "dnn/download_model.sh" autogen.sh') do (
+    set "model=%%A"
+)
+
+call dnn\download_model.bat %model%
+
+echo Updating build configuration files, please wait....
--- a/autogen.sh
+++ b/autogen.sh
@ -9,6 +9,8 @@ set -e
 srcdir=`dirname $0`
 test -n "$srcdir" && cd "$srcdir"

+dnn/download_model.sh ec04a94
+
 echo "Updating build configuration files, please wait...."

 autoreconf -isf
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@ -40,7 +40,8 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, c
  celt_inner_prod_c,   /* ARMv4 */
  celt_inner_prod_c,   /* EDSP */
  celt_inner_prod_c,   /* Media */
-  celt_inner_prod_neon /* NEON */
+  celt_inner_prod_neon,/* NEON */
+  celt_inner_prod_neon /* DOTPROD */
 };

 void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
@ -48,7 +49,8 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const o
  dual_inner_prod_c,   /* ARMv4 */
  dual_inner_prod_c,   /* EDSP */
  dual_inner_prod_c,   /* Media */
-  dual_inner_prod_neon /* NEON */
+  dual_inner_prod_neon,/* NEON */
+  dual_inner_prod_neon /* DOTPROD */
 };
 # endif

@ -61,7 +63,8 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
  celt_pitch_xcorr_c,               /* ARMv4 */
  MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
  MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
-  MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
+  MAY_HAVE_NEON(celt_pitch_xcorr),  /* NEON */
+  MAY_HAVE_NEON(celt_pitch_xcorr)   /* DOTPROD */
 };

 #  endif
@ -72,7 +75,8 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
  celt_pitch_xcorr_c,              /* ARMv4 */
  celt_pitch_xcorr_c,              /* EDSP */
  celt_pitch_xcorr_c,              /* Media */
-  celt_pitch_xcorr_float_neon      /* Neon */
+  celt_pitch_xcorr_float_neon,     /* Neon */
+  celt_pitch_xcorr_float_neon      /* DOTPROD */
 };
 #  endif
 # endif /* FIXED_POINT */
@ -90,6 +94,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
  xcorr_kernel_c,                /* EDSP */
  xcorr_kernel_c,                /* Media */
  xcorr_kernel_neon_fixed,       /* Neon */
+  xcorr_kernel_neon_fixed        /* DOTPROD */
 };

 #endif
@ -101,14 +106,16 @@ int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
   opus_fft_alloc_arch_c,        /* ARMv4 */
   opus_fft_alloc_arch_c,        /* EDSP */
   opus_fft_alloc_arch_c,        /* Media */
-   opus_fft_alloc_arm_neon       /* Neon with NE10 library support */
+   opus_fft_alloc_arm_neon,      /* Neon with NE10 library support */
+   opus_fft_alloc_arm_neon       /* DOTPROD with NE10 library support */
 };

 void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
   opus_fft_free_arch_c,         /* ARMv4 */
   opus_fft_free_arch_c,         /* EDSP */
   opus_fft_free_arch_c,         /* Media */
-   opus_fft_free_arm_neon        /* Neon with NE10 */
+   opus_fft_free_arm_neon,       /* Neon with NE10 */
+   opus_fft_free_arm_neon        /* DOTPROD with NE10 */
 };
 #   endif /* CUSTOM_MODES */

@ -118,7 +125,8 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
   opus_fft_c,                   /* ARMv4 */
   opus_fft_c,                   /* EDSP */
   opus_fft_c,                   /* Media */
-   opus_fft_neon                 /* Neon with NE10 */
+   opus_fft_neon,                /* Neon with NE10 */
+   opus_fft_neon                 /* DOTPROD with NE10 */
 };

 void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
@ -127,7 +135,8 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
   opus_ifft_c,                   /* ARMv4 */
   opus_ifft_c,                   /* EDSP */
   opus_ifft_c,                   /* Media */
-   opus_ifft_neon                 /* Neon with NE10 */
+   opus_ifft_neon,                /* Neon with NE10 */
+   opus_ifft_neon                 /* DOTPROD with NE10 */
 };

 void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@ -139,7 +148,8 @@ void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
   clt_mdct_forward_c,           /* ARMv4 */
   clt_mdct_forward_c,           /* EDSP */
   clt_mdct_forward_c,           /* Media */
-   clt_mdct_forward_neon         /* Neon with NE10 */
+   clt_mdct_forward_neon,        /* Neon with NE10 */
+   clt_mdct_forward_neon         /* DOTPROD with NE10 */
 };

 void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@ -151,7 +161,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
   clt_mdct_backward_c,           /* ARMv4 */
   clt_mdct_backward_c,           /* EDSP */
   clt_mdct_backward_c,           /* Media */
-   clt_mdct_backward_neon         /* Neon with NE10 */
+   clt_mdct_backward_neon,        /* Neon with NE10 */
+   clt_mdct_backward_neon         /* DOTPROD with NE10 */
 };

 #  endif /* HAVE_ARM_NE10 */
--- a/celt/arm/armcpu.c
+++ b/celt/arm/armcpu.c
@ -43,6 +43,7 @@
 #define OPUS_CPU_ARM_EDSP_FLAG  (1<<OPUS_ARCH_ARM_EDSP)
 #define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
 #define OPUS_CPU_ARM_NEON_FLAG  (1<<OPUS_ARCH_ARM_NEON)
+#define OPUS_CPU_ARM_DOTPROD_FLAG  (1<<OPUS_ARCH_ARM_DOTPROD)

 #if defined(_MSC_VER)
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@ -126,6 +127,14 @@ opus_uint32 opus_cpu_capabilities(void)
        p = strstr(buf, " neon");
        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
          flags |= OPUS_CPU_ARM_NEON_FLAG;
+        p = strstr(buf, " asimd");
+        if(p != NULL && (p[6] == ' ' || p[6] == '\n'))
+          flags |= OPUS_CPU_ARM_NEON_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_EDSP_FLAG;
+#  endif
+#  if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+        p = strstr(buf, " asimddp");
+        if(p != NULL && (p[8] == ' ' || p[8] == '\n'))
+          flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
 #  endif
      }
 # endif
@ -144,6 +153,13 @@ opus_uint32 opus_cpu_capabilities(void)
 # endif
    }

+#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
+    flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+    flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+# endif
+#endif
+
    fclose(cpuinfo);
  }
  return flags;
@ -180,7 +196,13 @@ static int opus_select_arch_impl(void)
  }
  arch++;

-  celt_assert(arch == OPUS_ARCH_ARM_NEON);
+  if(!(flags & OPUS_CPU_ARM_DOTPROD_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_NEON);
+    return arch;
+  }
+  arch++;
+
+  celt_assert(arch == OPUS_ARCH_ARM_DOTPROD);
  return arch;
 }

--- a/celt/arm/armcpu.h
+++ b/celt/arm/armcpu.h
@ -46,6 +46,12 @@
 #  define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
 # endif

+# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+#  define MAY_HAVE_DOTPROD(name) name ## _dotprod
+# else
+#  define MAY_HAVE_DOTPROD(name) MAY_HAVE_NEON(name)
+# endif
+
 # if defined(OPUS_ARM_PRESUME_EDSP)
 #  define PRESUME_EDSP(name) name ## _edsp
 # else
@ -64,6 +70,12 @@
 #  define PRESUME_NEON(name) PRESUME_MEDIA(name)
 # endif

+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+#  define PRESUME_DOTPROD(name) name ## _dotprod
+# else
+#  define PRESUME_DOTPROD(name) PRESUME_NEON(name)
+# endif
+
 # if defined(OPUS_HAVE_RTCD)
 int opus_select_arch(void);

@ -71,6 +83,7 @@ int opus_select_arch(void);
 #define OPUS_ARCH_ARM_EDSP  (1)
 #define OPUS_ARCH_ARM_MEDIA (2)
 #define OPUS_ARCH_ARM_NEON  (3)
+#define OPUS_ARCH_ARM_DOTPROD  (4)

 # endif

--- a/celt/arm/celt_neon_intr.c
+++ b/celt/arm/celt_neon_intr.c
@ -97,6 +97,14 @@ void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_va
 }

 #else
+
+#if defined(__ARM_FEATURE_FMA) && defined(__ARM_ARCH_ISA_A64)
+/* If we can, force the compiler to use an FMA instruction rather than break
+ *    vmlaq_f32() into fmul/fadd. */
+#define vmlaq_lane_f32(a,b,c,lane) vfmaq_lane_f32(a,b,c,lane)
+#endif
+
+
 /*
 * Function: xcorr_kernel_neon_float
 * ---------------------------------
--- a/celt/arm/pitch_neon_intr.c
+++ b/celt/arm/pitch_neon_intr.c
@ -130,6 +130,13 @@ void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus

 /* ========================================================================== */

+#ifdef __ARM_FEATURE_FMA
+/* If we can, force the compiler to use an FMA instruction rather than break
+   vmlaq_f32() into fmul/fadd. */
+#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
+#endif
+
+
 #ifdef OPUS_CHECK_ASM

 /* This part of code simulates floating-point NEON operations. */
--- a/celt/bands.c
+++ b/celt/bands.c
@ -1450,7 +1450,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
   if (encode && resynth)
      lowband_scratch = _lowband_scratch;
   else
-      lowband_scratch = X_+M*eBands[m->nbEBands-1];
+      lowband_scratch = X_+M*eBands[m->effEBands-1];
   ALLOC(X_save, resynth_alloc, celt_norm);
   ALLOC(Y_save, resynth_alloc, celt_norm);
   ALLOC(X_save2, resynth_alloc, celt_norm);
--- a/celt/celt.h
+++ b/celt/celt.h
@ -41,6 +41,7 @@
 #include "entenc.h"
 #include "entdec.h"
 #include "arch.h"
+#include "lpcnet.h"

 #ifdef __cplusplus
 extern "C" {
@ -149,6 +150,13 @@ int celt_decoder_get_size(int channels);

 int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);

+int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
+#ifdef ENABLE_DEEP_PLC
+      ,LPCNetPLCState *lpcnet
+#endif
+      );
+
 int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);

@ -225,23 +233,13 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
      const opus_val16 *window, int overlap, int arch);

-#ifdef NON_STATIC_COMB_FILTER_CONST_C
-void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
-                         opus_val16 g10, opus_val16 g11, opus_val16 g12);
-#endif
-
-#ifndef OVERRIDE_COMB_FILTER_CONST
-# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
-    ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
-#endif
-
 void init_caps(const CELTMode *m,int *cap,int LM,int C);

 #ifdef RESYNTH
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, int accum);
 void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
      opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
-      int LM, int downsample, int silence);
+      int LM, int downsample, int silence, int arch);
 #endif

 #ifdef __cplusplus
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@ -51,6 +51,11 @@
 #include "celt_lpc.h"
 #include "vq.h"

+#ifdef ENABLE_DEEP_PLC
+#include "lpcnet.h"
+#include "lpcnet_private.h"
+#endif
+
 /* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
   CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
   current value corresponds to a pitch of 66.67 Hz. */
@ -69,6 +74,9 @@
 /**********************************************************************/
 #define DECODE_BUFFER_SIZE 2048

+#define PLC_UPDATE_FRAMES 4
+#define PLC_UPDATE_SAMPLES (PLC_UPDATE_FRAMES*FRAME_SIZE)
+
 /** Decoder state
 @brief Decoder state
 */
@ -82,6 +90,7 @@ struct OpusCustomDecoder {
   int start, end;
   int signalling;
   int disable_inv;
+   int complexity;
   int arch;

   /* Everything beyond this point gets cleared on a reset */
@ -98,11 +107,18 @@ struct OpusCustomDecoder {
   opus_val16 postfilter_gain_old;
   int postfilter_tapset;
   int postfilter_tapset_old;
+   int prefilter_and_fold;

   celt_sig preemph_memD[2];

+#ifdef ENABLE_DEEP_PLC
+   opus_int16 plc_pcm[PLC_UPDATE_SAMPLES];
+   int plc_fill;
+   float plc_preemphasis_mem;
+#endif
+
   celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
-   /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
+   /* opus_val16 lpc[],  Size = channels*CELT_LPC_ORDER */
   /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
   /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
   /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
@ -157,7 +173,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int
 {
   int size = sizeof(struct CELTDecoder)
            + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
-            + channels*LPC_ORDER*sizeof(opus_val16)
+            + channels*CELT_LPC_ORDER*sizeof(opus_val16)
            + 4*2*mode->nbEBands*sizeof(opus_val16);
   return size;
 }
@ -499,7 +515,100 @@ static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
   return pitch_index;
 }

-static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
+static void prefilter_and_fold(CELTDecoder * OPUS_RESTRICT st, int N)
+{
+   int c;
+   int CC;
+   int i;
+   int overlap;
+   celt_sig *decode_mem[2];
+   const OpusCustomMode *mode;
+   VARDECL(opus_val32, etmp);
+   mode = st->mode;
+   overlap = st->overlap;
+   CC = st->channels;
+   ALLOC(etmp, overlap, opus_val32);
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+   } while (++c<CC);
+
+   c=0; do {
+      /* Apply the pre-filter to the MDCT overlap for the next frame because
+         the post-filter will be re-applied in the decoder after the MDCT
+         overlap. */
+      comb_filter(etmp, decode_mem[c]+DECODE_BUFFER_SIZE-N,
+         st->postfilter_period_old, st->postfilter_period, overlap,
+         -st->postfilter_gain_old, -st->postfilter_gain,
+         st->postfilter_tapset_old, st->postfilter_tapset, NULL, 0, st->arch);
+
+      /* Simulate TDAC on the concealed audio so that it blends with the
+         MDCT of the next frame. */
+      for (i=0;i<overlap/2;i++)
+      {
+         decode_mem[c][DECODE_BUFFER_SIZE-N+i] =
+            MULT16_32_Q15(mode->window[i], etmp[overlap-1-i])
+            + MULT16_32_Q15(mode->window[overlap-i-1], etmp[i]);
+      }
+   } while (++c<CC);
+}
+
+#ifdef ENABLE_DEEP_PLC
+
+#define SINC_ORDER 48
+/* h=cos(pi/2*abs(sin([-24:24]/48*pi*23./24)).^2);
+   b=sinc([-24:24]/3*1.02).*h;
+   b=b/sum(b); */
+static const float sinc_filter[SINC_ORDER+1] = {
+    4.2931e-05f, -0.000190293f, -0.000816132f, -0.000637162f, 0.00141662f, 0.00354764f, 0.00184368f, -0.00428274f,
+    -0.00856105f, -0.0034003f, 0.00930201f, 0.0159616f, 0.00489785f, -0.0169649f, -0.0259484f, -0.00596856f,
+    0.0286551f, 0.0405872f, 0.00649994f, -0.0509284f, -0.0716655f, -0.00665212f,  0.134336f,  0.278927f,
+    0.339995f,  0.278927f,  0.134336f, -0.00665212f, -0.0716655f, -0.0509284f, 0.00649994f, 0.0405872f,
+    0.0286551f, -0.00596856f, -0.0259484f, -0.0169649f, 0.00489785f, 0.0159616f, 0.00930201f, -0.0034003f,
+    -0.00856105f, -0.00428274f, 0.00184368f, 0.00354764f, 0.00141662f, -0.000637162f, -0.000816132f, -0.000190293f,
+    4.2931e-05f
+};
+
+void update_plc_state(LPCNetPLCState *lpcnet, celt_sig *decode_mem[2], float *plc_preemphasis_mem, int CC)
+{
+   int i;
+   int tmp_read_post, tmp_fec_skip;
+   int offset;
+   celt_sig buf48k[DECODE_BUFFER_SIZE];
+   opus_int16 buf16k[PLC_UPDATE_SAMPLES];
+   if (CC == 1) OPUS_COPY(buf48k, decode_mem[0], DECODE_BUFFER_SIZE);
+   else {
+      for (i=0;i<DECODE_BUFFER_SIZE;i++) {
+         buf48k[i] = .5*(decode_mem[0][i] + decode_mem[1][i]);
+      }
+   }
+   /* Down-sample the last 40 ms. */
+   for (i=1;i<DECODE_BUFFER_SIZE;i++) buf48k[i] += PREEMPHASIS*buf48k[i-1];
+   *plc_preemphasis_mem = buf48k[DECODE_BUFFER_SIZE-1];
+   offset = DECODE_BUFFER_SIZE-SINC_ORDER-1 - 3*(PLC_UPDATE_SAMPLES-1);
+   celt_assert(3*(PLC_UPDATE_SAMPLES-1) + SINC_ORDER + offset == DECODE_BUFFER_SIZE-1);
+   for (i=0;i<PLC_UPDATE_SAMPLES;i++) {
+      int j;
+      float sum = 0;
+      for (j=0;j<SINC_ORDER+1;j++) {
+         sum += buf48k[3*i + j + offset]*sinc_filter[j];
+      }
+      buf16k[i] = sum;
+   }
+   tmp_read_post = lpcnet->fec_read_pos;
+   tmp_fec_skip = lpcnet->fec_skip;
+   for (i=0;i<PLC_UPDATE_FRAMES;i++) {
+      lpcnet_plc_update(lpcnet, &buf16k[FRAME_SIZE*i]);
+   }
+   lpcnet->fec_read_pos = tmp_read_post;
+   lpcnet->fec_skip = tmp_fec_skip;
+}
+#endif
+
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM
+#ifdef ENABLE_DEEP_PLC
+      ,LPCNetPLCState *lpcnet
+#endif
+      )
 {
   int c;
   int i;
@ -527,14 +636,18 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
   } while (++c<C);
   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
-   oldBandE = lpc+C*LPC_ORDER;
+   oldBandE = lpc+C*CELT_LPC_ORDER;
   oldLogE = oldBandE + 2*nbEBands;
   oldLogE2 = oldLogE + 2*nbEBands;
   backgroundLogE = oldLogE2  + 2*nbEBands;

   loss_duration = st->loss_duration;
   start = st->start;
+#ifdef ENABLE_DEEP_PLC
+   noise_based = start != 0 || (lpcnet->fec_fill_pos == 0 && (st->skip_plc || loss_duration >= 80));
+#else
   noise_based = loss_duration >= 40 || start != 0 || st->skip_plc;
+#endif
   if (noise_based)
   {
      /* Noise-based PLC/CNG */
@ -559,9 +672,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
 #endif
      c=0; do {
         OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
-               DECODE_BUFFER_SIZE-N+(overlap>>1));
+               DECODE_BUFFER_SIZE-N+overlap);
      } while (++c<C);

+      if (st->prefilter_and_fold) {
+         prefilter_and_fold(st, N);
+      }
+
      /* Energy decay */
      decay = loss_duration==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
      c=0; do
@ -590,6 +707,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
      st->rng = seed;

      celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
+      st->prefilter_and_fold = 0;
+      /* Skip regular PLC until we get two consecutive packets. */
+      st->skip_plc = 1;
   } else {
      int exc_length;
      /* Pitch-based PLC */
@ -597,12 +717,14 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
      opus_val16 *exc;
      opus_val16 fade = Q15ONE;
      int pitch_index;
-      VARDECL(opus_val32, etmp);
      VARDECL(opus_val16, _exc);
      VARDECL(opus_val16, fir_tmp);

      if (loss_duration == 0)
      {
+#ifdef ENABLE_DEEP_PLC
+        if (lpcnet->loaded) update_plc_state(lpcnet, decode_mem, &st->plc_preemphasis_mem, C);
+#endif
         st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
      } else {
         pitch_index = st->last_pitch_index;
@ -613,10 +735,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
         decaying signal, but we can't get more than MAX_PERIOD. */
      exc_length = IMIN(2*pitch_index, MAX_PERIOD);

-      ALLOC(etmp, overlap, opus_val32);
-      ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
+      ALLOC(_exc, MAX_PERIOD+CELT_LPC_ORDER, opus_val16);
      ALLOC(fir_tmp, exc_length, opus_val16);
-      exc = _exc+LPC_ORDER;
+      exc = _exc+CELT_LPC_ORDER;
      window = mode->window;
      c=0; do {
         opus_val16 decay;
@ -628,16 +749,16 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
         int j;

         buf = decode_mem[c];
-         for (i=0;i<MAX_PERIOD+LPC_ORDER;i++)
-            exc[i-LPC_ORDER] = SROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT);
+         for (i=0;i<MAX_PERIOD+CELT_LPC_ORDER;i++)
+            exc[i-CELT_LPC_ORDER] = SROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-CELT_LPC_ORDER+i], SIG_SHIFT);

         if (loss_duration == 0)
         {
-            opus_val32 ac[LPC_ORDER+1];
+            opus_val32 ac[CELT_LPC_ORDER+1];
            /* Compute LPC coefficients for the last MAX_PERIOD samples before
               the first loss so we can work in the excitation-filter domain. */
            _celt_autocorr(exc, ac, window, overlap,
-                   LPC_ORDER, MAX_PERIOD, st->arch);
+                   CELT_LPC_ORDER, MAX_PERIOD, st->arch);
            /* Add a noise floor of -40 dB. */
 #ifdef FIXED_POINT
            ac[0] += SHR32(ac[0],13);
@ -645,7 +766,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
            ac[0] *= 1.0001f;
 #endif
            /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
-            for (i=1;i<=LPC_ORDER;i++)
+            for (i=1;i<=CELT_LPC_ORDER;i++)
            {
               /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
 #ifdef FIXED_POINT
@ -654,7 +775,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
               ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
 #endif
            }
-            _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+            _celt_lpc(lpc+c*CELT_LPC_ORDER, ac, CELT_LPC_ORDER);
 #ifdef FIXED_POINT
         /* For fixed-point, apply bandwidth expansion until we can guarantee that
            no overflow can happen in the IIR filter. This means:
@ -662,13 +783,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
         while (1) {
            opus_val16 tmp=Q15ONE;
            opus_val32 sum=QCONST16(1., SIG_SHIFT);
-            for (i=0;i<LPC_ORDER;i++)
-               sum += ABS16(lpc[c*LPC_ORDER+i]);
+            for (i=0;i<CELT_LPC_ORDER;i++)
+               sum += ABS16(lpc[c*CELT_LPC_ORDER+i]);
            if (sum < 65535) break;
-            for (i=0;i<LPC_ORDER;i++)
+            for (i=0;i<CELT_LPC_ORDER;i++)
            {
               tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
-               lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
+               lpc[c*CELT_LPC_ORDER+i] = MULT16_16_Q15(lpc[c*CELT_LPC_ORDER+i], tmp);
            }
         }
 #endif
@ -678,8 +799,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
         {
            /* Compute the excitation for exc_length samples before the loss. We need the copy
               because celt_fir() cannot filter in-place. */
-            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
-                  fir_tmp, exc_length, LPC_ORDER, st->arch);
+            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*CELT_LPC_ORDER,
+                  fir_tmp, exc_length, CELT_LPC_ORDER, st->arch);
            OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length);
         }

@ -737,15 +858,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
            S1 += SHR32(MULT16_16(tmp, tmp), 10);
         }
         {
-            opus_val16 lpc_mem[LPC_ORDER];
+            opus_val16 lpc_mem[CELT_LPC_ORDER];
            /* Copy the last decoded samples (prior to the overlap region) to
               synthesis filter memory so we can have a continuous signal. */
-            for (i=0;i<LPC_ORDER;i++)
+            for (i=0;i<CELT_LPC_ORDER;i++)
               lpc_mem[i] = SROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
            /* Apply the synthesis filter to convert the excitation back into
               the signal domain. */
-            celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
-                  buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+            celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*CELT_LPC_ORDER,
+                  buf+DECODE_BUFFER_SIZE-N, extrapolation_len, CELT_LPC_ORDER,
                  lpc_mem, st->arch);
 #ifdef FIXED_POINT
            for (i=0; i < extrapolation_len; i++)
@ -792,23 +913,65 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
            }
         }

-         /* Apply the pre-filter to the MDCT overlap for the next frame because
-            the post-filter will be re-applied in the decoder after the MDCT
-            overlap. */
-         comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
-              st->postfilter_period, st->postfilter_period, overlap,
-              -st->postfilter_gain, -st->postfilter_gain,
-              st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
-
-         /* Simulate TDAC on the concealed audio so that it blends with the
-            MDCT of the next frame. */
-         for (i=0;i<overlap/2;i++)
-         {
-            buf[DECODE_BUFFER_SIZE+i] =
-               MULT16_32_Q15(window[i], etmp[overlap-1-i])
-               + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
-         }
      } while (++c<C);
+
+#ifdef ENABLE_DEEP_PLC
+      if (lpcnet->loaded && (st->complexity >= 5 || lpcnet->fec_fill_pos > 0)) {
+         float overlap_mem;
+         int samples_needed16k;
+         celt_sig *buf;
+         VARDECL(float, buf_copy);
+         buf = decode_mem[0];
+         ALLOC(buf_copy, C*overlap, float);
+         c=0; do {
+            OPUS_COPY(buf_copy+c*overlap, &decode_mem[c][DECODE_BUFFER_SIZE-N], overlap);
+         } while (++c<C);
+
+         /* Need enough samples from the PLC to cover the frame size, resampling delay,
+            and the overlap at the end. */
+         samples_needed16k = (N+SINC_ORDER+overlap)/3;
+         if (loss_duration == 0) {
+            st->plc_fill = 0;
+         }
+         while (st->plc_fill < samples_needed16k) {
+            lpcnet_plc_conceal(lpcnet, &st->plc_pcm[st->plc_fill]);
+            st->plc_fill += FRAME_SIZE;
+         }
+         /* Resample to 48 kHz. */
+         for (i=0;i<(N+overlap)/3;i++) {
+            int j;
+            float sum;
+            for (sum=0, j=0;j<17;j++) sum += 3*st->plc_pcm[i+j]*sinc_filter[3*j];
+            buf[DECODE_BUFFER_SIZE-N+3*i] = sum;
+            for (sum=0, j=0;j<16;j++) sum += 3*st->plc_pcm[i+j+1]*sinc_filter[3*j+2];
+            buf[DECODE_BUFFER_SIZE-N+3*i+1] = sum;
+            for (sum=0, j=0;j<16;j++) sum += 3*st->plc_pcm[i+j+1]*sinc_filter[3*j+1];
+            buf[DECODE_BUFFER_SIZE-N+3*i+2] = sum;
+         }
+         OPUS_MOVE(st->plc_pcm, &st->plc_pcm[N/3], st->plc_fill-N/3);
+         st->plc_fill -= N/3;
+         for (i=0;i<N;i++) {
+            float tmp = buf[DECODE_BUFFER_SIZE-N+i];
+            buf[DECODE_BUFFER_SIZE-N+i] -= PREEMPHASIS*st->plc_preemphasis_mem;
+            st->plc_preemphasis_mem = tmp;
+         }
+         overlap_mem = st->plc_preemphasis_mem;
+         for (i=0;i<overlap;i++) {
+            float tmp = buf[DECODE_BUFFER_SIZE+i];
+            buf[DECODE_BUFFER_SIZE+i] -= PREEMPHASIS*overlap_mem;
+            overlap_mem = tmp;
+         }
+         /* For now, we just do mono PLC. */
+         if (C==2) OPUS_COPY(decode_mem[1], decode_mem[0], DECODE_BUFFER_SIZE+overlap);
+         c=0; do {
+            /* Cross-fade with 48-kHz non-neural PLC for the first 2.5 ms to avoid a discontinuity. */
+            if (loss_duration == 0) {
+               for (i=0;i<overlap;i++) decode_mem[c][DECODE_BUFFER_SIZE-N+i] = (1-window[i])*buf_copy[c*overlap+i] + (window[i])*decode_mem[c][DECODE_BUFFER_SIZE-N+i];
+            }
+         } while (++c<C);
+      }
+#endif
+      st->prefilter_and_fold = 1;
   }

   /* Saturate to soemthing large to avoid wrap-around. */
@ -817,8 +980,12 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
   RESTORE_STACK;
 }

-int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
+int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
+#ifdef ENABLE_DEEP_PLC
+      ,LPCNetPLCState *lpcnet
+#endif
+      )
 {
   int c, i, N;
   int spread_decision;
@ -881,7 +1048,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
   frame_size *= st->downsample;

   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
-   oldBandE = lpc+CC*LPC_ORDER;
+   oldBandE = lpc+CC*CELT_LPC_ORDER;
   oldLogE = oldBandE + 2*nbEBands;
   oldLogE2 = oldLogE + 2*nbEBands;
   backgroundLogE = oldLogE2  + 2*nbEBands;
@ -935,15 +1102,25 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat

   if (data == NULL || len<=1)
   {
-      celt_decode_lost(st, N, LM);
+      celt_decode_lost(st, N, LM
+#ifdef ENABLE_DEEP_PLC
+      , lpcnet
+#endif
+                      );
      deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
      RESTORE_STACK;
      return frame_size/st->downsample;
   }
+#ifdef ENABLE_DEEP_PLC
+   else {
+      /* FIXME: This is a bit of a hack just to make sure opus_decode_native() knows we're no longer in PLC. */
+      if (lpcnet) lpcnet->blend = 0;
+   }
+#endif

   /* Check if there are at least two packets received consecutively before
    * turning on the pitch-based PLC */
-   st->skip_plc = st->loss_duration != 0;
+   if (st->loss_duration == 0) st->skip_plc = 0;

   if (dec == NULL)
   {
@ -1006,6 +1183,36 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat

   /* Decode the global flags (first symbols in the stream) */
   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+   /* If recovering from packet loss, make sure we make the energy prediction safe to reduce the
+      risk of getting loud artifacts. */
+   if (!intra_ener && st->loss_duration != 0) {
+      c=0; do
+      {
+         opus_val16 safety = 0;
+         int missing = IMIN(10, st->loss_duration>>LM);
+         if (LM==0) safety = QCONST16(1.5f,DB_SHIFT);
+         else if (LM==1) safety = QCONST16(.5f,DB_SHIFT);
+         for (i=start;i<end;i++)
+         {
+            if (oldBandE[c*nbEBands+i] < MAX16(oldLogE[c*nbEBands+i], oldLogE2[c*nbEBands+i])) {
+               /* If energy is going down already, continue the trend. */
+               opus_val32 slope;
+               opus_val32 E0, E1, E2;
+               E0 = oldBandE[c*nbEBands+i];
+               E1 = oldLogE[c*nbEBands+i];
+               E2 = oldLogE2[c*nbEBands+i];
+               slope = MAX32(E1 - E0, HALF32(E2 - E0));
+               E0 -= MAX32(0, (1+missing)*slope);
+               oldBandE[c*nbEBands+i] = MAX32(-QCONST16(20.f,DB_SHIFT), E0);
+            } else {
+               /* Otherwise take the min of the last frames. */
+               oldBandE[c*nbEBands+i] = MIN16(MIN16(oldBandE[c*nbEBands+i], oldLogE[c*nbEBands+i]), oldLogE2[c*nbEBands+i]);
+            }
+            /* Shorter frames have more natural fluctuations -- play it safe. */
+            oldBandE[c*nbEBands+i] -= safety;
+         }
+      } while (++c<2);
+   }
   /* Get band energies */
   unquant_coarse_energy(mode, start, end, oldBandE,
         intra_ener, dec, C, LM);
@ -1073,7 +1280,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
   unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);

   c=0; do {
-      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap);
   } while (++c<CC);

   /* Decode fixed codebook */
@ -1109,7 +1316,9 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
      for (i=0;i<C*nbEBands;i++)
         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
   }
-
+   if (st->prefilter_and_fold) {
+      prefilter_and_fold(st, N);
+   }
   celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
                  C, CC, isTransient, LM, st->downsample, silence, st->arch);

@ -1173,6 +1382,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat

   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
   st->loss_duration = 0;
+   st->prefilter_and_fold = 0;
   RESTORE_STACK;
   if (ec_tell(dec) > 8*len)
      return OPUS_INTERNAL_ERROR;
@ -1181,6 +1391,15 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
   return frame_size/st->downsample;
 }

+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
+{
+   return celt_decode_with_ec_dred(st, data, len, pcm, frame_size, dec, accum
+#ifdef ENABLE_DEEP_PLC
+       , NULL
+#endif
+       );
+}

 #ifdef CUSTOM_MODES

@ -1254,6 +1473,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
   va_start(ap, request);
   switch (request)
   {
+      case OPUS_SET_COMPLEXITY_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          if(value<0 || value>10)
+          {
+             goto bad_arg;
+          }
+          st->complexity = value;
+      }
+      break;
+      case OPUS_GET_COMPLEXITY_REQUEST:
+      {
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          if (!value)
+          {
+             goto bad_arg;
+          }
+          *value = st->complexity;
+      }
+      break;
      case CELT_SET_START_BAND_REQUEST:
      {
         opus_int32 value = va_arg(ap, opus_int32);
@ -1300,7 +1539,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
         int i;
         opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
         lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
-         oldBandE = lpc+st->channels*LPC_ORDER;
+         oldBandE = lpc+st->channels*CELT_LPC_ORDER;
         oldLogE = oldBandE + 2*st->mode->nbEBands;
         oldLogE2 = oldLogE + 2*st->mode->nbEBands;
         OPUS_CLEAR((char*)&st->DECODER_RESET_START,
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@ -281,6 +281,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
      /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
      for (i=0;i<len;i++)
      {
+#ifndef FIXED_POINT
+         float mem00;
+#endif
         opus_val32 x,y;
         x = SHR32(in[i+c*len],SIG_SHIFT);
         y = ADD32(mem0, x);
@ -288,8 +291,13 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
         mem0 = mem1 + y - SHL32(x,1);
         mem1 = x - SHR32(y,1);
 #else
+         /* Original code:
         mem0 = mem1 + y - 2*x;
         mem1 = x - .5f*y;
+         Modified code to shorten dependency chains: */
+         mem00=mem0;
+         mem0 = mem0 - x + .5f*mem1;
+         mem1 =  x - mem00;
 #endif
         tmp[i] = SROUND16(y, 2);
         /*printf("%f ", tmp[i]);*/
@ -322,10 +330,11 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
 #ifdef FIXED_POINT
         /* FIXME: Use PSHR16() instead */
         tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift);
-#else
-         tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0);
-#endif
         mem0 = tmp[i];
+#else
+         mem0 = x2 + (1.f-forward_decay)*mem0;
+         tmp[i] = forward_decay*mem0;
+#endif
      }

      mem0=0;
@ -337,11 +346,13 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
 #ifdef FIXED_POINT
         /* FIXME: Use PSHR16() instead */
         tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
-#else
-         tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
-#endif
         mem0 = tmp[i];
         maxE = MAX16(maxE, mem0);
+#else
+         mem0 = tmp[i] + 0.875f*mem0;
+         tmp[i] = 0.125f*mem0;
+         maxE = MAX16(maxE, 0.125f*mem0);
+#endif
      }
      /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/

@ -1565,10 +1576,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
      vbr_rate = 0;
      tmp = st->bitrate*frame_size;
      if (tell>1)
-         tmp += tell;
+         tmp += tell*mode->Fs;
      if (st->bitrate!=OPUS_BITRATE_MAX)
+      {
         nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
               (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
+         ec_enc_shrink(enc, nbCompressedBytes);
+      }
      effectiveBytes = nbCompressedBytes - nbFilledBytes;
   }
   equiv_rate = ((opus_int32)nbCompressedBytes*8*50 << (3-LM)) - (40*C+20)*((400>>LM) - 50);
@ -2246,7 +2260,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
      if (anti_collapse_on)
      {
         anti_collapse(mode, X, collapse_masks, LM, C, N,
-               start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+               start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
      }

      c=0; do {
@ -2265,15 +2279,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
         st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
         comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
               st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
-               mode->window, overlap);
+               mode->window, overlap, st->arch);
         if (LM!=0)
            comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
                  st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
-                  mode->window, overlap);
+                  mode->window, overlap, st->arch);
      } while (++c<CC);

      /* We reuse freq[] as scratch space for the de-emphasis */
-      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD);
+      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, 0);
      st->prefilter_period_old = st->prefilter_period;
      st->prefilter_gain_old = st->prefilter_gain;
      st->prefilter_tapset_old = st->prefilter_tapset;
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@ -44,7 +44,7 @@ int          p
   opus_val32 r;
   opus_val32 error = ac[0];
 #ifdef FIXED_POINT
-   opus_val32 lpc[LPC_ORDER];
+   opus_val32 lpc[CELT_LPC_ORDER];
 #else
   float *lpc = _lpc;
 #endif
--- a/celt/celt_lpc.h
+++ b/celt/celt_lpc.h
@ -35,7 +35,7 @@
 #include "x86/celt_lpc_sse.h"
 #endif

-#define LPC_ORDER 24
+#define CELT_LPC_ORDER 24

 void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);

--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@ -35,19 +35,20 @@
  (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
 #include "arm/armcpu.h"

-/* We currently support 4 ARM variants:
+/* We currently support 5 ARM variants:
 * arch[0] -> ARMv4
 * arch[1] -> ARMv5E
 * arch[2] -> ARMv6
 * arch[3] -> NEON
+ * arch[4] -> NEON+DOTPROD
 */
-#define OPUS_ARCHMASK 3
+#define OPUS_ARCHMASK 7

 #elif defined(OPUS_HAVE_RTCD) && \
  ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
  (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
  (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
-  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
+  (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))

 #include "x86/x86cpu.h"
 /* We currently support 5 x86 variants:
--- a/celt/entdec.c
+++ b/celt/entdec.c
@ -195,6 +195,27 @@ int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){
  return ret;
 }

+int ec_dec_icdf16(ec_dec *_this,const opus_uint16 *_icdf,unsigned _ftb){
+  opus_uint32 r;
+  opus_uint32 d;
+  opus_uint32 s;
+  opus_uint32 t;
+  int         ret;
+  s=_this->rng;
+  d=_this->val;
+  r=s>>_ftb;
+  ret=-1;
+  do{
+    t=s;
+    s=IMUL32(r,_icdf[++ret]);
+  }
+  while(d<s);
+  _this->val=d-s;
+  _this->rng=t-s;
+  ec_dec_normalize(_this);
+  return ret;
+}
+
 opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){
  unsigned ft;
  unsigned s;
--- a/celt/entdec.h
+++ b/celt/entdec.h
@ -81,6 +81,16 @@ int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
  Return: The decoded symbol s.*/
 int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);

+/*Decodes a symbol given an "inverse" CDF table.
+  No call to ec_dec_update() is necessary after this call.
+  _icdf: The "inverse" CDF, such that symbol s falls in the range
+          [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb.
+         The values must be monotonically non-increasing, and the last value
+          must be 0.
+  _ftb: The number of bits of precision in the cumulative distribution.
+  Return: The decoded symbol s.*/
+int ec_dec_icdf16(ec_dec *_this,const opus_uint16 *_icdf,unsigned _ftb);
+
 /*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
  The bits must have been encoded with ec_enc_uint().
  No call to ec_dec_update() is necessary after this call.
--- a/celt/entenc.c
+++ b/celt/entenc.c
@ -172,6 +172,17 @@ void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){
  ec_enc_normalize(_this);
 }

+void ec_enc_icdf16(ec_enc *_this,int _s,const opus_uint16 *_icdf,unsigned _ftb){
+  opus_uint32 r;
+  r=_this->rng>>_ftb;
+  if(_s>0){
+    _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]);
+    _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]);
+  }
+  else _this->rng-=IMUL32(r,_icdf[_s]);
+  ec_enc_normalize(_this);
+}
+
 void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){
  unsigned  ft;
  unsigned  fl;
--- a/celt/entenc.h
+++ b/celt/entenc.h
@ -64,6 +64,15 @@ void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp);
  _ftb: The number of bits of precision in the cumulative distribution.*/
 void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);

+/*Encodes a symbol given an "inverse" CDF table.
+  _s:    The index of the symbol to encode.
+  _icdf: The "inverse" CDF, such that symbol _s falls in the range
+          [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb.
+         The values must be monotonically non-increasing, and the last value
+          must be 0.
+  _ftb: The number of bits of precision in the cumulative distribution.*/
+void ec_enc_icdf16(ec_enc *_this,int _s,const opus_uint16 *_icdf,unsigned _ftb);
+
 /*Encodes a raw unsigned integer in the stream.
  _fl: The integer to encode.
  _ft: The number of integers that can be encoded (one more than the max).
--- a/celt/laplace.c
+++ b/celt/laplace.c
@ -132,3 +132,104 @@ int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay)
   ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768);
   return val;
 }
+
+void ec_laplace_encode_p0(ec_enc *enc, int value, opus_uint16 p0, opus_uint16 decay)
+{
+   int s;
+   opus_uint16 sign_icdf[3];
+   sign_icdf[0] = 32768-p0;
+   sign_icdf[1] = sign_icdf[0]/2;
+   sign_icdf[2] = 0;
+   s = value == 0 ? 0 : (value > 0 ? 1 : 2);
+   ec_enc_icdf16(enc, s, sign_icdf, 15);
+   value = abs(value);
+   if (value)
+   {
+      int i;
+      opus_uint16 icdf[8];
+      icdf[0] = IMAX(7, decay);
+      for (i=1;i<7;i++)
+      {
+         icdf[i] = IMAX(7-i, (icdf[i-1] * (opus_int32)decay) >> 15);
+      }
+      icdf[7] = 0;
+      value--;
+      do {
+         ec_enc_icdf16(enc, IMIN(value, 7), icdf, 15);
+         value -= 7;
+      } while (value >= 0);
+   }
+}
+
+int ec_laplace_decode_p0(ec_dec *dec, opus_uint16 p0, opus_uint16 decay)
+{
+   int s;
+   int value;
+   opus_uint16 sign_icdf[3];
+   sign_icdf[0] = 32768-p0;
+   sign_icdf[1] = sign_icdf[0]/2;
+   sign_icdf[2] = 0;
+   s = ec_dec_icdf16(dec, sign_icdf, 15);
+   if (s==2) s = -1;
+   if (s != 0)
+   {
+      int i;
+      int v;
+      opus_uint16 icdf[8];
+      icdf[0] = IMAX(7, decay);
+      for (i=1;i<7;i++)
+      {
+         icdf[i] = IMAX(7-i, (icdf[i-1] * (opus_int32)decay) >> 15);
+      }
+      icdf[7] = 0;
+      value = 1;
+      do {
+         v = ec_dec_icdf16(dec, icdf, 15);
+         value += v;
+      } while (v == 7);
+      return s*value;
+   } else return 0;
+}
+
+#if 0
+
+#include <stdio.h>
+#define NB_VALS 10
+#define DATA_SIZE 10000
+int main() {
+   ec_enc enc;
+   ec_dec dec;
+   unsigned char *ptr;
+   int i;
+   int decay, p0;
+   int val[NB_VALS] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+   /*for (i=0;i<NB_VALS;i++) {
+      val[i] = -log(rand()/(float)RAND_MAX);
+      if (rand()%2) val[i] = -val[i];
+   }*/
+   p0 = 16000;
+   decay = 16000;
+   ptr = (unsigned char *)malloc(DATA_SIZE);
+   ec_enc_init(&enc,ptr,DATA_SIZE);
+   for (i=0;i<NB_VALS;i++) {
+      printf("%d ", val[i]);
+   }
+   printf("\n");
+   for (i=0;i<NB_VALS;i++) {
+      ec_laplace_encode_p0(&enc, val[i], p0, decay);
+   }
+
+   ec_enc_done(&enc);
+
+   ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc));
+
+   for (i=0;i<NB_VALS;i++) {
+      val[i] = ec_laplace_decode_p0(&dec, p0, decay);
+   }
+   for (i=0;i<NB_VALS;i++) {
+      printf("%d ", val[i]);
+   }
+   printf("\n");
+}
+
+#endif
--- a/celt/laplace.h
+++ b/celt/laplace.h
@ -26,6 +26,9 @@
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

+#ifndef LAPLACE_H
+#define LAPLACE_H
+
 #include "entenc.h"
 #include "entdec.h"

@ -46,3 +49,9 @@ void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay);
 @return Value decoded
 */
 int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay);
+
+
+int ec_laplace_decode_p0(ec_dec *dec, opus_uint16 p0, opus_uint16 decay);
+void ec_laplace_encode_p0(ec_enc *enc, int value, opus_uint16 p0, opus_uint16 decay);
+
+#endif
--- a/celt/mathops.h
+++ b/celt/mathops.h
@ -230,6 +230,12 @@ static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
   frac = SHL16(x, 4);
   return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
 }
+
+#undef D0
+#undef D1
+#undef D2
+#undef D3
+
 /** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
 static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
 {
--- a/celt/meson.build
+++ b/celt/meson.build
@ -6,6 +6,8 @@ celt_sse2_sources = sources['CELT_SOURCES_SSE2']

 celt_sse4_1_sources = sources['CELT_SOURCES_SSE4_1']

+celt_avx2_sources = sources['CELT_SOURCES_AVX2']
+
 celt_neon_intr_sources = sources['CELT_SOURCES_ARM_NEON_INTR']

 celt_static_libs = []
@ -14,7 +16,7 @@ if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD')
  celt_sources +=  sources['CELT_SOURCES_X86_RTCD']
 endif

-foreach intr_name : ['sse', 'sse2', 'sse4_1', 'neon_intr']
+foreach intr_name : ['sse', 'sse2', 'sse4_1', 'avx2', 'neon_intr']
  have_intr = get_variable('have_' + intr_name)
  if not have_intr
    continue
--- a/celt/mips/celt_mipsr1.h
+++ b/celt/mips/celt_mipsr1.h
@ -27,8 +27,8 @@
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#ifndef __CELT_MIPSR1_H__
-#define __CELT_MIPSR1_H__
+#ifndef CELT_MIPSR1_H__
+#define CELT_MIPSR1_H__

 #ifdef HAVE_CONFIG_H
 #include "config.h"
@ -149,4 +149,4 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
   }
 }

-#endif /* __CELT_MIPSR1_H__ */
+#endif /* CELT_MIPSR1_H__ */
--- a/celt/mips/mdct_mipsr1.h
+++ b/celt/mips/mdct_mipsr1.h
@ -38,8 +38,8 @@
   MDCT implementation in FFMPEG, but has differences in signs, ordering
   and scaling in many places.
 */
-#ifndef __MDCT_MIPSR1_H__
-#define __MDCT_MIPSR1_H__
+#ifndef MDCT_MIPSR1_H__
+#define MDCT_MIPSR1_H__

 #ifndef SKIP_CONFIG_H
 #ifdef HAVE_CONFIG_H
@ -285,4 +285,4 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
      }
   }
 }
-#endif /* __MDCT_MIPSR1_H__ */
+#endif /* MDCT_MIPSR1_H__ */
--- a/celt/mips/vq_mipsr1.h
+++ b/celt/mips/vq_mipsr1.h
@ -26,8 +26,8 @@
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#ifndef __VQ_MIPSR1_H__
-#define __VQ_MIPSR1_H__
+#ifndef VQ_MIPSR1_H__
+#define VQ_MIPSR1_H__

 #ifdef HAVE_CONFIG_H
 #include "config.h"
@ -113,4 +113,4 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
   /*return celt_sqrt(E);*/
 }

-#endif /* __VQ_MIPSR1_H__ */
+#endif /* VQ_MIPSR1_H__ */
--- a/celt/os_support.h
+++ b/celt/os_support.h
@ -41,7 +41,7 @@
 #include <string.h>
 #include <stdlib.h>

-/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
+/** Opus wrapper for malloc(). To do your own dynamic allocation replace this function, opus_realloc, and opus_free */
 #ifndef OVERRIDE_OPUS_ALLOC
 static OPUS_INLINE void *opus_alloc (size_t size)
 {
@ -49,7 +49,15 @@ static OPUS_INLINE void *opus_alloc (size_t size)
 }
 #endif

-/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
+#ifndef OVERRIDE_OPUS_REALLOC
+static OPUS_INLINE void *opus_realloc (void *ptr, size_t size)
+{
+   return realloc(ptr, size);
+}
+#endif
+
+/** Used only for non-threadsafe pseudostack.
+    If desired, this can always return the same area of memory rather than allocating a new one every time. */
 #ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
 static OPUS_INLINE void *opus_alloc_scratch (size_t size)
 {
@ -58,7 +66,7 @@ static OPUS_INLINE void *opus_alloc_scratch (size_t size)
 }
 #endif

-/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
+/** Opus wrapper for free(). To do your own dynamic allocation replace this function, opus_realloc, and opus_free */
 #ifndef OVERRIDE_OPUS_FREE
 static OPUS_INLINE void opus_free (void *ptr)
 {
--- a/celt/pitch.h
+++ b/celt/pitch.h
@ -189,4 +189,15 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
 # define celt_pitch_xcorr celt_pitch_xcorr_c
 #endif

+#ifdef NON_STATIC_COMB_FILTER_CONST_C
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+                         opus_val16 g10, opus_val16 g11, opus_val16 g12);
+#endif
+
+#ifndef OVERRIDE_COMB_FILTER_CONST
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
+    ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
+#endif
+
+
 #endif
--- a/celt/x86/pitch_avx.c
+++ b/celt/x86/pitch_avx.c
@ -0,0 +1,101 @@
+/* Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <immintrin.h>
+#include "x86cpu.h"
+#include "pitch.h"
+
+#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(FIXED_POINT)
+
+/* Like the "regular" xcorr_kernel(), but computes 8 results at a time. */
+static void xcorr_kernel_avx(const float *x, const float *y, float sum[8], int len)
+{
+    __m256 xsum0, xsum1, xsum2, xsum3, xsum4, xsum5, xsum6, xsum7;
+    xsum7 = xsum6 = xsum5 = xsum4 = xsum3 = xsum2 = xsum1 = xsum0 = _mm256_setzero_ps();
+    int i;
+    __m256 x0;
+    /* Compute 8 inner products using partial sums. */
+    for (i=0;i<len-7;i+=8)
+    {
+        x0 = _mm256_loadu_ps(x+i);
+        xsum0 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i  ), xsum0);
+        xsum1 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+1), xsum1);
+        xsum2 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+2), xsum2);
+        xsum3 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+3), xsum3);
+        xsum4 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+4), xsum4);
+        xsum5 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+5), xsum5);
+        xsum6 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+6), xsum6);
+        xsum7 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+7), xsum7);
+    }
+    if (i != len) {
+        static const int mask[15] = {-1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
+        __m256i m;
+        m = _mm256_loadu_si256((__m256i*)(void*)(mask + 7+i-len));
+        x0 = _mm256_maskload_ps(x+i, m);
+        xsum0 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i  , m), xsum0);
+        xsum1 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+1, m), xsum1);
+        xsum2 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+2, m), xsum2);
+        xsum3 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+3, m), xsum3);
+        xsum4 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+4, m), xsum4);
+        xsum5 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+5, m), xsum5);
+        xsum6 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+6, m), xsum6);
+        xsum7 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+7, m), xsum7);
+    }
+    /* 8 horizontal adds. */
+    /* Compute [0 4] [1 5] [2 6] [3 7] */
+    xsum0 = _mm256_add_ps(_mm256_permute2f128_ps(xsum0, xsum4, 2<<4), _mm256_permute2f128_ps(xsum0, xsum4, 1 | (3<<4)));
+    xsum1 = _mm256_add_ps(_mm256_permute2f128_ps(xsum1, xsum5, 2<<4), _mm256_permute2f128_ps(xsum1, xsum5, 1 | (3<<4)));
+    xsum2 = _mm256_add_ps(_mm256_permute2f128_ps(xsum2, xsum6, 2<<4), _mm256_permute2f128_ps(xsum2, xsum6, 1 | (3<<4)));
+    xsum3 = _mm256_add_ps(_mm256_permute2f128_ps(xsum3, xsum7, 2<<4), _mm256_permute2f128_ps(xsum3, xsum7, 1 | (3<<4)));
+    /* Compute [0 1 4 5] [2 3 6 7] */
+    xsum0 = _mm256_hadd_ps(xsum0, xsum1);
+    xsum1 = _mm256_hadd_ps(xsum2, xsum3);
+    /* Compute [0 1 2 3 4 5 6 7] */
+    xsum0 = _mm256_hadd_ps(xsum0, xsum1);
+    _mm256_storeu_ps(sum, xsum0);
+}
+
+void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch)
+{
+   int i;
+   celt_assert(max_pitch>0);
+   (void)arch;
+   for (i=0;i<max_pitch-7;i+=8)
+   {
+      xcorr_kernel_avx(_x, _y+i, &xcorr[i], len);
+   }
+   for (;i<max_pitch;i++)
+   {
+      xcorr[i] = celt_inner_prod(_x, _y+i, len, arch);
+   }
+}
+
+#endif
--- a/celt/x86/pitch_sse.h
+++ b/celt/x86/pitch_sse.h
@ -131,12 +131,6 @@ extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(

 #if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)

-#define OVERRIDE_DUAL_INNER_PROD
-#define OVERRIDE_COMB_FILTER_CONST
-
-#undef dual_inner_prod
-#undef comb_filter_const
-
 void dual_inner_prod_sse(const opus_val16 *x,
    const opus_val16 *y01,
    const opus_val16 *y02,
@ -154,13 +148,17 @@ void comb_filter_const_sse(opus_val32 *y,


 #if defined(OPUS_X86_PRESUME_SSE)
+#define OVERRIDE_DUAL_INNER_PROD
+#define OVERRIDE_COMB_FILTER_CONST
 # define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
    ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))

 # define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
    ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
-#else
+#elif defined(OPUS_HAVE_RTCD)

+#define OVERRIDE_DUAL_INNER_PROD
+#define OVERRIDE_COMB_FILTER_CONST
 extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
              const opus_val16 *x,
              const opus_val16 *y01,
@ -187,6 +185,32 @@ extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
 #define NON_STATIC_COMB_FILTER_CONST_C

 #endif
-#endif
+
+void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch);
+
+#if defined(OPUS_X86_PRESUME_AVX2)
+
+#define OVERRIDE_PITCH_XCORR
+# define celt_pitch_xcorr celt_pitch_xcorr_avx2
+
+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)
+
+#define OVERRIDE_PITCH_XCORR
+extern void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
+              const float *_x,
+              const float *_y,
+              float *xcorr,
+              int len,
+              int max_pitch,
+              int arch
+              );
+
+#define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+    ((*PITCH_XCORR_IMPL[(arch) & OPUS_ARCHMASK])(_x, _y, xcorr, len, max_pitch, arch))
+
+
+#endif /* OPUS_X86_PRESUME_AVX2 && !OPUS_HAVE_RTCD */
+
+#endif /* OPUS_X86_MAY_HAVE_SSE && !FIXED_POINT */

 #endif
--- a/celt/x86/vq_sse.h
+++ b/celt/x86/vq_sse.h
@ -28,16 +28,18 @@
 #define VQ_SSE_H

 #if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
-#define OVERRIDE_OP_PVQ_SEARCH

 opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);

 #if defined(OPUS_X86_PRESUME_SSE2)
+
+#define OVERRIDE_OP_PVQ_SEARCH
 #define op_pvq_search(x, iy, K, N, arch) \
    (op_pvq_search_sse2(x, iy, K, N, arch))

-#else
+#elif defined(OPUS_HAVE_RTCD)

+#define OVERRIDE_OP_PVQ_SEARCH
 extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
      celt_norm *_X, int *iy, int K, int N, int arch);

--- a/celt/x86/vq_sse2.c
+++ b/celt/x86/vq_sse2.c
@ -75,7 +75,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
      sums = _mm_add_ps(sums, x4);
      /* Clear y and iy in case we don't do the projection. */
      _mm_storeu_ps(&y[j], _mm_setzero_ps());
-      _mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
+      _mm_storeu_si128((__m128i*)(void*)&iy[j], _mm_setzero_si128());
      _mm_storeu_ps(&X[j], x4);
      _mm_storeu_ps(&signy[j], s4);
   }
@ -116,7 +116,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
         rx4 = _mm_mul_ps(x4, rcp4);
         iy4 = _mm_cvttps_epi32(rx4);
         pulses_sum = _mm_add_epi32(pulses_sum, iy4);
-         _mm_storeu_si128((__m128i*)&iy[j], iy4);
+         _mm_storeu_si128((__m128i*)(void*)&iy[j], iy4);
         y4 = _mm_cvtepi32_ps(iy4);
         xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
         yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
@ -205,10 +205,10 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
   {
      __m128i y4;
      __m128i s4;
-      y4 = _mm_loadu_si128((__m128i*)&iy[j]);
+      y4 = _mm_loadu_si128((__m128i*)(void*)&iy[j]);
      s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
      y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
-      _mm_storeu_si128((__m128i*)&iy[j], y4);
+      _mm_storeu_si128((__m128i*)(void*)&iy[j], y4);
   }
   RESTORE_STACK;
   return yy;
--- a/celt/x86/x86_arch_macros.h
+++ b/celt/x86/x86_arch_macros.h
@ -0,0 +1,47 @@
+/* Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef _MSC_VER
+
+# ifdef OPUS_X86_MAY_HAVE_SSE
+#  ifndef __SSE__
+#   define __SSE__
+#  endif
+# endif
+
+# ifdef OPUS_X86_MAY_HAVE_SSE2
+#  ifndef __SSE2__
+#   define __SSE2__
+#  endif
+# endif
+
+# ifdef OPUS_X86_MAY_HAVE_SSE4_1
+#  ifndef __SSE4_1__
+#   define __SSE4_1__
+#  endif
+# endif
+
+#endif
--- a/celt/x86/x86_celt_map.c
+++ b/celt/x86/x86_celt_map.c
@ -90,6 +90,26 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(

 # else

+#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)
+
+void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
+         const float *_x,
+         const float *_y,
+         float *xcorr,
+         int len,
+         int max_pitch,
+         int arch
+) = {
+  celt_pitch_xcorr_c,                /* non-sse */
+  celt_pitch_xcorr_c,
+  celt_pitch_xcorr_c,
+  celt_pitch_xcorr_c,
+  MAY_HAVE_AVX2(celt_pitch_xcorr)
+};
+
+#endif
+
+
 #if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)

 void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
--- a/celt/x86/x86cpu.c
+++ b/celt/x86/x86cpu.c
@ -39,7 +39,7 @@
  ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
  (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
  (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
-  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
+  (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))

 #if defined(_MSC_VER)

@ -105,7 +105,7 @@ typedef struct CPU_Feature{
    int HW_SSE2;
    int HW_SSE41;
    /*  SIMD: 256-bit */
-    int HW_AVX;
+    int HW_AVX2;
 } CPU_Feature;

 static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
@ -121,13 +121,19 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
        cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
        cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
        cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
-        cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
+        cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
+        if (cpu_feature->HW_AVX2 && nIds >= 7) {
+            cpuid(info, 7);
+            cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
+        } else {
+            cpu_feature->HW_AVX2 = 0;
+        }
    }
    else {
        cpu_feature->HW_SSE = 0;
        cpu_feature->HW_SSE2 = 0;
        cpu_feature->HW_SSE41 = 0;
-        cpu_feature->HW_AVX = 0;
+        cpu_feature->HW_AVX2 = 0;
    }
 }

@ -157,7 +163,7 @@ static int opus_select_arch_impl(void)
    }
    arch++;

-    if (!cpu_feature.HW_AVX)
+    if (!cpu_feature.HW_AVX2)
    {
        return arch;
    }
--- a/celt/x86/x86cpu.h
+++ b/celt/x86/x86cpu.h
@ -46,13 +46,17 @@
 #  define MAY_HAVE_SSE4_1(name) name ## _c
 # endif

-# if defined(OPUS_X86_MAY_HAVE_AVX)
-#  define MAY_HAVE_AVX(name) name ## _avx
+# if defined(OPUS_X86_MAY_HAVE_AVX2)
+#  define MAY_HAVE_AVX2(name) name ## _avx2
 # else
-#  define MAY_HAVE_AVX(name) name ## _c
+#  define MAY_HAVE_AVX2(name) name ## _c
 # endif

-# if defined(OPUS_HAVE_RTCD)
+# if defined(OPUS_HAVE_RTCD) && \
+  ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
+  (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
 int opus_select_arch(void);
 # endif

@ -68,6 +72,6 @@ int opus_select_arch(void);
 (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(OP_LOADU_EPI32(x))))

 #define OP_CVTEPI16_EPI32_M64(x) \
- (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
+ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(void*)(x))))

 #endif
--- a/celt_headers.mk
+++ b/celt_headers.mk
@ -50,4 +50,5 @@ celt/mips/pitch_mipsr1.h \
 celt/mips/vq_mipsr1.h \
 celt/x86/pitch_sse.h \
 celt/x86/vq_sse.h \
+celt/x86/x86_arch_macros.h \
 celt/x86/x86cpu.h
--- a/celt_sources.mk
+++ b/celt_sources.mk
@ -33,6 +33,9 @@ CELT_SOURCES_SSE4_1 = \
 celt/x86/celt_lpc_sse4_1.c \
 celt/x86/pitch_sse4_1.c

+CELT_SOURCES_AVX2 = \
+celt/x86/pitch_avx.c
+
 CELT_SOURCES_ARM_RTCD = \
 celt/arm/armcpu.c \
 celt/arm/arm_celt_map.c
--- a/cmake/OpusConfig.cmake
+++ b/cmake/OpusConfig.cmake
@ -102,7 +102,10 @@ if(MINGW)
  endif()
 endif()

-if(NOT MSVC)
+if(MSVC)
+  # move cosmetic warnings to level 4
+  add_compile_options(/w44244 /w44305 /w44267)
+else()
  set(WARNING_LIST -Wall -W -Wstrict-prototypes -Wextra -Wcast-align -Wnested-externs -Wshadow)
  include(CheckCCompilerFlag)
  foreach(WARNING_FLAG ${WARNING_LIST})
--- a/cmake/OpusFunctions.cmake
+++ b/cmake/OpusFunctions.cmake
@ -47,10 +47,12 @@ function(check_flag NAME FLAG)
 endfunction()

 include(CheckIncludeFile)
-# function to check if compiler supports SSE, SSE2, SSE4.1 and AVX if target
-# systems may not have SSE support then use OPUS_MAY_HAVE_SSE option if target
-# system is guaranteed to have SSE support then OPUS_PRESUME_SSE can be used to
-# skip SSE runtime check
+
+# This function determines if the compiler has support for SSE, SSE2, SSE4.1, AVX,
+# AVX2 and FMA. Should the target systems potentially lack SSE support, the
+# OPUS_MAY_HAVE_SSE option is recommended for use. If, however, the target system is
+# assured to support SSE, the OPUS_PRESUME_SSE option can be employed, thus
+# eliminating the necessity for an SSE runtime check.
 function(opus_detect_sse COMPILER_SUPPORT_SIMD)
  message(STATUS "Check SIMD support by compiler")
  check_include_file(xmmintrin.h HAVE_XMMINTRIN_H) # SSE1
@ -111,20 +113,20 @@ function(opus_detect_sse COMPILER_SUPPORT_SIMD)
        PARENT_SCOPE)
  endif()

-  check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX
+  check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX2
  if(HAVE_IMMINTRIN_H)
    if(MSVC)
-      check_flag(AVX /arch:AVX)
+      check_flag(AVX2 /arch:AVX2)
    else()
-      check_flag(AVX -mavx)
+      check_flag(AVX2 -mavx2 -mfma -mavx)
    endif()
  else()
-    set(AVX_SUPPORTED
+    set(AVX2_SUPPORTED
        0
        PARENT_SCOPE)
  endif()

-  if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX_SUPPORTED)
+  if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX2_SUPPORTED)
    set(COMPILER_SUPPORT_SIMD 1 PARENT_SCOPE)
  else()
    message(STATUS "No SIMD support in compiler")
@ -215,7 +217,7 @@ function(get_opus_sources SOURCE_GROUP MAKE_FILE SOURCES)
  if(${list_length} LESS 1)
    message(
      FATAL_ERROR
-        "No files parsed succesfully from ${SOURCE_GROUP} in ${MAKE_FILE}")
+        "No files parsed successfully from ${SOURCE_GROUP} in ${MAKE_FILE}")
  endif()

  # remove trailing whitespaces
--- a/cmake/OpusSources.cmake
+++ b/cmake/OpusSources.cmake
@ -13,6 +13,8 @@ get_opus_sources(SILK_SOURCES_X86_RTCD silk_sources.mk silk_sources_x86_rtcd)
 get_opus_sources(SILK_SOURCES_SSE4_1 silk_sources.mk silk_sources_sse4_1)
 get_opus_sources(SILK_SOURCES_FIXED_SSE4_1 silk_sources.mk
                 silk_sources_fixed_sse4_1)
+get_opus_sources(SILK_SOURCES_AVX2 silk_sources.mk silk_sources_avx2)
+get_opus_sources(SILK_SOURCES_FLOAT_AVX2 silk_sources.mk silk_sources_float_avx2)
 get_opus_sources(SILK_SOURCES_ARM_RTCD silk_sources.mk silk_sources_arm_rtcd)
 get_opus_sources(SILK_SOURCES_ARM_NEON_INTR silk_sources.mk
                 silk_sources_arm_neon_intr)
@ -29,6 +31,7 @@ get_opus_sources(CELT_SOURCES_X86_RTCD celt_sources.mk celt_sources_x86_rtcd)
 get_opus_sources(CELT_SOURCES_SSE celt_sources.mk celt_sources_sse)
 get_opus_sources(CELT_SOURCES_SSE2 celt_sources.mk celt_sources_sse2)
 get_opus_sources(CELT_SOURCES_SSE4_1 celt_sources.mk celt_sources_sse4_1)
+get_opus_sources(CELT_SOURCES_AVX2 celt_sources.mk celt_sources_avx2)
 get_opus_sources(CELT_SOURCES_ARM_RTCD celt_sources.mk celt_sources_arm_rtcd)
 get_opus_sources(CELT_SOURCES_ARM_ASM celt_sources.mk celt_sources_arm_asm)
 get_opus_sources(CELT_AM_SOURCES_ARM_ASM celt_sources.mk
@ -37,13 +40,30 @@ get_opus_sources(CELT_SOURCES_ARM_NEON_INTR celt_sources.mk
                 celt_sources_arm_neon_intr)
 get_opus_sources(CELT_SOURCES_ARM_NE10 celt_sources.mk celt_sources_arm_ne10)

+get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers)
+get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers)
+get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers)
+get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources)
+get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources)
+get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources)
+get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)
+get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
+get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)
+get_opus_sources(DNN_SOURCES_AVX2 lpcnet_sources.mk dnn_sources_avx2)
+get_opus_sources(DNN_SOURCES_NEON lpcnet_sources.mk dnn_sources_arm_neon)
+get_opus_sources(DNN_SOURCES_DOTPROD lpcnet_sources.mk dnn_sources_arm_dotprod)
+
 get_opus_sources(opus_demo_SOURCES Makefile.am opus_demo_sources)
 get_opus_sources(opus_custom_demo_SOURCES Makefile.am opus_custom_demo_sources)
 get_opus_sources(opus_compare_SOURCES Makefile.am opus_compare_sources)
 get_opus_sources(tests_test_opus_api_SOURCES Makefile.am test_opus_api_sources)
 get_opus_sources(tests_test_opus_encode_SOURCES Makefile.am
                 test_opus_encode_sources)
+get_opus_sources(tests_test_opus_extensions_SOURCES Makefile.am
+                 test_opus_extensions_sources)
 get_opus_sources(tests_test_opus_decode_SOURCES Makefile.am
                 test_opus_decode_sources)
 get_opus_sources(tests_test_opus_padding_SOURCES Makefile.am
                 test_opus_padding_sources)
+get_opus_sources(tests_test_opus_dred_SOURCES Makefile.am
+                 test_opus_dred_sources)
--- a/configure.ac
+++ b/configure.ac
@ -162,12 +162,39 @@ AS_IF([test "$enable_custom_modes" = "yes"],[

 AM_CONDITIONAL([CUSTOM_MODES], [test "$enable_custom_modes" = "yes"])

+AC_ARG_ENABLE([dred],
+    [AS_HELP_STRING([--enable-dred], [Use Deep REDundancy (DRED)])],,
+    [enable_dred=no])
+
+AS_IF([test "$enable_dred" = "yes"],[
+  AC_DEFINE([ENABLE_DRED], [1], [DRED])
+])
+AM_CONDITIONAL([ENABLE_DRED], [test "$enable_dred" = "yes"])
+
+AC_ARG_ENABLE([deep-plc],
+    [AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,
+    [enable_deep_plc=no])
+
+AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[
+  AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])
+])
+AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
+
+AC_ARG_ENABLE([lossgen],
+    [AS_HELP_STRING([--enable-lossgen], [Build opus_demo with packet loss simulator])],,
+    [enable_lossgen=no])
+
+AS_IF([test "$enable_lossgen" = "yes"],[
+  AC_DEFINE([ENABLE_LOSSGEN], [1], [LOSSGEN])
+])
+AM_CONDITIONAL([ENABLE_LOSSGEN], [test "$enable_lossgen" = "yes"])
+
 has_float_approx=no
-#case "$host_cpu" in
-#i[[3456]]86 | x86_64 | powerpc64 | powerpc32 | ia64)
-#  has_float_approx=yes
-#  ;;
-#esac
+case "$host_cpu" in
+i[[3456]]86 | x86_64 | arm* | aarch64* | powerpc64 | powerpc32 | ia64)
+  has_float_approx=yes
+  ;;
+esac

 AC_ARG_ENABLE([float-approx],
    [AS_HELP_STRING([--enable-float-approx], [enable fast approximations for floating point])],
@ -202,7 +229,7 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
    case $host_cpu in
      arm*)
        dnl Currently we only have asm for fixed-point
-        AS_IF([test "$enable_float" != "yes"],[
+        #AS_IF([test "$enable_float" != "yes"],[
            cpu_arm=yes
            AC_DEFINE([OPUS_ARM_ASM], [],  [Make use of ARM asm optimization])
            AS_GCC_INLINE_ASSEMBLY(
@ -316,6 +343,18 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
                    )
                ])
                AC_SUBST(OPUS_ARM_MAY_HAVE_NEON)
+                AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],[
+                    AC_DEFINE(OPUS_ARM_MAY_HAVE_DOTPROD, 1,
+                      [Define if compiler supports DOTPROD instructions])
+                    AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"], [
+                        AC_DEFINE(OPUS_ARM_PRESUME_DOTPROD, 1,
+                          [Define if binary requires DOTPROD instruction support])
+                        asm_optimization="$asm_optimization (DOTPROD)"
+                    ],
+                        [rtcd_support="$rtcd_support (DOTPROD)"]
+                    )
+                ])
+                AC_SUBST(OPUS_ARM_MAY_HAVE_DOTPROD)
                dnl Make sure turning on RTCD gets us at least one
                dnl instruction set.
                AS_IF([test x"$rtcd_support" != x""],
@ -336,7 +375,7 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
                  [*** ARM assembly requires perl -- disabling optimizations])
                asm_optimization="(missing perl dependency for ARM)"
            ])
-        ])
+        #])
        ;;
    esac
 ],[
@ -352,13 +391,14 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
 AM_CONDITIONAL([HAVE_SSE], [false])
 AM_CONDITIONAL([HAVE_SSE2], [false])
 AM_CONDITIONAL([HAVE_SSE4_1], [false])
-AM_CONDITIONAL([HAVE_AVX], [false])
+AM_CONDITIONAL([HAVE_AVX2], [false])

 m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
 m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
 m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
-m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx])
+m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -mavx2])
 m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
+m4_define([DEFAULT_ARM_DOTPROD_INTR_CFLAGS], ["-march=armv8.2-a+dotprod"])
 # With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
 # -mfloat-abi=softfp for -mfpu=neon to work.  However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
 # this option will break things.
@ -374,14 +414,16 @@ AS_CASE([$host],
 AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
 AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
 AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
-AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@])
+AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
 AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
+AC_ARG_VAR([ARM_DOTPROD_INTR_CFLAGS], [C compiler flags to compile ARM DOTPROD intrinsics @<:@default=]DEFAULT_ARM_DOTPROD_INTR_CFLAGS[@:>@])

 AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
 AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
 AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
-AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")])
+AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
 AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
+AS_VAR_SET_IF([ARM_DOTPROD_INTR_CFLAGS], [], [AS_VAR_SET([ARM_DOTPROD_INTR_CFLAGS], ["DEFAULT_ARM_DOTPROD_INTR_CFLAGS"])])

 AC_DEFUN([OPUS_PATH_NE10],
   [
@ -525,6 +567,46 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
            intrinsics_support="$intrinsics_support (NEON [Aarch64])"
         ])

+         OPUS_CHECK_INTRINSICS(
+            [Aarch64 dotprod],
+	    [$ARM_DOTPROD_INTR_CFLAGS],
+            [OPUS_ARM_MAY_HAVE_DOTPROD],
+            [OPUS_ARM_PRESUME_DOTPROD],
+            [[#include <arm_neon.h>
+            ]],
+            [[
+               static int32x4_t acc;
+               static int8x16_t a, b;
+               acc = vdotq_s32(acc, a, b);
+            ]]
+         )
+         AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
+             [
+                OPUS_ARM_DOTPROD_INTR_CFLAGS="$ARM_NEON_DOTPROD_CFLAGS"
+                AC_SUBST([OPUS_ARM_DOTPROD_INTR_CFLAGS])
+             ]
+         )
+
+         AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],
+             [
+                AC_DEFINE([OPUS_ARM_MAY_HAVE_DOTPROD], 1, [Compiler supports Aarch64 DOTPROD Intrinsics])
+                intrinsics_support="$intrinsics_support (DOTPROD)"
+
+                AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"],
+                [
+                   AC_DEFINE([OPUS_ARM_PRESUME_DOTPROD], 1, [Define if binary requires Aarch64 dotprod Intrinsics])
+                   intrinsics_support="$intrinsics_support (DOTPROD [Aarch64])"
+                ])
+
+                AS_IF([test x"$enable_rtcd" != x"no" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
+                   [AS_IF([test x"$rtcd_support" = x"no"],
+                      [rtcd_support="ARM (DOTPROD Intrinsics)"],
+                      [rtcd_support="$rtcd_support (DOTPROD Intrinsics)"])])
+
+             ]
+         )
+
+
         AS_IF([test x"$intrinsics_support" = x""],
            [intrinsics_support=no],
            [intrinsics_support="ARM$intrinsics_support"])
@ -601,24 +683,24 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
          ]
      )
      OPUS_CHECK_INTRINSICS(
-         [AVX],
-         [$X86_AVX_CFLAGS],
-         [OPUS_X86_MAY_HAVE_AVX],
-         [OPUS_X86_PRESUME_AVX],
+         [AVX2],
+         [$X86_AVX2_CFLAGS],
+         [OPUS_X86_MAY_HAVE_AVX2],
+         [OPUS_X86_PRESUME_AVX2],
         [[#include <immintrin.h>
           #include <time.h>
         ]],
         [[
             __m256 mtest;
             mtest = _mm256_set1_ps((float)time(NULL));
-             mtest = _mm256_addsub_ps(mtest, mtest);
-             return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0));
+             mtest = _mm256_fmadd_ps(mtest, mtest, mtest);
+             return _mm256_extract_epi16(_mm256_cvttps_epi32(mtest), 0);
         ]]
      )
-      AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"],
+      AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1" && test x"$OPUS_X86_PRESUME_AVX2" != x"1"],
          [
-             OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS"
-             AC_SUBST([OPUS_X86_AVX_CFLAGS])
+             OPUS_X86_AVX2_CFLAGS="$X86_AVX2_CFLAGS"
+             AC_SUBST([OPUS_X86_AVX2_CFLAGS])
          ]
      )
         AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
@ -660,17 +742,17 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
         [
            AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
         ])
-         AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"],
+         AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"],
         [
-            AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics])
-            intrinsics_support="$intrinsics_support AVX"
+            AC_DEFINE([OPUS_X86_MAY_HAVE_AVX2], 1, [Compiler supports X86 AVX2 Intrinsics])
+            intrinsics_support="$intrinsics_support AVX2"

-            AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"],
-               [AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])],
-               [rtcd_support="$rtcd_support AVX"])
+            AS_IF([test x"$OPUS_X86_PRESUME_AVX2" = x"1"],
+               [AC_DEFINE([OPUS_X86_PRESUME_AVX2], 1, [Define if binary requires AVX2 intrinsics support])],
+               [rtcd_support="$rtcd_support AVX2"])
         ],
         [
-            AC_MSG_WARN([Compiler does not support AVX intrinsics])
+            AC_MSG_WARN([Compiler does not support AVX2 intrinsics])
         ])

         AS_IF([test x"$intrinsics_support" = x""],
@ -742,6 +824,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
 ])

 AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
+AM_CONDITIONAL([HAVE_ARM_DOTPROD],
+    [test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"])
 AM_CONDITIONAL([HAVE_ARM_NEON_INTR],
    [test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"])
 AM_CONDITIONAL([HAVE_ARM_NE10],
@ -753,8 +837,8 @@ AM_CONDITIONAL([HAVE_SSE2],
    [test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
 AM_CONDITIONAL([HAVE_SSE4_1],
    [test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
-AM_CONDITIONAL([HAVE_AVX],
-    [test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"])
+AM_CONDITIONAL([HAVE_AVX2],
+    [test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"])

 AM_CONDITIONAL([HAVE_RTCD],
 [test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"])
@ -813,6 +897,47 @@ AS_IF([test "$enable_doc" = "yes"], [
  HAVE_DOXYGEN=no
 ])

+AC_ARG_ENABLE([dot-product],
+	      AS_HELP_STRING([--disable-dot-product], [Disable dot product implementation]),,
+  enable_dot_product=yes)
+
+AS_IF([test "$enable_dot_product" = "no"], [
+       AC_DEFINE([DISABLE_DOT_PROD], [1], [Disable dot product instructions])
+])
+
+AC_ARG_ENABLE([dnn-debug-float],
+	      AS_HELP_STRING([--enable-dnn-debug-float], [Use floating-point DNN computation everywhere]),,
+  enable_dnn_debug_float=no)
+
+AS_IF([test "$enable_dnn_debug_float" = "no"], [
+       AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])
+])
+
+AC_ARG_ENABLE([osce-training-data],
+  AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,
+  [enable_osc_training_data=no]
+)
+
+AS_IF([test "$enable_osce_training_data" = "yes"], [
+       AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])
+])
+
+AC_MSG_CHECKING([argument osce training data])
+AS_IF([test "$enable_osce_training_data" = "yes"], [
+       AC_MSG_RESULT([yes])
+], [AC_MSG_RESULT([no])])
+
+AC_ARG_ENABLE([osce],
+  AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),,
+  [enable_osce=no]
+)
+
+AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [
+       AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
+])
+
+AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
+
 AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])

 AC_ARG_ENABLE([extra-programs],
--- a/dnn/LPCNet.yml
+++ b/dnn/LPCNet.yml
@ -0,0 +1,24 @@
+#
+# install
+# conda env create -f=LPCNet.yml
+#
+# update
+# conda env update -f=LPCNet.yml
+#
+# activate
+# conda activate LPCNet
+#
+# remove
+# conda remove --name LPCNet --all
+#
+name: LPCNet
+channels:
+  - anaconda
+  - conda-forge
+dependencies:
+  - keras==2.2.4
+  - python>=3.6
+  - tensorflow-gpu==1.12.0
+  - cudatoolkit
+  - h5py
+  - numpy
--- a/dnn/README
+++ b/dnn/README
@ -0,0 +1 @@
+See README.md
--- a/dnn/README.md
+++ b/dnn/README.md
@ -0,0 +1,126 @@
+# LPCNet
+
+Low complexity implementation of the WaveRNN-based LPCNet algorithm, as described in:
+
+- J.-M. Valin, J. Skoglund, [LPCNet: Improving Neural Speech Synthesis Through Linear Prediction](https://jmvalin.ca/papers/lpcnet_icassp2019.pdf), *Proc. International Conference on Acoustics, Speech and Signal Processing (ICASSP)*, arXiv:1810.11846, 2019.
+- J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [Neural Speech Synthesis on a Shoestring: Improving the Efficiency of LPCNet](https://jmvalin.ca/papers/improved_lpcnet.pdf), *Proc. ICASSP*, arxiv:2106.04129, 2022.
+- K. Subramani, J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [End-to-end LPCNet: A Neural Vocoder With Fully-Differentiable LPC Estimation](https://jmvalin.ca/papers/lpcnet_end2end.pdf), *Proc. INTERSPEECH*, arxiv:2106.04129, 2022.
+
+For coding/PLC applications of LPCNet, see:
+
+- J.-M. Valin, J. Skoglund, [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://jmvalin.ca/papers/lpcnet_codec.pdf), *Proc. INTERSPEECH*, arxiv:1903.12087, 2019.
+- J. Skoglund, J.-M. Valin, [Improving Opus Low Bit Rate Quality with Neural Speech Synthesis](https://jmvalin.ca/papers/opusnet.pdf), *Proc. INTERSPEECH*, arxiv:1905.04628, 2020.
+- J.-M. Valin, A. Mustafa, C. Montgomery, T.B. Terriberry, M. Klingbeil, P. Smaragdis, A. Krishnaswamy, [Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model](https://jmvalin.ca/papers/lpcnet_plc.pdf), *Proc. INTERSPEECH*, arxiv:2205.05785, 2022.
+- J.-M. Valin, J. Büthe, A. Mustafa, [Low-Bitrate Redundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder](https://jmvalin.ca/papers/valin_dred.pdf), *Proc. ICASSP*, arXiv:2212.04453, 2023. ([blog post](https://www.amazon.science/blog/neural-encoding-enables-more-efficient-recovery-of-lost-audio-packets))
+
+# Introduction
+
+Work in progress software for researching low CPU complexity algorithms for speech synthesis and compression by applying Linear Prediction techniques to WaveRNN. High quality speech can be synthesised on regular CPUs (around 3 GFLOP) with SIMD support (SSE2, SSSE3, AVX, AVX2/FMA, NEON currently supported). The code also supports very low bitrate compression at 1.6 kb/s.
+
+The BSD licensed software is written in C and Python/Keras. For training, a GTX 1080 Ti or better is recommended.
+
+This software is an open source starting point for LPCNet/WaveRNN-based speech synthesis and coding.
+
+# Using the existing software
+
+You can build the code using:
+
+```
+./autogen.sh
+./configure
+make
+```
+Note that the autogen.sh script is used when building from Git and will automatically download the latest model
+(models are too large to put in Git). By default, LPCNet will attempt to use 8-bit dot product instructions on AVX\*/Neon to
+speed up inference. To disable that (e.g. to avoid quantization effects when retraining), add --disable-dot-product to the
+configure script. LPCNet does not yet have a complete implementation for some of the integer operations on the ARMv7
+architecture so for now you will also need --disable-dot-product to successfully compile on 32-bit ARM.
+
+It is highly recommended to set the CFLAGS environment variable to enable AVX or NEON *prior* to running configure, otherwise
+no vectorization will take place and the code will be very slow. On a recent x86 CPU, something like
+```
+export CFLAGS='-Ofast -g -march=native'
+```
+should work. On ARM, you can enable Neon with:
+```
+export CFLAGS='-Ofast -g -mfpu=neon'
+```
+While not strictly required, the -Ofast flag will help with auto-vectorization, especially for dot products that
+cannot be optimized without -ffast-math (which -Ofast enables). Additionally, -falign-loops=32 has been shown to
+help on x86.
+
+You can test the capabilities of LPCNet using the lpcnet\_demo application. To encode a file:
+```
+./lpcnet_demo -encode input.pcm compressed.bin
+```
+where input.pcm is a 16-bit (machine endian) PCM file sampled at 16 kHz. The raw compressed data (no header)
+is written to compressed.bin and consists of 8 bytes per 40-ms packet.
+
+To decode:
+```
+./lpcnet_demo -decode compressed.bin output.pcm
+```
+where output.pcm is also 16-bit, 16 kHz PCM.
+
+Alternatively, you can run the uncompressed analysis/synthesis using -features
+instead of -encode and -synthesis instead of -decode.
+The same functionality is available in the form of a library. See include/lpcnet.h for the API.
+
+To try packet loss concealment (PLC), you first need a PLC model, which you can get with:
+```
+./download_model.sh plc-3b1eab4
+```
+or (for the PLC challenge submission):
+```
+./download_model.sh plc_challenge
+```
+PLC can be tested with:
+```
+./lpcnet_demo -plc_file noncausal_dc error_pattern.txt input.pcm output.pcm
+```
+where error_pattern.txt is a text file with one entry per 20-ms packet, with 1 meaning "packet lost" and 0 meaning "packet not lost".
+noncausal_dc is the non-causal (5-ms look-ahead) with special handling for DC offsets. It's also possible to use "noncausal", "causal",
+or "causal_dc".
+
+# Training a new model
+
+This codebase is also meant for research and it is possible to train new models. These are the steps to do that:
+
+1. Set up a Keras system with GPU.
+
+1. Generate training data:
+   ```
+   ./dump_data -train input.s16 features.f32 data.s16
+   ```
+   where the first file contains 16 kHz 16-bit raw PCM audio (no header) and the other files are output files. This program makes several passes over the data with different filters to generate a large amount of training data.
+
+1. Now that you have your files, train with:
+   ```
+   python3 training_tf2/train_lpcnet.py features.f32 data.s16 model_name
+   ```
+   and it will generate an h5 file for each iteration, with model\_name as prefix. If it stops with a
+   "Failed to allocate RNN reserve space" message try specifying a smaller --batch-size for  train\_lpcnet.py.
+
+1. You can synthesise speech with Python and your GPU card (very slow):
+   ```
+   ./dump_data -test test_input.s16 test_features.f32
+   ./training_tf2/test_lpcnet.py lpcnet_model_name.h5 test_features.f32 test.s16
+   ```
+
+1. Or with C on a CPU (C inference is much faster):
+   First extract the model files nnet\_data.h and nnet\_data.c
+   ```
+   ./training_tf2/dump_lpcnet.py lpcnet_model_name.h5
+   ```
+   and move the generated nnet\_data.\* files to the src/ directory.
+   Then you just need to rebuild the software and use lpcnet\_demo as explained above.
+
+# Speech Material for Training
+
+Suitable training material can be obtained from [Open Speech and Language Resources](https://www.openslr.org/).  See the datasets.txt file for details on suitable training data.
+
+# Reading Further
+
+1. [LPCNet: DSP-Boosted Neural Speech Synthesis](https://people.xiph.org/~jm/demo/lpcnet/)
+1. [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://people.xiph.org/~jm/demo/lpcnet_codec/)
+1. Sample model files (check compatibility): https://media.xiph.org/lpcnet/data/
--- a/dnn/adaconvtest.c
+++ b/dnn/adaconvtest.c
@ -0,0 +1,449 @@
+#include "lace_data.h"
+#include "nolace_data.h"
+#include "osce.h"
+#include "nndsp.h"
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+
+extern const WeightArray lacelayers_arrays[];
+extern const WeightArray nolacelayers_arrays[];
+
+void adaconv_compare(
+    const char * prefix,
+    int num_frames,
+    AdaConvState* hAdaConv,
+    LinearLayer *kernel_layer,
+    LinearLayer *gain_layer,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int in_channels,
+    int out_channels,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float shape_gain
+)
+{
+    char feature_file[256];
+    char x_in_file[256];
+    char x_out_file[256];
+    char message[512];
+    int i_frame, i_sample;
+    float mse;
+    float features[512];
+    float x_in[512];
+    float x_out_ref[512];
+    float x_out[512];
+    float window[40];
+
+    init_adaconv_state(hAdaConv);
+    compute_overlap_window(window, 40);
+
+    FILE *f_features, *f_x_in, *f_x_out;
+
+    strcpy(feature_file, prefix);
+    strcat(feature_file, "_features.f32");
+    f_features = fopen(feature_file, "r");
+    if (f_features == NULL)
+    {
+        sprintf(message, "could not open file %s", feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_in_file, prefix);
+    strcat(x_in_file, "_x_in.f32");
+    f_x_in = fopen(x_in_file, "r");
+    if (f_x_in == NULL)
+    {
+        sprintf(message, "could not open file %s", x_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_out_file, prefix);
+    strcat(x_out_file, "_x_out.f32");
+    f_x_out = fopen(x_out_file, "r");
+    if (f_x_out == NULL)
+    {
+        sprintf(message, "could not open file %s", x_out_file);
+        perror(message);
+        exit(1);
+    }
+
+    for (i_frame = 0; i_frame < num_frames; i_frame ++)
+    {
+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+            exit(1);
+        }
+
+        if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+            exit(1);
+        }
+
+        if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+            exit(1);
+        }
+
+        adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,
+            frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,
+            filter_gain_a, filter_gain_b, shape_gain, window, 0);
+
+        mse = 0;
+        for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)
+        {
+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+        }
+        mse = sqrt(mse / (frame_size * out_channels));
+        printf("rmse[%d] %f\n", i_frame, mse);
+
+    }
+}
+
+
+void adacomb_compare(
+    const char * prefix,
+    int num_frames,
+    AdaCombState* hAdaComb,
+    LinearLayer *kernel_layer,
+    LinearLayer *gain_layer,
+    LinearLayer *global_gain_layer,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float log_gain_limit
+)
+{
+    char feature_file[256];
+    char x_in_file[256];
+    char p_in_file[256];
+    char x_out_file[256];
+    char message[512];
+    int i_frame, i_sample;
+    float mse;
+    float features[512];
+    float x_in[512];
+    float x_out_ref[512];
+    float x_out[512];
+    int pitch_lag;
+    float window[40];
+
+    init_adacomb_state(hAdaComb);
+    compute_overlap_window(window, 40);
+
+    FILE *f_features, *f_x_in, *f_p_in, *f_x_out;
+
+    strcpy(feature_file, prefix);
+    strcat(feature_file, "_features.f32");
+    f_features = fopen(feature_file, "r");
+    if (f_features == NULL)
+    {
+        sprintf(message, "could not open file %s", feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_in_file, prefix);
+    strcat(x_in_file, "_x_in.f32");
+    f_x_in = fopen(x_in_file, "r");
+    if (f_x_in == NULL)
+    {
+        sprintf(message, "could not open file %s", x_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(p_in_file, prefix);
+    strcat(p_in_file, "_p_in.s32");
+    f_p_in = fopen(p_in_file, "r");
+    if (f_p_in == NULL)
+    {
+        sprintf(message, "could not open file %s", p_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_out_file, prefix);
+    strcat(x_out_file, "_x_out.f32");
+    f_x_out = fopen(x_out_file, "r");
+    if (f_x_out == NULL)
+    {
+        sprintf(message, "could not open file %s", x_out_file);
+        perror(message);
+        exit(1);
+    }
+
+    for (i_frame = 0; i_frame < num_frames; i_frame ++)
+    {
+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+            exit(1);
+        }
+
+        if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+            exit(1);
+        }
+
+        if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);
+            exit(1);
+        }
+
+        if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+            exit(1);
+        }
+
+        adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,
+            pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);
+
+
+        mse = 0;
+        for (i_sample = 0; i_sample < frame_size; i_sample ++)
+        {
+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+        }
+        mse = sqrt(mse / (frame_size));
+        printf("rmse[%d] %f\n", i_frame, mse);
+
+    }
+}
+
+void adashape_compare(
+    const char * prefix,
+    int num_frames,
+    AdaShapeState* hAdaShape,
+    LinearLayer *alpha1,
+    LinearLayer *alpha2,
+    int feature_dim,
+    int frame_size,
+    int avg_pool_k
+)
+{
+    char feature_file[256];
+    char x_in_file[256];
+    char x_out_file[256];
+    char message[512];
+    int i_frame, i_sample;
+    float mse;
+    float features[512];
+    float x_in[512];
+    float x_out_ref[512];
+    float x_out[512];
+
+    init_adashape_state(hAdaShape);
+
+    FILE *f_features, *f_x_in, *f_x_out;
+
+    strcpy(feature_file, prefix);
+    strcat(feature_file, "_features.f32");
+    f_features = fopen(feature_file, "r");
+    if (f_features == NULL)
+    {
+        sprintf(message, "could not open file %s", feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_in_file, prefix);
+    strcat(x_in_file, "_x_in.f32");
+    f_x_in = fopen(x_in_file, "r");
+    if (f_x_in == NULL)
+    {
+        sprintf(message, "could not open file %s", x_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_out_file, prefix);
+    strcat(x_out_file, "_x_out.f32");
+    f_x_out = fopen(x_out_file, "r");
+    if (f_x_out == NULL)
+    {
+        sprintf(message, "could not open file %s", x_out_file);
+        perror(message);
+        exit(1);
+    }
+
+    for (i_frame = 0; i_frame < num_frames; i_frame ++)
+    {
+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+            exit(1);
+        }
+
+        if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+            exit(1);
+        }
+
+        if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+            exit(1);
+        }
+
+        adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,
+            frame_size, avg_pool_k, 0);
+
+        mse = 0;
+        for (i_sample = 0; i_sample < frame_size; i_sample ++)
+        {
+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+        }
+        mse = sqrt(mse / (frame_size));
+        printf("rmse[%d] %f\n", i_frame, mse);
+
+    }
+}
+
+
+int main()
+{
+    LACELayers hLACE;
+    NOLACELayers hNoLACE;
+
+    AdaConvState hAdaConv;
+    AdaCombState hAdaComb;
+    AdaShapeState hAdaShape;
+
+    init_adaconv_state(&hAdaConv);
+
+    init_lacelayers(&hLACE, lacelayers_arrays);
+    init_nolacelayers(&hNoLACE, nolacelayers_arrays);
+
+    printf("\ntesting lace.af1 (1 in, 1 out)...\n");
+    adaconv_compare(
+        "testvectors/lace_af1",
+        5,
+        &hAdaConv,
+        &hLACE.lace_af1_kernel,
+        &hLACE.lace_af1_gain,
+        LACE_AF1_FEATURE_DIM,
+        LACE_AF1_FRAME_SIZE,
+        LACE_AF1_OVERLAP_SIZE,
+        LACE_AF1_IN_CHANNELS,
+        LACE_AF1_OUT_CHANNELS,
+        LACE_AF1_KERNEL_SIZE,
+        LACE_AF1_LEFT_PADDING,
+        LACE_AF1_FILTER_GAIN_A,
+        LACE_AF1_FILTER_GAIN_B,
+        LACE_AF1_SHAPE_GAIN
+    );
+
+
+    printf("\ntesting nolace.af1 (1 in, 2 out)...\n");
+    adaconv_compare(
+        "testvectors/nolace_af1",
+        5,
+        &hAdaConv,
+        &hNoLACE.nolace_af1_kernel,
+        &hNoLACE.nolace_af1_gain,
+        NOLACE_AF1_FEATURE_DIM,
+        NOLACE_AF1_FRAME_SIZE,
+        NOLACE_AF1_OVERLAP_SIZE,
+        NOLACE_AF1_IN_CHANNELS,
+        NOLACE_AF1_OUT_CHANNELS,
+        NOLACE_AF1_KERNEL_SIZE,
+        NOLACE_AF1_LEFT_PADDING,
+        NOLACE_AF1_FILTER_GAIN_A,
+        NOLACE_AF1_FILTER_GAIN_B,
+        NOLACE_AF1_SHAPE_GAIN
+    );
+
+
+    printf("testing nolace.af4 (2 in, 1 out)...\n");
+    adaconv_compare(
+        "testvectors/nolace_af4",
+        5,
+        &hAdaConv,
+        &hNoLACE.nolace_af4_kernel,
+        &hNoLACE.nolace_af4_gain,
+        NOLACE_AF4_FEATURE_DIM,
+        NOLACE_AF4_FRAME_SIZE,
+        NOLACE_AF4_OVERLAP_SIZE,
+        NOLACE_AF4_IN_CHANNELS,
+        NOLACE_AF4_OUT_CHANNELS,
+        NOLACE_AF4_KERNEL_SIZE,
+        NOLACE_AF4_LEFT_PADDING,
+        NOLACE_AF4_FILTER_GAIN_A,
+        NOLACE_AF4_FILTER_GAIN_B,
+        NOLACE_AF4_SHAPE_GAIN
+    );
+
+    printf("\ntesting nolace.af2 (2 in, 2 out)...\n");
+    adaconv_compare(
+        "testvectors/nolace_af2",
+        5,
+        &hAdaConv,
+        &hNoLACE.nolace_af2_kernel,
+        &hNoLACE.nolace_af2_gain,
+        NOLACE_AF2_FEATURE_DIM,
+        NOLACE_AF2_FRAME_SIZE,
+        NOLACE_AF2_OVERLAP_SIZE,
+        NOLACE_AF2_IN_CHANNELS,
+        NOLACE_AF2_OUT_CHANNELS,
+        NOLACE_AF2_KERNEL_SIZE,
+        NOLACE_AF2_LEFT_PADDING,
+        NOLACE_AF2_FILTER_GAIN_A,
+        NOLACE_AF2_FILTER_GAIN_B,
+        NOLACE_AF2_SHAPE_GAIN
+    );
+
+    printf("\ntesting lace.cf1...\n");
+    adacomb_compare(
+        "testvectors/lace_cf1",
+        5,
+        &hAdaComb,
+        &hLACE.lace_cf1_kernel,
+        &hLACE.lace_cf1_gain,
+        &hLACE.lace_cf1_global_gain,
+        LACE_CF1_FEATURE_DIM,
+        LACE_CF1_FRAME_SIZE,
+        LACE_CF1_OVERLAP_SIZE,
+        LACE_CF1_KERNEL_SIZE,
+        LACE_CF1_LEFT_PADDING,
+        LACE_CF1_FILTER_GAIN_A,
+        LACE_CF1_FILTER_GAIN_B,
+        LACE_CF1_LOG_GAIN_LIMIT
+    );
+
+    printf("\ntesting nolace.tdshape1...\n");
+    adashape_compare(
+        "testvectors/nolace_tdshape1",
+        5,
+        &hAdaShape,
+        &hNoLACE.nolace_tdshape1_alpha1,
+        &hNoLACE.nolace_tdshape1_alpha2,
+        NOLACE_TDSHAPE1_FEATURE_DIM,
+        NOLACE_TDSHAPE1_FRAME_SIZE,
+        NOLACE_TDSHAPE1_AVG_POOL_K
+    );
+
+    return 0;
+}
+
+/* gcc -DVAR_ARRAYS -DENABLE_OSCE  -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */
--- a/dnn/arm/arm_dnn_map.c
+++ b/dnn/arm/arm_dnn_map.c
@ -0,0 +1,88 @@
+/* Copyright (c) 2018-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "arm/armcpu.h"
+#include "nnet.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD))
+
+void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+         const LinearLayer *linear,
+         float *out,
+         const float *in
+) = {
+  compute_linear_c,                /* default */
+  compute_linear_c,
+  compute_linear_c,
+  MAY_HAVE_NEON(compute_linear),   /* neon  */
+  MAY_HAVE_DOTPROD(compute_linear) /* dotprod  */
+};
+
+#endif
+
+#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON)
+
+void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+         float *output,
+         const float *input,
+         int N,
+         int activation
+) = {
+    compute_activation_c,                /* default */
+    compute_activation_c,
+    compute_activation_c,
+    MAY_HAVE_NEON(compute_activation),   /* neon  */
+    MAY_HAVE_DOTPROD(compute_activation) /* dotprod  */
+};
+
+void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+         const Conv2dLayer *conv,
+         float *out,
+         float *mem,
+         const float *in,
+         int height,
+         int hstride,
+         int activation
+) = {
+    compute_conv2d_c,                /* default */
+    compute_conv2d_c,
+    compute_conv2d_c,
+    MAY_HAVE_NEON(compute_conv2d),   /* neon  */
+    MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod  */
+};
+
+
+#endif
+
+
+#endif
--- a/dnn/arm/dnn_arm.h
+++ b/dnn/arm/dnn_arm.h
@ -0,0 +1,104 @@
+/* Copyright (c) 2011-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DNN_ARM_H
+#define DNN_ARM_H
+
+#include "cpu_support.h"
+#include "opus_types.h"
+
+void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
+void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
+
+void compute_activation_neon(float *output, const float *input, int N, int activation);
+void compute_activation_dotprod(float *output, const float *input, int N, int activation);
+
+void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+
+#if defined(OPUS_ARM_PRESUME_DOTPROD)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in))
+
+#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
+
+extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+                    const LinearLayer *linear,
+                    float *out,
+                    const float *in
+                    );
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) \
+    ((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
+
+
+#endif
+
+#if defined(OPUS_ARM_PRESUME_NEON)
+
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation))
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
+
+extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+                    float *output,
+                    const float *input,
+                    int N,
+                    int activation
+                    );
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) \
+    ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
+
+
+extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+                    const Conv2dLayer *conv,
+                    float *out,
+                    float *mem,
+                    const float *in,
+                    int height,
+                    int hstride,
+                    int activation
+                    );
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
+    ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
+
+
+#endif
+
+
+#endif /* DNN_ARM_H */
--- a/dnn/arm/nnet_dotprod.c
+++ b/dnn/arm/nnet_dotprod.c
@ -0,0 +1,38 @@
+/* Copyright (c) 2018-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef __ARM_FEATURE_DOTPROD
+#error nnet_dotprod.c is being compiled without DOTPROD enabled
+#endif
+
+#define RTCD_ARCH dotprod
+
+#include "nnet_arch.h"
--- a/dnn/arm/nnet_neon.c
+++ b/dnn/arm/nnet_neon.c
@ -0,0 +1,38 @@
+/* Copyright (c) 2018-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if !(defined(__ARM_NEON__) || defined(__ARM_NEON))
+#error nnet_neon.c is being compiled without Neon enabled
+#endif
+
+#define RTCD_ARCH neon
+
+#include "nnet_arch.h"
--- a/dnn/burg.c
+++ b/dnn/burg.c
@ -0,0 +1,245 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+
+#include "burg.h"
+
+#define MAX_FRAME_SIZE              384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
+#define SILK_MAX_ORDER_LPC          16
+#define FIND_LPC_COND_FAC           1e-5f
+
+/* sum of squares of a silk_float array, with result as double */
+static double silk_energy_FLP(
+    const float    *data,
+    int            dataSize
+)
+{
+    int i;
+    double   result;
+
+    /* 4x unrolled loop */
+    result = 0.0;
+    for( i = 0; i < dataSize - 3; i += 4 ) {
+        result += data[ i + 0 ] * (double)data[ i + 0 ] +
+                  data[ i + 1 ] * (double)data[ i + 1 ] +
+                  data[ i + 2 ] * (double)data[ i + 2 ] +
+                  data[ i + 3 ] * (double)data[ i + 3 ];
+    }
+
+    /* add any remaining products */
+    for( ; i < dataSize; i++ ) {
+        result += data[ i ] * (double)data[ i ];
+    }
+
+    assert( result >= 0.0 );
+    return result;
+}
+
+/* inner product of two silk_float arrays, with result as double */
+static double silk_inner_product_FLP(
+    const float    *data1,
+    const float    *data2,
+    int            dataSize
+)
+{
+    int i;
+    double   result;
+
+    /* 4x unrolled loop */
+    result = 0.0;
+    for( i = 0; i < dataSize - 3; i += 4 ) {
+        result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
+                  data1[ i + 1 ] * (double)data2[ i + 1 ] +
+                  data1[ i + 2 ] * (double)data2[ i + 2 ] +
+                  data1[ i + 3 ] * (double)data2[ i + 3 ];
+    }
+
+    /* add any remaining products */
+    for( ; i < dataSize; i++ ) {
+        result += data1[ i ] * (double)data2[ i ];
+    }
+
+    return result;
+}
+
+
+/* Compute reflection coefficients from input signal */
+float silk_burg_analysis(              /* O    returns residual energy                                     */
+    float          A[],                /* O    prediction coefficients (length order)                      */
+    const float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
+    const float    minInvGain,         /* I    minimum inverse prediction gain                             */
+    const int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
+    const int      nb_subfr,           /* I    number of subframes stacked in x                            */
+    const int      D                   /* I    order                                                       */
+)
+{
+    int         k, n, s, reached_max_gain;
+    double           C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
+    const float *x_ptr;
+    double           C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
+    double           CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
+    double           Af[ SILK_MAX_ORDER_LPC ];
+
+    assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+    /* Compute autocorrelations, added over subframes */
+    C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
+    memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
+    for( s = 0; s < nb_subfr; s++ ) {
+        x_ptr = x + s * subfr_length;
+        for( n = 1; n < D + 1; n++ ) {
+            C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
+        }
+    }
+    memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
+
+    /* Initialize */
+    CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
+    invGain = 1.0f;
+    reached_max_gain = 0;
+    for( n = 0; n < D; n++ ) {
+        /* Update first row of correlation matrix (without first element) */
+        /* Update last row of correlation matrix (without last element, stored in reversed order) */
+        /* Update C * Af */
+        /* Update C * flipud(Af) (stored in reversed order) */
+        for( s = 0; s < nb_subfr; s++ ) {
+            x_ptr = x + s * subfr_length;
+            tmp1 = x_ptr[ n ];
+            tmp2 = x_ptr[ subfr_length - n - 1 ];
+            for( k = 0; k < n; k++ ) {
+                C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
+                C_last_row[ k ]  -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
+                Atmp = Af[ k ];
+                tmp1 += x_ptr[ n - k - 1 ] * Atmp;
+                tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
+            }
+            for( k = 0; k <= n; k++ ) {
+                CAf[ k ] -= tmp1 * x_ptr[ n - k ];
+                CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
+            }
+        }
+        tmp1 = C_first_row[ n ];
+        tmp2 = C_last_row[ n ];
+        for( k = 0; k < n; k++ ) {
+            Atmp = Af[ k ];
+            tmp1 += C_last_row[  n - k - 1 ] * Atmp;
+            tmp2 += C_first_row[ n - k - 1 ] * Atmp;
+        }
+        CAf[ n + 1 ] = tmp1;
+        CAb[ n + 1 ] = tmp2;
+
+        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+        num = CAb[ n + 1 ];
+        nrg_b = CAb[ 0 ];
+        nrg_f = CAf[ 0 ];
+        for( k = 0; k < n; k++ ) {
+            Atmp = Af[ k ];
+            num   += CAb[ n - k ] * Atmp;
+            nrg_b += CAb[ k + 1 ] * Atmp;
+            nrg_f += CAf[ k + 1 ] * Atmp;
+        }
+        assert( nrg_f > 0.0 );
+        assert( nrg_b > 0.0 );
+
+        /* Calculate the next order reflection (parcor) coefficient */
+        rc = -2.0 * num / ( nrg_f + nrg_b );
+        assert( rc > -1.0 && rc < 1.0 );
+
+        /* Update inverse prediction gain */
+        tmp1 = invGain * ( 1.0 - rc * rc );
+        if( tmp1 <= minInvGain ) {
+            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+            rc = sqrt( 1.0 - minInvGain / invGain );
+            if( num > 0 ) {
+                /* Ensure adjusted reflection coefficients has the original sign */
+                rc = -rc;
+            }
+            invGain = minInvGain;
+            reached_max_gain = 1;
+        } else {
+            invGain = tmp1;
+        }
+
+        /* Update the AR coefficients */
+        for( k = 0; k < (n + 1) >> 1; k++ ) {
+            tmp1 = Af[ k ];
+            tmp2 = Af[ n - k - 1 ];
+            Af[ k ]         = tmp1 + rc * tmp2;
+            Af[ n - k - 1 ] = tmp2 + rc * tmp1;
+        }
+        Af[ n ] = rc;
+
+        if( reached_max_gain ) {
+            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+            for( k = n + 1; k < D; k++ ) {
+                Af[ k ] = 0.0;
+            }
+            break;
+        }
+
+        /* Update C * Af and C * Ab */
+        for( k = 0; k <= n + 1; k++ ) {
+            tmp1 = CAf[ k ];
+            CAf[ k ]          += rc * CAb[ n - k + 1 ];
+            CAb[ n - k + 1  ] += rc * tmp1;
+        }
+    }
+
+    if( reached_max_gain ) {
+        /* Convert to float */
+        for( k = 0; k < D; k++ ) {
+            A[ k ] = (float)( -Af[ k ] );
+        }
+        /* Subtract energy of preceding samples from C0 */
+        for( s = 0; s < nb_subfr; s++ ) {
+            C0 -= silk_energy_FLP( x + s * subfr_length, D );
+        }
+        /* Approximate residual energy */
+        nrg_f = C0 * invGain;
+    } else {
+        /* Compute residual energy and store coefficients as float */
+        nrg_f = CAf[ 0 ];
+        tmp1 = 1.0;
+        for( k = 0; k < D; k++ ) {
+            Atmp = Af[ k ];
+            nrg_f += CAf[ k + 1 ] * Atmp;
+            tmp1  += Atmp * Atmp;
+            A[ k ] = (float)(-Atmp);
+        }
+        nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
+    }
+
+    /* Return residual energy */
+    return (float)nrg_f;
+}
--- a/win32/config.h
+++ b/win32/config.h
@ -1,5 +1,5 @@
 /***********************************************************************
-Copyright (c) 2011, Skype Limited. All rights reserved.
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
@ -25,40 +25,17 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/

-#ifndef CONFIG_H
-#define CONFIG_H
+#ifndef BURG_H
+#define BURG_H

-#define USE_ALLOCA            1

-/* Comment out the next line for floating-point code */
-/*#define FIXED_POINT           1 */
-
-#define OPUS_BUILD            1
-
-#if defined(_M_IX86) || defined(_M_X64)
-/* Can always compile SSE intrinsics (no special compiler flags necessary) */
-#define OPUS_X86_MAY_HAVE_SSE
-#define OPUS_X86_MAY_HAVE_SSE2
-#define OPUS_X86_MAY_HAVE_SSE4_1
-
-/* Presume SSE functions, if compiled to use SSE/SSE2/AVX (note that AMD64 implies SSE2, and AVX
-   implies SSE4.1) */
-#if defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || defined(__AVX__)
-#define OPUS_X86_PRESUME_SSE 1
-#endif
-#if defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__AVX__)
-#define OPUS_X86_PRESUME_SSE2 1
-#endif
-#if defined(__AVX__)
-#define OPUS_X86_PRESUME_SSE4_1 1
-#endif
-
-#if !defined(OPUS_X86_PRESUME_SSE4_1) || !defined(OPUS_X86_PRESUME_SSE2) || !defined(OPUS_X86_PRESUME_SSE)
-#define OPUS_HAVE_RTCD 1
-#endif
+float silk_burg_analysis(              /* O    returns residual energy                                     */
+    float          A[],                /* O    prediction coefficients (length order)                      */
+    const float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
+    const float    minInvGain,         /* I    minimum inverse prediction gain                             */
+    const int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
+    const int      nb_subfr,           /* I    number of subframes stacked in x                            */
+    const int      D                   /* I    order                                                       */
+);

 #endif
-
-#include "version.h"
-
-#endif /* CONFIG_H */
--- a/dnn/common.h
+++ b/dnn/common.h
@ -0,0 +1,56 @@
+
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "opus_defines.h"
+
+#define LOG256 5.5451774445f
+static OPUS_INLINE float log2_approx(float x)
+{
+   int integer;
+   float frac;
+   union {
+      float f;
+      int i;
+   } in;
+   in.f = x;
+   integer = (in.i>>23)-127;
+   in.i -= integer<<23;
+   frac = in.f - 1.5f;
+   frac = -0.41445418f + frac*(0.95909232f
+          + frac*(-0.33951290f + frac*0.16541097f));
+   return 1+integer+frac;
+}
+
+#define log_approx(x) (0.69315f*log2_approx(x))
+
+static OPUS_INLINE float ulaw2lin(float u)
+{
+    float s;
+    float scale_1 = 32768.f/255.f;
+    u = u - 128.f;
+    s = u >= 0.f ? 1.f : -1.f;
+    u = fabs(u);
+    return s*scale_1*(exp(u/128.*LOG256)-1);
+}
+
+static OPUS_INLINE int lin2ulaw(float x)
+{
+    float u;
+    float scale = 255.f/32768.f;
+    int s = x >= 0 ? 1 : -1;
+    x = fabs(x);
+    u = (s*(128*log_approx(1+scale*x)/LOG256));
+    u = 128 + u;
+    if (u < 0) u = 0;
+    if (u > 255) u = 255;
+    return (int)floor(.5 + u);
+}
+
+
+
+#endif
--- a/dnn/datasets.txt
+++ b/dnn/datasets.txt
@ -0,0 +1,173 @@
+The following datasets can be used to train a language-independent LPCNet model.
+A good choice is to include all the data from these datasets, except for
+hi_fi_tts for which only a small subset is recommended (since it's very large
+but has few speakers). Note that this data typically needs to be resampled
+before it can be used.
+
+https://www.openslr.org/resources/30/si_lk.tar.gz
+https://www.openslr.org/resources/32/af_za.tar.gz
+https://www.openslr.org/resources/32/st_za.tar.gz
+https://www.openslr.org/resources/32/tn_za.tar.gz
+https://www.openslr.org/resources/32/xh_za.tar.gz
+https://www.openslr.org/resources/37/bn_bd.zip
+https://www.openslr.org/resources/37/bn_in.zip
+https://www.openslr.org/resources/41/jv_id_female.zip
+https://www.openslr.org/resources/41/jv_id_male.zip
+https://www.openslr.org/resources/42/km_kh_male.zip
+https://www.openslr.org/resources/43/ne_np_female.zip
+https://www.openslr.org/resources/44/su_id_female.zip
+https://www.openslr.org/resources/44/su_id_male.zip
+https://www.openslr.org/resources/61/es_ar_female.zip
+https://www.openslr.org/resources/61/es_ar_male.zip
+https://www.openslr.org/resources/63/ml_in_female.zip
+https://www.openslr.org/resources/63/ml_in_male.zip
+https://www.openslr.org/resources/64/mr_in_female.zip
+https://www.openslr.org/resources/65/ta_in_female.zip
+https://www.openslr.org/resources/65/ta_in_male.zip
+https://www.openslr.org/resources/66/te_in_female.zip
+https://www.openslr.org/resources/66/te_in_male.zip
+https://www.openslr.org/resources/69/ca_es_female.zip
+https://www.openslr.org/resources/69/ca_es_male.zip
+https://www.openslr.org/resources/70/en_ng_female.zip
+https://www.openslr.org/resources/70/en_ng_male.zip
+https://www.openslr.org/resources/71/es_cl_female.zip
+https://www.openslr.org/resources/71/es_cl_male.zip
+https://www.openslr.org/resources/72/es_co_female.zip
+https://www.openslr.org/resources/72/es_co_male.zip
+https://www.openslr.org/resources/73/es_pe_female.zip
+https://www.openslr.org/resources/73/es_pe_male.zip
+https://www.openslr.org/resources/74/es_pr_female.zip
+https://www.openslr.org/resources/75/es_ve_female.zip
+https://www.openslr.org/resources/75/es_ve_male.zip
+https://www.openslr.org/resources/76/eu_es_female.zip
+https://www.openslr.org/resources/76/eu_es_male.zip
+https://www.openslr.org/resources/77/gl_es_female.zip
+https://www.openslr.org/resources/77/gl_es_male.zip
+https://www.openslr.org/resources/78/gu_in_female.zip
+https://www.openslr.org/resources/78/gu_in_male.zip
+https://www.openslr.org/resources/79/kn_in_female.zip
+https://www.openslr.org/resources/79/kn_in_male.zip
+https://www.openslr.org/resources/80/my_mm_female.zip
+https://www.openslr.org/resources/83/irish_english_male.zip
+https://www.openslr.org/resources/83/midlands_english_female.zip
+https://www.openslr.org/resources/83/midlands_english_male.zip
+https://www.openslr.org/resources/83/northern_english_female.zip
+https://www.openslr.org/resources/83/northern_english_male.zip
+https://www.openslr.org/resources/83/scottish_english_female.zip
+https://www.openslr.org/resources/83/scottish_english_male.zip
+https://www.openslr.org/resources/83/southern_english_female.zip
+https://www.openslr.org/resources/83/southern_english_male.zip
+https://www.openslr.org/resources/83/welsh_english_female.zip
+https://www.openslr.org/resources/83/welsh_english_male.zip
+https://www.openslr.org/resources/86/yo_ng_female.zip
+https://www.openslr.org/resources/86/yo_ng_male.zip
+https://www.openslr.org/resources/109/hi_fi_tts_v0.tar.gz
+
+The corresponding citations for all these datasets are:
+
+  @inproceedings{demirsahin-etal-2020-open,
+    title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},
+    author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},
+    booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
+    month = may,
+    year = {2020},
+    pages = {6532--6541},
+    address = {Marseille, France},
+    publisher = {European Language Resources Association (ELRA)},
+    url = {https://www.aclweb.org/anthology/2020.lrec-1.804},
+    ISBN = {979-10-95546-34-4},
+  }
+  @inproceedings{kjartansson-etal-2020-open,
+    title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},
+    author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},
+    booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},
+    year = {2020},
+    pages = {21--27},
+    month = may,
+    address = {Marseille, France},
+    publisher = {European Language Resources association (ELRA)},
+    url = {https://www.aclweb.org/anthology/2020.sltu-1.3},
+    ISBN = {979-10-95546-35-1},
+  }
+
+
+  @inproceedings{guevara-rukoz-etal-2020-crowdsourcing,
+    title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},
+    author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin, Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},
+    booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
+    year = {2020},
+    month = may,
+    address = {Marseille, France},
+    publisher = {European Language Resources Association (ELRA)},
+    url = {https://www.aclweb.org/anthology/2020.lrec-1.801},
+    pages = {6504--6513},
+    ISBN = {979-10-95546-34-4},
+  }
+  @inproceedings{he-etal-2020-open,
+    title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and Telugu Speech Synthesis Systems}},
+    author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin, Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},
+    booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
+    month = may,
+    year = {2020},
+    address = {Marseille, France},
+    publisher = {European Language Resources Association (ELRA)},
+    pages = {6494--6503},
+    url = {https://www.aclweb.org/anthology/2020.lrec-1.800},
+    ISBN = "{979-10-95546-34-4}",
+  }
+
+
+  @inproceedings{kjartansson-etal-tts-sltu2018,
+    title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese, Khmer, Nepali, Sinhala, and Sundanese}},
+    author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu De Silva and Supheakmungkol Sarin},
+    booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},
+    year  = {2018},
+    address = {Gurugram, India},
+    month = aug,
+    pages = {66--70},
+    URL   = {http://dx.doi.org/10.21437/SLTU.2018-14}
+  }
+
+
+  @inproceedings{oo-etal-2020-burmese,
+    title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application to Text-to-Speech}},
+    author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin, Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},
+    booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
+    month = may,
+    year = {2020},
+    pages = "6328--6339",
+    address = {Marseille, France},
+    publisher = {European Language Resources Association (ELRA)},
+    url = {https://www.aclweb.org/anthology/2020.lrec-1.777},
+    ISBN = {979-10-95546-34-4},
+  }
+  @inproceedings{van-niekerk-etal-2017,
+    title = {{Rapid development of TTS corpora for four South African languages}},
+    author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson and Martin Jansche and Linne Ha},
+    booktitle = {Proc. Interspeech 2017},
+    pages = {2178--2182},
+    address = {Stockholm, Sweden},
+    month = aug,
+    year  = {2017},
+    URL   = {http://dx.doi.org/10.21437/Interspeech.2017-1139}
+  }
+
+  @inproceedings{gutkin-et-al-yoruba2020,
+    title = {{Developing an Open-Source Corpus of Yoruba Speech}},
+    author = {Alexander Gutkin and I{\c{s}}{\i}n Demir{\c{s}}ahin and Oddur Kjartansson and Clara Rivera and K\d{\'o}lá Túb\d{\`o}sún},
+    booktitle = {Proceedings of Interspeech 2020},
+    pages = {404--408},
+    month = {October},
+    year = {2020},
+    address = {Shanghai, China},
+    publisher = {International Speech and Communication Association (ISCA)},
+    doi = {10.21437/Interspeech.2020-1096},
+    url = {http://dx.doi.org/10.21437/Interspeech.2020-1096},
+  }
+
+@article{bakhturina2021hi,
+  title={{Hi-Fi Multi-Speaker English TTS Dataset}},
+  author={Bakhturina, Evelina and Lavrukhin, Vitaly and Ginsburg, Boris and Zhang, Yang},
+  journal={arXiv preprint arXiv:2104.01497},
+  year={2021}
+}
--- a/dnn/download_model.bat
+++ b/dnn/download_model.bat
@ -0,0 +1,9 @@
+@echo off
+set model=opus_data-%1.tar.gz
+
+if not exist %model% (
+    echo Downloading latest model
+    powershell -Command "(New-Object System.Net.WebClient).DownloadFile('https://media.xiph.org/opus/models/%model%', '%model%')"
+)
+
+tar -xvzf %model%
--- a/dnn/download_model.sh
+++ b/dnn/download_model.sh
@ -0,0 +1,10 @@
+#!/bin/sh
+set -e
+
+model=opus_data-$1.tar.gz
+
+if [ ! -f $model ]; then
+        echo "Downloading latest model"
+        wget https://media.xiph.org/opus/models/$model
+fi
+tar xvomf $model
--- a/dnn/dred_rdovae.h
+++ b/dnn/dred_rdovae.h
@ -0,0 +1,42 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_RDOVAE_H
+#define DRED_RDOVAE_H
+
+#include <stdlib.h>
+
+#include "opus_types.h"
+
+typedef struct RDOVAEDec RDOVAEDec;
+typedef struct RDOVAEEnc RDOVAEEnc;
+typedef struct RDOVAEDecStruct RDOVAEDecState;
+typedef struct RDOVAEEncStruct RDOVAEEncState;
+
+
+
+#endif
--- a/dnn/dred_rdovae_dec.c
+++ b/dnn/dred_rdovae_dec.c
@ -0,0 +1,139 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "dred_rdovae_dec.h"
+#include "dred_rdovae_constants.h"
+#include "os_support.h"
+
+static void conv1_cond_init(float *mem, int len, int dilation, int *init)
+{
+    if (!*init) {
+        int i;
+        for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
+    }
+    *init = 1;
+}
+
+void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch)
+{
+    int i;
+    RDOVAEDecState dec;
+    memset(&dec, 0, sizeof(dec));
+    dred_rdovae_dec_init_states(&dec, model, state, arch);
+    for (i = 0; i < 2*nb_latents; i += 2)
+    {
+        dred_rdovae_decode_qframe(
+            &dec,
+            model,
+            &features[2*i*DRED_NUM_FEATURES],
+            &latents[(i/2)*DRED_LATENT_DIM],
+            arch);
+    }
+}
+
+void dred_rdovae_dec_init_states(
+    RDOVAEDecState *h,            /* io: state buffer handle */
+    const RDOVAEDec *model,
+    const float *initial_state,  /* i: initial state */
+    int arch
+    )
+{
+    float hidden[DEC_HIDDEN_INIT_OUT_SIZE];
+    float state_init[DEC_GRU1_STATE_SIZE+DEC_GRU2_STATE_SIZE+DEC_GRU3_STATE_SIZE+DEC_GRU4_STATE_SIZE+DEC_GRU5_STATE_SIZE];
+    int counter=0;
+    compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH, arch);
+    compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH, arch);
+    OPUS_COPY(h->gru1_state, state_init, DEC_GRU1_STATE_SIZE);
+    counter += DEC_GRU1_STATE_SIZE;
+    OPUS_COPY(h->gru2_state, &state_init[counter], DEC_GRU2_STATE_SIZE);
+    counter += DEC_GRU2_STATE_SIZE;
+    OPUS_COPY(h->gru3_state, &state_init[counter], DEC_GRU3_STATE_SIZE);
+    counter += DEC_GRU3_STATE_SIZE;
+    OPUS_COPY(h->gru4_state, &state_init[counter], DEC_GRU4_STATE_SIZE);
+    counter += DEC_GRU4_STATE_SIZE;
+    OPUS_COPY(h->gru5_state, &state_init[counter], DEC_GRU5_STATE_SIZE);
+    h->initialized = 0;
+}
+
+
+void dred_rdovae_decode_qframe(
+    RDOVAEDecState *dec_state,       /* io: state buffer handle */
+    const RDOVAEDec *model,
+    float *qframe,              /* o: quadruple feature frame (four concatenated frames in reverse order) */
+    const float *input,          /* i: latent vector */
+    int arch
+    )
+{
+    float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
+                 + DEC_CONV1_OUT_SIZE + DEC_CONV2_OUT_SIZE + DEC_CONV3_OUT_SIZE + DEC_CONV4_OUT_SIZE + DEC_CONV5_OUT_SIZE];
+    int output_index = 0;
+
+    /* run encoder stack and concatenate output in buffer*/
+    compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
+    output_index += DEC_DENSE1_OUT_SIZE;
+
+    compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
+    compute_glu(&model->dec_glu1, &buffer[output_index], dec_state->gru1_state, arch);
+    output_index += DEC_GRU1_OUT_SIZE;
+    conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
+    compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
+    output_index += DEC_CONV1_OUT_SIZE;
+
+    compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
+    compute_glu(&model->dec_glu2, &buffer[output_index], dec_state->gru2_state, arch);
+    output_index += DEC_GRU2_OUT_SIZE;
+    conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
+    compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
+    output_index += DEC_CONV2_OUT_SIZE;
+
+    compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
+    compute_glu(&model->dec_glu3, &buffer[output_index], dec_state->gru3_state, arch);
+    output_index += DEC_GRU3_OUT_SIZE;
+    conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
+    compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
+    output_index += DEC_CONV3_OUT_SIZE;
+
+    compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
+    compute_glu(&model->dec_glu4, &buffer[output_index], dec_state->gru4_state, arch);
+    output_index += DEC_GRU4_OUT_SIZE;
+    conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
+    compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
+    output_index += DEC_CONV4_OUT_SIZE;
+
+    compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
+    compute_glu(&model->dec_glu5, &buffer[output_index], dec_state->gru5_state, arch);
+    output_index += DEC_GRU5_OUT_SIZE;
+    conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
+    compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
+    output_index += DEC_CONV5_OUT_SIZE;
+
+    compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR, arch);
+}
--- a/dnn/dred_rdovae_dec.h
+++ b/dnn/dred_rdovae_dec.h
@ -0,0 +1,53 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_RDOVAE_DEC_H
+#define DRED_RDOVAE_DEC_H
+
+#include "dred_rdovae.h"
+#include "dred_rdovae_dec_data.h"
+#include "dred_rdovae_stats_data.h"
+
+struct RDOVAEDecStruct {
+  int initialized;
+  float gru1_state[DEC_GRU1_STATE_SIZE];
+  float gru2_state[DEC_GRU2_STATE_SIZE];
+  float gru3_state[DEC_GRU3_STATE_SIZE];
+  float gru4_state[DEC_GRU4_STATE_SIZE];
+  float gru5_state[DEC_GRU5_STATE_SIZE];
+  float conv1_state[DEC_CONV1_STATE_SIZE];
+  float conv2_state[DEC_CONV2_STATE_SIZE];
+  float conv3_state[DEC_CONV3_STATE_SIZE];
+  float conv4_state[DEC_CONV4_STATE_SIZE];
+  float conv5_state[DEC_CONV5_STATE_SIZE];
+};
+
+void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state, int arch);
+void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z, int arch);
+void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch);
+
+#endif
--- a/dnn/dred_rdovae_enc.c
+++ b/dnn/dred_rdovae_enc.c
@ -0,0 +1,110 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <math.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "dred_rdovae_enc.h"
+#include "os_support.h"
+#include "dred_rdovae_constants.h"
+
+static void conv1_cond_init(float *mem, int len, int dilation, int *init)
+{
+    if (!*init) {
+        int i;
+        for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
+    }
+    *init = 1;
+}
+
+void dred_rdovae_encode_dframe(
+    RDOVAEEncState *enc_state,           /* io: encoder state */
+    const RDOVAEEnc *model,
+    float *latents,                 /* o: latent vector */
+    float *initial_state,           /* o: initial state */
+    const float *input,              /* i: double feature frame (concatenated) */
+    int arch
+    )
+{
+    float padded_latents[DRED_PADDED_LATENT_DIM];
+    float padded_state[DRED_PADDED_STATE_DIM];
+    float buffer[ENC_DENSE1_OUT_SIZE + ENC_GRU1_OUT_SIZE + ENC_GRU2_OUT_SIZE + ENC_GRU3_OUT_SIZE + ENC_GRU4_OUT_SIZE + ENC_GRU5_OUT_SIZE
+               + ENC_CONV1_OUT_SIZE + ENC_CONV2_OUT_SIZE + ENC_CONV3_OUT_SIZE + ENC_CONV4_OUT_SIZE + ENC_CONV5_OUT_SIZE];
+    float state_hidden[GDENSE1_OUT_SIZE];
+    int output_index = 0;
+
+    /* run encoder stack and concatenate output in buffer*/
+    compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
+    output_index += ENC_DENSE1_OUT_SIZE;
+
+    compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer, arch);
+    OPUS_COPY(&buffer[output_index], enc_state->gru1_state, ENC_GRU1_OUT_SIZE);
+    output_index += ENC_GRU1_OUT_SIZE;
+    conv1_cond_init(enc_state->conv1_state, output_index, 1, &enc_state->initialized);
+    compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
+    output_index += ENC_CONV1_OUT_SIZE;
+
+    compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer, arch);
+    OPUS_COPY(&buffer[output_index], enc_state->gru2_state, ENC_GRU2_OUT_SIZE);
+    output_index += ENC_GRU2_OUT_SIZE;
+    conv1_cond_init(enc_state->conv2_state, output_index, 2, &enc_state->initialized);
+    compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
+    output_index += ENC_CONV2_OUT_SIZE;
+
+    compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer, arch);
+    OPUS_COPY(&buffer[output_index], enc_state->gru3_state, ENC_GRU3_OUT_SIZE);
+    output_index += ENC_GRU3_OUT_SIZE;
+    conv1_cond_init(enc_state->conv3_state, output_index, 2, &enc_state->initialized);
+    compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
+    output_index += ENC_CONV3_OUT_SIZE;
+
+    compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer, arch);
+    OPUS_COPY(&buffer[output_index], enc_state->gru4_state, ENC_GRU4_OUT_SIZE);
+    output_index += ENC_GRU4_OUT_SIZE;
+    conv1_cond_init(enc_state->conv4_state, output_index, 2, &enc_state->initialized);
+    compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
+    output_index += ENC_CONV4_OUT_SIZE;
+
+    compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer, arch);
+    OPUS_COPY(&buffer[output_index], enc_state->gru5_state, ENC_GRU5_OUT_SIZE);
+    output_index += ENC_GRU5_OUT_SIZE;
+    conv1_cond_init(enc_state->conv5_state, output_index, 2, &enc_state->initialized);
+    compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
+    output_index += ENC_CONV5_OUT_SIZE;
+
+    compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR, arch);
+    OPUS_COPY(latents, padded_latents, DRED_LATENT_DIM);
+
+    /* next, calculate initial state */
+    compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH, arch);
+    compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR, arch);
+    OPUS_COPY(initial_state, padded_state, DRED_STATE_DIM);
+}
--- a/dnn/dred_rdovae_enc.h
+++ b/dnn/dred_rdovae_enc.h
@ -0,0 +1,52 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_RDOVAE_ENC_H
+#define DRED_RDOVAE_ENC_H
+
+#include "dred_rdovae.h"
+
+#include "dred_rdovae_enc_data.h"
+
+struct RDOVAEEncStruct {
+    int initialized;
+    float gru1_state[ENC_GRU1_STATE_SIZE];
+    float gru2_state[ENC_GRU2_STATE_SIZE];
+    float gru3_state[ENC_GRU3_STATE_SIZE];
+    float gru4_state[ENC_GRU4_STATE_SIZE];
+    float gru5_state[ENC_GRU5_STATE_SIZE];
+    float conv1_state[ENC_CONV1_STATE_SIZE];
+    float conv2_state[2*ENC_CONV2_STATE_SIZE];
+    float conv3_state[2*ENC_CONV3_STATE_SIZE];
+    float conv4_state[2*ENC_CONV4_STATE_SIZE];
+    float conv5_state[2*ENC_CONV5_STATE_SIZE];
+};
+
+void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input, int arch);
+
+
+#endif
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@ -0,0 +1,280 @@
+/* Copyright (c) 2017-2018 Mozilla */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "kiss_fft.h"
+#include "common.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include <assert.h>
+#include "lpcnet.h"
+#include "lpcnet_private.h"
+#include "os_support.h"
+#include "cpu_support.h"
+
+
+static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
+  int i;
+  for (i=0;i<N;i++) {
+    float xi, yi;
+    xi = x[i];
+    yi = x[i] + mem[0];
+    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
+    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
+    y[i] = yi;
+  }
+}
+
+static float uni_rand(void) {
+  return rand()/(double)RAND_MAX-.5;
+}
+
+static void rand_resp(float *a, float *b) {
+  a[0] = .75*uni_rand();
+  a[1] = .75*uni_rand();
+  b[0] = .75*uni_rand();
+  b[1] = .75*uni_rand();
+}
+
+void compute_noise(int *noise, float noise_std) {
+  int i;
+  for (i=0;i<FRAME_SIZE;i++) {
+    noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
+  }
+}
+
+static opus_int16 float2short(float x)
+{
+  int i;
+  i = (int)floor(.5+x);
+  return IMAX(-32767, IMIN(32767, i));
+}
+
+
+void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
+  int i;
+  opus_int16 data[2*FRAME_SIZE];
+  for (i=0;i<FRAME_SIZE;i++) {
+    float p=0;
+    float e;
+    int j;
+    for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
+    e = lin2ulaw(pcm[i] - p);
+    /* Signal in. */
+    data[2*i] = float2short(st->sig_mem[0]);
+    /* Signal out. */
+    data[2*i+1] = pcm[i];
+    /* Simulate error on excitation. */
+    e += noise[i];
+    e = IMIN(255, IMAX(0, e));
+
+    OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
+    st->sig_mem[0] = p + ulaw2lin(e);
+  }
+  fwrite(data, 4*FRAME_SIZE, 1, file);
+}
+
+int main(int argc, char **argv) {
+  int i;
+  char *argv0;
+  int count=0;
+  static const float a_hp[2] = {-1.99599, 0.99600};
+  static const float b_hp[2] = {-2, 1};
+  float a_sig[2] = {0};
+  float b_sig[2] = {0};
+  float mem_hp_x[2]={0};
+  float mem_resp_x[2]={0};
+  float mem_preemph=0;
+  float x[FRAME_SIZE];
+  int gain_change_count=0;
+  FILE *f1;
+  FILE *ffeat;
+  FILE *fpcm=NULL;
+  opus_int16 pcm[FRAME_SIZE]={0};
+  int noisebuf[FRAME_SIZE]={0};
+  opus_int16 tmp[FRAME_SIZE] = {0};
+  float speech_gain=1;
+  float old_speech_gain = 1;
+  int one_pass_completed = 0;
+  LPCNetEncState *st;
+  float noise_std=0;
+  int training = -1;
+  int burg = 0;
+  int pitch = 0;
+  FILE *fnoise = NULL;
+  float noise_gain = 0;
+  long noise_size=0;
+  int arch;
+  srand(getpid());
+  arch = opus_select_arch();
+  st = lpcnet_encoder_create();
+  argv0=argv[0];
+  if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
+      burg = 1;
+      training = 1;
+  }
+  else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
+      burg = 1;
+      training = 0;
+  }
+  else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
+      pitch = 1;
+      training = 1;
+      fnoise = fopen(argv[2], "rb");
+      fseek(fnoise, 0, SEEK_END);
+      noise_size = ftell(fnoise);
+      fseek(fnoise, 0, SEEK_SET);
+      argv++;
+  }
+  else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
+      pitch = 1;
+      training = 0;
+  }
+  else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
+  else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
+  if (training == -1) {
+    fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
+    fprintf(stderr, "  or   %s -test <speech> <features out>\n", argv0);
+    return 1;
+  }
+  f1 = fopen(argv[2], "r");
+  if (f1 == NULL) {
+    fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
+    exit(1);
+  }
+  ffeat = fopen(argv[3], "wb");
+  if (ffeat == NULL) {
+    fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
+    exit(1);
+  }
+  if (training && !pitch) {
+    fpcm = fopen(argv[4], "wb");
+    if (fpcm == NULL) {
+      fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
+      exit(1);
+    }
+  }
+  while (1) {
+    size_t ret;
+    ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
+    if (feof(f1) || ret != FRAME_SIZE) {
+      if (!training) break;
+      rewind(f1);
+      ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
+      if (ret != FRAME_SIZE) {
+        fprintf(stderr, "error reading\n");
+        exit(1);
+      }
+      one_pass_completed = 1;
+    }
+    for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
+    if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
+    if (training && ++gain_change_count > 2821) {
+      float tmp1, tmp2;
+      speech_gain = pow(10., (-30+(rand()%40))/20.);
+      if (rand()&1) speech_gain = -speech_gain;
+      if (rand()%20==0) speech_gain *= .01;
+      if (!pitch && rand()%100==0) speech_gain = 0;
+      gain_change_count = 0;
+      rand_resp(a_sig, b_sig);
+      tmp1 = rand()/(float)RAND_MAX;
+      tmp2 = rand()/(float)RAND_MAX;
+      noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
+      if (fnoise != NULL) {
+        long pos;
+        /* Randomize the fraction because rand() only gives us 31 bits. */
+        float frac_pos = rand()/(float)RAND_MAX;
+        pos = (frac_pos*noise_size);
+        /* 32-bit alignment. */
+        pos = pos/4 * 4;
+        if (pos > noise_size-500000) pos = noise_size-500000;
+        noise_gain = pow(10., (-15+(rand()%40))/20.);
+        if (rand()%10==0) noise_gain = 0;
+        fseek(fnoise, pos, SEEK_SET);
+      }
+    }
+    if (fnoise != NULL) {
+      opus_int16 noise[FRAME_SIZE];
+      ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
+      for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
+    }
+    biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
+    biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
+    for (i=0;i<FRAME_SIZE;i++) {
+      float g;
+      float f = (float)i/FRAME_SIZE;
+      g = f*speech_gain + (1-f)*old_speech_gain;
+      x[i] *= g;
+    }
+    if (burg) {
+      float ceps[2*NB_BANDS];
+      burg_cepstral_analysis(ceps, x);
+      fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
+    }
+    preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
+    for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
+    /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
+    for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
+    compute_frame_features(st, x, arch);
+
+    if (fpcm) {
+        compute_noise(noisebuf, noise_std);
+    }
+
+    if (pitch) {
+      signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
+      for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+        pitch_features[i] = floor(.5 + 127.f*st->xcorr_features[i]);
+      }
+      for (i=0;i<PITCH_IF_FEATURES;i++) {
+        pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = floor(.5 + 127.f*st->if_features[i]);
+      }
+      fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
+    } else {
+      fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
+    }
+    /*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
+    if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
+    /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
+    for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
+    old_speech_gain = speech_gain;
+    count++;
+  }
+  fclose(f1);
+  fclose(ffeat);
+  if (fpcm) fclose(fpcm);
+  lpcnet_encoder_destroy(st);
+  return 0;
+}
--- a/dnn/dump_lpcnet_tables.c
+++ b/dnn/dump_lpcnet_tables.c
@ -0,0 +1,104 @@
+/* Copyright (c) 2017-2018 Mozilla
+   Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include "freq.h"
+#include "kiss_fft.h"
+
+
+int main(void) {
+  int i;
+  FILE *file;
+  kiss_fft_state *kfft;
+  float half_window[OVERLAP_SIZE];
+  float dct_table[NB_BANDS*NB_BANDS];
+
+  file=fopen("lpcnet_tables.c", "wb");
+  fprintf(file, "/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/\n\n");
+  fprintf(file, "#ifdef HAVE_CONFIG_H\n");
+  fprintf(file, "#include \"config.h\"\n");
+  fprintf(file, "#endif\n");
+
+  fprintf(file, "#include \"kiss_fft.h\"\n\n");
+
+  kfft = opus_fft_alloc_twiddles(WINDOW_SIZE, NULL, NULL, NULL, 0);
+
+  fprintf(file, "static const arch_fft_state arch_fft = {0, NULL};\n\n");
+
+  fprintf (file, "static const opus_int16 fft_bitrev[%d] = {\n", kfft->nfft);
+  for (i=0;i<kfft->nfft;i++)
+    fprintf (file, "%d,%c", kfft->bitrev[i],(i+16)%15==0?'\n':' ');
+  fprintf (file, "};\n\n");
+
+  fprintf (file, "static const kiss_twiddle_cpx fft_twiddles[%d] = {\n", kfft->nfft);
+  for (i=0;i<kfft->nfft;i++)
+    fprintf (file, "{%#0.9gf, %#0.9gf},%c", kfft->twiddles[i].r, kfft->twiddles[i].i,(i+3)%2==0?'\n':' ');
+  fprintf (file, "};\n\n");
+
+
+  fprintf(file, "const kiss_fft_state kfft = {\n");
+  fprintf(file, "%d, /* nfft */\n", kfft->nfft);
+  fprintf(file, "%#0.8gf, /* scale */\n", kfft->scale);
+  fprintf(file, "%d, /* shift */\n", kfft->shift);
+  fprintf(file, "{");
+  for (i=0;i<2*MAXFACTORS;i++) {
+    fprintf(file, "%d, ", kfft->factors[i]);
+  }
+  fprintf(file, "}, /* factors */\n");
+  fprintf(file, "fft_bitrev, /* bitrev*/\n");
+  fprintf(file, "fft_twiddles, /* twiddles*/\n");
+  fprintf(file, "(arch_fft_state *)&arch_fft, /* arch_fft*/\n");
+
+  fprintf(file, "};\n\n");
+
+  for (i=0;i<OVERLAP_SIZE;i++)
+    half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/OVERLAP_SIZE) * sin(.5*M_PI*(i+.5)/OVERLAP_SIZE));
+  fprintf(file, "const float half_window[] = {\n");
+  for (i=0;i<OVERLAP_SIZE;i++)
+    fprintf (file, "%#0.9gf,%c", half_window[i],(i+6)%5==0?'\n':' ');
+  fprintf(file, "};\n\n");
+
+  for (i=0;i<NB_BANDS;i++) {
+    int j;
+    for (j=0;j<NB_BANDS;j++) {
+      dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
+      if (j==0) dct_table[i*NB_BANDS + j] *= sqrt(.5);
+    }
+  }
+  fprintf(file, "const float dct_table[] = {\n");
+  for (i=0;i<NB_BANDS*NB_BANDS;i++)
+    fprintf (file, "%#0.9gf,%c", dct_table[i],(i+6)%5==0?'\n':' ');
+  fprintf(file, "};\n");
+
+  fclose(file);
+  return 0;
+}
--- a/dnn/fargan.c
+++ b/dnn/fargan.c
@ -0,0 +1,225 @@
+/* Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "fargan.h"
+#include "os_support.h"
+#include "freq.h"
+#include "fargan_data.h"
+#include "lpcnet.h"
+#include "pitch.h"
+#include "nnet.h"
+#include "lpcnet_private.h"
+#include "cpu_support.h"
+
+#define FARGAN_FEATURES (NB_FEATURES)
+
+static void compute_fargan_cond(FARGANState *st, float *cond, const float *features, int period)
+{
+  FARGAN *model;
+  float dense_in[NB_FEATURES+COND_NET_PEMBED_OUT_SIZE];
+  float conv1_in[COND_NET_FCONV1_IN_SIZE];
+  float fdense2_in[COND_NET_FCONV1_OUT_SIZE];
+  model = &st->model;
+  celt_assert(FARGAN_FEATURES+COND_NET_PEMBED_OUT_SIZE == model->cond_net_fdense1.nb_inputs);
+  celt_assert(COND_NET_FCONV1_IN_SIZE == model->cond_net_fdense1.nb_outputs);
+  celt_assert(COND_NET_FCONV1_OUT_SIZE == model->cond_net_fconv1.nb_outputs);
+  OPUS_COPY(&dense_in[NB_FEATURES], &model->cond_net_pembed.float_weights[IMAX(0,IMIN(period-32, 223))*COND_NET_PEMBED_OUT_SIZE], COND_NET_PEMBED_OUT_SIZE);
+  OPUS_COPY(dense_in, features, NB_FEATURES);
+
+  compute_generic_dense(&model->cond_net_fdense1, conv1_in, dense_in, ACTIVATION_TANH, st->arch);
+  compute_generic_conv1d(&model->cond_net_fconv1, fdense2_in, st->cond_conv1_state, conv1_in, COND_NET_FCONV1_IN_SIZE, ACTIVATION_TANH, st->arch);
+  compute_generic_dense(&model->cond_net_fdense2, cond, fdense2_in, ACTIVATION_TANH, st->arch);
+}
+
+static void fargan_deemphasis(float *pcm, float *deemph_mem) {
+  int i;
+  for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) {
+    pcm[i] += FARGAN_DEEMPHASIS * *deemph_mem;
+    *deemph_mem = pcm[i];
+  }
+}
+
+static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond, int period)
+{
+  int i, pos;
+  float fwc0_in[SIG_NET_INPUT_SIZE];
+  float gru1_in[SIG_NET_FWC0_CONV_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
+  float gru2_in[SIG_NET_GRU1_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
+  float gru3_in[SIG_NET_GRU2_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
+  float pred[FARGAN_SUBFRAME_SIZE+4];
+  float prev[FARGAN_SUBFRAME_SIZE];
+  float pitch_gate[4];
+  float gain;
+  float gain_1;
+  float skip_cat[10000];
+  float skip_out[SIG_NET_SKIP_DENSE_OUT_SIZE];
+  FARGAN *model;
+
+  celt_assert(st->cont_initialized);
+  model = &st->model;
+
+  compute_generic_dense(&model->sig_net_cond_gain_dense, &gain, cond, ACTIVATION_LINEAR, st->arch);
+  gain = exp(gain);
+  gain_1 = 1.f/(1e-5f + gain);
+
+  pos = PITCH_MAX_PERIOD-period-2;
+  for (i=0;i<FARGAN_SUBFRAME_SIZE+4;i++) {
+    pred[i] = MIN32(1.f, MAX32(-1.f, gain_1*st->pitch_buf[IMAX(0, pos)]));
+    pos++;
+    if (pos == PITCH_MAX_PERIOD) pos -= period;
+  }
+  for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) prev[i] = MAX32(-1.f, MIN16(1.f, gain_1*st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE+i]));
+
+  OPUS_COPY(&fwc0_in[0], &cond[0], FARGAN_COND_SIZE);
+  OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE], pred, FARGAN_SUBFRAME_SIZE+4);
+  OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE+FARGAN_SUBFRAME_SIZE+4], prev, FARGAN_SUBFRAME_SIZE);
+
+  compute_generic_conv1d(&model->sig_net_fwc0_conv, gru1_in, st->fwc0_mem, fwc0_in, SIG_NET_INPUT_SIZE, ACTIVATION_TANH, st->arch);
+  celt_assert(SIG_NET_FWC0_GLU_GATE_OUT_SIZE == model->sig_net_fwc0_glu_gate.nb_outputs);
+  compute_glu(&model->sig_net_fwc0_glu_gate, gru1_in, gru1_in, st->arch);
+
+  compute_generic_dense(&model->sig_net_gain_dense_out, pitch_gate, gru1_in, ACTIVATION_SIGMOID, st->arch);
+
+  for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+i] = pitch_gate[0]*pred[i+2];
+  OPUS_COPY(&gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
+  compute_generic_gru(&model->sig_net_gru1_input, &model->sig_net_gru1_recurrent, st->gru1_state, gru1_in, st->arch);
+  compute_glu(&model->sig_net_gru1_glu_gate, gru2_in, st->gru1_state, st->arch);
+
+  for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru2_in[SIG_NET_GRU1_OUT_SIZE+i] = pitch_gate[1]*pred[i+2];
+  OPUS_COPY(&gru2_in[SIG_NET_GRU1_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
+  compute_generic_gru(&model->sig_net_gru2_input, &model->sig_net_gru2_recurrent, st->gru2_state, gru2_in, st->arch);
+  compute_glu(&model->sig_net_gru2_glu_gate, gru3_in, st->gru2_state, st->arch);
+
+  for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru3_in[SIG_NET_GRU2_OUT_SIZE+i] = pitch_gate[2]*pred[i+2];
+  OPUS_COPY(&gru3_in[SIG_NET_GRU2_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
+  compute_generic_gru(&model->sig_net_gru3_input, &model->sig_net_gru3_recurrent, st->gru3_state, gru3_in, st->arch);
+  compute_glu(&model->sig_net_gru3_glu_gate, &skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE], st->gru3_state, st->arch);
+
+  OPUS_COPY(skip_cat, gru2_in, SIG_NET_GRU1_OUT_SIZE);
+  OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE], gru3_in, SIG_NET_GRU2_OUT_SIZE);
+  OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE], gru1_in, SIG_NET_FWC0_CONV_OUT_SIZE);
+  for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+i] = pitch_gate[3]*pred[i+2];
+  OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
+
+  compute_generic_dense(&model->sig_net_skip_dense, skip_out, skip_cat, ACTIVATION_TANH, st->arch);
+  compute_glu(&model->sig_net_skip_glu_gate, skip_out, skip_out, st->arch);
+
+  compute_generic_dense(&model->sig_net_sig_dense_out, pcm, skip_out, ACTIVATION_TANH, st->arch);
+  for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) pcm[i] *= gain;
+
+  OPUS_MOVE(st->pitch_buf, &st->pitch_buf[FARGAN_SUBFRAME_SIZE], PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE);
+  OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], pcm, FARGAN_SUBFRAME_SIZE);
+  fargan_deemphasis(pcm, &st->deemph_mem);
+}
+
+void fargan_cont(FARGANState *st, const float *pcm0, const float *features0)
+{
+  int i;
+  float cond[COND_NET_FDENSE2_OUT_SIZE];
+  float x0[FARGAN_CONT_SAMPLES];
+  float dummy[FARGAN_SUBFRAME_SIZE];
+  int period=0;
+
+  /* Pre-load features. */
+  for (i=0;i<5;i++) {
+    const float *features = &features0[i*NB_FEATURES];
+    st->last_period = period;
+    period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
+    compute_fargan_cond(st, cond, features, period);
+  }
+
+  x0[0] = 0;
+  for (i=1;i<FARGAN_CONT_SAMPLES;i++) {
+    x0[i] = pcm0[i] - FARGAN_DEEMPHASIS*pcm0[i-1];
+  }
+
+  OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_FRAME_SIZE], x0, FARGAN_FRAME_SIZE);
+  st->cont_initialized = 1;
+
+  for (i=0;i<FARGAN_NB_SUBFRAMES;i++) {
+    run_fargan_subframe(st, dummy, &cond[i*FARGAN_COND_SIZE], st->last_period);
+    OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], &x0[FARGAN_FRAME_SIZE+i*FARGAN_SUBFRAME_SIZE], FARGAN_SUBFRAME_SIZE);
+  }
+  st->deemph_mem = pcm0[FARGAN_CONT_SAMPLES-1];
+}
+
+
+void fargan_init(FARGANState *st)
+{
+  int ret;
+  OPUS_CLEAR(st, 1);
+  st->arch = opus_select_arch();
+#ifndef USE_WEIGHTS_FILE
+  ret = init_fargan(&st->model, fargan_arrays);
+#else
+  ret = 0;
+#endif
+  celt_assert(ret == 0);
+}
+
+int fargan_load_model(FARGANState *st, const unsigned char *data, int len) {
+  WeightArray *list;
+  int ret;
+  parse_weights(&list, data, len);
+  ret = init_fargan(&st->model, list);
+  opus_free(list);
+  if (ret == 0) return 0;
+  else return -1;
+}
+
+static void fargan_synthesize_impl(FARGANState *st, float *pcm, const float *features)
+{
+  int subframe;
+  float cond[COND_NET_FDENSE2_OUT_SIZE];
+  int period;
+  celt_assert(st->cont_initialized);
+
+  period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
+  compute_fargan_cond(st, cond, features, period);
+  for (subframe=0;subframe<FARGAN_NB_SUBFRAMES;subframe++) {
+    float *sub_cond;
+    sub_cond = &cond[subframe*FARGAN_COND_SIZE];
+    run_fargan_subframe(st, &pcm[subframe*FARGAN_SUBFRAME_SIZE], sub_cond, st->last_period);
+  }
+  st->last_period = period;
+}
+
+void fargan_synthesize(FARGANState *st, float *pcm, const float *features)
+{
+  fargan_synthesize_impl(st, pcm, features);
+}
+
+void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features)
+{
+  int i;
+  float fpcm[FARGAN_FRAME_SIZE];
+  fargan_synthesize(st, fpcm, features);
+  for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
+}
--- a/dnn/fargan.h
+++ b/dnn/fargan.h
@ -0,0 +1,68 @@
+/* Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FARGAN_H
+#define FARGAN_H
+
+#include "freq.h"
+#include "fargan_data.h"
+#include "pitchdnn.h"
+
+#define FARGAN_CONT_SAMPLES 320
+#define FARGAN_NB_SUBFRAMES 4
+#define FARGAN_SUBFRAME_SIZE 40
+#define FARGAN_FRAME_SIZE (FARGAN_NB_SUBFRAMES*FARGAN_SUBFRAME_SIZE)
+#define FARGAN_COND_SIZE (COND_NET_FDENSE2_OUT_SIZE/FARGAN_NB_SUBFRAMES)
+#define FARGAN_DEEMPHASIS 0.85f
+
+#define SIG_NET_INPUT_SIZE (FARGAN_COND_SIZE+2*FARGAN_SUBFRAME_SIZE+4)
+#define SIG_NET_FWC0_STATE_SIZE (2*SIG_NET_INPUT_SIZE)
+
+#define FARGAN_MAX_RNN_NEURONS SIG_NET_GRU1_OUT_SIZE
+typedef struct {
+  FARGAN model;
+  int arch;
+  int cont_initialized;
+  float deemph_mem;
+  float pitch_buf[PITCH_MAX_PERIOD];
+  float cond_conv1_state[COND_NET_FCONV1_STATE_SIZE];
+  float fwc0_mem[SIG_NET_FWC0_STATE_SIZE];
+  float gru1_state[SIG_NET_GRU1_STATE_SIZE];
+  float gru2_state[SIG_NET_GRU2_STATE_SIZE];
+  float gru3_state[SIG_NET_GRU3_STATE_SIZE];
+  int last_period;
+} FARGANState;
+
+void fargan_init(FARGANState *st);
+int fargan_load_model(FARGANState *st, const unsigned char *data, int len);
+
+void fargan_cont(FARGANState *st, const float *pcm0, const float *features0);
+
+void fargan_synthesize(FARGANState *st, float *pcm, const float *features);
+void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features);
+
+
+#endif /* FARGAN_H */
--- a/dnn/freq.c
+++ b/dnn/freq.c
@ -0,0 +1,328 @@
+/* Copyright (c) 2017-2018 Mozilla */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "kiss_fft.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include "burg.h"
+#include <assert.h>
+#include "os_support.h"
+
+#define SQUARE(x) ((x)*(x))
+
+static const opus_int16 eband5ms[] = {
+/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k*/
+  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40
+};
+
+static const float compensation[] = {
+    0.8f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.666667f, 0.5f, 0.5f, 0.5f, 0.333333f, 0.25f, 0.25f, 0.2f, 0.166667f, 0.173913f
+};
+
+
+extern const kiss_fft_state kfft;
+extern const float half_window[OVERLAP_SIZE];
+extern const float dct_table[NB_BANDS*NB_BANDS];
+
+
+static void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
+  int i;
+  float sum[NB_BANDS] = {0};
+  for (i=0;i<NB_BANDS-1;i++)
+  {
+    int j;
+    int band_size;
+    band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
+    for (j=0;j<band_size;j++) {
+      float tmp;
+      float frac = (float)j/band_size;
+      tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
+      tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
+      tmp = 1.f/(tmp + 1e-9);
+      sum[i] += (1-frac)*tmp;
+      sum[i+1] += frac*tmp;
+    }
+  }
+  sum[0] *= 2;
+  sum[NB_BANDS-1] *= 2;
+  for (i=0;i<NB_BANDS;i++)
+  {
+    bandE[i] = sum[i];
+  }
+}
+
+static float lpcn_lpc(
+      opus_val16 *lpc, /* out: [0...p-1] LPC coefficients      */
+      opus_val16 *rc,
+const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
+int          p
+)
+{
+   int i, j;
+   opus_val32 r;
+   opus_val32 error = ac[0];
+
+   OPUS_CLEAR(lpc, p);
+   OPUS_CLEAR(rc, p);
+   if (ac[0] != 0)
+   {
+      for (i = 0; i < p; i++) {
+         /* Sum up this iteration's reflection coefficient */
+         opus_val32 rr = 0;
+         for (j = 0; j < i; j++)
+            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+         rr += SHR32(ac[i + 1],3);
+         r = -SHL32(rr,3)/error;
+         rc[i] = r;
+         /*  Update LPC coefficients and total error */
+         lpc[i] = SHR32(r,3);
+         for (j = 0; j < (i+1)>>1; j++)
+         {
+            opus_val32 tmp1, tmp2;
+            tmp1 = lpc[j];
+            tmp2 = lpc[i-1-j];
+            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
+            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
+         }
+
+         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
+         /* Bail out once we get 30 dB gain */
+         if (error<.001f*ac[0])
+            break;
+      }
+   }
+   return error;
+}
+
+
+
+void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
+  int i;
+  float sum[NB_BANDS] = {0};
+  for (i=0;i<NB_BANDS-1;i++)
+  {
+    int j;
+    int band_size;
+    band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
+    for (j=0;j<band_size;j++) {
+      float tmp;
+      float frac = (float)j/band_size;
+      tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
+      tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
+      sum[i] += (1-frac)*tmp;
+      sum[i+1] += frac*tmp;
+    }
+  }
+  sum[0] *= 2;
+  sum[NB_BANDS-1] *= 2;
+  for (i=0;i<NB_BANDS;i++)
+  {
+    bandE[i] = sum[i];
+  }
+}
+
+static void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order) {
+  int i;
+  float burg_in[FRAME_SIZE];
+  float burg_lpc[LPC_ORDER];
+  float x[WINDOW_SIZE];
+  float Eburg[NB_BANDS];
+  float g;
+  kiss_fft_cpx LPC[FREQ_SIZE];
+  float Ly[NB_BANDS];
+  float logMax = -2;
+  float follow = -2;
+  assert(order <= LPC_ORDER);
+  assert(len <= FRAME_SIZE);
+  for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
+  g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
+  g /= len - 2*(order-1);
+  OPUS_CLEAR(x, WINDOW_SIZE);
+  x[0] = 1;
+  for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
+  forward_transform(LPC, x);
+  compute_band_energy_inverse(Eburg, LPC);
+  for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
+  for (i=0;i<NB_BANDS;i++) {
+    Ly[i] = log10(1e-2+Eburg[i]);
+    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
+    logMax = MAX16(logMax, Ly[i]);
+    follow = MAX16(follow-2.5, Ly[i]);
+  }
+  dct(burg_cepstrum, Ly);
+  burg_cepstrum[0] += - 4;
+}
+
+void burg_cepstral_analysis(float *ceps, const float *x) {
+  int i;
+  compute_burg_cepstrum(x,                &ceps[0       ], FRAME_SIZE/2, LPC_ORDER);
+  compute_burg_cepstrum(&x[FRAME_SIZE/2], &ceps[NB_BANDS], FRAME_SIZE/2, LPC_ORDER);
+  for (i=0;i<NB_BANDS;i++) {
+    float c0, c1;
+    c0 = ceps[i];
+    c1 = ceps[NB_BANDS+i];
+    ceps[i         ] = .5*(c0+c1);
+    ceps[NB_BANDS+i] = (c0-c1);
+  }
+}
+
+
+static void interp_band_gain(float *g, const float *bandE) {
+  int i;
+  memset(g, 0, FREQ_SIZE);
+  for (i=0;i<NB_BANDS-1;i++)
+  {
+    int j;
+    int band_size;
+    band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
+    for (j=0;j<band_size;j++) {
+      float frac = (float)j/band_size;
+      g[(eband5ms[i]*WINDOW_SIZE_5MS) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
+    }
+  }
+}
+
+
+void dct(float *out, const float *in) {
+  int i;
+  for (i=0;i<NB_BANDS;i++) {
+    int j;
+    float sum = 0;
+    for (j=0;j<NB_BANDS;j++) {
+      sum += in[j] * dct_table[j*NB_BANDS + i];
+    }
+    out[i] = sum*sqrt(2./NB_BANDS);
+  }
+}
+
+static void idct(float *out, const float *in) {
+  int i;
+  for (i=0;i<NB_BANDS;i++) {
+    int j;
+    float sum = 0;
+    for (j=0;j<NB_BANDS;j++) {
+      sum += in[j] * dct_table[i*NB_BANDS + j];
+    }
+    out[i] = sum*sqrt(2./NB_BANDS);
+  }
+}
+
+void forward_transform(kiss_fft_cpx *out, const float *in) {
+  int i;
+  kiss_fft_cpx x[WINDOW_SIZE];
+  kiss_fft_cpx y[WINDOW_SIZE];
+  for (i=0;i<WINDOW_SIZE;i++) {
+    x[i].r = in[i];
+    x[i].i = 0;
+  }
+  opus_fft(&kfft, x, y, 0);
+  for (i=0;i<FREQ_SIZE;i++) {
+    out[i] = y[i];
+  }
+}
+
+static void inverse_transform(float *out, const kiss_fft_cpx *in) {
+  int i;
+  kiss_fft_cpx x[WINDOW_SIZE];
+  kiss_fft_cpx y[WINDOW_SIZE];
+  for (i=0;i<FREQ_SIZE;i++) {
+    x[i] = in[i];
+  }
+  for (;i<WINDOW_SIZE;i++) {
+    x[i].r = x[WINDOW_SIZE - i].r;
+    x[i].i = -x[WINDOW_SIZE - i].i;
+  }
+  opus_fft(&kfft, x, y, 0);
+  /* output in reverse order for IFFT. */
+  out[0] = WINDOW_SIZE*y[0].r;
+  for (i=1;i<WINDOW_SIZE;i++) {
+    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
+  }
+}
+
+static float lpc_from_bands(float *lpc, const float *Ex)
+{
+   int i;
+   float e;
+   float ac[LPC_ORDER+1];
+   float rc[LPC_ORDER];
+   float Xr[FREQ_SIZE];
+   kiss_fft_cpx X_auto[FREQ_SIZE];
+   float x_auto[WINDOW_SIZE];
+   interp_band_gain(Xr, Ex);
+   Xr[FREQ_SIZE-1] = 0;
+   OPUS_CLEAR(X_auto, FREQ_SIZE);
+   for (i=0;i<FREQ_SIZE;i++) X_auto[i].r = Xr[i];
+   inverse_transform(x_auto, X_auto);
+   for (i=0;i<LPC_ORDER+1;i++) ac[i] = x_auto[i];
+
+   /* -40 dB noise floor. */
+   ac[0] += ac[0]*1e-4 + 320/12/38.;
+   /* Lag windowing. */
+   for (i=1;i<LPC_ORDER+1;i++) ac[i] *= (1 - 6e-5*i*i);
+   e = lpcn_lpc(lpc, rc, ac, LPC_ORDER);
+   return e;
+}
+
+void lpc_weighting(float *lpc, float gamma)
+{
+  int i;
+  float gamma_i = gamma;
+  for (i = 0; i < LPC_ORDER; i++)
+  {
+    lpc[i] *= gamma_i;
+    gamma_i *= gamma;
+  }
+}
+
+float lpc_from_cepstrum(float *lpc, const float *cepstrum)
+{
+   int i;
+   float Ex[NB_BANDS];
+   float tmp[NB_BANDS];
+   OPUS_COPY(tmp, cepstrum, NB_BANDS);
+   tmp[0] += 4;
+   idct(Ex, tmp);
+   for (i=0;i<NB_BANDS;i++) Ex[i] = pow(10.f, Ex[i])*compensation[i];
+   return lpc_from_bands(lpc, Ex);
+}
+
+void apply_window(float *x) {
+  int i;
+  for (i=0;i<OVERLAP_SIZE;i++) {
+    x[i] *= half_window[i];
+    x[WINDOW_SIZE - 1 - i] *= half_window[i];
+  }
+}
--- a/dnn/freq.h
+++ b/dnn/freq.h
@ -0,0 +1,61 @@
+/* Copyright (c) 2017-2018 Mozilla */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FREQ_H
+#define FREQ_H
+
+#include "kiss_fft.h"
+
+#define LPC_ORDER 16
+
+#define PREEMPHASIS (0.85f)
+
+#define FRAME_SIZE_5MS (2)
+#define OVERLAP_SIZE_5MS (2)
+#define TRAINING_OFFSET_5MS (1)
+
+#define WINDOW_SIZE_5MS (FRAME_SIZE_5MS + OVERLAP_SIZE_5MS)
+
+#define FRAME_SIZE (80*FRAME_SIZE_5MS)
+#define OVERLAP_SIZE (80*OVERLAP_SIZE_5MS)
+#define TRAINING_OFFSET (80*TRAINING_OFFSET_5MS)
+#define WINDOW_SIZE (FRAME_SIZE + OVERLAP_SIZE)
+#define FREQ_SIZE (WINDOW_SIZE/2 + 1)
+
+#define NB_BANDS 18
+#define NB_BANDS_1 (NB_BANDS - 1)
+
+void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X);
+void burg_cepstral_analysis(float *ceps, const float *x);
+
+void apply_window(float *x);
+void dct(float *out, const float *in);
+void forward_transform(kiss_fft_cpx *out, const float *in);
+float lpc_from_cepstrum(float *lpc, const float *cepstrum);
+void apply_window(float *x);
+void lpc_weighting(float *lpc, float gamma);
+
+#endif
--- a/dnn/fwgan.c
+++ b/dnn/fwgan.c
@ -0,0 +1,322 @@
+/* Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "fwgan.h"
+#include "os_support.h"
+#include "freq.h"
+#include "fwgan_data.h"
+#include "lpcnet.h"
+#include "pitch.h"
+#include "nnet.h"
+#include "lpcnet_private.h"
+
+#define FEAT_IN_SIZE (BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4 + FWGAN_FRAME_SIZE/2)
+
+#define FWGAN_FEATURES (NB_FEATURES-1)
+
+static void pitch_embeddings(float *pembed, float *phase, double w0) {
+  int i;
+  float wreal, wimag;
+#if 1
+  /* This Taylor expansion should be good enough since w0 is always small. */
+  float w2 = w0*w0;
+  wreal = 1 - .5*w2*(1.f - 0.083333333f*w2);
+  wimag = w0*(1 - 0.166666667f*w2*(1.f - 0.05f*w2));
+#else
+  wreal = cos(w0);
+  wimag = sin(w0);
+#endif
+  /* Speed-up phase reference by making phase a unit-norm complex value and rotating it
+     by exp(-i*w0) each sample.  */
+  for (i=0;i<SUBFRAME_SIZE;i++) {
+    float tmp;
+    tmp = phase[0]*wreal - phase[1]*wimag;
+    phase[1] = phase[0]*wimag + phase[1]*wreal;
+    phase[0] = tmp;
+    pembed[i] = phase[1];
+    pembed[SUBFRAME_SIZE+i] = phase[0];
+  }
+  /* Renormalize once per sub-frame, though we could probably do it even less frequently. */
+  {
+    float r = 1.f/sqrt(phase[0]*phase[0] + phase[1]*phase[1]);
+    phase[0] *= r;
+    phase[1] *= r;
+  }
+}
+
+static void compute_wlpc(float lpc[LPC_ORDER], const float *features) {
+  float lpc_weight;
+  int i;
+  lpc_from_cepstrum(lpc, features);
+  lpc_weight = 1.f;
+  for (i=0;i<LPC_ORDER;i++) {
+    lpc_weight *= FWGAN_GAMMA;
+    lpc[i] *= lpc_weight;
+  }
+}
+
+static void run_fwgan_upsampler(FWGANState *st, float *cond, const float *features)
+{
+  FWGAN *model;
+  model = &st->model;
+  celt_assert(FWGAN_FEATURES == model->bfcc_with_corr_upsampler_fc.nb_inputs);
+  celt_assert(BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE == model->bfcc_with_corr_upsampler_fc.nb_outputs);
+  compute_generic_dense(&model->bfcc_with_corr_upsampler_fc, cond, features, ACTIVATION_TANH);
+}
+
+static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features);
+void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0)
+{
+  int i;
+  float norm2, norm_1;
+  float wpcm0[CONT_PCM_INPUTS];
+  float cont_inputs[CONT_PCM_INPUTS+1];
+  float tmp1[MAX_CONT_SIZE];
+  float tmp2[MAX_CONT_SIZE];
+  float lpc[LPC_ORDER];
+  float new_pcm[FWGAN_FRAME_SIZE];
+  FWGAN *model;
+  st->embed_phase[0] = 1;
+  model = &st->model;
+  compute_wlpc(lpc, features0);
+  /* Deemphasis memory is just the last continuation sample. */
+  st->deemph_mem = pcm0[CONT_PCM_INPUTS-1];
+
+  /* Apply analysis filter, considering that the preemphasis and deemphasis filter
+     cancel each other in this case since the LPC filter is constant across that boundary.
+     */
+  for (i=LPC_ORDER;i<CONT_PCM_INPUTS;i++) {
+    int j;
+    wpcm0[i] = pcm0[i];
+    for (j=0;j<LPC_ORDER;j++) wpcm0[i] += lpc[j]*pcm0[i-j-1];
+  }
+  /* FIXME: Make this less stupid. */
+  for (i=0;i<LPC_ORDER;i++) wpcm0[i] = wpcm0[LPC_ORDER];
+
+  /* The memory of the pre-empahsis is the last sample of the weighted signal
+     (ignoring preemphasis+deemphasis combination). */
+  st->preemph_mem = wpcm0[CONT_PCM_INPUTS-1];
+  /* The memory of the synthesis filter is the pre-emphasized continuation. */
+  for (i=0;i<LPC_ORDER;i++) st->syn_mem[i] = pcm0[CONT_PCM_INPUTS-1-i] - FWGAN_DEEMPHASIS*pcm0[CONT_PCM_INPUTS-2-i];
+
+  norm2 = celt_inner_prod(wpcm0, wpcm0, CONT_PCM_INPUTS, st->arch);
+  norm_1 = 1.f/sqrt(1e-8f + norm2);
+  for (i=0;i<CONT_PCM_INPUTS;i++) cont_inputs[i+1] = norm_1*wpcm0[i];
+  cont_inputs[0] = log(sqrt(norm2) + 1e-7f);
+
+  /* Continuation network */
+  compute_generic_dense(&model->cont_net_0, tmp1, cont_inputs, ACTIVATION_TANH);
+  compute_generic_dense(&model->cont_net_2, tmp2, tmp1, ACTIVATION_TANH);
+  compute_generic_dense(&model->cont_net_4, tmp1, tmp2, ACTIVATION_TANH);
+  compute_generic_dense(&model->cont_net_6, tmp2, tmp1, ACTIVATION_TANH);
+  compute_generic_dense(&model->cont_net_8, tmp1, tmp2, ACTIVATION_TANH);
+  celt_assert(CONT_NET_10_OUT_SIZE == model->cont_net_10.nb_outputs);
+  compute_generic_dense(&model->cont_net_10, st->cont, tmp1, ACTIVATION_TANH);
+
+  /* Computing continuation for each layer. */
+  celt_assert(RNN_GRU_STATE_SIZE == model->rnn_cont_fc_0.nb_outputs);
+  compute_generic_dense(&model->rnn_cont_fc_0, st->rnn_state, st->cont, ACTIVATION_TANH);
+
+  celt_assert(FWC1_STATE_SIZE == model->fwc1_cont_fc_0.nb_outputs);
+  compute_generic_dense(&model->fwc1_cont_fc_0, st->fwc1_state, st->cont, ACTIVATION_TANH);
+  celt_assert(FWC2_STATE_SIZE == model->fwc2_cont_fc_0.nb_outputs);
+  compute_generic_dense(&model->fwc2_cont_fc_0, st->fwc2_state, st->cont, ACTIVATION_TANH);
+  celt_assert(FWC3_STATE_SIZE == model->fwc3_cont_fc_0.nb_outputs);
+  compute_generic_dense(&model->fwc3_cont_fc_0, st->fwc3_state, st->cont, ACTIVATION_TANH);
+  celt_assert(FWC4_STATE_SIZE == model->fwc4_cont_fc_0.nb_outputs);
+  compute_generic_dense(&model->fwc4_cont_fc_0, st->fwc4_state, st->cont, ACTIVATION_TANH);
+  celt_assert(FWC5_STATE_SIZE == model->fwc5_cont_fc_0.nb_outputs);
+  compute_generic_dense(&model->fwc5_cont_fc_0, st->fwc5_state, st->cont, ACTIVATION_TANH);
+  celt_assert(FWC6_STATE_SIZE == model->fwc6_cont_fc_0.nb_outputs);
+  compute_generic_dense(&model->fwc6_cont_fc_0, st->fwc6_state, st->cont, ACTIVATION_TANH);
+  celt_assert(FWC7_STATE_SIZE == model->fwc7_cont_fc_0.nb_outputs);
+  compute_generic_dense(&model->fwc7_cont_fc_0, st->fwc7_state, st->cont, ACTIVATION_TANH);
+
+  st->cont_initialized = 1;
+  /* Process the first frame, discard the first subframe, and keep the rest for the first
+     synthesis call. */
+  fwgan_synthesize_impl(st, new_pcm, lpc, features0);
+  OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
+}
+
+static void apply_gain(float *pcm, float c0, float *last_gain) {
+  int i;
+  float gain = pow(10.f, (0.5f*c0/sqrt(18.f)));
+  for (i=0;i<SUBFRAME_SIZE;i++) pcm[i] *= *last_gain;
+  *last_gain = gain;
+}
+
+static void fwgan_lpc_syn(float *pcm, float *mem, const float *lpc, float last_lpc[LPC_ORDER]) {
+  int i;
+  for (i=0;i<SUBFRAME_SIZE;i++) {
+    int j;
+    for (j=0;j<LPC_ORDER;j++) pcm[i] -= mem[j]*last_lpc[j];
+    OPUS_MOVE(&mem[1], &mem[0], LPC_ORDER-1);
+    mem[0] = pcm[i];
+  }
+  OPUS_COPY(last_lpc, lpc, LPC_ORDER);
+}
+
+static void fwgan_preemphasis(float *pcm, float *preemph_mem) {
+  int i;
+  for (i=0;i<SUBFRAME_SIZE;i++) {
+    float tmp = pcm[i];
+    pcm[i] -= FWGAN_DEEMPHASIS * *preemph_mem;
+    *preemph_mem = tmp;
+  }
+}
+
+static void fwgan_deemphasis(float *pcm, float *deemph_mem) {
+  int i;
+  for (i=0;i<SUBFRAME_SIZE;i++) {
+    pcm[i] += FWGAN_DEEMPHASIS * *deemph_mem;
+    *deemph_mem = pcm[i];
+  }
+}
+
+static void run_fwgan_subframe(FWGANState *st, float *pcm, const float *cond, double w0, const float *lpc, float c0)
+{
+  float tmp1[FWC1_FC_0_OUT_SIZE];
+  float tmp2[IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE)];
+  float feat_in[FEAT_IN_SIZE];
+  float rnn_in[FEAT_IN_CONV1_CONV_OUT_SIZE];
+  float pembed[FWGAN_FRAME_SIZE/2];
+  FWGAN *model;
+  model = &st->model;
+
+  pitch_embeddings(pembed, st->embed_phase, w0);
+  /* Interleave bfcc_cond and pembed for each subframe in feat_in. */
+  OPUS_COPY(&feat_in[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4], &cond[0], BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4);
+  OPUS_COPY(&feat_in[0], &pembed[0], FWGAN_FRAME_SIZE/2);
+
+  compute_generic_conv1d(&model->feat_in_conv1_conv, rnn_in, st->cont_conv1_mem, feat_in, FEAT_IN_CONV1_CONV_IN_SIZE, ACTIVATION_LINEAR);
+  celt_assert(FEAT_IN_NL1_GATE_OUT_SIZE == model->feat_in_nl1_gate.nb_outputs);
+  compute_gated_activation(&model->feat_in_nl1_gate, rnn_in, rnn_in, ACTIVATION_TANH);
+
+  if (st->cont_initialized == 1) {
+    /* On the very first subframe we stop here. We only want to run the feat_in layer since the
+       others are initialized via the continuation network. */
+    OPUS_CLEAR(pcm, SUBFRAME_SIZE);
+    st->cont_initialized = 2;
+    apply_gain(pcm, c0, &st->last_gain);
+    OPUS_COPY(st->last_lpc, lpc, LPC_ORDER);
+    return;
+  }
+
+  compute_generic_gru(&model->rnn_gru_input, &model->rnn_gru_recurrent, st->rnn_state, rnn_in);
+  celt_assert(IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE) >= model->rnn_nl_gate.nb_outputs);
+  compute_gated_activation(&model->rnn_nl_gate, tmp2, st->rnn_state, ACTIVATION_TANH);
+
+  compute_generic_conv1d(&model->fwc1_fc_0, tmp1, st->fwc1_state, tmp2, RNN_GRU_STATE_SIZE, ACTIVATION_LINEAR);
+  compute_gated_activation(&model->fwc1_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+  compute_generic_conv1d(&model->fwc2_fc_0, tmp2, st->fwc2_state, tmp1, FWC1_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+  compute_gated_activation(&model->fwc2_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+
+  compute_generic_conv1d(&model->fwc3_fc_0, tmp1, st->fwc3_state, tmp2, FWC2_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+  compute_gated_activation(&model->fwc3_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+  compute_generic_conv1d(&model->fwc4_fc_0, tmp2, st->fwc4_state, tmp1, FWC3_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+  compute_gated_activation(&model->fwc4_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+
+  compute_generic_conv1d(&model->fwc5_fc_0, tmp1, st->fwc5_state, tmp2, FWC4_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+  compute_gated_activation(&model->fwc5_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+  compute_generic_conv1d(&model->fwc6_fc_0, tmp2, st->fwc6_state, tmp1, FWC5_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+  compute_gated_activation(&model->fwc6_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+
+  compute_generic_conv1d(&model->fwc7_fc_0, tmp1, st->fwc7_state, tmp2, FWC6_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+  compute_gated_activation(&model->fwc7_fc_1_gate, pcm, tmp1, ACTIVATION_TANH);
+
+  apply_gain(pcm, c0, &st->last_gain);
+  fwgan_preemphasis(pcm, &st->preemph_mem);
+  fwgan_lpc_syn(pcm, st->syn_mem, lpc, st->last_lpc);
+  fwgan_deemphasis(pcm, &st->deemph_mem);
+}
+
+void fwgan_init(FWGANState *st)
+{
+  int ret;
+  OPUS_CLEAR(st, 1);
+  ret = init_fwgan(&st->model, fwgan_arrays);
+  celt_assert(ret == 0);
+  /* FIXME: perform arch detection. */
+}
+
+int fwgan_load_model(FWGANState *st, const unsigned char *data, int len) {
+  WeightArray *list;
+  int ret;
+  parse_weights(&list, data, len);
+  ret = init_fwgan(&st->model, list);
+  opus_free(list);
+  if (ret == 0) return 0;
+  else return -1;
+}
+
+static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features)
+{
+  int subframe;
+  float cond[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE];
+  double w0;
+  int period;
+  float fwgan_features[NB_FEATURES-1];
+  celt_assert(st->cont_initialized);
+  OPUS_COPY(fwgan_features, features, NB_FEATURES-2);
+  fwgan_features[NB_FEATURES-2] = features[NB_FEATURES-1]+.5;
+
+  period = (int)floor(.1 + 50*features[NB_BANDS]+100);
+  w0 = 2*M_PI/period;
+  run_fwgan_upsampler(st, cond, fwgan_features);
+  for (subframe=0;subframe<NB_SUBFRAMES;subframe++) {
+    float *sub_cond;
+    sub_cond = &cond[subframe*BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4];
+    run_fwgan_subframe(st, &pcm[subframe*SUBFRAME_SIZE], sub_cond, w0, lpc, features[0]);
+  }
+}
+
+void fwgan_synthesize(FWGANState *st, float *pcm, const float *features)
+{
+  float lpc[LPC_ORDER];
+  float new_pcm[FWGAN_FRAME_SIZE];
+  compute_wlpc(lpc, features);
+  fwgan_synthesize_impl(st, new_pcm, lpc, features);
+  /* Handle buffering. */
+  OPUS_COPY(pcm, st->pcm_buf, FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
+  OPUS_COPY(&pcm[FWGAN_FRAME_SIZE-SUBFRAME_SIZE], new_pcm, SUBFRAME_SIZE);
+  OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
+}
+
+void fwgan_synthesize_int(FWGANState *st, opus_int16 *pcm, const float *features)
+{
+  int i;
+  float fpcm[FWGAN_FRAME_SIZE];
+  fwgan_synthesize(st, fpcm, features);
+  for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
+}
--- a/dnn/fwgan.h
+++ b/dnn/fwgan.h
@ -0,0 +1,83 @@
+/* Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FWGAN_H
+#define FWGAN_H
+
+#include "freq.h"
+#include "fwgan_data.h"
+
+#define FWGAN_CONT_SAMPLES 320
+#define NB_SUBFRAMES 4
+#define SUBFRAME_SIZE 40
+#define FWGAN_FRAME_SIZE (NB_SUBFRAMES*SUBFRAME_SIZE)
+#define CONT_PCM_INPUTS 320
+#define MAX_CONT_SIZE CONT_NET_0_OUT_SIZE
+#define FWGAN_GAMMA 0.92f
+#define FWGAN_DEEMPHASIS 0.85f
+
+/* FIXME: Derive those from the model rather than hardcoding. */
+#define FWC1_STATE_SIZE 512
+#define FWC2_STATE_SIZE 512
+#define FWC3_STATE_SIZE 256
+#define FWC4_STATE_SIZE 256
+#define FWC5_STATE_SIZE 128
+#define FWC6_STATE_SIZE 128
+#define FWC7_STATE_SIZE 80
+
+typedef struct {
+  FWGAN model;
+  int arch;
+  int cont_initialized;
+  float embed_phase[2];
+  float last_gain;
+  float last_lpc[LPC_ORDER];
+  float syn_mem[LPC_ORDER];
+  float preemph_mem;
+  float deemph_mem;
+  float pcm_buf[FWGAN_FRAME_SIZE];
+  float cont[CONT_NET_10_OUT_SIZE];
+  float cont_conv1_mem[FEAT_IN_CONV1_CONV_STATE_SIZE];
+  float rnn_state[RNN_GRU_STATE_SIZE];
+  float fwc1_state[FWC1_STATE_SIZE];
+  float fwc2_state[FWC2_STATE_SIZE];
+  float fwc3_state[FWC3_STATE_SIZE];
+  float fwc4_state[FWC4_STATE_SIZE];
+  float fwc5_state[FWC5_STATE_SIZE];
+  float fwc6_state[FWC6_STATE_SIZE];
+  float fwc7_state[FWC7_STATE_SIZE];
+} FWGANState;
+
+void fwgan_init(FWGANState *st);
+int fwgan_load_model(FWGANState *st, const unsigned char *data, int len);
+
+void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0);
+
+void fwgan_synthesize(FWGANState *st, float *pcm, const float *features);
+void fwgan_synthesize_int(FWGANState *st, opus_int16 *pcm, const float *features);
+
+
+#endif /* FWGAN_H */
--- a/dnn/kiss99.c
+++ b/dnn/kiss99.c
@ -0,0 +1,81 @@
+/*Daala video codec
+Copyright (c) 2012 Daala project contributors.  All rights reserved.
+Author: Timothy B. Terriberry
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "kiss99.h"
+
+void kiss99_srand(kiss99_ctx *_this,const unsigned char *_data,int _ndata){
+  int i;
+  _this->z=362436069;
+  _this->w=521288629;
+  _this->jsr=123456789;
+  _this->jcong=380116160;
+  for(i=3;i<_ndata;i+=4){
+    _this->z^=_data[i-3];
+    _this->w^=_data[i-2];
+    _this->jsr^=_data[i-1];
+    _this->jcong^=_data[i];
+    kiss99_rand(_this);
+  }
+  if(i-3<_ndata)_this->z^=_data[i-3];
+  if(i-2<_ndata)_this->w^=_data[i-2];
+  if(i-1<_ndata)_this->jsr^=_data[i-1];
+  /*Fix any potential short cycles that show up.
+    These are not too likely, given the way we initialize the state, but they
+     are technically possible, so let us go ahead and eliminate that
+     possibility.
+    See Gregory G. Rose: "KISS: A Bit Too Simple", Cryptographic Communications
+     No. 10, pp. 123---137, 2018.*/
+  if(_this->z==0||_this->z==0x9068FFFF)_this->z++;
+  if(_this->w==0||_this->w==0x464FFFFF)_this->w++;
+  if(_this->jsr==0)_this->jsr++;
+}
+
+uint32_t kiss99_rand(kiss99_ctx *_this){
+  uint32_t znew;
+  uint32_t wnew;
+  uint32_t mwc;
+  uint32_t shr3;
+  uint32_t cong;
+  znew=36969*(_this->z&0xFFFF)+(_this->z>>16);
+  wnew=18000*(_this->w&0xFFFF)+(_this->w>>16);
+  mwc=(znew<<16)+wnew;
+  /*We swap the 13 and 17 from the original 1999 algorithm to produce a single
+     cycle of maximal length, matching KISS11.
+    We are not actually using KISS11 because of the impractically large (16 MB)
+     internal state of the full algorithm.*/
+  shr3=_this->jsr^(_this->jsr<<13);
+  shr3^=shr3>>17;
+  shr3^=shr3<<5;
+  cong=69069*_this->jcong+1234567;
+  _this->z=znew;
+  _this->w=wnew;
+  _this->jsr=shr3;
+  _this->jcong=cong;
+  return (mwc^cong)+shr3;
+}
--- a/dnn/kiss99.h
+++ b/dnn/kiss99.h
@ -0,0 +1,46 @@
+/*Daala video codec
+Copyright (c) 2012 Daala project contributors.  All rights reserved.
+Author: Timothy B. Terriberry
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_kiss99_H)
+# define _kiss99_H (1)
+# include <stdint.h>
+
+/*KISS PRNG from George Marsaglia (1999 version).
+  See https://en.wikipedia.org/wiki/KISS_(algorithm) for details.
+  This is suitable for simulations, but not for use in crytographic contexts.*/
+
+typedef struct kiss99_ctx kiss99_ctx;
+
+struct kiss99_ctx{
+  uint32_t z;
+  uint32_t w;
+  uint32_t jsr;
+  uint32_t jcong;
+};
+
+void kiss99_srand(kiss99_ctx *_this,const unsigned char *_data,int _ndata);
+uint32_t kiss99_rand(kiss99_ctx *_this);
+
+#endif
--- a/dnn/lossgen.c
+++ b/dnn/lossgen.c
@ -0,0 +1,134 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "arch.h"
+
+#include <math.h>
+#include "lossgen.h"
+#include "os_support.h"
+#include "nnet.h"
+#include "assert.h"
+
+/* Disable RTCD for this. */
+#define RTCD_ARCH c
+
+/* Override assert to avoid undefined/redefined symbols. */
+#undef celt_assert
+#define celt_assert assert
+
+/* Directly include the C files we need since the symbols won't be exposed if we link in a shared object. */
+#include "parse_lpcnet_weights.c"
+#include "nnet_arch.h"
+
+#undef compute_linear
+#undef compute_activation
+
+/* Force the C version since the SIMD versions may be hidden. */
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
+
+#define MAX_RNN_NEURONS_ALL IMAX(LOSSGEN_GRU1_STATE_SIZE, LOSSGEN_GRU2_STATE_SIZE)
+
+/* These two functions are copied from nnet.c to make sure we don't have linking issues. */
+void compute_generic_gru_lossgen(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
+{
+  int i;
+  int N;
+  float zrh[3*MAX_RNN_NEURONS_ALL];
+  float recur[3*MAX_RNN_NEURONS_ALL];
+  float *z;
+  float *r;
+  float *h;
+  celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
+  celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
+  N = recurrent_weights->nb_inputs;
+  z = zrh;
+  r = &zrh[N];
+  h = &zrh[2*N];
+  celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
+  celt_assert(in != state);
+  compute_linear(input_weights, zrh, in, arch);
+  compute_linear(recurrent_weights, recur, state, arch);
+  for (i=0;i<2*N;i++)
+     zrh[i] += recur[i];
+  compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
+  for (i=0;i<N;i++)
+     h[i] += recur[2*N+i]*r[i];
+  compute_activation(h, h, N, ACTIVATION_TANH, arch);
+  for (i=0;i<N;i++)
+     h[i] = z[i]*state[i] + (1-z[i])*h[i];
+  for (i=0;i<N;i++)
+     state[i] = h[i];
+}
+
+
+void compute_generic_dense_lossgen(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
+{
+   compute_linear(layer, output, input, arch);
+   compute_activation(output, output, layer->nb_outputs, activation, arch);
+}
+
+
+int sample_loss(
+    LossGenState *st,
+    float percent_loss)
+{
+  float input[2];
+  float tmp[LOSSGEN_DENSE_IN_OUT_SIZE];
+  float out;
+  int loss;
+  LossGen *model = &st->model;
+  input[0] = st->last_loss;
+  input[1] = percent_loss;
+  compute_generic_dense_lossgen(&model->lossgen_dense_in, tmp, input, ACTIVATION_TANH, 0);
+  compute_generic_gru_lossgen(&model->lossgen_gru1_input, &model->lossgen_gru1_recurrent, st->gru1_state, tmp, 0);
+  compute_generic_gru_lossgen(&model->lossgen_gru2_input, &model->lossgen_gru2_recurrent, st->gru2_state, st->gru1_state, 0);
+  compute_generic_dense_lossgen(&model->lossgen_dense_out, &out, st->gru2_state, ACTIVATION_SIGMOID, 0);
+  loss = (float)rand()/RAND_MAX < out;
+  st->last_loss = loss;
+  return loss;
+}
+
+
+void lossgen_init(LossGenState *st)
+{
+  int ret;
+  OPUS_CLEAR(st, 1);
+#ifndef USE_WEIGHTS_FILE
+  ret = init_lossgen(&st->model, lossgen_arrays);
+#else
+  ret = 0;
+#endif
+  celt_assert(ret == 0);
+  (void)ret;
+}
+
+int lossgen_load_model(LossGenState *st, const unsigned char *data, int len) {
+  WeightArray *list;
+  int ret;
+  parse_weights(&list, data, len);
+  ret = init_lossgen(&st->model, list);
+  opus_free(list);
+  if (ret == 0) return 0;
+  else return -1;
+}
+
+#if 0
+#include <stdio.h>
+int main(int argc, char **argv) {
+  int i, N;
+  float p;
+  LossGenState st;
+  if (argc!=3) {
+    fprintf(stderr, "usage: lossgen <percentage> <length>\n");
+    return 1;
+  }
+  lossgen_init(&st);
+  p = atof(argv[1]);
+  N = atoi(argv[2]);
+  for (i=0;i<N;i++) {
+    printf("%d\n", sample_loss(&st, p));
+  }
+}
+#endif
--- a/dnn/lossgen.h
+++ b/dnn/lossgen.h
@ -0,0 +1,28 @@
+#ifndef LOSSGEN_H
+#define LOSSGEN_H
+
+
+#include "lossgen_data.h"
+
+#define PITCH_MIN_PERIOD 32
+#define PITCH_MAX_PERIOD 256
+
+#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
+
+
+typedef struct {
+  LossGen model;
+  float gru1_state[LOSSGEN_GRU1_STATE_SIZE];
+  float gru2_state[LOSSGEN_GRU2_STATE_SIZE];
+  int last_loss;
+} LossGenState;
+
+
+void lossgen_init(LossGenState *st);
+int lossgen_load_model(LossGenState *st, const unsigned char *data, int len);
+
+int sample_loss(
+    LossGenState *st,
+    float percent_loss);
+
+#endif
--- a/dnn/lpcnet.c
+++ b/dnn/lpcnet.c
@ -0,0 +1,283 @@
+/* Copyright (c) 2018 Mozilla */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include "nnet_data.h"
+#include "nnet.h"
+#include "common.h"
+#include "arch.h"
+#include "lpcnet.h"
+#include "lpcnet_private.h"
+#include "os_support.h"
+
+#define PREEMPH 0.85f
+
+#define PDF_FLOOR 0.002
+
+#define FRAME_INPUT_SIZE (NB_FEATURES + EMBED_PITCH_OUT_SIZE)
+
+
+#if 0
+static void print_vector(float *x, int N)
+{
+    int i;
+    for (i=0;i<N;i++) printf("%f ", x[i]);
+    printf("\n");
+}
+#endif
+
+#ifdef END2END
+void rc2lpc(float *lpc, const float *rc)
+{
+  int i, j, k;
+  float tmp[LPC_ORDER];
+  float ntmp[LPC_ORDER] = {0.0};
+  OPUS_COPY(tmp, rc, LPC_ORDER);
+  for(i = 0; i < LPC_ORDER ; i++)
+    {
+        for(j = 0; j <= i-1; j++)
+        {
+            ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1];
+        }
+        for(k = 0; k <= i-1; k++)
+        {
+            tmp[k] = ntmp[k];
+        }
+    }
+  for(i = 0; i < LPC_ORDER ; i++)
+  {
+    lpc[i] = tmp[i];
+  }
+}
+
+#endif
+
+void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features)
+{
+    NNetState *net;
+    float condition[FEATURE_DENSE2_OUT_SIZE];
+    float in[FRAME_INPUT_SIZE];
+    float conv1_out[FEATURE_CONV1_OUT_SIZE];
+    float conv2_out[FEATURE_CONV2_OUT_SIZE];
+    float dense1_out[FEATURE_DENSE1_OUT_SIZE];
+    int pitch;
+    float rc[LPC_ORDER];
+    /* Matches the Python code -- the 0.1 avoids rounding issues. */
+    pitch = (int)floor(.1 + 50*features[NB_BANDS]+100);
+    pitch = IMIN(255, IMAX(33, pitch));
+    net = &lpcnet->nnet;
+    OPUS_COPY(in, features, NB_FEATURES);
+    compute_embedding(&lpcnet->model.embed_pitch, &in[NB_FEATURES], pitch);
+    compute_conv1d(&lpcnet->model.feature_conv1, conv1_out, net->feature_conv1_state, in);
+    if (lpcnet->frame_count < FEATURE_CONV1_DELAY) OPUS_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);
+    compute_conv1d(&lpcnet->model.feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
+    if (lpcnet->frame_count < FEATURES_DELAY) OPUS_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);
+    _lpcnet_compute_dense(&lpcnet->model.feature_dense1, dense1_out, conv2_out);
+    _lpcnet_compute_dense(&lpcnet->model.feature_dense2, condition, dense1_out);
+    OPUS_COPY(rc, condition, LPC_ORDER);
+    _lpcnet_compute_dense(&lpcnet->model.gru_a_dense_feature, gru_a_condition, condition);
+    _lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition);
+#ifdef END2END
+    rc2lpc(lpc, rc);
+#elif FEATURES_DELAY>0
+    memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
+    memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
+    lpc_from_cepstrum(lpcnet->old_lpc[0], features);
+#else
+    lpc_from_cepstrum(lpc, features);
+#endif
+#ifdef LPC_GAMMA
+    lpc_weighting(lpc, LPC_GAMMA);
+#endif
+    if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
+}
+
+void run_frame_network_deferred(LPCNetState *lpcnet, const float *features)
+{
+    int max_buffer_size = lpcnet->model.feature_conv1.kernel_size + lpcnet->model.feature_conv2.kernel_size - 2;
+    celt_assert(max_buffer_size <= MAX_FEATURE_BUFFER_SIZE);
+    if (lpcnet->feature_buffer_fill == max_buffer_size) {
+        OPUS_MOVE(lpcnet->feature_buffer, &lpcnet->feature_buffer[NB_FEATURES],  (max_buffer_size-1)*NB_FEATURES);
+    } else {
+      lpcnet->feature_buffer_fill++;
+    }
+    OPUS_COPY(&lpcnet->feature_buffer[(lpcnet->feature_buffer_fill-1)*NB_FEATURES], features, NB_FEATURES);
+}
+
+void run_frame_network_flush(LPCNetState *lpcnet)
+{
+    int i;
+    for (i=0;i<lpcnet->feature_buffer_fill;i++) {
+        float lpc[LPC_ORDER];
+        float gru_a_condition[3*GRU_A_STATE_SIZE];
+        float gru_b_condition[3*GRU_B_STATE_SIZE];
+        run_frame_network(lpcnet, gru_a_condition, gru_b_condition, lpc, &lpcnet->feature_buffer[i*NB_FEATURES]);
+    }
+    lpcnet->feature_buffer_fill = 0;
+}
+
+int run_sample_network(LPCNetState *lpcnet, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)
+{
+    NNetState *net;
+    float gru_a_input[3*GRU_A_STATE_SIZE];
+    float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
+    float gru_b_input[3*GRU_B_STATE_SIZE];
+    net = &lpcnet->nnet;
+#if 1
+    compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &lpcnet->model.gru_a_embed_sig, last_sig, &lpcnet->model.gru_a_embed_pred, pred, &lpcnet->model.gru_a_embed_exc, last_exc);
+#else
+    OPUS_COPY(gru_a_input, gru_a_condition, 3*GRU_A_STATE_SIZE);
+    accum_embedding(&lpcnet->model.gru_a_embed_sig, gru_a_input, last_sig);
+    accum_embedding(&lpcnet->model.gru_a_embed_pred, gru_a_input, pred);
+    accum_embedding(&lpcnet->model.gru_a_embed_exc, gru_a_input, last_exc);
+#endif
+    /*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
+    compute_sparse_gru(&lpcnet->model.sparse_gru_a, net->gru_a_state, gru_a_input);
+    OPUS_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
+    OPUS_COPY(gru_b_input, gru_b_condition, 3*GRU_B_STATE_SIZE);
+    compute_gruB(&lpcnet->model.gru_b, gru_b_input, net->gru_b_state, in_b);
+    return sample_mdense(&lpcnet->model.dual_fc, net->gru_b_state, sampling_logit_table, rng);
+}
+
+int lpcnet_get_size()
+{
+    return sizeof(LPCNetState);
+}
+
+void lpcnet_reset(LPCNetState *lpcnet)
+{
+    const char* rng_string="LPCNet";
+    OPUS_CLEAR((char*)&lpcnet->LPCNET_RESET_START,
+            sizeof(LPCNetState)-
+            ((char*)&lpcnet->LPCNET_RESET_START - (char*)lpcnet));
+    lpcnet->last_exc = lin2ulaw(0.f);
+    kiss99_srand(&lpcnet->rng, (const unsigned char *)rng_string, strlen(rng_string));
+}
+
+int lpcnet_init(LPCNetState *lpcnet)
+{
+    int i;
+    int ret;
+    for (i=0;i<256;i++) {
+        float prob = .025f+.95f*i/255.f;
+        lpcnet->sampling_logit_table[i] = -log((1-prob)/prob);
+    }
+#ifndef USE_WEIGHTS_FILE
+    ret = init_lpcnet_model(&lpcnet->model, lpcnet_arrays);
+#else
+    ret = 0;
+#endif
+    lpcnet_reset(lpcnet);
+    celt_assert(ret == 0);
+    return ret;
+}
+
+int lpcnet_load_model(LPCNetState *st, const unsigned char *data, int len) {
+  WeightArray *list;
+  int ret;
+  parse_weights(&list, data, len);
+  ret = init_lpcnet_model(&st->model, list);
+  opus_free(list);
+  if (ret == 0) return 0;
+  else return -1;
+}
+
+
+LPCNetState *lpcnet_create()
+{
+    LPCNetState *lpcnet;
+    lpcnet = (LPCNetState *)opus_alloc(lpcnet_get_size(), 1);
+    OPUS_CLEAR(lpcnet, 1);
+    lpcnet_init(lpcnet);
+    return lpcnet;
+}
+
+void lpcnet_destroy(LPCNetState *lpcnet)
+{
+    opus_free(lpcnet);
+}
+
+void lpcnet_reset_signal(LPCNetState *lpcnet)
+{
+    lpcnet->deemph_mem = 0;
+    lpcnet->last_exc = lin2ulaw(0.f);
+    OPUS_CLEAR(lpcnet->last_sig, LPC_ORDER);
+    OPUS_CLEAR(lpcnet->nnet.gru_a_state, GRU_A_STATE_SIZE);
+    OPUS_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE);
+}
+
+void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload)
+{
+    int i;
+
+    if (lpcnet->frame_count <= FEATURES_DELAY)
+    {
+        OPUS_CLEAR(output, N);
+        return;
+    }
+    for (i=0;i<N;i++)
+    {
+        int j;
+        float pcm;
+        int exc;
+        int last_sig_ulaw;
+        int pred_ulaw;
+        float pred = 0;
+        for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpcnet->lpc[j];
+        last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
+        pred_ulaw = lin2ulaw(pred);
+        exc = run_sample_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
+        if (i < preload) {
+          exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
+          pcm = output[i]-PREEMPH*lpcnet->deemph_mem;
+        } else {
+          pcm = pred + ulaw2lin(exc);
+        }
+        OPUS_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
+        lpcnet->last_sig[0] = pcm;
+        lpcnet->last_exc = exc;
+        pcm += PREEMPH*lpcnet->deemph_mem;
+        lpcnet->deemph_mem = pcm;
+        if (pcm<-32767) pcm = -32767;
+        if (pcm>32767) pcm = 32767;
+        if (i >= preload) output[i] = (int)floor(.5 + pcm);
+    }
+}
+
+void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload)
+{
+    run_frame_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->lpc, features);
+    lpcnet_synthesize_tail_impl(lpcnet, output, N, preload);
+}
+
+void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N) {
+    lpcnet_synthesize_impl(lpcnet, features, output, N, 0);
+}
--- a/dnn/lpcnet.h
+++ b/dnn/lpcnet.h
@ -0,0 +1,183 @@
+/* Copyright (c) 2018 Mozilla */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef LPCNET_H_
+#define LPCNET_H_
+
+#include "opus_types.h"
+
+#define NB_FEATURES 20
+#define NB_TOTAL_FEATURES 36
+
+/** Number of audio samples in a feature frame (not for encoding/decoding). */
+#define LPCNET_FRAME_SIZE (160)
+
+typedef struct LPCNetState LPCNetState;
+
+typedef struct LPCNetDecState LPCNetDecState;
+
+typedef struct LPCNetEncState LPCNetEncState;
+
+typedef struct LPCNetPLCState LPCNetPLCState;
+
+
+/** Gets the size of an <code>LPCNetDecState</code> structure.
+  * @returns The size in bytes.
+  */
+int lpcnet_decoder_get_size(void);
+
+/** Initializes a previously allocated decoder state
+  * The memory pointed to by st must be at least the size returned by lpcnet_decoder_get_size().
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see lpcnet_decoder_create(),lpcnet_decoder_get_size()
+  * @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
+  * @retval 0 Success
+  */
+int lpcnet_decoder_init(LPCNetDecState *st);
+
+void lpcnet_reset(LPCNetState *lpcnet);
+
+/** Allocates and initializes a decoder state.
+  *  @returns The newly created state
+  */
+LPCNetDecState *lpcnet_decoder_create(void);
+
+/** Frees an <code>LPCNetDecState</code> allocated by lpcnet_decoder_create().
+  * @param[in] st <tt>LPCNetDecState*</tt>: State to be freed.
+  */
+void lpcnet_decoder_destroy(LPCNetDecState *st);
+
+/** Decodes a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8) into LPCNET_PACKET_SAMPLES samples (currently 640).
+  * @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
+  * @param [in] buf <tt>const unsigned char *</tt>: Compressed packet
+  * @param [out] pcm <tt>opus_int16 *</tt>: Decoded audio
+  * @retval 0 Success
+  */
+int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, opus_int16 *pcm);
+
+
+
+/** Gets the size of an <code>LPCNetEncState</code> structure.
+  * @returns The size in bytes.
+  */
+int lpcnet_encoder_get_size(void);
+
+/** Initializes a previously allocated encoder state
+  * The memory pointed to by st must be at least the size returned by lpcnet_encoder_get_size().
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see lpcnet_encoder_create(),lpcnet_encoder_get_size()
+  * @param [in] st <tt>LPCNetEncState*</tt>: Encoder state
+  * @retval 0 Success
+  */
+int lpcnet_encoder_init(LPCNetEncState *st);
+
+int lpcnet_encoder_load_model(LPCNetEncState *st, const unsigned char *data, int len);
+
+/** Allocates and initializes an encoder state.
+  *  @returns The newly created state
+  */
+LPCNetEncState *lpcnet_encoder_create(void);
+
+/** Frees an <code>LPCNetEncState</code> allocated by lpcnet_encoder_create().
+  * @param[in] st <tt>LPCNetEncState*</tt>: State to be freed.
+  */
+void lpcnet_encoder_destroy(LPCNetEncState *st);
+
+/** Encodes LPCNET_PACKET_SAMPLES speech samples (currently 640) into a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8).
+  * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
+  * @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be encoded
+  * @param [out] buf <tt>const unsigned char *</tt>: Compressed packet
+  * @retval 0 Success
+  */
+int lpcnet_encode(LPCNetEncState *st, const opus_int16 *pcm, unsigned char *buf);
+
+/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
+  * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
+  * @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be analyzed
+  * @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
+  * @retval 0 Success
+  */
+int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch);
+
+
+/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
+  * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
+  * @param [in] pcm <tt>float *</tt>: Input speech to be analyzed
+  * @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
+  * @retval 0 Success
+  */
+int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch);
+
+/** Gets the size of an <code>LPCNetState</code> structure.
+  * @returns The size in bytes.
+  */
+int lpcnet_get_size(void);
+
+/** Initializes a previously allocated synthesis state
+  * The memory pointed to by st must be at least the size returned by lpcnet_get_size().
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see lpcnet_create(),lpcnet_get_size()
+  * @param [in] st <tt>LPCNetState*</tt>: Synthesis state
+  * @retval 0 Success
+  */
+int lpcnet_init(LPCNetState *st);
+
+/** Allocates and initializes a synthesis state.
+  *  @returns The newly created state
+  */
+LPCNetState *lpcnet_create(void);
+
+/** Frees an <code>LPCNetState</code> allocated by lpcnet_create().
+  * @param[in] st <tt>LPCNetState*</tt>: State to be freed.
+  */
+void lpcnet_destroy(LPCNetState *st);
+
+/** Synthesizes speech from an LPCNet feature vector.
+  * @param [in] st <tt>LPCNetState*</tt>: Synthesis state
+  * @param [in] features <tt>const float *</tt>: Compressed packet
+  * @param [out] output <tt>opus_int16 **</tt>: Synthesized speech
+  * @param [in] N <tt>int</tt>: Number of samples to generate
+  * @retval 0 Success
+  */
+void lpcnet_synthesize(LPCNetState *st, const float *features, opus_int16 *output, int N);
+
+
+
+int lpcnet_plc_init(LPCNetPLCState *st);
+void lpcnet_plc_reset(LPCNetPLCState *st);
+
+int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm);
+
+int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm);
+
+void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features);
+
+void lpcnet_plc_fec_clear(LPCNetPLCState *st);
+
+int lpcnet_load_model(LPCNetState *st, const unsigned char *data, int len);
+int lpcnet_plc_load_model(LPCNetPLCState *st, const unsigned char *data, int len);
+
+#endif
--- a/dnn/lpcnet_demo.c
+++ b/dnn/lpcnet_demo.c
@ -0,0 +1,193 @@
+/* Copyright (c) 2018 Mozilla */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "arch.h"
+#include "lpcnet.h"
+#include "freq.h"
+#include "os_support.h"
+#include "fargan.h"
+#include "cpu_support.h"
+
+#ifdef USE_WEIGHTS_FILE
+# if __unix__
+#  include <fcntl.h>
+#  include <sys/mman.h>
+#  include <unistd.h>
+#  include <sys/stat.h>
+/* When available, mmap() is preferable to reading the file, as it leads to
+   better resource utilization, especially if multiple processes are using the same
+   file (mapping will be shared in cache). */
+unsigned char *load_blob(const char *filename, int *len) {
+  int fd;
+  unsigned char *data;
+  struct stat st;
+  stat(filename, &st);
+  *len = st.st_size;
+  fd = open(filename, O_RDONLY);
+  data = mmap(NULL, *len, PROT_READ, MAP_SHARED, fd, 0);
+  close(fd);
+  return data;
+}
+void free_blob(unsigned char *blob, int len) {
+  munmap(blob, len);
+}
+# else
+unsigned char *load_blob(const char *filename, int *len) {
+  FILE *file;
+  unsigned char *data;
+  file = fopen(filename, "r");
+  fseek(file, 0L, SEEK_END);
+  *len = ftell(file);
+  fseek(file, 0L, SEEK_SET);
+  if (*len <= 0) return NULL;
+  data = malloc(*len);
+  *len = fread(data, 1, *len, file);
+  return data;
+}
+void free_blob(unsigned char *blob, int len) {
+  free(blob);
+  (void)len;
+}
+# endif
+#endif
+
+#define MODE_FEATURES 2
+/*#define MODE_SYNTHESIS 3*/
+#define MODE_ADDLPC 5
+#define MODE_FWGAN_SYNTHESIS 6
+#define MODE_FARGAN_SYNTHESIS 7
+
+void usage(void) {
+    fprintf(stderr, "usage: lpcnet_demo -features <input.pcm> <features.f32>\n");
+    fprintf(stderr, "       lpcnet_demo -fargan_synthesis <features.f32> <output.pcm>\n");
+    fprintf(stderr, "       lpcnet_demo -addlpc <features_without_lpc.f32> <features_with_lpc.lpc>\n\n");
+    fprintf(stderr, "  plc_options:\n");
+    fprintf(stderr, "       causal:       normal (causal) PLC\n");
+    fprintf(stderr, "       codec:        normal (causal) PLC without cross-fade (will glitch)\n");
+    exit(1);
+}
+
+int main(int argc, char **argv) {
+    int mode=0;
+    int arch;
+    FILE *fin, *fout;
+#ifdef USE_WEIGHTS_FILE
+    int len;
+    unsigned char *data;
+    const char *filename = "weights_blob.bin";
+#endif
+    arch = opus_select_arch();
+    if (argc < 4) usage();
+    if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
+    else if (strcmp(argv[1], "-fargan-synthesis") == 0) mode=MODE_FARGAN_SYNTHESIS;
+    else if (strcmp(argv[1], "-addlpc") == 0){
+        mode=MODE_ADDLPC;
+    } else {
+        usage();
+    }
+    if (argc != 4) usage();
+    fin = fopen(argv[2], "rb");
+    if (fin == NULL) {
+        fprintf(stderr, "Can't open %s\n", argv[2]);
+        exit(1);
+    }
+
+    fout = fopen(argv[3], "wb");
+    if (fout == NULL) {
+        fprintf(stderr, "Can't open %s\n", argv[3]);
+        exit(1);
+    }
+#ifdef USE_WEIGHTS_FILE
+    data = load_blob(filename, &len);
+#endif
+    if (mode == MODE_FEATURES) {
+        LPCNetEncState *net;
+        net = lpcnet_encoder_create();
+        while (1) {
+            float features[NB_TOTAL_FEATURES];
+            opus_int16 pcm[LPCNET_FRAME_SIZE];
+            size_t ret;
+            ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);
+            if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;
+            lpcnet_compute_single_frame_features(net, pcm, features, arch);
+            fwrite(features, sizeof(float), NB_TOTAL_FEATURES, fout);
+        }
+        lpcnet_encoder_destroy(net);
+    } else if (mode == MODE_FARGAN_SYNTHESIS) {
+        FARGANState fargan;
+        size_t ret, i;
+        float in_features[5*NB_TOTAL_FEATURES];
+        float zeros[320] = {0};
+        fargan_init(&fargan);
+#ifdef USE_WEIGHTS_FILE
+        fargan_load_model(&fargan, data, len);
+#endif
+        /* uncomment the following to align with Python code */
+        /*ret = fread(&in_features[0], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);*/
+        for (i=0;i<5;i++) {
+          ret = fread(&in_features[i*NB_FEATURES], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);
+        }
+        fargan_cont(&fargan, zeros, in_features);
+        while (1) {
+            float features[NB_FEATURES];
+            float fpcm[LPCNET_FRAME_SIZE];
+            opus_int16 pcm[LPCNET_FRAME_SIZE];
+            ret = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
+            if (feof(fin) || ret != NB_TOTAL_FEATURES) break;
+            OPUS_COPY(features, in_features, NB_FEATURES);
+            fargan_synthesize(&fargan, fpcm, features);
+            for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
+            fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);
+        }
+    } else if (mode == MODE_ADDLPC) {
+        float features[36];
+        size_t ret;
+
+        while (1) {
+            ret = fread(features, sizeof(features[0]), 36, fin);
+            if (ret != 36 || feof(fin)) break;
+            lpc_from_cepstrum(&features[20], &features[0]);
+            fwrite(features, sizeof(features[0]), 36, fout);
+        }
+
+    } else {
+        fprintf(stderr, "unknown action\n");
+    }
+    fclose(fin);
+    fclose(fout);
+#ifdef USE_WEIGHTS_FILE
+    free_blob(data, len);
+#endif
+    return 0;
+}
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@ -0,0 +1,230 @@
+/* Copyright (c) 2017-2019 Mozilla */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "kiss_fft.h"
+#include "common.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include <assert.h>
+#include "lpcnet_private.h"
+#include "lpcnet.h"
+#include "os_support.h"
+#include "_kiss_fft_guts.h"
+#include "celt_lpc.h"
+#include "mathops.h"
+
+
+int lpcnet_encoder_get_size(void) {
+  return sizeof(LPCNetEncState);
+}
+
+int lpcnet_encoder_init(LPCNetEncState *st) {
+  memset(st, 0, sizeof(*st));
+  pitchdnn_init(&st->pitchdnn);
+  return 0;
+}
+
+int lpcnet_encoder_load_model(LPCNetEncState *st, const unsigned char *data, int len) {
+  return pitchdnn_load_model(&st->pitchdnn, data, len);
+}
+
+LPCNetEncState *lpcnet_encoder_create(void) {
+  LPCNetEncState *st;
+  st = opus_alloc(lpcnet_encoder_get_size());
+  lpcnet_encoder_init(st);
+  return st;
+}
+
+void lpcnet_encoder_destroy(LPCNetEncState *st) {
+  opus_free(st);
+}
+
+static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
+  float x[WINDOW_SIZE];
+  OPUS_COPY(x, st->analysis_mem, OVERLAP_SIZE);
+  OPUS_COPY(&x[OVERLAP_SIZE], in, FRAME_SIZE);
+  OPUS_COPY(st->analysis_mem, &in[FRAME_SIZE-OVERLAP_SIZE], OVERLAP_SIZE);
+  apply_window(x);
+  forward_transform(X, x);
+  lpcn_compute_band_energy(Ex, X);
+}
+
+static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
+  int i;
+  float mem0, mem1;
+  mem0 = mem[0];
+  mem1 = mem[1];
+  for (i=0;i<N;i++) {
+    float xi, yi, mem00;
+    xi = x[i];
+    yi = x[i] + mem0;
+    mem00 = mem0;
+    /* Original code:
+    mem0 = mem1 + (b[0]*xi - a[0]*yi);
+    mem1 = (b[1]*xi - a[1]*yi);
+    Modified to reduce dependency chains: (the +1e-30f forces the ordering and has no effect on the output)
+    */
+    mem0 = (b[0]-a[0])*xi + mem1 - a[0]*mem0;
+    mem1 = (b[1]-a[1])*xi + 1e-30f - a[1]*mem00;
+    y[i] = yi;
+  }
+  mem[0] = mem0;
+  mem[1] = mem1;
+}
+
+#define celt_log10(x) (0.3010299957f*celt_log2(x))
+
+void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
+  float aligned_in[FRAME_SIZE];
+  int i;
+  float Ly[NB_BANDS];
+  float follow, logMax;
+  kiss_fft_cpx X[FREQ_SIZE];
+  float Ex[NB_BANDS];
+  float xcorr[PITCH_MAX_PERIOD];
+  float ener0;
+  float ener;
+  float x[FRAME_SIZE+LPC_ORDER];
+  float frame_corr;
+  float xy, xx, yy;
+  int pitch;
+  float ener_norm[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
+  /* [b,a]=ellip(2, 2, 20, 1200/8000); */
+  static const float lp_b[2] = {-0.84946f, 1.f};
+  static const float lp_a[2] = {-1.54220f, 0.70781f};
+  OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
+  frame_analysis(st, X, Ex, in);
+  st->if_features[0] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[0].r*X[0].r)-6.f)));
+  for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
+    kiss_fft_cpx prod;
+    float norm_1;
+    C_MULC(prod, X[i], st->prev_if[i]);
+    norm_1 = 1.f/sqrt(1e-15f + prod.r*prod.r + prod.i*prod.i);
+    C_MULBYSCALAR(prod, norm_1);
+    st->if_features[3*i-2] = prod.r;
+    st->if_features[3*i-1] = prod.i;
+    st->if_features[3*i] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[i].r*X[i].r + X[i].i*X[i].i)-6.f)));
+  }
+  OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
+  /*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
+  logMax = -2;
+  follow = -2;
+  for (i=0;i<NB_BANDS;i++) {
+    Ly[i] = celt_log10(1e-2f+Ex[i]);
+    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
+    logMax = MAX16(logMax, Ly[i]);
+    follow = MAX16(follow-2.5f, Ly[i]);
+  }
+  dct(st->features, Ly);
+  st->features[0] -= 4;
+  lpc_from_cepstrum(st->lpc, st->features);
+  for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
+  OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
+  OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
+  OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
+  OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
+  OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
+  OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
+  celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
+  for (i=0;i<FRAME_SIZE;i++) {
+    st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
+    st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
+    /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
+  }
+  biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
+  {
+    double ener1;
+    float *buf = st->exc_buf;
+    celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
+    ener0 = celt_inner_prod(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
+    ener1 = celt_inner_prod(&buf[0], &buf[0], FRAME_SIZE, arch);
+    /*printf("%f\n", st->frame_weight[sub]);*/
+    for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+      ener = 1 + ener0 + ener1;
+      st->xcorr_features[i] = 2*xcorr[i];
+      ener_norm[i] = ener;
+      ener1 += buf[i+FRAME_SIZE]*(double)buf[i+FRAME_SIZE] - buf[i]*(double)buf[i];
+      /*printf("%f ", st->xcorr_features[i]);*/
+    }
+    /* Split in a separate loop so the compiler can vectorize it */
+    for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+      st->xcorr_features[i] /= ener_norm[i];
+    }
+    /*printf("\n");*/
+  }
+  st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
+  pitch = (int)floor(.5+256./pow(2.f,((1./60.)*((st->dnn_pitch+1.5)*60))));
+  xx = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
+  yy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
+  xy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
+  /*printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy));*/
+  frame_corr = xy/sqrt(1+xx*yy);
+  frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f));
+  st->features[NB_BANDS] = st->dnn_pitch;
+  st->features[NB_BANDS + 1] = frame_corr-.5f;
+}
+
+void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
+  int i;
+  for (i=0;i<N;i++) {
+    float yi;
+    yi = x[i] + *mem;
+    *mem = -coef*x[i];
+    y[i] = yi;
+  }
+}
+
+static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
+  preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
+  compute_frame_features(st, x, arch);
+  OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
+  return 0;
+}
+
+int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
+  int i;
+  float x[FRAME_SIZE];
+  for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
+  lpcnet_compute_single_frame_features_impl(st, x, features, arch);
+  return 0;
+}
+
+int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
+  int i;
+  float x[FRAME_SIZE];
+  for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
+  lpcnet_compute_single_frame_features_impl(st, x, features, arch);
+  return 0;
+}
--- a/dnn/lpcnet_plc.c
+++ b/dnn/lpcnet_plc.c
@ -0,0 +1,211 @@
+/* Copyright (c) 2021 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "lpcnet_private.h"
+#include "lpcnet.h"
+#include "plc_data.h"
+#include "os_support.h"
+#include "common.h"
+#include "cpu_support.h"
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+/* Comment this out to have LPCNet update its state on every good packet (slow). */
+#define PLC_SKIP_UPDATES
+
+void lpcnet_plc_reset(LPCNetPLCState *st) {
+  OPUS_CLEAR((char*)&st->LPCNET_PLC_RESET_START,
+          sizeof(LPCNetPLCState)-
+          ((char*)&st->LPCNET_PLC_RESET_START - (char*)st));
+  lpcnet_encoder_init(&st->enc);
+  OPUS_CLEAR(st->pcm, PLC_BUF_SIZE);
+  st->blend = 0;
+  st->loss_count = 0;
+  st->analysis_gap = 1;
+  st->analysis_pos = PLC_BUF_SIZE;
+  st->predict_pos = PLC_BUF_SIZE;
+}
+
+int lpcnet_plc_init(LPCNetPLCState *st) {
+  int ret;
+  st->arch = opus_select_arch();
+  fargan_init(&st->fargan);
+  lpcnet_encoder_init(&st->enc);
+  st->loaded = 0;
+#ifndef USE_WEIGHTS_FILE
+  ret = init_plcmodel(&st->model, plcmodel_arrays);
+  if (ret == 0) st->loaded = 1;
+#else
+  ret = 0;
+#endif
+  celt_assert(ret == 0);
+  lpcnet_plc_reset(st);
+  return ret;
+}
+
+int lpcnet_plc_load_model(LPCNetPLCState *st, const unsigned char *data, int len) {
+  WeightArray *list;
+  int ret;
+  parse_weights(&list, data, len);
+  ret = init_plcmodel(&st->model, list);
+  opus_free(list);
+  if (ret == 0) {
+    ret = lpcnet_encoder_load_model(&st->enc, data, len);
+  }
+  if (ret == 0) {
+    ret = fargan_load_model(&st->fargan, data, len);
+  }
+  if (ret == 0) st->loaded = 1;
+  return ret;
+}
+
+void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features) {
+  if (features == NULL) {
+    st->fec_skip++;
+    return;
+  }
+  if (st->fec_fill_pos == PLC_MAX_FEC) {
+    OPUS_MOVE(&st->fec[0][0], &st->fec[st->fec_read_pos][0], (st->fec_fill_pos-st->fec_read_pos)*NB_FEATURES);
+    st->fec_fill_pos = st->fec_fill_pos-st->fec_read_pos;
+    st->fec_read_pos -= st->fec_read_pos;
+  }
+  OPUS_COPY(&st->fec[st->fec_fill_pos][0], features, NB_FEATURES);
+  st->fec_fill_pos++;
+}
+
+void lpcnet_plc_fec_clear(LPCNetPLCState *st) {
+  st->fec_read_pos = st->fec_fill_pos = st->fec_skip = 0;
+}
+
+
+static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) {
+  float tmp[PLC_DENSE_IN_OUT_SIZE];
+  PLCModel *model = &st->model;
+  PLCNetState *net = &st->plc_net;
+  celt_assert(st->loaded);
+  compute_generic_dense(&model->plc_dense_in, tmp, in, ACTIVATION_TANH, 0);
+  compute_generic_gru(&model->plc_gru1_input, &model->plc_gru1_recurrent, net->gru1_state, tmp, 0);
+  compute_generic_gru(&model->plc_gru2_input, &model->plc_gru2_recurrent, net->gru2_state, net->gru1_state, 0);
+  compute_generic_dense(&model->plc_dense_out, out, net->gru2_state, ACTIVATION_LINEAR, 0);
+}
+
+static int get_fec_or_pred(LPCNetPLCState *st, float *out) {
+  if (st->fec_read_pos != st->fec_fill_pos && st->fec_skip==0) {
+    float plc_features[2*NB_BANDS+NB_FEATURES+1] = {0};
+    float discard[NB_FEATURES];
+    OPUS_COPY(out, &st->fec[st->fec_read_pos][0], NB_FEATURES);
+    st->fec_read_pos++;
+    /* Update PLC state using FEC, so without Burg features. */
+    OPUS_COPY(&plc_features[2*NB_BANDS], out, NB_FEATURES);
+    plc_features[2*NB_BANDS+NB_FEATURES] = -1;
+    compute_plc_pred(st, discard, plc_features);
+    return 1;
+  } else {
+    float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
+    compute_plc_pred(st, out, zeros);
+    if (st->fec_skip > 0) st->fec_skip--;
+    return 0;
+  }
+}
+
+static void queue_features(LPCNetPLCState *st, const float *features) {
+  OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES);
+  OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], features, NB_FEATURES);
+}
+
+/* In this causal version of the code, the DNN model implemented by compute_plc_pred()
+   needs to generate two feature vectors to conceal the first lost packet.*/
+
+int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) {
+  int i;
+  if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
+  else st->analysis_gap = 1;
+  if (st->predict_pos - FRAME_SIZE >= 0) st->predict_pos -= FRAME_SIZE;
+  OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
+  for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
+  st->loss_count = 0;
+  st->blend = 0;
+  return 0;
+}
+
+static const float att_table[10] = {0, 0,  -.2, -.2,  -.4, -.4,  -.8, -.8, -1.6, -1.6};
+int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) {
+  int i;
+  celt_assert(st->loaded);
+  if (st->blend == 0) {
+    int count = 0;
+    st->plc_net = st->plc_bak[0];
+    while (st->analysis_pos + FRAME_SIZE <= PLC_BUF_SIZE) {
+      float x[FRAME_SIZE];
+      float plc_features[2*NB_BANDS+NB_FEATURES+1];
+      celt_assert(st->analysis_pos >= 0);
+      for (i=0;i<FRAME_SIZE;i++) x[i] = 32768.f*st->pcm[st->analysis_pos+i];
+      burg_cepstral_analysis(plc_features, x);
+      lpcnet_compute_single_frame_features_float(&st->enc, x, st->features, st->arch);
+      if ((!st->analysis_gap || count>0) && st->analysis_pos >= st->predict_pos) {
+        queue_features(st, st->features);
+        OPUS_COPY(&plc_features[2*NB_BANDS], st->features, NB_FEATURES);
+        plc_features[2*NB_BANDS+NB_FEATURES] = 1;
+        st->plc_bak[0] = st->plc_bak[1];
+        st->plc_bak[1] = st->plc_net;
+        compute_plc_pred(st, st->features, plc_features);
+      }
+      st->analysis_pos += FRAME_SIZE;
+      count++;
+    }
+    st->plc_bak[0] = st->plc_bak[1];
+    st->plc_bak[1] = st->plc_net;
+    get_fec_or_pred(st, st->features);
+    queue_features(st, st->features);
+    st->plc_bak[0] = st->plc_bak[1];
+    st->plc_bak[1] = st->plc_net;
+    get_fec_or_pred(st, st->features);
+    queue_features(st, st->features);
+    fargan_cont(&st->fargan, &st->pcm[PLC_BUF_SIZE-FARGAN_CONT_SAMPLES], st->cont_features);
+    st->analysis_gap = 0;
+  }
+  st->plc_bak[0] = st->plc_bak[1];
+  st->plc_bak[1] = st->plc_net;
+  if (get_fec_or_pred(st, st->features)) st->loss_count = 0;
+  else st->loss_count++;
+  if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
+  else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
+  fargan_synthesize_int(&st->fargan, pcm, &st->features[0]);
+  queue_features(st, st->features);
+  if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
+  else st->analysis_gap = 1;
+  st->predict_pos = PLC_BUF_SIZE;
+  OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
+  for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
+  st->blend = 1;
+  return 0;
+}
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@ -0,0 +1,90 @@
+#ifndef LPCNET_PRIVATE_H
+#define LPCNET_PRIVATE_H
+
+#include <stdio.h>
+#include "freq.h"
+#include "lpcnet.h"
+#include "plc_data.h"
+#include "pitchdnn.h"
+#include "fargan.h"
+
+
+#define PITCH_FRAME_SIZE 320
+#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
+
+#define PLC_MAX_FEC 100
+#define MAX_FEATURE_BUFFER_SIZE 4
+
+#define PITCH_IF_MAX_FREQ 30
+#define PITCH_IF_FEATURES (3*PITCH_IF_MAX_FREQ - 2)
+
+#define CONT_VECTORS 5
+
+#define FEATURES_DELAY 1
+
+struct LPCNetEncState{
+  PitchDNNState pitchdnn;
+  float analysis_mem[OVERLAP_SIZE];
+  float mem_preemph;
+  kiss_fft_cpx prev_if[PITCH_IF_MAX_FREQ];
+  float if_features[PITCH_IF_FEATURES];
+  float xcorr_features[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
+  float dnn_pitch;
+  float pitch_mem[LPC_ORDER];
+  float pitch_filt;
+  float exc_buf[PITCH_BUF_SIZE];
+  float lp_buf[PITCH_BUF_SIZE];
+  float lp_mem[4];
+  float lpc[LPC_ORDER];
+  float features[NB_TOTAL_FEATURES];
+  float sig_mem[LPC_ORDER];
+  float burg_cepstrum[2*NB_BANDS];
+};
+
+typedef struct {
+  float gru1_state[PLC_GRU1_STATE_SIZE];
+  float gru2_state[PLC_GRU2_STATE_SIZE];
+} PLCNetState;
+
+#define PLC_BUF_SIZE ((CONT_VECTORS+5)*FRAME_SIZE)
+struct LPCNetPLCState {
+  PLCModel model;
+  FARGANState fargan;
+  LPCNetEncState enc;
+  int loaded;
+  int arch;
+
+#define LPCNET_PLC_RESET_START fec
+  float fec[PLC_MAX_FEC][NB_FEATURES];
+  int analysis_gap;
+  int fec_read_pos;
+  int fec_fill_pos;
+  int fec_skip;
+  int analysis_pos;
+  int predict_pos;
+  float pcm[PLC_BUF_SIZE];
+  int blend;
+  float features[NB_TOTAL_FEATURES];
+  float cont_features[CONT_VECTORS*NB_FEATURES];
+  int loss_count;
+  PLCNetState plc_net;
+  PLCNetState plc_bak[2];
+};
+
+void preemphasis(float *y, float *mem, const float *x, float coef, int N);
+
+void compute_frame_features(LPCNetEncState *st, const float *in, int arch);
+
+void lpcnet_reset_signal(LPCNetState *lpcnet);
+void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
+void run_frame_network_deferred(LPCNetState *lpcnet, const float *features);
+void run_frame_network_flush(LPCNetState *lpcnet);
+
+
+void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload);
+void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload);
+void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const opus_int16 *pcm_in, opus_int16 *output, int N);
+
+void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
+
+#endif
--- a/dnn/lpcnet_tables.c
+++ b/dnn/lpcnet_tables.c
@ -0,0 +1,307 @@
+/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "kiss_fft.h"
+
+static const arch_fft_state arch_fft = {0, NULL};
+
+static const opus_int16 fft_bitrev[320] = {
+0, 64, 128, 192, 256, 16, 80, 144, 208, 272, 32, 96, 160, 224, 288,
+48, 112, 176, 240, 304, 4, 68, 132, 196, 260, 20, 84, 148, 212, 276,
+36, 100, 164, 228, 292, 52, 116, 180, 244, 308, 8, 72, 136, 200, 264,
+24, 88, 152, 216, 280, 40, 104, 168, 232, 296, 56, 120, 184, 248, 312,
+12, 76, 140, 204, 268, 28, 92, 156, 220, 284, 44, 108, 172, 236, 300,
+60, 124, 188, 252, 316, 1, 65, 129, 193, 257, 17, 81, 145, 209, 273,
+33, 97, 161, 225, 289, 49, 113, 177, 241, 305, 5, 69, 133, 197, 261,
+21, 85, 149, 213, 277, 37, 101, 165, 229, 293, 53, 117, 181, 245, 309,
+9, 73, 137, 201, 265, 25, 89, 153, 217, 281, 41, 105, 169, 233, 297,
+57, 121, 185, 249, 313, 13, 77, 141, 205, 269, 29, 93, 157, 221, 285,
+45, 109, 173, 237, 301, 61, 125, 189, 253, 317, 2, 66, 130, 194, 258,
+18, 82, 146, 210, 274, 34, 98, 162, 226, 290, 50, 114, 178, 242, 306,
+6, 70, 134, 198, 262, 22, 86, 150, 214, 278, 38, 102, 166, 230, 294,
+54, 118, 182, 246, 310, 10, 74, 138, 202, 266, 26, 90, 154, 218, 282,
+42, 106, 170, 234, 298, 58, 122, 186, 250, 314, 14, 78, 142, 206, 270,
+30, 94, 158, 222, 286, 46, 110, 174, 238, 302, 62, 126, 190, 254, 318,
+3, 67, 131, 195, 259, 19, 83, 147, 211, 275, 35, 99, 163, 227, 291,
+51, 115, 179, 243, 307, 7, 71, 135, 199, 263, 23, 87, 151, 215, 279,
+39, 103, 167, 231, 295, 55, 119, 183, 247, 311, 11, 75, 139, 203, 267,
+27, 91, 155, 219, 283, 43, 107, 171, 235, 299, 59, 123, 187, 251, 315,
+15, 79, 143, 207, 271, 31, 95, 159, 223, 287, 47, 111, 175, 239, 303,
+63, 127, 191, 255, 319, };
+
+static const kiss_twiddle_cpx fft_twiddles[320] = {
+{1.00000000f, -0.00000000f}, {0.999807239f, -0.0196336918f},
+{0.999229014f, -0.0392598175f}, {0.998265624f, -0.0588708036f},
+{0.996917307f, -0.0784590989f}, {0.995184720f, -0.0980171412f},
+{0.993068457f, -0.117537394f}, {0.990569353f, -0.137012348f},
+{0.987688363f, -0.156434461f}, {0.984426558f, -0.175796285f},
+{0.980785251f, -0.195090324f}, {0.976765871f, -0.214309156f},
+{0.972369909f, -0.233445361f}, {0.967599094f, -0.252491564f},
+{0.962455213f, -0.271440446f}, {0.956940353f, -0.290284663f},
+{0.951056540f, -0.309017003f}, {0.944806039f, -0.327630192f},
+{0.938191354f, -0.346117049f}, {0.931214929f, -0.364470512f},
+{0.923879504f, -0.382683426f}, {0.916187942f, -0.400748819f},
+{0.908143163f, -0.418659747f}, {0.899748266f, -0.436409235f},
+{0.891006529f, -0.453990489f}, {0.881921291f, -0.471396744f},
+{0.872496009f, -0.488621235f}, {0.862734377f, -0.505657375f},
+{0.852640152f, -0.522498548f}, {0.842217207f, -0.539138317f},
+{0.831469595f, -0.555570245f}, {0.820401430f, -0.571787953f},
+{0.809017003f, -0.587785244f}, {0.797320664f, -0.603555918f},
+{0.785316944f, -0.619093955f}, {0.773010433f, -0.634393275f},
+{0.760405958f, -0.649448037f}, {0.747508347f, -0.664252460f},
+{0.734322488f, -0.678800762f}, {0.720853567f, -0.693087339f},
+{0.707106769f, -0.707106769f}, {0.693087339f, -0.720853567f},
+{0.678800762f, -0.734322488f}, {0.664252460f, -0.747508347f},
+{0.649448037f, -0.760405958f}, {0.634393275f, -0.773010433f},
+{0.619093955f, -0.785316944f}, {0.603555918f, -0.797320664f},
+{0.587785244f, -0.809017003f}, {0.571787953f, -0.820401430f},
+{0.555570245f, -0.831469595f}, {0.539138317f, -0.842217207f},
+{0.522498548f, -0.852640152f}, {0.505657375f, -0.862734377f},
+{0.488621235f, -0.872496009f}, {0.471396744f, -0.881921291f},
+{0.453990489f, -0.891006529f}, {0.436409235f, -0.899748266f},
+{0.418659747f, -0.908143163f}, {0.400748819f, -0.916187942f},
+{0.382683426f, -0.923879504f}, {0.364470512f, -0.931214929f},
+{0.346117049f, -0.938191354f}, {0.327630192f, -0.944806039f},
+{0.309017003f, -0.951056540f}, {0.290284663f, -0.956940353f},
+{0.271440446f, -0.962455213f}, {0.252491564f, -0.967599094f},
+{0.233445361f, -0.972369909f}, {0.214309156f, -0.976765871f},
+{0.195090324f, -0.980785251f}, {0.175796285f, -0.984426558f},
+{0.156434461f, -0.987688363f}, {0.137012348f, -0.990569353f},
+{0.117537394f, -0.993068457f}, {0.0980171412f, -0.995184720f},
+{0.0784590989f, -0.996917307f}, {0.0588708036f, -0.998265624f},
+{0.0392598175f, -0.999229014f}, {0.0196336918f, -0.999807239f},
+{6.12323426e-17f, -1.00000000f}, {-0.0196336918f, -0.999807239f},
+{-0.0392598175f, -0.999229014f}, {-0.0588708036f, -0.998265624f},
+{-0.0784590989f, -0.996917307f}, {-0.0980171412f, -0.995184720f},
+{-0.117537394f, -0.993068457f}, {-0.137012348f, -0.990569353f},
+{-0.156434461f, -0.987688363f}, {-0.175796285f, -0.984426558f},
+{-0.195090324f, -0.980785251f}, {-0.214309156f, -0.976765871f},
+{-0.233445361f, -0.972369909f}, {-0.252491564f, -0.967599094f},
+{-0.271440446f, -0.962455213f}, {-0.290284663f, -0.956940353f},
+{-0.309017003f, -0.951056540f}, {-0.327630192f, -0.944806039f},
+{-0.346117049f, -0.938191354f}, {-0.364470512f, -0.931214929f},
+{-0.382683426f, -0.923879504f}, {-0.400748819f, -0.916187942f},
+{-0.418659747f, -0.908143163f}, {-0.436409235f, -0.899748266f},
+{-0.453990489f, -0.891006529f}, {-0.471396744f, -0.881921291f},
+{-0.488621235f, -0.872496009f}, {-0.505657375f, -0.862734377f},
+{-0.522498548f, -0.852640152f}, {-0.539138317f, -0.842217207f},
+{-0.555570245f, -0.831469595f}, {-0.571787953f, -0.820401430f},
+{-0.587785244f, -0.809017003f}, {-0.603555918f, -0.797320664f},
+{-0.619093955f, -0.785316944f}, {-0.634393275f, -0.773010433f},
+{-0.649448037f, -0.760405958f}, {-0.664252460f, -0.747508347f},
+{-0.678800762f, -0.734322488f}, {-0.693087339f, -0.720853567f},
+{-0.707106769f, -0.707106769f}, {-0.720853567f, -0.693087339f},
+{-0.734322488f, -0.678800762f}, {-0.747508347f, -0.664252460f},
+{-0.760405958f, -0.649448037f}, {-0.773010433f, -0.634393275f},
+{-0.785316944f, -0.619093955f}, {-0.797320664f, -0.603555918f},
+{-0.809017003f, -0.587785244f}, {-0.820401430f, -0.571787953f},
+{-0.831469595f, -0.555570245f}, {-0.842217207f, -0.539138317f},
+{-0.852640152f, -0.522498548f}, {-0.862734377f, -0.505657375f},
+{-0.872496009f, -0.488621235f}, {-0.881921291f, -0.471396744f},
+{-0.891006529f, -0.453990489f}, {-0.899748266f, -0.436409235f},
+{-0.908143163f, -0.418659747f}, {-0.916187942f, -0.400748819f},
+{-0.923879504f, -0.382683426f}, {-0.931214929f, -0.364470512f},
+{-0.938191354f, -0.346117049f}, {-0.944806039f, -0.327630192f},
+{-0.951056540f, -0.309017003f}, {-0.956940353f, -0.290284663f},
+{-0.962455213f, -0.271440446f}, {-0.967599094f, -0.252491564f},
+{-0.972369909f, -0.233445361f}, {-0.976765871f, -0.214309156f},
+{-0.980785251f, -0.195090324f}, {-0.984426558f, -0.175796285f},
+{-0.987688363f, -0.156434461f}, {-0.990569353f, -0.137012348f},
+{-0.993068457f, -0.117537394f}, {-0.995184720f, -0.0980171412f},
+{-0.996917307f, -0.0784590989f}, {-0.998265624f, -0.0588708036f},
+{-0.999229014f, -0.0392598175f}, {-0.999807239f, -0.0196336918f},
+{-1.00000000f, -1.22464685e-16f}, {-0.999807239f, 0.0196336918f},
+{-0.999229014f, 0.0392598175f}, {-0.998265624f, 0.0588708036f},
+{-0.996917307f, 0.0784590989f}, {-0.995184720f, 0.0980171412f},
+{-0.993068457f, 0.117537394f}, {-0.990569353f, 0.137012348f},
+{-0.987688363f, 0.156434461f}, {-0.984426558f, 0.175796285f},
+{-0.980785251f, 0.195090324f}, {-0.976765871f, 0.214309156f},
+{-0.972369909f, 0.233445361f}, {-0.967599094f, 0.252491564f},
+{-0.962455213f, 0.271440446f}, {-0.956940353f, 0.290284663f},
+{-0.951056540f, 0.309017003f}, {-0.944806039f, 0.327630192f},
+{-0.938191354f, 0.346117049f}, {-0.931214929f, 0.364470512f},
+{-0.923879504f, 0.382683426f}, {-0.916187942f, 0.400748819f},
+{-0.908143163f, 0.418659747f}, {-0.899748266f, 0.436409235f},
+{-0.891006529f, 0.453990489f}, {-0.881921291f, 0.471396744f},
+{-0.872496009f, 0.488621235f}, {-0.862734377f, 0.505657375f},
+{-0.852640152f, 0.522498548f}, {-0.842217207f, 0.539138317f},
+{-0.831469595f, 0.555570245f}, {-0.820401430f, 0.571787953f},
+{-0.809017003f, 0.587785244f}, {-0.797320664f, 0.603555918f},
+{-0.785316944f, 0.619093955f}, {-0.773010433f, 0.634393275f},
+{-0.760405958f, 0.649448037f}, {-0.747508347f, 0.664252460f},
+{-0.734322488f, 0.678800762f}, {-0.720853567f, 0.693087339f},
+{-0.707106769f, 0.707106769f}, {-0.693087339f, 0.720853567f},
+{-0.678800762f, 0.734322488f}, {-0.664252460f, 0.747508347f},
+{-0.649448037f, 0.760405958f}, {-0.634393275f, 0.773010433f},
+{-0.619093955f, 0.785316944f}, {-0.603555918f, 0.797320664f},
+{-0.587785244f, 0.809017003f}, {-0.571787953f, 0.820401430f},
+{-0.555570245f, 0.831469595f}, {-0.539138317f, 0.842217207f},
+{-0.522498548f, 0.852640152f}, {-0.505657375f, 0.862734377f},
+{-0.488621235f, 0.872496009f}, {-0.471396744f, 0.881921291f},
+{-0.453990489f, 0.891006529f}, {-0.436409235f, 0.899748266f},
+{-0.418659747f, 0.908143163f}, {-0.400748819f, 0.916187942f},
+{-0.382683426f, 0.923879504f}, {-0.364470512f, 0.931214929f},
+{-0.346117049f, 0.938191354f}, {-0.327630192f, 0.944806039f},
+{-0.309017003f, 0.951056540f}, {-0.290284663f, 0.956940353f},
+{-0.271440446f, 0.962455213f}, {-0.252491564f, 0.967599094f},
+{-0.233445361f, 0.972369909f}, {-0.214309156f, 0.976765871f},
+{-0.195090324f, 0.980785251f}, {-0.175796285f, 0.984426558f},
+{-0.156434461f, 0.987688363f}, {-0.137012348f, 0.990569353f},
+{-0.117537394f, 0.993068457f}, {-0.0980171412f, 0.995184720f},
+{-0.0784590989f, 0.996917307f}, {-0.0588708036f, 0.998265624f},
+{-0.0392598175f, 0.999229014f}, {-0.0196336918f, 0.999807239f},
+{-1.83697015e-16f, 1.00000000f}, {0.0196336918f, 0.999807239f},
+{0.0392598175f, 0.999229014f}, {0.0588708036f, 0.998265624f},
+{0.0784590989f, 0.996917307f}, {0.0980171412f, 0.995184720f},
+{0.117537394f, 0.993068457f}, {0.137012348f, 0.990569353f},
+{0.156434461f, 0.987688363f}, {0.175796285f, 0.984426558f},
+{0.195090324f, 0.980785251f}, {0.214309156f, 0.976765871f},
+{0.233445361f, 0.972369909f}, {0.252491564f, 0.967599094f},
+{0.271440446f, 0.962455213f}, {0.290284663f, 0.956940353f},
+{0.309017003f, 0.951056540f}, {0.327630192f, 0.944806039f},
+{0.346117049f, 0.938191354f}, {0.364470512f, 0.931214929f},
+{0.382683426f, 0.923879504f}, {0.400748819f, 0.916187942f},
+{0.418659747f, 0.908143163f}, {0.436409235f, 0.899748266f},
+{0.453990489f, 0.891006529f}, {0.471396744f, 0.881921291f},
+{0.488621235f, 0.872496009f}, {0.505657375f, 0.862734377f},
+{0.522498548f, 0.852640152f}, {0.539138317f, 0.842217207f},
+{0.555570245f, 0.831469595f}, {0.571787953f, 0.820401430f},
+{0.587785244f, 0.809017003f}, {0.603555918f, 0.797320664f},
+{0.619093955f, 0.785316944f}, {0.634393275f, 0.773010433f},
+{0.649448037f, 0.760405958f}, {0.664252460f, 0.747508347f},
+{0.678800762f, 0.734322488f}, {0.693087339f, 0.720853567f},
+{0.707106769f, 0.707106769f}, {0.720853567f, 0.693087339f},
+{0.734322488f, 0.678800762f}, {0.747508347f, 0.664252460f},
+{0.760405958f, 0.649448037f}, {0.773010433f, 0.634393275f},
+{0.785316944f, 0.619093955f}, {0.797320664f, 0.603555918f},
+{0.809017003f, 0.587785244f}, {0.820401430f, 0.571787953f},
+{0.831469595f, 0.555570245f}, {0.842217207f, 0.539138317f},
+{0.852640152f, 0.522498548f}, {0.862734377f, 0.505657375f},
+{0.872496009f, 0.488621235f}, {0.881921291f, 0.471396744f},
+{0.891006529f, 0.453990489f}, {0.899748266f, 0.436409235f},
+{0.908143163f, 0.418659747f}, {0.916187942f, 0.400748819f},
+{0.923879504f, 0.382683426f}, {0.931214929f, 0.364470512f},
+{0.938191354f, 0.346117049f}, {0.944806039f, 0.327630192f},
+{0.951056540f, 0.309017003f}, {0.956940353f, 0.290284663f},
+{0.962455213f, 0.271440446f}, {0.967599094f, 0.252491564f},
+{0.972369909f, 0.233445361f}, {0.976765871f, 0.214309156f},
+{0.980785251f, 0.195090324f}, {0.984426558f, 0.175796285f},
+{0.987688363f, 0.156434461f}, {0.990569353f, 0.137012348f},
+{0.993068457f, 0.117537394f}, {0.995184720f, 0.0980171412f},
+{0.996917307f, 0.0784590989f}, {0.998265624f, 0.0588708036f},
+{0.999229014f, 0.0392598175f}, {0.999807239f, 0.0196336918f},
+};
+
+const kiss_fft_state kfft = {
+320, /* nfft */
+0.0031250000f, /* scale */
+-1, /* shift */
+{5, 64, 4, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev, /* bitrev*/
+fft_twiddles, /* twiddles*/
+(arch_fft_state *)&arch_fft, /* arch_fft*/
+};
+
+const float half_window[] = {
+3.78491532e-05f, 0.000340620492f, 0.000946046319f, 0.00185389258f, 0.00306380726f,
+0.00457531959f, 0.00638783723f, 0.00850064680f, 0.0109129101f, 0.0136236614f,
+0.0166318044f, 0.0199361145f, 0.0235352255f, 0.0274276342f, 0.0316116922f,
+0.0360856056f, 0.0408474281f, 0.0458950549f, 0.0512262285f, 0.0568385124f,
+0.0627293140f, 0.0688958541f, 0.0753351897f, 0.0820441842f, 0.0890194997f,
+0.0962576419f, 0.103754878f, 0.111507311f, 0.119510807f, 0.127761051f,
+0.136253506f, 0.144983411f, 0.153945804f, 0.163135484f, 0.172547072f,
+0.182174906f, 0.192013159f, 0.202055752f, 0.212296382f, 0.222728521f,
+0.233345464f, 0.244140238f, 0.255105674f, 0.266234398f, 0.277518868f,
+0.288951218f, 0.300523549f, 0.312227666f, 0.324055225f, 0.335997701f,
+0.348046392f, 0.360192508f, 0.372427016f, 0.384740859f, 0.397124738f,
+0.409569323f, 0.422065198f, 0.434602767f, 0.447172493f, 0.459764689f,
+0.472369671f, 0.484977663f, 0.497579008f, 0.510163903f, 0.522722721f,
+0.535245717f, 0.547723293f, 0.560145974f, 0.572504222f, 0.584788740f,
+0.596990347f, 0.609099925f, 0.621108532f, 0.633007407f, 0.644788086f,
+0.656442165f, 0.667961538f, 0.679338276f, 0.690564752f, 0.701633692f,
+0.712537885f, 0.723270535f, 0.733825266f, 0.744195819f, 0.754376352f,
+0.764361382f, 0.774145722f, 0.783724606f, 0.793093503f, 0.802248418f,
+0.811185598f, 0.819901764f, 0.828393936f, 0.836659551f, 0.844696403f,
+0.852502763f, 0.860077202f, 0.867418647f, 0.874526560f, 0.881400526f,
+0.888040781f, 0.894447744f, 0.900622249f, 0.906565487f, 0.912279010f,
+0.917764664f, 0.923024654f, 0.928061485f, 0.932878017f, 0.937477291f,
+0.941862822f, 0.946038187f, 0.950007319f, 0.953774393f, 0.957343817f,
+0.960720181f, 0.963908315f, 0.966913164f, 0.969739914f, 0.972393870f,
+0.974880517f, 0.977205336f, 0.979374051f, 0.981392324f, 0.983266115f,
+0.985001266f, 0.986603677f, 0.988079309f, 0.989434063f, 0.990674019f,
+0.991804957f, 0.992832899f, 0.993763626f, 0.994602919f, 0.995356441f,
+0.996029854f, 0.996628702f, 0.997158289f, 0.997623861f, 0.998030603f,
+0.998383403f, 0.998687088f, 0.998946249f, 0.999165416f, 0.999348700f,
+0.999500215f, 0.999623775f, 0.999723017f, 0.999801278f, 0.999861658f,
+0.999907196f, 0.999940455f, 0.999963880f, 0.999979615f, 0.999989510f,
+0.999995291f, 0.999998271f, 0.999999523f, 0.999999940f, 1.00000000f,
+};
+
+const float dct_table[] = {
+0.707106769f, 0.996194720f, 0.984807730f, 0.965925813f, 0.939692616f,
+0.906307817f, 0.866025388f, 0.819152057f, 0.766044438f, 0.707106769f,
+0.642787635f, 0.573576450f, 0.500000000f, 0.422618270f, 0.342020154f,
+0.258819044f, 0.173648179f, 0.0871557444f, 0.707106769f, 0.965925813f,
+0.866025388f, 0.707106769f, 0.500000000f, 0.258819044f, 6.12323426e-17f,
+-0.258819044f, -0.500000000f, -0.707106769f, -0.866025388f, -0.965925813f,
+-1.00000000f, -0.965925813f, -0.866025388f, -0.707106769f, -0.500000000f,
+-0.258819044f, 0.707106769f, 0.906307817f, 0.642787635f, 0.258819044f,
+-0.173648179f, -0.573576450f, -0.866025388f, -0.996194720f, -0.939692616f,
+-0.707106769f, -0.342020154f, 0.0871557444f, 0.500000000f, 0.819152057f,
+0.984807730f, 0.965925813f, 0.766044438f, 0.422618270f, 0.707106769f,
+0.819152057f, 0.342020154f, -0.258819044f, -0.766044438f, -0.996194720f,
+-0.866025388f, -0.422618270f, 0.173648179f, 0.707106769f, 0.984807730f,
+0.906307817f, 0.500000000f, -0.0871557444f, -0.642787635f, -0.965925813f,
+-0.939692616f, -0.573576450f, 0.707106769f, 0.707106769f, 6.12323426e-17f,
+-0.707106769f, -1.00000000f, -0.707106769f, -1.83697015e-16f, 0.707106769f,
+1.00000000f, 0.707106769f, 3.06161700e-16f, -0.707106769f, -1.00000000f,
+-0.707106769f, -4.28626385e-16f, 0.707106769f, 1.00000000f, 0.707106769f,
+0.707106769f, 0.573576450f, -0.342020154f, -0.965925813f, -0.766044438f,
+0.0871557444f, 0.866025388f, 0.906307817f, 0.173648179f, -0.707106769f,
+-0.984807730f, -0.422618270f, 0.500000000f, 0.996194720f, 0.642787635f,
+-0.258819044f, -0.939692616f, -0.819152057f, 0.707106769f, 0.422618270f,
+-0.642787635f, -0.965925813f, -0.173648179f, 0.819152057f, 0.866025388f,
+-0.0871557444f, -0.939692616f, -0.707106769f, 0.342020154f, 0.996194720f,
+0.500000000f, -0.573576450f, -0.984807730f, -0.258819044f, 0.766044438f,
+0.906307817f, 0.707106769f, 0.258819044f, -0.866025388f, -0.707106769f,
+0.500000000f, 0.965925813f, 3.06161700e-16f, -0.965925813f, -0.500000000f,
+0.707106769f, 0.866025388f, -0.258819044f, -1.00000000f, -0.258819044f,
+0.866025388f, 0.707106769f, -0.500000000f, -0.965925813f, 0.707106769f,
+0.0871557444f, -0.984807730f, -0.258819044f, 0.939692616f, 0.422618270f,
+-0.866025388f, -0.573576450f, 0.766044438f, 0.707106769f, -0.642787635f,
+-0.819152057f, 0.500000000f, 0.906307817f, -0.342020154f, -0.965925813f,
+0.173648179f, 0.996194720f, 0.707106769f, -0.0871557444f, -0.984807730f,
+0.258819044f, 0.939692616f, -0.422618270f, -0.866025388f, 0.573576450f,
+0.766044438f, -0.707106769f, -0.642787635f, 0.819152057f, 0.500000000f,
+-0.906307817f, -0.342020154f, 0.965925813f, 0.173648179f, -0.996194720f,
+0.707106769f, -0.258819044f, -0.866025388f, 0.707106769f, 0.500000000f,
+-0.965925813f, -4.28626385e-16f, 0.965925813f, -0.500000000f, -0.707106769f,
+0.866025388f, 0.258819044f, -1.00000000f, 0.258819044f, 0.866025388f,
+-0.707106769f, -0.500000000f, 0.965925813f, 0.707106769f, -0.422618270f,
+-0.642787635f, 0.965925813f, -0.173648179f, -0.819152057f, 0.866025388f,
+0.0871557444f, -0.939692616f, 0.707106769f, 0.342020154f, -0.996194720f,
+0.500000000f, 0.573576450f, -0.984807730f, 0.258819044f, 0.766044438f,
+-0.906307817f, 0.707106769f, -0.573576450f, -0.342020154f, 0.965925813f,
+-0.766044438f, -0.0871557444f, 0.866025388f, -0.906307817f, 0.173648179f,
+0.707106769f, -0.984807730f, 0.422618270f, 0.500000000f, -0.996194720f,
+0.642787635f, 0.258819044f, -0.939692616f, 0.819152057f, 0.707106769f,
+-0.707106769f, -1.83697015e-16f, 0.707106769f, -1.00000000f, 0.707106769f,
+5.51091070e-16f, -0.707106769f, 1.00000000f, -0.707106769f, -2.69484189e-15f,
+0.707106769f, -1.00000000f, 0.707106769f, -4.90477710e-16f, -0.707106769f,
+1.00000000f, -0.707106769f, 0.707106769f, -0.819152057f, 0.342020154f,
+0.258819044f, -0.766044438f, 0.996194720f, -0.866025388f, 0.422618270f,
+0.173648179f, -0.707106769f, 0.984807730f, -0.906307817f, 0.500000000f,
+0.0871557444f, -0.642787635f, 0.965925813f, -0.939692616f, 0.573576450f,
+0.707106769f, -0.906307817f, 0.642787635f, -0.258819044f, -0.173648179f,
+0.573576450f, -0.866025388f, 0.996194720f, -0.939692616f, 0.707106769f,
+-0.342020154f, -0.0871557444f, 0.500000000f, -0.819152057f, 0.984807730f,
+-0.965925813f, 0.766044438f, -0.422618270f, 0.707106769f, -0.965925813f,
+0.866025388f, -0.707106769f, 0.500000000f, -0.258819044f, 1.10280111e-15f,
+0.258819044f, -0.500000000f, 0.707106769f, -0.866025388f, 0.965925813f,
+-1.00000000f, 0.965925813f, -0.866025388f, 0.707106769f, -0.500000000f,
+0.258819044f, 0.707106769f, -0.996194720f, 0.984807730f, -0.965925813f,
+0.939692616f, -0.906307817f, 0.866025388f, -0.819152057f, 0.766044438f,
+-0.707106769f, 0.642787635f, -0.573576450f, 0.500000000f, -0.422618270f,
+0.342020154f, -0.258819044f, 0.173648179f, -0.0871557444f, };
--- a/dnn/meson.build
+++ b/dnn/meson.build
@ -0,0 +1,64 @@
+dnn_sources = sources['DEEP_PLC_SOURCES']
+
+dred_sources = sources['DRED_SOURCES']
+if opt_enable_dred
+  dnn_sources += dred_sources
+endif
+
+osce_sources = sources['OSCE_SOURCES']
+if opt_enable_osce
+  dnn_sources += osce_sources
+endif
+
+dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
+dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
+dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']
+
+dnn_sources_neon_intr = sources['DNN_SOURCES_NEON']
+dnn_sources_dotprod_intr = sources['DNN_SOURCES_DOTPROD']
+
+dnn_includes = [opus_includes]
+dnn_static_libs = []
+
+if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD')
+  dnn_sources +=  sources['DNN_SOURCES_X86_RTCD']
+endif
+
+if host_cpu_family in ['arm', 'aarch64'] and have_arm_intrinsics_or_asm
+  if opus_conf.has('OPUS_HAVE_RTCD')
+    dnn_sources +=  sources['DNN_SOURCES_ARM_RTCD']
+  endif
+endif
+
+foreach intr_name : ['sse2', 'sse4_1', 'avx2', 'neon_intr', 'dotprod_intr']
+  have_intr = get_variable('have_' + intr_name)
+  if not have_intr
+    continue
+  endif
+
+  intr_sources = get_variable('dnn_sources_' + intr_name)
+
+  intr_args = get_variable('opus_@0@_args'.format(intr_name), [])
+  dnn_static_libs += static_library('dnn_' + intr_name, intr_sources,
+      c_args: intr_args,
+      include_directories: dnn_includes,
+      install: false)
+endforeach
+
+dnn_c_args = []
+if host_machine.system() == 'windows'
+  dnn_c_args += ['-DDLL_EXPORT']
+endif
+
+
+if opt_enable_deep_plc
+ dnn_lib = static_library('opus-dnn',
+  dnn_sources,
+  c_args: dnn_c_args,
+  include_directories: dnn_includes,
+  link_whole: [dnn_static_libs],
+  dependencies: libm,
+  install: false)
+else
+  dnn_lib = []
+endif
--- a/dnn/nndsp.c
+++ b/dnn/nndsp.c
@ -0,0 +1,416 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "nndsp.h"
+#include "arch.h"
+#include "nnet.h"
+#include "os_support.h"
+#include "pitch.h"
+
+#include <math.h>
+
+#ifndef M_PI
+#define M_PI 3.141592653589793f
+#endif
+
+#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
+
+void init_adaconv_state(AdaConvState *hAdaConv)
+{
+    OPUS_CLEAR(hAdaConv, 1);
+}
+
+void init_adacomb_state(AdaCombState *hAdaComb)
+{
+    OPUS_CLEAR(hAdaComb, 1);
+}
+
+void init_adashape_state(AdaShapeState *hAdaShape)
+{
+    OPUS_CLEAR(hAdaShape, 1);
+}
+
+void compute_overlap_window(float *window, int overlap_size)
+{
+    int i_sample;
+    for (i_sample=0; i_sample < overlap_size; i_sample++)
+    {
+        window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
+    }
+}
+
+#ifdef DEBUG_NNDSP
+void print_float_vector(const char* name, const float *vec, int length)
+{
+    for (int i = 0; i < length; i ++)
+    {
+        printf("%s[%d]: %f\n", name, i, vec[i]);
+    }
+}
+#endif
+
+static void scale_kernel(
+    float *kernel,
+    int in_channels,
+    int out_channels,
+    int kernel_size,
+    float *gain
+)
+/* normalizes (p-norm) kernel over input channel and kernel dimension */
+{
+    float norm;
+    int i_in_channels, i_out_channels, i_kernel;
+
+    for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+    {
+        norm = 0;
+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
+        {
+            for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
+            {
+                norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
+            }
+        }
+#ifdef DEBUG_NNDSP
+        printf("kernel norm: %f, %f\n", norm, sqrt(norm));
+#endif
+        norm = 1.f / (1e-6f + sqrt(norm));
+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+        {
+            for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
+            {
+
+                kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
+            }
+        }
+    }
+}
+
+static void transform_gains(
+    float *gains,
+    int num_gains,
+    float filter_gain_a,
+    float filter_gain_b
+)
+{
+    int i;
+    for (i = 0; i < num_gains; i++)
+    {
+        gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
+    }
+}
+
+void adaconv_process_frame(
+    AdaConvState* hAdaConv,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *kernel_layer,
+    const LinearLayer *gain_layer,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int in_channels,
+    int out_channels,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float shape_gain,
+    float *window,
+    int arch
+)
+{
+    float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
+    float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
+    float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
+    float kernel0[ADACONV_MAX_KERNEL_SIZE];
+    float kernel1[ADACONV_MAX_KERNEL_SIZE];
+    float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
+    float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
+    float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
+    float *p_input;
+    int i_in_channels, i_out_channels, i_sample;
+
+    (void) feature_dim; /* ToDo: figure out whether we might need this information */
+
+    celt_assert(shape_gain == 1);
+    celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
+    celt_assert(kernel_size < frame_size);
+
+    OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
+    OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
+    OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("x_in", x_in, in_channels * frame_size);
+#endif
+
+    /* prepare input */
+    for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+    {
+        OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
+        OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
+    }
+    p_input = input_buffer + kernel_size;
+
+
+    /* calculate new kernel and new gain */
+    compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
+    compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
+#ifdef DEBUG_NNDSP
+    print_float_vector("features", features, feature_dim);
+    print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
+    print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
+#endif
+    transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
+    scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
+    print_float_vector("adaconv_gain", gain_buffer, out_channels);
+#endif
+
+    /* calculate overlapping part using kernel from last frame */
+
+    for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+    {
+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+        {
+            OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
+            OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
+
+            OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
+            OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
+            celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
+            celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
+            for (i_sample = 0; i_sample < overlap_size; i_sample++)
+            {
+                output_buffer[i_sample + i_out_channels * frame_size] +=  window[i_sample] * channel_buffer0[i_sample];
+                output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
+            }
+            for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+            {
+                output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
+            }
+        }
+    }
+
+    OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("x_out", x_out, out_channels * frame_size);
+#endif
+
+    /* buffer update */
+    for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+    {
+        OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
+    }
+    OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
+}
+
+void adacomb_process_frame(
+    AdaCombState* hAdaComb,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *kernel_layer,
+    const LinearLayer *gain_layer,
+    const LinearLayer *global_gain_layer,
+    int pitch_lag,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float log_gain_limit,
+    float *window,
+    int arch
+)
+{
+    float output_buffer[ADACOMB_MAX_FRAME_SIZE];
+    float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
+    float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
+    float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
+    float gain, global_gain;
+    float *p_input;
+    int i_sample;
+    float kernel[16];
+    float last_kernel[16];
+
+    (void) feature_dim; /* ToDo: figure out whether we might need this information */
+
+    OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
+    OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
+    OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
+
+    OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
+    OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
+    p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
+
+    /* calculate new kernel and new gain */
+    compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
+    compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
+    compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
+#ifdef DEBUG_NNDSP
+    print_float_vector("features", features, feature_dim);
+    print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
+    print_float_vector("adacomb_gain_raw", &gain, 1);
+    print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
+#endif
+    gain = exp(log_gain_limit - gain);
+    global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
+    scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
+    print_float_vector("adacomb_gain", &gain, 1);
+#endif
+
+    OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
+    OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
+    OPUS_COPY(kernel, kernel_buffer, kernel_size);
+    OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
+
+    celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
+
+    celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
+    for (i_sample = 0; i_sample < overlap_size; i_sample++)
+    {
+      output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
+    }
+
+    for (i_sample = 0; i_sample < overlap_size; i_sample++)
+    {
+      output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
+    }
+
+    for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+    {
+      output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
+    }
+    OPUS_COPY(x_out, output_buffer, frame_size);
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("x_out", x_out, frame_size);
+#endif
+
+    /* buffer update */
+    OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
+    OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
+    hAdaComb->last_pitch_lag = pitch_lag;
+    hAdaComb->last_global_gain = global_gain;
+}
+
+
+void adashape_process_frame(
+    AdaShapeState *hAdaShape,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *alpha1f,
+    const LinearLayer *alpha1t,
+    const LinearLayer *alpha2,
+    int feature_dim,
+    int frame_size,
+    int avg_pool_k,
+    int arch
+)
+{
+    float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
+    float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
+    float tmp_buffer[ADASHAPE_MAX_FRAME_SIZE];
+    int i, k;
+    int tenv_size;
+    float mean;
+    float *tenv;
+
+    celt_assert(frame_size % avg_pool_k == 0);
+    celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
+
+    tenv_size = frame_size / avg_pool_k;
+    tenv = in_buffer + feature_dim;
+    OPUS_CLEAR(tenv, tenv_size + 1);
+
+    OPUS_COPY(in_buffer, features, feature_dim);
+
+    /* calculate temporal envelope */
+    mean = 0;
+    for (i = 0; i < tenv_size; i++)
+    {
+        for (k = 0; k < avg_pool_k; k++)
+        {
+            tenv[i] += fabs(x_in[i * avg_pool_k + k]);
+        }
+        tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
+        mean += tenv[i];
+    }
+    mean /= tenv_size;
+    for (i = 0; i < tenv_size; i++)
+    {
+        tenv[i] -= mean;
+    }
+    tenv[tenv_size] = mean;
+#ifdef DEBUG_NNDSP
+    print_float_vector("tenv", tenv, tenv_size + 1);
+#endif
+
+    /* calculate temporal weights */
+#ifdef DEBUG_NNDSP
+    print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
+#endif
+    compute_generic_conv1d(alpha1f, out_buffer, hAdaShape->conv_alpha1f_state, in_buffer, feature_dim, ACTIVATION_LINEAR, arch);
+    compute_generic_conv1d(alpha1t, tmp_buffer, hAdaShape->conv_alpha1t_state, tenv, tenv_size + 1, ACTIVATION_LINEAR, arch);
+#ifdef DEBUG_NNDSP
+    print_float_vector("alpha1_out", out_buffer, frame_size);
+#endif
+    /* compute leaky ReLU by hand. ToDo: try tanh activation */
+    for (i = 0; i < frame_size; i ++)
+    {
+        float tmp = out_buffer[i] + tmp_buffer[i];
+        in_buffer[i] = tmp >= 0 ? tmp : 0.2 * tmp;
+    }
+#ifdef DEBUG_NNDSP
+    print_float_vector("post_alpha1", in_buffer, frame_size);
+#endif
+    compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
+
+    /* shape signal */
+    for (i = 0; i < frame_size; i ++)
+    {
+        x_out[i] = exp(out_buffer[i]) * x_in[i];
+    }
+
+}
--- a/dnn/nndsp.h
+++ b/dnn/nndsp.h
@ -0,0 +1,143 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef NNDSP_H
+#define NNDSP_H
+
+#include "opus_types.h"
+#include "nnet.h"
+#include <string.h>
+
+
+#define ADACONV_MAX_KERNEL_SIZE 16
+#define ADACONV_MAX_INPUT_CHANNELS 2
+#define ADACONV_MAX_OUTPUT_CHANNELS 2
+#define ADACONV_MAX_FRAME_SIZE 80
+#define ADACONV_MAX_OVERLAP_SIZE 40
+
+#define ADACOMB_MAX_LAG 300
+#define ADACOMB_MAX_KERNEL_SIZE 16
+#define ADACOMB_MAX_FRAME_SIZE 80
+#define ADACOMB_MAX_OVERLAP_SIZE 40
+
+#define ADASHAPE_MAX_INPUT_DIM 512
+#define ADASHAPE_MAX_FRAME_SIZE 160
+
+/*#define DEBUG_NNDSP*/
+#ifdef DEBUG_NNDSP
+#include <stdio.h>
+#endif
+
+
+void print_float_vector(const char* name, const float *vec, int length);
+
+typedef struct {
+    float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
+    float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
+    float last_gain;
+} AdaConvState;
+
+
+typedef struct {
+    float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
+    float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
+    float last_global_gain;
+    int last_pitch_lag;
+} AdaCombState;
+
+
+typedef struct {
+    float conv_alpha1f_state[ADASHAPE_MAX_INPUT_DIM];
+    float conv_alpha1t_state[ADASHAPE_MAX_INPUT_DIM];
+    float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
+} AdaShapeState;
+
+void init_adaconv_state(AdaConvState *hAdaConv);
+
+void init_adacomb_state(AdaCombState *hAdaComb);
+
+void init_adashape_state(AdaShapeState *hAdaShape);
+
+void compute_overlap_window(float *window, int overlap_size);
+
+void adaconv_process_frame(
+    AdaConvState* hAdaConv,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *kernel_layer,
+    const LinearLayer *gain_layer,
+    int feature_dim, /* not strictly necessary */
+    int frame_size,
+    int overlap_size,
+    int in_channels,
+    int out_channels,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float shape_gain,
+    float *window,
+    int arch
+);
+
+void adacomb_process_frame(
+    AdaCombState* hAdaComb,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *kernel_layer,
+    const LinearLayer *gain_layer,
+    const LinearLayer *global_gain_layer,
+    int pitch_lag,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float log_gain_limit,
+    float *window,
+    int arch
+);
+
+void adashape_process_frame(
+    AdaShapeState *hAdaShape,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *alpha1f,
+    const LinearLayer *alpha1t,
+    const LinearLayer *alpha2,
+    int feature_dim,
+    int frame_size,
+    int avg_pool_k,
+    int arch
+);
+
+#endif
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@ -0,0 +1,149 @@
+/* Copyright (c) 2018 Mozilla
+                 2008-2011 Octasic Inc.
+                 2012-2017 Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <math.h>
+#include "opus_types.h"
+#include "arch.h"
+#include "nnet.h"
+#include "dred_rdovae_constants.h"
+#include "plc_data.h"
+#include "fargan.h"
+#include "os_support.h"
+#include "vec.h"
+
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
+#ifdef NO_OPTIMIZATIONS
+#if defined(_MSC_VER)
+#pragma message ("Compiling without any vectorization. This code will be very slow")
+#else
+#warning Compiling without any vectorization. This code will be very slow
+#endif
+#endif
+
+
+#define SOFTMAX_HACK
+
+
+void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
+{
+   compute_linear(layer, output, input, arch);
+   compute_activation(output, output, layer->nb_outputs, activation, arch);
+}
+
+#ifdef ENABLE_OSCE
+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
+#else
+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS)
+#endif
+
+void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
+{
+  int i;
+  int N;
+  float zrh[3*MAX_RNN_NEURONS_ALL];
+  float recur[3*MAX_RNN_NEURONS_ALL];
+  float *z;
+  float *r;
+  float *h;
+  celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
+  celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
+  N = recurrent_weights->nb_inputs;
+  z = zrh;
+  r = &zrh[N];
+  h = &zrh[2*N];
+  celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
+  celt_assert(in != state);
+  compute_linear(input_weights, zrh, in, arch);
+  compute_linear(recurrent_weights, recur, state, arch);
+  for (i=0;i<2*N;i++)
+     zrh[i] += recur[i];
+  compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
+  for (i=0;i<N;i++)
+     h[i] += recur[2*N+i]*r[i];
+  compute_activation(h, h, N, ACTIVATION_TANH, arch);
+  for (i=0;i<N;i++)
+     h[i] = z[i]*state[i] + (1-z[i])*h[i];
+  for (i=0;i<N;i++)
+     state[i] = h[i];
+}
+
+void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch)
+{
+   int i;
+   float act2[MAX_INPUTS];
+   celt_assert(layer->nb_inputs == layer->nb_outputs);
+   compute_linear(layer, act2, input, arch);
+   compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID, arch);
+   if (input == output) {
+     /* Give a vectorization hint to the compiler for the in-place case. */
+     for (i=0;i<layer->nb_outputs;i++) output[i] = output[i]*act2[i];
+   } else {
+     for (i=0;i<layer->nb_outputs;i++) output[i] = input[i]*act2[i];
+   }
+}
+
+#define MAX_CONV_INPUTS_ALL DRED_MAX_CONV_INPUTS
+
+void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch)
+{
+   float tmp[MAX_CONV_INPUTS_ALL];
+   celt_assert(input != output);
+   celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
+   if (layer->nb_inputs!=input_size) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
+   OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
+   compute_linear(layer, output, tmp, arch);
+   compute_activation(output, output, layer->nb_outputs, activation, arch);
+   if (layer->nb_inputs!=input_size) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
+}
+
+void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch)
+{
+   float tmp[MAX_CONV_INPUTS_ALL];
+   int ksize = layer->nb_inputs/input_size;
+   int i;
+   celt_assert(input != output);
+   celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
+   if (dilation==1) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
+   else for (i=0;i<ksize-1;i++) OPUS_COPY(&tmp[i*input_size], &mem[i*input_size*dilation], input_size);
+   OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
+   compute_linear(layer, output, tmp, arch);
+   compute_activation(output, output, layer->nb_outputs, activation, arch);
+   if (dilation==1) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
+   else {
+     OPUS_COPY(mem, &mem[input_size], input_size*dilation*(ksize-1)-input_size);
+     OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
+   }
+}
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@ -0,0 +1,163 @@
+/* Copyright (c) 2018 Mozilla
+   Copyright (c) 2017 Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef NNET_H_
+#define NNET_H_
+
+#include <stddef.h>
+#include "opus_types.h"
+
+#define ACTIVATION_LINEAR  0
+#define ACTIVATION_SIGMOID 1
+#define ACTIVATION_TANH    2
+#define ACTIVATION_RELU    3
+#define ACTIVATION_SOFTMAX 4
+#define ACTIVATION_SWISH   5
+
+#define WEIGHT_BLOB_VERSION 0
+#define WEIGHT_BLOCK_SIZE 64
+typedef struct {
+  const char *name;
+  int type;
+  int size;
+  const void *data;
+} WeightArray;
+
+#define WEIGHT_TYPE_float 0
+#define WEIGHT_TYPE_int 1
+#define WEIGHT_TYPE_qweight 2
+#define WEIGHT_TYPE_int8 3
+
+typedef struct {
+  char head[4];
+  int version;
+  int type;
+  int size;
+  int block_size;
+  char name[44];
+} WeightHead;
+
+/* Generic sparse affine transformation. */
+typedef struct {
+  const float *bias;
+  const float *subias;
+  const opus_int8 *weights;
+  const float *float_weights;
+  const int *weights_idx;
+  const float *diag;
+  const float *scale;
+  int nb_inputs;
+  int nb_outputs;
+} LinearLayer;
+
+/* Generic sparse affine transformation. */
+typedef struct {
+  const float *bias;
+  const float *float_weights;
+  int in_channels;
+  int out_channels;
+  int ktime;
+  int kheight;
+} Conv2dLayer;
+
+
+void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
+void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch);
+void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch);
+void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch);
+void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch);
+void compute_gated_activation(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
+
+
+int parse_weights(WeightArray **list, const unsigned char *data, int len);
+
+
+extern const WeightArray lpcnet_arrays[];
+extern const WeightArray plcmodel_arrays[];
+extern const WeightArray rdovaeenc_arrays[];
+extern const WeightArray rdovaedec_arrays[];
+extern const WeightArray fwgan_arrays[];
+extern const WeightArray fargan_arrays[];
+extern const WeightArray pitchdnn_arrays[];
+extern const WeightArray lossgen_arrays[];
+
+int linear_init(LinearLayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *subias,
+  const char *weights,
+  const char *float_weights,
+  const char *weights_idx,
+  const char *diag,
+  const char *scale,
+  int nb_inputs,
+  int nb_outputs);
+
+int conv2d_init(Conv2dLayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *float_weights,
+  int in_channels,
+  int out_channels,
+  int ktime,
+  int kheight);
+
+
+void compute_linear_c(const LinearLayer *linear, float *out, const float *in);
+void compute_activation_c(float *output, const float *input, int N, int activation);
+void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+
+
+#if defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#include "arm/dnn_arm.h"
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2)
+#include "x86/dnn_x86.h"
+#endif
+
+#ifndef OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
+#endif
+
+#ifndef OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
+#endif
+
+#ifndef OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_c(conv, out, mem, in, height, hstride, activation))
+#endif
+
+#if defined(__x86_64__) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
+#if defined(_MSC_VER)
+#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")
+#else
+#warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"
+#endif
+#endif
+
+
+
+#endif /* NNET_H_ */
--- a/dnn/nnet_arch.h
+++ b/dnn/nnet_arch.h
@ -0,0 +1,247 @@
+/* Copyright (c) 2018-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef NNET_ARCH_H
+#define NNET_ARCH_H
+
+#include "nnet.h"
+#include "arch.h"
+#include "os_support.h"
+#include "vec.h"
+
+#define CAT_SUFFIX2(a,b) a ## b
+#define CAT_SUFFIX(a,b) CAT_SUFFIX2(a, b)
+
+#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
+
+/* Force vectorization on for DNN code because some of the loops rely on
+   compiler vectorization rather than explicitly using intrinsics. */
+#if OPUS_GNUC_PREREQ(5,1)
+#define GCC_POP_OPTIONS
+#pragma GCC push_options
+#pragma GCC optimize("tree-vectorize")
+#endif
+
+
+#define MAX_ACTIVATIONS (4096)
+
+static OPUS_INLINE void vec_swish(float *y, const float *x, int N)
+{
+   int i;
+   float tmp[MAX_ACTIVATIONS];
+   celt_assert(N <= MAX_ACTIVATIONS);
+   vec_sigmoid(tmp, x, N);
+   for (i=0;i<N;i++)
+      y[i] = x[i]*tmp[i];
+}
+
+static OPUS_INLINE float relu(float x)
+{
+   return x < 0 ? 0 : x;
+}
+
+/*#define HIGH_ACCURACY */
+
+void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
+{
+   int i;
+   if (activation == ACTIVATION_SIGMOID) {
+#ifdef HIGH_ACCURACY
+      for (int n=0; n<N; n++)
+      {
+         output[n] = 1.f  / (1 + exp(-input[n]));
+      }
+#else
+      vec_sigmoid(output, input, N);
+#endif
+   } else if (activation == ACTIVATION_TANH) {
+#ifdef HIGH_ACCURACY
+      for (int n=0; n<N; n++)
+      {
+         output[n] = tanh(input[n]);
+      }
+#else
+      vec_tanh(output, input, N);
+#endif
+   } else if (activation == ACTIVATION_SWISH) {
+      vec_swish(output, input, N);
+   } else if (activation == ACTIVATION_RELU) {
+      for (i=0;i<N;i++)
+         output[i] = relu(input[i]);
+   } else if (activation == ACTIVATION_SOFTMAX) {
+#ifdef SOFTMAX_HACK
+      OPUS_COPY(output, input, N);
+      /*for (i=0;i<N;i++)
+         output[i] = input[i];*/
+#else
+      float sum = 0;
+      softmax(output, input, N);
+      for (i=0;i<N;i++) {
+         sum += output[i];
+      }
+      sum = 1.f/(sum+1e-30);
+      for (i=0;i<N;i++)
+         output[i] = sum*output[i];
+#endif
+   } else {
+      celt_assert(activation == ACTIVATION_LINEAR);
+      if (input != output) {
+         for (i=0;i<N;i++)
+            output[i] = input[i];
+      }
+   }
+}
+
+
+void RTCD_SUF(compute_linear_) (const LinearLayer *linear, float *out, const float *in)
+{
+   int i, M, N;
+   const float *bias;
+   celt_assert(in != out);
+   bias = linear->bias;
+   M = linear->nb_inputs;
+   N = linear->nb_outputs;
+   if (linear->float_weights != NULL) {
+     if (linear->weights_idx != NULL) sparse_sgemv8x4(out, linear->float_weights, linear->weights_idx, N, in);
+     else sgemv(out, linear->float_weights, N, M, N, in);
+   } else if (linear->weights != NULL) {
+     if (linear->weights_idx != NULL) sparse_cgemv8x4(out, linear->weights, linear->weights_idx, linear->scale, N, M, in);
+     else cgemv8x4(out, linear->weights, linear->scale, N, M, in);
+     /* Only use SU biases on for integer matrices on SU archs. */
+#ifdef USE_SU_BIAS
+     bias = linear->subias;
+#endif
+   }
+   else OPUS_CLEAR(out, N);
+   if (bias != NULL) {
+      for (i=0;i<N;i++) out[i] += bias[i];
+   }
+   if (linear->diag) {
+      /* Diag is only used for GRU recurrent weights. */
+      celt_assert(3*M == N);
+      for (i=0;i<M;i++) {
+         out[i] += linear->diag[i]*in[i];
+         out[i+M] += linear->diag[i+M]*in[i];
+         out[i+2*M] += linear->diag[i+2*M]*in[i];
+      }
+   }
+}
+
+/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
+   kernel [ out_channels x in_channels x ksize1 x ksize2 ],
+   storing the output as [ out_channels x len2 ].
+   We assume that the output dimension along the ksize1 axis is 1,
+   i.e. processing one frame at a time. */
+static void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int height, int hstride)
+{
+   int i;
+   int in_stride;
+   in_stride = height+kheight-1;
+   for (i=0;i<out_channels;i++) {
+      int m;
+      OPUS_CLEAR(&out[i*hstride], height);
+      for (m=0;m<in_channels;m++) {
+         int t;
+         for (t=0;t<ktime;t++) {
+            int h;
+            for (h=0;h<kheight;h++) {
+               int j;
+               for (j=0;j<height;j++) {
+                  out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
+                                     in[t*in_channels*in_stride + m*in_stride + j + h];
+               }
+            }
+         }
+      }
+   }
+}
+
+/* There's no intrinsics in this function (or the one above) because the gcc (and hopefully other compiler) auto-vectorizer is smart enough to
+   produce the right code by itself based on the compile flags. */
+static void conv2d_3x3_float(float *out, const float *weights, int in_channels, int out_channels, const float *in, int height, int hstride)
+{
+   int i;
+   int in_stride;
+   int kheight, ktime;
+   kheight = ktime = 3;
+   in_stride = height+kheight-1;
+   for (i=0;i<out_channels;i++) {
+      int m;
+      OPUS_CLEAR(&out[i*hstride], height);
+      for (m=0;m<in_channels;m++) {
+         int j;
+         for (j=0;j<height;j++) {
+            /* Unrolled version of previous function -- compiler will figure out the indexing simplifications. */
+            out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 0]*in[0*in_channels*in_stride + m*in_stride + j + 0]
+                                + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 1]*in[0*in_channels*in_stride + m*in_stride + j + 1]
+                                + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 2]*in[0*in_channels*in_stride + m*in_stride + j + 2]
+                                + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 0]*in[1*in_channels*in_stride + m*in_stride + j + 0]
+                                + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 1]*in[1*in_channels*in_stride + m*in_stride + j + 1]
+                                + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 2]*in[1*in_channels*in_stride + m*in_stride + j + 2]
+                                + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 0]*in[2*in_channels*in_stride + m*in_stride + j + 0]
+                                + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 1]*in[2*in_channels*in_stride + m*in_stride + j + 1]
+                                + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 2]*in[2*in_channels*in_stride + m*in_stride + j + 2];
+               }
+      }
+   }
+}
+
+#define MAX_CONV2D_INPUTS 8192
+
+void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation)
+{
+   int i;
+   const float *bias;
+   float in_buf[MAX_CONV2D_INPUTS];
+   int time_stride;
+   celt_assert(in != out);
+   time_stride = conv->in_channels*(height+conv->kheight-1);
+   celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
+   OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
+   OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
+   OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
+   bias = conv->bias;
+   if (conv->kheight == 3 && conv->ktime == 3)
+     conv2d_3x3_float(out, conv->float_weights, conv->in_channels, conv->out_channels, in_buf, height, hstride);
+   else
+     conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, height, hstride);
+   if (bias != NULL) {
+     for (i=0;i<conv->out_channels;i++) {
+       int j;
+       for (j=0;j<height;j++) out[i*hstride+j] += bias[i];
+     }
+   }
+   for (i=0;i<conv->out_channels;i++) {
+     RTCD_SUF(compute_activation_)(&out[i*hstride], &out[i*hstride], height, activation);
+   }
+}
+
+#ifdef GCC_POP_OPTIONS
+#pragma GCC pop_options
+#endif
+
+#endif
--- a/Show More
+++ b/Show More