webrtc/api/audio_codecs/audio_decoder.h
Miriam Zimmerman 31a3397e96 Merge remote-tracking branch 'upstream/branch-heads/7778' into mutexlox/webrtc-148
Merge conflicts resolved in:
* api/neteq/neteq.h
* audio/channel_receive.cc
* audio/channel_receive.h
* media/engine/simulcast_encoder_adapter.cc
* modules/audio_coding/BUILD.gn
* modules/audio_coding/acm2/acm_resampler.cc
* modules/audio_coding/acm2/acm_resampler_unittest.cc
* modules/audio_processing/audio_processing_impl.h
* modules/video_coding/BUILD.gn
* p2p/base/p2p_transport_channel_unittest.cc
* pc/jsep_transport_controller.cc
* pc/rtp_transport.h
* pc/rtp_transport_internal.h
* unsafe_buffers_paths.txt
2026-05-08 13:42:18 -04:00

229 lines
8.9 KiB
C++

/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_
#define API_AUDIO_CODECS_AUDIO_DECODER_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <optional>
#include <span>
#include <vector>
#include "api/audio/audio_view.h"
#include "rtc_base/buffer.h"
// RingRTC Change to configure opus
#include "rtc_base/logging.h"
namespace webrtc {
class AudioDecoder {
public:
enum SpeechType {
kSpeech = 1,
kComfortNoise = 2,
};
// Used by PacketDuration below. Save the value -1 for errors.
enum { kNotImplemented = -2 };
AudioDecoder() = default;
virtual ~AudioDecoder() = default;
AudioDecoder(const AudioDecoder&) = delete;
AudioDecoder& operator=(const AudioDecoder&) = delete;
class EncodedAudioFrame {
public:
struct DecodeResult {
size_t num_decoded_samples;
SpeechType speech_type;
};
virtual ~EncodedAudioFrame() = default;
// Returns the duration in samples-per-channel of this audio frame.
// If no duration can be ascertained, returns zero.
virtual size_t Duration() const = 0;
// Returns true if this packet contains DTX.
virtual bool IsDtxPacket() const;
// Decodes this frame of audio and writes the result in `decoded`.
// `decoded` must be large enough to store as many samples as indicated by a
// call to Duration() . On success, returns an std::optional containing the
// total number of samples across all channels, as well as whether the
// decoder produced comfort noise or speech. On failure, returns an empty
// std::optional. Decode may be called at most once per frame object.
virtual std::optional<DecodeResult> Decode(
std::span<int16_t> decoded) const = 0;
};
struct ParseResult {
ParseResult();
ParseResult(uint32_t timestamp,
int priority,
std::unique_ptr<EncodedAudioFrame> frame);
ParseResult(ParseResult&& b);
~ParseResult();
ParseResult& operator=(ParseResult&& b);
// The timestamp of the frame is in samples per channel.
uint32_t timestamp;
// The relative priority of the frame compared to other frames of the same
// payload and the same timeframe. A higher value means a lower priority.
// The highest priority is zero - negative values are not allowed.
int priority;
std::unique_ptr<EncodedAudioFrame> frame;
};
// Let the decoder parse this payload and prepare zero or more decodable
// frames. Each frame must be between 10 ms and 120 ms long. The caller must
// ensure that the AudioDecoder object outlives any frame objects returned by
// this call. The decoder is free to swap or move the data from the `payload`
// buffer. `timestamp` is the input timestamp, in samples, corresponding to
// the start of the payload.
virtual std::vector<ParseResult> ParsePayload(Buffer&& payload,
uint32_t timestamp);
// RingRTC change to support Opus DRED
#if WEBRTC_OPUS_SUPPORT_DRED
virtual std::vector<ParseResult> ParsePayloadRedundancy(
Buffer&& payload,
uint32_t timestamp,
uint32_t recovery_timestamp_offset);
#endif
// TODO(bugs.webrtc.org/10098): The Decode and DecodeRedundant methods are
// obsolete; callers should call ParsePayload instead. For now, subclasses
// must still implement DecodeInternal.
// Decodes `encode_len` bytes from `encoded` and writes the result in
// `decoded`. The maximum bytes allowed to be written into `decoded` is
// `max_decoded_bytes`. Returns the total number of samples across all
// channels. If the decoder produced comfort noise, `speech_type`
// is set to kComfortNoise, otherwise it is kSpeech. The desired output
// sample rate is provided in `sample_rate_hz`, which must be valid for the
// codec at hand.
int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type);
// Same as Decode(), but interfaces to the decoders redundant decode function.
// The default implementation simply calls the regular Decode() method.
int DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type);
// Indicates if the decoder implements the DecodePlc method.
virtual bool HasDecodePlc() const;
// Calls the packet-loss concealment of the decoder to update the state after
// one or several lost packets. The caller has to make sure that the
// memory allocated in `decoded` should accommodate `num_frames` frames.
virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
// Asks the decoder to generate packet-loss concealment and append it to the
// end of `concealment_audio`. The concealment audio should be in
// channel-interleaved format, with as many channels as the last decoded
// packet produced. The implementation must produce at least
// requested_samples_per_channel, or nothing at all. This is a signal to the
// caller to conceal the loss with other means. If the implementation provides
// concealment samples, it is also responsible for "stitching" it together
// with the decoded audio on either side of the concealment.
// Note: The default implementation of GeneratePlc will be deleted soon. All
// implementations must provide their own, which can be a simple as a no-op.
// TODO(bugs.webrtc.org/9676): Remove default implementation.
virtual void GeneratePlc(size_t requested_samples_per_channel,
BufferT<int16_t>* concealment_audio);
// Resets the decoder state (empty buffers etc.).
virtual void Reset() = 0;
// Returns the last error code from the decoder.
virtual int ErrorCode();
// Returns the duration in samples-per-channel of the payload in `encoded`
// which is `encoded_len` bytes long. Returns kNotImplemented if no duration
// estimate is available, or -1 in case of an error.
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
// Returns the duration in samples-per-channel of the redandant payload in
// `encoded` which is `encoded_len` bytes long. Returns kNotImplemented if no
// duration estimate is available, or -1 in case of an error.
virtual int PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const;
// Detects whether a packet has forward error correction. The packet is
// comprised of the samples in `encoded` which is `encoded_len` bytes long.
// Returns true if the packet has FEC and false otherwise.
virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
// Returns the actual sample rate of the decoder's output. This value may not
// change during the lifetime of the decoder.
virtual int SampleRateHz() const = 0;
// The number of channels in the decoder's output. This value may not change
// during the lifetime of the decoder.
virtual size_t Channels() const = 0;
// The maximum number of audio channels supported by WebRTC decoders.
static constexpr int kMaxNumberOfChannels = kMaxNumberOfAudioChannels;
// RingRTC change to configure opus
// Very OPUS-specific
struct Config {
// DNN weights blob
const void* dnn_weights_data = nullptr;
int32_t dnn_weights_length = 0;
// Decoder complexity:
// -1: Use NetEq PLC (default)
// 0: Use Opus PLC
// 4: Use Opus BWE (not supported)
// 5: Use Opus Deep PLC
// 6: Use Opus Deep PLC and LACE (not supported)
// 7: Use Opus Deep PLC and NoLACE (not supported)
int32_t complexity = -1;
};
virtual bool Configure(const Config& config) {
RTC_LOG(LS_WARNING) << "Default AudioDecoder::Configure(...) does nothing!";
return false;
}
// end RingRTC change to configure opus
protected:
static SpeechType ConvertSpeechType(int16_t type);
virtual int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) = 0;
virtual int DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
};
} // namespace webrtc
#endif // API_AUDIO_CODECS_AUDIO_DECODER_H_