1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276

media / base / audio_converter.cc [blame]

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// AudioConverter implementation.  Uses MultiChannelSincResampler for resampling
// audio, ChannelMixer for channel mixing, and AudioPullFifo for buffering.
//
// Delay estimates are provided to InputCallbacks based on the frame delay
// information reported via the resampler and FIFO units.

#include "media/base/audio_converter.h"

#include <memory>

#include "base/containers/contains.h"
#include "base/functional/bind.h"
#include "base/functional/callback_helpers.h"
#include "base/logging.h"
#include "base/trace_event/trace_event.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_pull_fifo.h"
#include "media/base/channel_mixer.h"
#include "media/base/multi_channel_resampler.h"
#include "media/base/vector_math.h"

namespace media {

AudioConverter::AudioConverter(const AudioParameters& input_params,
                               const AudioParameters& output_params,
                               bool disable_fifo)
    : chunk_size_(input_params.frames_per_buffer()),
      downmix_early_(false),
      initial_frames_delayed_(0),
      resampler_frames_delayed_(0),
      io_sample_rate_ratio_(input_params.sample_rate() /
                            static_cast<double>(output_params.sample_rate())),
      input_channel_count_(input_params.channels()) {
  CHECK(input_params.IsValid());
  CHECK(output_params.IsValid());

  // Handle different input and output channel layouts.
  if (input_params.channel_layout() != output_params.channel_layout() ||
      input_params.channels() != output_params.channels()) {
    DVLOG(1) << "Remixing channel layout from " << input_params.channel_layout()
             << " to " << output_params.channel_layout() << "; from "
             << input_params.channels() << " channels to "
             << output_params.channels() << " channels.";
    channel_mixer_ =
        std::make_unique<ChannelMixer>(input_params, output_params);

    // Pare off data as early as we can for efficiency.
    downmix_early_ = input_params.channels() > output_params.channels();
  }

  // Only resample if necessary since it's expensive.
  if (input_params.sample_rate() != output_params.sample_rate()) {
    DVLOG(1) << "Resampling from " << input_params.sample_rate() << " to "
             << output_params.sample_rate();
    const int request_size = disable_fifo ? SincResampler::kDefaultRequestSize
                                          : input_params.frames_per_buffer();
    resampler_ = std::make_unique<MultiChannelResampler>(
        downmix_early_ ? output_params.channels() : input_params.channels(),
        io_sample_rate_ratio_, request_size,
        base::BindRepeating(&AudioConverter::ProvideInput,
                            base::Unretained(this)));
  }

  // The resampler can be configured to work with a specific request size, so a
  // FIFO is not necessary when resampling.
  if (disable_fifo || resampler_)
    return;

  // Since the output device may want a different buffer size than the caller
  // asked for, we need to use a FIFO to ensure that both sides read in chunk
  // sizes they're configured for.
  if (input_params.frames_per_buffer() != output_params.frames_per_buffer()) {
    DVLOG(1) << "Rebuffering from " << input_params.frames_per_buffer()
             << " to " << output_params.frames_per_buffer();
    chunk_size_ = input_params.frames_per_buffer();
    audio_fifo_ = std::make_unique<AudioPullFifo>(
        downmix_early_ ? output_params.channels() : input_params.channels(),
        chunk_size_,
        base::BindRepeating(&AudioConverter::SourceCallback,
                            base::Unretained(this)));
  }
}

AudioConverter::~AudioConverter() = default;

void AudioConverter::AddInput(InputCallback* input) {
  DCHECK(!base::Contains(transform_inputs_, input));
  transform_inputs_.push_back(input);
}

void AudioConverter::RemoveInput(InputCallback* input) {
  DCHECK(base::Contains(transform_inputs_, input));
  transform_inputs_.remove(input);

  if (transform_inputs_.empty())
    Reset();
}

void AudioConverter::Reset() {
  if (audio_fifo_)
    audio_fifo_->Clear();
  if (resampler_)
    resampler_->Flush();
}

int AudioConverter::ChunkSize() const {
  if (!resampler_)
    return chunk_size_;
  return resampler_->ChunkSize();
}

void AudioConverter::PrimeWithSilence() {
  if (resampler_) {
    resampler_->PrimeWithSilence();
  }
}

int AudioConverter::GetMaxInputFramesRequested(int output_frames_requested) {
  return resampler_
             ? resampler_->GetMaxInputFramesRequested(output_frames_requested)
             : output_frames_requested;
}

void AudioConverter::ConvertWithInfo(uint32_t initial_frames_delayed,
                                     const AudioGlitchInfo& glitch_info,
                                     AudioBus* dest) {
  TRACE_EVENT("audio", "AudioConverter::Convert", "sample rate ratio",
              io_sample_rate_ratio_, "delay (frames)", initial_frames_delayed);
  initial_frames_delayed_ = initial_frames_delayed;
  glitch_info_accumulator_.Add(glitch_info);

  if (transform_inputs_.empty()) {
    dest->Zero();
    return;
  }

  // Determine if channel mixing should be done and if it should be done before
  // or after resampling.  If it's possible to reduce the channel count prior to
  // resampling we can save a lot of processing time.  Vice versa, we don't want
  // to increase the channel count prior to resampling for the same reason.
  bool needs_mixing = channel_mixer_ && !downmix_early_;

  if (needs_mixing)
    CreateUnmixedAudioIfNecessary(dest->frames());

  AudioBus* temp_dest = needs_mixing ? unmixed_audio_.get() : dest;
  DCHECK(temp_dest);

  // Figure out which method to call based on whether we're resampling and
  // rebuffering, just resampling, or just mixing.  We want to avoid any extra
  // steps when possible since we may be converting audio data in real time.
  if (!resampler_ && !audio_fifo_) {
    SourceCallback(0, temp_dest);
  } else {
    if (resampler_)
      resampler_->Resample(temp_dest->frames(), temp_dest);
    else
      ProvideInput(0, temp_dest);
  }

  // Finally upmix the channels if we didn't do so earlier.
  if (needs_mixing) {
    DCHECK_EQ(temp_dest->frames(), dest->frames());
    channel_mixer_->Transform(temp_dest, dest);
  }
}

void AudioConverter::Convert(AudioBus* dest) {
  ConvertWithInfo(0, {}, dest);
}

void AudioConverter::SourceCallback(int fifo_frame_delay, AudioBus* dest) {
  TRACE_EVENT("audio", "AudioConverter::SourceCallback", "delay (frames)",
              fifo_frame_delay);
  const bool needs_downmix = channel_mixer_ && downmix_early_;

  if (!mixer_input_audio_bus_ ||
      mixer_input_audio_bus_->frames() != dest->frames()) {
    mixer_input_audio_bus_ =
        AudioBus::Create(input_channel_count_, dest->frames());
  }

  // If we're downmixing early we need a temporary AudioBus which matches
  // the the input channel count and input frame size since we're passing
  // |unmixed_audio_| directly to the |source_callback_|.
  if (needs_downmix)
    CreateUnmixedAudioIfNecessary(dest->frames());

  AudioBus* const temp_dest = needs_downmix ? unmixed_audio_.get() : dest;

  // Sanity check our inputs.
  DCHECK_EQ(temp_dest->frames(), mixer_input_audio_bus_->frames());
  DCHECK_EQ(temp_dest->channels(), mixer_input_audio_bus_->channels());

  // |total_frames_delayed| is reported to the *input* source in terms of the
  // *input* sample rate. |initial_frames_delayed_| is given in terms of the
  // output sample rate, so we scale by sample rate ratio (in/out).
  uint32_t total_frames_delayed =
      std::round(initial_frames_delayed_ * io_sample_rate_ratio_);
  if (resampler_) {
    // |resampler_frames_delayed_| tallies frames queued up inside the resampler
    // that are already converted to the output format. Scale by ratio to get
    // delay in terms of input sample rate.
    total_frames_delayed +=
        std::round(resampler_frames_delayed_ * io_sample_rate_ratio_);
  }
  if (audio_fifo_) {
    total_frames_delayed += fifo_frame_delay;
  }

  // If we only have a single input, avoid an extra copy.
  AudioBus* const provide_input_dest =
      transform_inputs_.size() == 1 ? temp_dest : mixer_input_audio_bus_.get();

  // The glitch info that should be propagated to all inputs.
  AudioGlitchInfo glitch_info = glitch_info_accumulator_.GetAndReset();

  // Have each mixer render its data into an output buffer then mix the result.
  for (InputCallback* input : transform_inputs_) {
    const float volume = input->ProvideInput(provide_input_dest,
                                             total_frames_delayed, glitch_info);
    // Optimize the most common single input, full volume case.
    if (input == transform_inputs_.front()) {
      if (volume == 1.0f) {
        if (temp_dest != provide_input_dest)
          provide_input_dest->CopyTo(temp_dest);
      } else if (volume > 0) {
        for (int i = 0; i < provide_input_dest->channels(); ++i) {
          vector_math::FMUL(provide_input_dest->channel(i), volume,
                            provide_input_dest->frames(),
                            temp_dest->channel(i));
        }
      } else {
        // Zero |temp_dest| otherwise, so we're mixing into a clean buffer.
        temp_dest->Zero();
      }

      continue;
    }

    // Volume adjust and mix each mixer input into |temp_dest| after rendering.
    if (volume > 0) {
      for (int i = 0; i < mixer_input_audio_bus_->channels(); ++i) {
        vector_math::FMAC(mixer_input_audio_bus_->channel(i), volume,
                          mixer_input_audio_bus_->frames(),
                          temp_dest->channel(i));
      }
    }
  }

  if (needs_downmix) {
    DCHECK_EQ(temp_dest->frames(), dest->frames());
    channel_mixer_->Transform(temp_dest, dest);
  }
}

void AudioConverter::ProvideInput(int resampler_frame_delay, AudioBus* dest) {
  TRACE_EVENT1("audio", "AudioConverter::ProvideInput", "delay (frames)",
               resampler_frame_delay);
  resampler_frames_delayed_ = resampler_frame_delay;
  if (audio_fifo_)
    audio_fifo_->Consume(dest, dest->frames());
  else
    SourceCallback(0, dest);
}

void AudioConverter::CreateUnmixedAudioIfNecessary(int frames) {
  if (!unmixed_audio_ || unmixed_audio_->frames() != frames)
    unmixed_audio_ = AudioBus::Create(input_channel_count_, frames);
}

}  // namespace media