1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
media / filters / audio_file_reader.cc [blame]
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#include "media/filters/audio_file_reader.h"
#include <stddef.h>
#include <cmath>
#include <memory>
#include <vector>
#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/logging.h"
#include "base/numerics/safe_math.h"
#include "base/time/time.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_sample_types.h"
#include "media/base/media_switches.h"
#include "media/ffmpeg/ffmpeg_common.h"
#include "media/ffmpeg/ffmpeg_decoding_loop.h"
#include "media/ffmpeg/scoped_av_packet.h"
#include "media/formats/mpeg/mpeg1_audio_stream_parser.h"
namespace media {
// AAC(M4A) decoding specific constants.
static const int kAACPrimingFrameCount = 2112;
static const int kAACRemainderFrameCount = 519;
AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol)
: stream_index_(0),
protocol_(protocol),
audio_codec_(AudioCodec::kUnknown),
channels_(0),
sample_rate_(0),
av_sample_format_(0) {}
AudioFileReader::~AudioFileReader() {
Close();
}
bool AudioFileReader::Open() {
return OpenDemuxer() && OpenDecoder();
}
bool AudioFileReader::OpenDemuxer() {
glue_ = std::make_unique<FFmpegGlue>(protocol_);
AVFormatContext* format_context = glue_->format_context();
// Open FFmpeg AVFormatContext.
if (!glue_->OpenContext()) {
DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()";
return false;
}
const int result = avformat_find_stream_info(format_context, NULL);
if (result < 0) {
DLOG(WARNING)
<< "AudioFileReader::Open() : error in avformat_find_stream_info()";
return false;
}
// Calling avformat_find_stream_info can uncover new streams. We wait till now
// to find the first audio stream, if any.
codec_context_.reset();
bool found_stream = false;
for (size_t i = 0; i < format_context->nb_streams; ++i) {
if (format_context->streams[i]->codecpar->codec_type ==
AVMEDIA_TYPE_AUDIO) {
stream_index_ = i;
found_stream = true;
break;
}
}
if (!found_stream)
return false;
// Get the codec context.
codec_context_ =
AVStreamToAVCodecContext(format_context->streams[stream_index_]);
if (!codec_context_)
return false;
DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO);
return true;
}
bool AudioFileReader::OpenDecoder() {
const AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
if (codec) {
// MP3 decodes to S16P which we don't support, tell it to use S16 instead.
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
const int result = avcodec_open2(codec_context_.get(), codec, nullptr);
if (result < 0) {
DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -"
<< " result: " << result;
return false;
}
// Ensure avcodec_open2() respected our format request.
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a"
<< " supported sample format - "
<< codec_context_->sample_fmt;
return false;
}
} else {
DLOG(WARNING) << "AudioFileReader::Open() : could not find codec.";
return false;
}
// Verify the channel layout is supported by Chrome. Acts as a sanity check
// against invalid files. See http://crbug.com/171962
if (ChannelLayoutToChromeChannelLayout(
codec_context_->ch_layout.u.mask,
codec_context_->ch_layout.nb_channels) ==
CHANNEL_LAYOUT_UNSUPPORTED) {
return false;
}
// Store initial values to guard against midstream configuration changes.
channels_ = codec_context_->ch_layout.nb_channels;
audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id);
sample_rate_ = codec_context_->sample_rate;
av_sample_format_ = codec_context_->sample_fmt;
return true;
}
bool AudioFileReader::HasKnownDuration() const {
return glue_->format_context()->duration != AV_NOPTS_VALUE;
}
void AudioFileReader::Close() {
codec_context_.reset();
glue_.reset();
}
int AudioFileReader::Read(
std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
int packets_to_read) {
DCHECK(glue_ && codec_context_)
<< "AudioFileReader::Read() : reader is not opened!";
FFmpegDecodingLoop decode_loop(codec_context_.get());
int total_frames = 0;
auto frame_ready_cb =
base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this),
&total_frames, decoded_audio_packets);
auto packet = ScopedAVPacket::Allocate();
int packets_read = 0;
while (packets_read++ < packets_to_read && ReadPacket(packet.get())) {
const auto status = decode_loop.DecodePacket(packet.get(), frame_ready_cb);
av_packet_unref(packet.get());
if (status != FFmpegDecodingLoop::DecodeStatus::kOkay)
break;
}
return total_frames;
}
base::TimeDelta AudioFileReader::GetDuration() const {
const AVRational av_time_base = {1, AV_TIME_BASE};
DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE);
base::CheckedNumeric<int64_t> estimated_duration_us =
glue_->format_context()->duration;
if (audio_codec_ == AudioCodec::kAAC) {
// For certain AAC-encoded files, FFMPEG's estimated frame count might not
// be sufficient to capture the entire audio content that we want. This is
// especially noticeable for short files (< 10ms) resulting in silence
// throughout the decoded buffer. Thus we add the priming frames and the
// remainder frames to the estimation.
// (See: crbug.com/513178)
estimated_duration_us += ceil(
1000000.0 *
static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) /
sample_rate());
} else {
// Add one microsecond to avoid rounding-down errors which can occur when
// |duration| has been calculated from an exact number of sample-frames.
// One microsecond is much less than the time of a single sample-frame
// at any real-world sample-rate.
estimated_duration_us += 1;
}
return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie());
}
int AudioFileReader::GetNumberOfFrames() const {
return base::ClampCeil(GetDuration().InSecondsF() * sample_rate());
}
bool AudioFileReader::OpenDemuxerForTesting() {
return OpenDemuxer();
}
bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) {
return ReadPacket(output_packet);
}
bool AudioFileReader::ReadPacket(AVPacket* output_packet) {
while (av_read_frame(glue_->format_context(), output_packet) >= 0) {
// Skip packets from other streams.
if (output_packet->stream_index != stream_index_) {
av_packet_unref(output_packet);
continue;
}
if (!IsMp3File()) {
return true;
}
// FFmpeg may return garbage packets for MP3 stream containers, so we need
// to drop these to avoid decoder errors. The ffmpeg team maintains that
// this behavior isn't ideal, but have asked for a significant refactoring
// of the AVParser infrastructure to fix this, which is overkill for now.
// See http://crbug.com/794782.
// MP3 packets may be zero-padded according to ffmpeg, so trim until we
// have the packet.
uint8_t* packet_end = output_packet->data + output_packet->size;
uint8_t* header_start = output_packet->data;
while (header_start < packet_end && !*header_start) {
++header_start;
}
if (packet_end - header_start < MPEG1AudioStreamParser::kHeaderSize ||
!MPEG1AudioStreamParser::ParseHeader(nullptr, nullptr, header_start,
nullptr)) {
av_packet_unref(output_packet);
continue;
}
return true;
}
return false;
}
bool AudioFileReader::OnNewFrame(
int* total_frames,
std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
AVFrame* frame) {
int frames_read = frame->nb_samples;
if (frames_read < 0)
return false;
const int channels = frame->ch_layout.nb_channels;
if (frame->sample_rate != sample_rate_ || channels != channels_ ||
frame->format != av_sample_format_) {
DLOG(ERROR) << "Unsupported midstream configuration change!"
<< " Sample Rate: " << frame->sample_rate << " vs "
<< sample_rate_ << ", Channels: " << channels << " vs "
<< channels_ << ", Sample Format: " << frame->format << " vs "
<< av_sample_format_;
// This is an unrecoverable error, so bail out. We'll return
// whatever we've decoded up to this point.
return false;
}
// AAC decoding doesn't properly trim the last packet in a stream, so if we
// have duration information, use it to set the correct length to avoid extra
// silence from being output. In the case where we are also discarding some
// portion of the packet (as indicated by a negative pts), we further want to
// adjust the duration downward by however much exists before zero.
if (audio_codec_ == AudioCodec::kAAC && frame->duration) {
const base::TimeDelta pkt_duration = ConvertFromTimeBase(
glue_->format_context()->streams[stream_index_]->time_base,
frame->duration + std::min(static_cast<int64_t>(0), frame->pts));
const base::TimeDelta frame_duration =
base::Seconds(frames_read / static_cast<double>(sample_rate_));
if (pkt_duration < frame_duration && pkt_duration.is_positive()) {
const int new_frames_read =
base::ClampFloor(frames_read * (pkt_duration / frame_duration));
DVLOG(2) << "Shrinking AAC frame from " << frames_read << " to "
<< new_frames_read << " based on packet duration.";
frames_read = new_frames_read;
// The above process may delete the entire packet.
if (!frames_read)
return true;
}
}
// Deinterleave each channel and convert to 32bit floating-point with
// nominal range -1.0 -> +1.0. If the output is already in float planar
// format, just copy it into the AudioBus.
decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read));
AudioBus* audio_bus = decoded_audio_packets->back().get();
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
audio_bus->FromInterleaved<Float32SampleTypeTraits>(
reinterpret_cast<float*>(frame->data[0]), frames_read);
} else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) {
for (int ch = 0; ch < audio_bus->channels(); ++ch) {
memcpy(audio_bus->channel(ch), frame->extended_data[ch],
sizeof(float) * frames_read);
}
} else {
int bytes_per_sample = av_get_bytes_per_sample(codec_context_->sample_fmt);
switch (bytes_per_sample) {
case 1:
audio_bus->FromInterleaved<UnsignedInt8SampleTypeTraits>(
reinterpret_cast<const uint8_t*>(frame->data[0]), frames_read);
break;
case 2:
audio_bus->FromInterleaved<SignedInt16SampleTypeTraits>(
reinterpret_cast<const int16_t*>(frame->data[0]), frames_read);
break;
case 4:
audio_bus->FromInterleaved<SignedInt32SampleTypeTraits>(
reinterpret_cast<const int32_t*>(frame->data[0]), frames_read);
break;
default:
NOTREACHED() << "Unsupported bytes per sample encountered: "
<< bytes_per_sample;
}
}
(*total_frames) += frames_read;
return true;
}
bool AudioFileReader::IsMp3File() {
return glue_->container() ==
container_names::MediaContainerName::kContainerMP3;
}
bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) {
// Use the AVStream's time_base, since |codec_context_| does not have
// time_base populated until after OpenDecoder().
return av_seek_frame(
glue_->format_context(), stream_index_,
ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time),
AVSEEK_FLAG_BACKWARD) >= 0;
}
const AVStream* AudioFileReader::GetAVStreamForTesting() const {
return glue_->format_context()->streams[stream_index_];
}
} // namespace media