1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
media / filters / audio_timestamp_validator.cc [blame]
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/filters/audio_timestamp_validator.h"
#include <memory>
namespace media {
// Defines how many milliseconds of DecoderBuffer timestamp gap will be allowed
// before warning the user. See CheckForTimestampGap(). Value of 50 chosen, as
// this is low enough to catch issues early, but high enough to avoid noise for
// containers like WebM that default to low granularity timestamp precision.
const int kGapWarningThresholdMsec = 50;
// Limits the number of adjustments to |audio_ts_offset_| in order to reach a
// stable state where gaps between encoded timestamps match decoded output
// intervals. See CheckForTimestampGap().
const int kLimitTriesForStableTiming = 5;
// Limits the milliseconds of difference between expected and actual timestamps
// gaps to consider timestamp expectations "stable". 1 chosen because some
// containers (WebM) default to millisecond timestamp precision. See
// CheckForTimestampGap().
const int kStableTimeGapThrsholdMsec = 1;
// Maximum number of timestamp gap warnings sent to MediaLog.
const int kMaxTimestampGapWarnings = 10;
AudioTimestampValidator::AudioTimestampValidator(
const AudioDecoderConfig& decoder_config,
MediaLog* media_log)
: has_codec_delay_(decoder_config.codec_delay() > 0),
media_log_(media_log),
audio_base_ts_(kNoTimestamp),
reached_stable_state_(false),
num_unstable_audio_tries_(0),
limit_unstable_audio_tries_(kLimitTriesForStableTiming),
drift_warning_threshold_msec_(kGapWarningThresholdMsec) {
DCHECK(decoder_config.IsValidConfig());
}
AudioTimestampValidator::~AudioTimestampValidator() = default;
void AudioTimestampValidator::CheckForTimestampGap(
const DecoderBuffer& buffer) {
if (buffer.end_of_stream())
return;
DCHECK_NE(kNoTimestamp, buffer.timestamp());
// If audio_base_ts_ == kNoTimestamp, we are processing our first buffer.
// If stream has neither codec delay nor discard padding, we should expect
// timestamps and output durations to line up from the start (i.e. be stable).
if (audio_base_ts_ == kNoTimestamp && !has_codec_delay_ &&
buffer.discard_padding().first == base::TimeDelta() &&
buffer.discard_padding().second == base::TimeDelta()) {
DVLOG(3) << __func__ << " Expecting stable timestamps - stream has neither "
<< "codec delay nor discard padding.";
limit_unstable_audio_tries_ = 0;
}
// Don't continue checking timestamps if we've exhausted tries to reach stable
// state. This suggests the media's encoded timestamps are way off.
if (num_unstable_audio_tries_ > limit_unstable_audio_tries_)
return;
// Keep resetting encode base ts until we start getting decode output. Some
// codecs/containers (e.g. chained Ogg) will take several encoded buffers
// before producing the first decoded output.
if (!audio_output_ts_helper_) {
audio_base_ts_ = buffer.timestamp();
DVLOG(3) << __func__
<< " setting audio_base:" << audio_base_ts_.InMicroseconds();
return;
}
// If we have `audio_output_ts_helper_` we must have a base timestamp.
DCHECK(audio_output_ts_helper_->base_timestamp());
base::TimeDelta expected_ts = audio_output_ts_helper_->GetTimestamp();
base::TimeDelta ts_delta = buffer.timestamp() - expected_ts;
// Reconciling encoded buffer timestamps with decoded output often requires
// adjusting expectations by some offset. This accounts for varied (and at
// this point unknown) handling of front trimming and codec delay. Codec delay
// and skip trimming may or may not be accounted for in the encoded timestamps
// depending on the codec (e.g. MP3 vs Opus) and demuxers used (e.g. FFmpeg
// vs MSE stream parsers).
if (!reached_stable_state_) {
if (std::abs(ts_delta.InMilliseconds()) < kStableTimeGapThrsholdMsec) {
reached_stable_state_ = true;
DVLOG(3) << __func__ << " stabilized! tries:" << num_unstable_audio_tries_
<< " offset:"
<< audio_output_ts_helper_->base_timestamp()->InMicroseconds();
} else {
base::TimeDelta orig_offset = *audio_output_ts_helper_->base_timestamp();
// Save since this gets reset when we set new base time.
int64_t decoded_frame_count = audio_output_ts_helper_->frame_count();
audio_output_ts_helper_->SetBaseTimestamp(orig_offset + ts_delta);
audio_output_ts_helper_->AddFrames(decoded_frame_count);
DVLOG(3) << __func__
<< " NOT stabilized. tries:" << num_unstable_audio_tries_
<< " offset was:" << orig_offset.InMicroseconds() << " now:"
<< audio_output_ts_helper_->base_timestamp()->InMicroseconds();
num_unstable_audio_tries_++;
// Let developers know if their files timestamps are way off from
if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) {
MEDIA_LOG(WARNING, media_log_)
<< "Failed to reconcile encoded audio times with decoded output.";
}
}
// Don't bother with further checking until we reach stable state.
return;
}
if (std::abs(ts_delta.InMilliseconds()) > drift_warning_threshold_msec_) {
LIMITED_MEDIA_LOG(WARNING, media_log_, num_timestamp_gap_warnings_,
kMaxTimestampGapWarnings)
<< " Large timestamp gap detected; may cause AV sync to drift."
<< " time:" << buffer.timestamp().InMicroseconds() << "us"
<< " expected:" << expected_ts.InMicroseconds() << "us"
<< " delta:" << ts_delta.InMicroseconds() << "us";
// Increase threshold to avoid log spam but, let us know if gap widens.
drift_warning_threshold_msec_ = std::abs(ts_delta.InMilliseconds());
}
DVLOG(3) << __func__ << " delta:" << ts_delta.InMicroseconds()
<< " expected_ts:" << expected_ts.InMicroseconds()
<< " actual_ts:" << buffer.timestamp().InMicroseconds()
<< " audio_ts_offset:"
<< audio_output_ts_helper_->base_timestamp()->InMicroseconds();
}
void AudioTimestampValidator::RecordOutputDuration(
const AudioBuffer& audio_buffer) {
if (!audio_output_ts_helper_) {
DCHECK_NE(audio_base_ts_, kNoTimestamp);
// SUBTLE: deliberately creating this with output buffer sample rate because
// demuxer stream config is potentially stale for implicit AAC.
audio_output_ts_helper_ =
std::make_unique<AudioTimestampHelper>(audio_buffer.sample_rate());
audio_output_ts_helper_->SetBaseTimestamp(audio_base_ts_);
}
DVLOG(3) << __func__ << " " << audio_buffer.frame_count() << " frames";
audio_output_ts_helper_->AddFrames(audio_buffer.frame_count());
}
} // namespace media