1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
media / audio / win / audio_low_latency_input_win.h [blame]
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Implementation of AudioInputStream for Windows using Windows Core Audio
// WASAPI for low latency capturing.
//
// Overview of operation:
//
// - An object of WASAPIAudioInputStream is created by the AudioManager
// factory.
// - Next some thread will call Open(), at that point the underlying
// Core Audio APIs are utilized to create two WASAPI interfaces called
// IAudioClient and IAudioCaptureClient.
// - Then some thread will call Start(sink).
// A thread called "wasapi_capture_thread" is started and this thread listens
// on an event signal which is set periodically by the audio engine for
// each recorded data packet. As a result, data samples will be provided
// to the registered sink.
// - At some point, a thread will call Stop(), which stops and joins the
// capture thread and at the same time stops audio streaming.
// - The same thread that called stop will call Close() where we cleanup
// and notify the audio manager, which likely will destroy this object.
//
// Implementation notes:
//
// - The minimum supported client is Windows Vista.
// - This implementation is single-threaded, hence:
// o Construction and destruction must take place from the same thread.
// o It is recommended to call all APIs from the same thread as well.
// - It is recommended to first acquire the native sample rate of the default
// input device and then use the same rate when creating this object. Use
// WASAPIAudioInputStream::HardwareSampleRate() to retrieve the sample rate.
// - Calling Close() also leads to self destruction.
//
// Core Audio API details:
//
// - Utilized MMDevice interfaces:
// o IMMDeviceEnumerator
// o IMMDevice
// - Utilized WASAPI interfaces:
// o IAudioClient
// o IAudioCaptureClient
// - The stream is initialized in shared mode and the processing of the
// audio buffer is event driven.
// - The Multimedia Class Scheduler service (MMCSS) is utilized to boost
// the priority of the capture thread.
// - Audio applications that use the MMDevice API and WASAPI typically use
// the ISimpleAudioVolume interface to manage stream volume levels on a
// per-session basis. It is also possible to use of the IAudioEndpointVolume
// interface to control the master volume level of an audio endpoint device.
// This implementation is using the ISimpleAudioVolume interface.
// MSDN states that "In rare cases, a specialized audio application might
// require the use of the IAudioEndpointVolume".
//
#ifndef MEDIA_AUDIO_WIN_AUDIO_LOW_LATENCY_INPUT_WIN_H_
#define MEDIA_AUDIO_WIN_AUDIO_LOW_LATENCY_INPUT_WIN_H_
#include <Audioclient.h>
#include <MMDeviceAPI.h>
#include <endpointvolume.h>
#include <stddef.h>
#include <stdint.h>
#include <windows.media.effects.h>
#include <wrl/client.h>
#include <memory>
#include <string>
#include <vector>
#include "base/compiler_specific.h"
#include "base/memory/raw_ptr.h"
#include "base/sequence_checker.h"
#include "base/threading/platform_thread.h"
#include "base/threading/simple_thread.h"
#include "base/time/time.h"
#include "base/win/scoped_co_mem.h"
#include "base/win/scoped_com_initializer.h"
#include "base/win/scoped_handle.h"
#include "media/audio/agc_audio_stream.h"
#include "media/audio/system_glitch_reporter.h"
#include "media/audio/win/audio_manager_win.h"
#include "media/base/amplitude_peak_detector.h"
#include "media/base/audio_converter.h"
#include "media/base/audio_glitch_info.h"
#include "media/base/audio_parameters.h"
#include "media/base/media_export.h"
namespace media {
class AudioBlockFifo;
class AudioBus;
// AudioInputStream implementation using Windows Core Audio APIs.
class MEDIA_EXPORT WASAPIAudioInputStream
: public AgcAudioStream<AudioInputStream>,
public base::DelegateSimpleThread::Delegate,
public AudioConverter::InputCallback {
public:
// Used to track down where we fail during initialization which at the
// moment seems to be happening frequently and we're not sure why.
// The reason might be expected (e.g. trying to open "default" on a machine
// that has no audio devices).
// Note: This enum is used to record a histogram value and should not be
// re-ordered.
enum StreamOpenResult {
OPEN_RESULT_OK = 0,
OPEN_RESULT_CREATE_INSTANCE = 1,
OPEN_RESULT_NO_ENDPOINT = 2,
OPEN_RESULT_NO_STATE = 3,
OPEN_RESULT_DEVICE_NOT_ACTIVE = 4,
OPEN_RESULT_ACTIVATION_FAILED = 5,
OPEN_RESULT_FORMAT_NOT_SUPPORTED = 6,
OPEN_RESULT_AUDIO_CLIENT_INIT_FAILED = 7,
OPEN_RESULT_GET_BUFFER_SIZE_FAILED = 8,
OPEN_RESULT_LOOPBACK_ACTIVATE_FAILED = 9,
OPEN_RESULT_LOOPBACK_INIT_FAILED = 10,
OPEN_RESULT_SET_EVENT_HANDLE = 11,
OPEN_RESULT_NO_CAPTURE_CLIENT = 12,
OPEN_RESULT_NO_AUDIO_VOLUME = 13,
OPEN_RESULT_OK_WITH_RESAMPLING = 14,
OPEN_RESULT_MAX = OPEN_RESULT_OK_WITH_RESAMPLING
};
// The ctor takes all the usual parameters, plus |manager| which is the
// the audio manager who is creating this object.
WASAPIAudioInputStream(AudioManagerWin* manager,
const AudioParameters& params,
const std::string& device_id,
AudioManager::LogCallback log_callback);
WASAPIAudioInputStream(const WASAPIAudioInputStream&) = delete;
WASAPIAudioInputStream& operator=(const WASAPIAudioInputStream&) = delete;
// The dtor is typically called by the AudioManager only and it is usually
// triggered by calling AudioInputStream::Close().
~WASAPIAudioInputStream() override;
// Implementation of AudioInputStream.
AudioInputStream::OpenOutcome Open() override;
void Start(AudioInputCallback* callback) override;
void Stop() override;
void Close() override;
double GetMaxVolume() override;
void SetVolume(double volume) override;
double GetVolume() override;
bool IsMuted() override;
void SetOutputDeviceForAec(const std::string& output_device_id) override;
bool started() const { return started_; }
private:
class DataDiscontinuityReporter;
PRINTF_FORMAT(2, 3) void SendLogMessage(const char* format, ...);
// DelegateSimpleThread::Delegate implementation.
void Run() override;
// Pulls capture data from the endpoint device and pushes it to the sink.
void PullCaptureDataAndPushToSink();
// Issues the OnError() callback to the |sink_|.
void HandleError(HRESULT err);
// The Open() method is divided into these sub methods.
HRESULT SetCaptureDevice();
// Returns whether raw audio processing is supported or not for the selected
// capture device.
bool RawProcessingSupported();
// The Windows.Media.Effects.AudioEffectsManager UWP API contains a method
// called CreateAudioCaptureEffectsManagerWithMode() which is needed to
// enumerate active audio effects on the capture stream. This UWP method
// needs a device ID which differs from what can be derived from the default
// Win32 API in CoreAudio. The GetUWPDeviceId() method builds up the required
// device ID that the audio effects manager needs. Note that it is also
// possible to get the ID directly from the Windows.Devices.Enumeration UWP
// API but that is rather complex and requires use of asynchronous methods.
std::string GetUWPDeviceId();
// For the selected |uwp_device_id|, generate two lists of enabled audio
// effects and store them in |default_effect_types_| and |raw_effect_types_|.
HRESULT GetAudioCaptureEffects(const std::string& uwp_device_id);
// Returns the native number of channels that the audio engine uses for its
// internal processing of shared-mode streams.
HRESULT GetAudioEngineNumChannels(WORD* channels);
// Sets communications policy and excludes any built-in audio processing,
// i.e., activates raw capture mode.
// Raw capture mode is only enabled if the native number of input channels is
// less than |media::kMaxConcurrentChannels| (8).
HRESULT SetCommunicationsCategoryAndMaybeRawCaptureMode(WORD channels);
// Returns whether the desired format is supported or not and writes the
// result of a failing system call to |*hr|, or S_OK if successful. If this
// function returns false with |*hr| == S_FALSE, the OS supports a closest
// match but we don't support conversion to it.
bool DesiredFormatIsSupported(HRESULT* hr);
void SetupConverterAndStoreFormatInfo();
HRESULT InitializeAudioEngine();
void ReportOpenResult(HRESULT hr);
// Reports stats for format related audio client initialization
// (IAudioClient::Initialize) errors, that is if |hr| is an error related to
// the format.
void MaybeReportFormatRelatedInitError(HRESULT hr) const;
// AudioConverter::InputCallback implementation.
double ProvideInput(AudioBus* audio_bus,
uint32_t frames_delayed,
const AudioGlitchInfo& glitch_info) override;
// Reports glitch stats and resets associated variables.
void ReportAndResetGlitchStats();
// Our creator, the audio manager needs to be notified when we close.
const raw_ptr<AudioManagerWin> manager_;
// Used to aggregate and report glitch metrics to UMA (periodically) and to
// text logs (when a stream ends).
SystemGlitchReporter glitch_reporter_;
// Accumulates glitch info to be passed on to OnData().
media::AudioGlitchInfo::Accumulator glitch_accumulator_;
AmplitudePeakDetector peak_detector_;
// Used to track and log data discontinuity warnings from
// IAudioCaptureClient::GetBuffer.
std::unique_ptr<DataDiscontinuityReporter> data_discontinuity_reporter_;
// Capturing is driven by this thread (which has no message loop).
// All OnData() callbacks will be called from this thread.
std::unique_ptr<base::DelegateSimpleThread> capture_thread_;
// Contains the desired output audio format which is set up at construction
// and then never modified. It is the audio format this class will output
// data to the sink in, or equivalently, the format after the converter if
// such is needed. Does not need the extended version since we only support
// max stereo at this stage.
WAVEFORMATEX output_format_;
// Contains the audio format we get data from the audio engine in. Initially
// set to |output_format_| at construction but it might be changed to a close
// match if the audio engine doesn't support the originally set format. Note
// that, this is also the format after the FIFO, i.e. the input format to the
// converter if any.
WAVEFORMATEXTENSIBLE input_format_;
bool opened_ = false;
bool started_ = false;
StreamOpenResult open_result_ = OPEN_RESULT_OK;
// Size in bytes of each audio frame before the converter (4 bytes for 16-bit
// stereo PCM). Note that this is the same before and after the fifo.
size_t frame_size_bytes_ = 0;
// Size in audio frames of each audio packet (buffer) after the fifo but
// before the converter.
size_t packet_size_frames_ = 0;
// Size in bytes of each audio packet (buffer) after the fifo but before the
// converter.
size_t packet_size_bytes_ = 0;
// Length of the audio endpoint buffer, i.e. the buffer size before the fifo.
uint32_t endpoint_buffer_size_frames_ = 0;
// Contains the unique name of the selected endpoint device.
// Note that AudioDeviceDescription::kDefaultDeviceId represents the default
// device role and is not a valid ID as such.
std::string device_id_;
// Pointer to the object that will receive the recorded audio samples.
raw_ptr<AudioInputCallback> sink_ = nullptr;
// Windows Multimedia Device (MMDevice) API interfaces.
// An IMMDevice interface which represents an audio endpoint device.
Microsoft::WRL::ComPtr<IMMDevice> endpoint_device_;
// Windows Audio Session API (WASAPI) interfaces.
// An IAudioClient interface which enables a client to create and initialize
// an audio stream between an audio application and the audio engine.
Microsoft::WRL::ComPtr<IAudioClient> audio_client_;
// Loopback IAudioClient doesn't support event-driven mode, so a separate
// IAudioClient is needed to receive notifications when data is available in
// the buffer. For loopback input |audio_client_| is used to receive data,
// while |audio_render_client_for_loopback_| is used to get notifications
// when a new buffer is ready. See comment in InitializeAudioEngine() for
// details.
Microsoft::WRL::ComPtr<IAudioClient> audio_render_client_for_loopback_;
// The IAudioCaptureClient interface enables a client to read input data
// from a capture endpoint buffer.
Microsoft::WRL::ComPtr<IAudioCaptureClient> audio_capture_client_;
// The ISimpleAudioVolume interface enables a client to control the
// master volume level of an audio session.
// The volume-level is a value in the range 0.0 to 1.0.
// This interface does only work with shared-mode streams.
Microsoft::WRL::ComPtr<ISimpleAudioVolume> simple_audio_volume_;
// The IAudioEndpointVolume allows a client to control the volume level of
// the whole system.
Microsoft::WRL::ComPtr<IAudioEndpointVolume> system_audio_volume_;
// The audio engine will signal this event each time a buffer has been
// recorded.
base::win::ScopedHandle audio_samples_ready_event_;
// This event will be signaled when capturing shall stop.
base::win::ScopedHandle stop_capture_event_;
// Never set it through external API. Only used when |device_id_| ==
// kLoopbackWithMuteDeviceId.
// True, if we have muted the system audio for the stream capturing, and
// indicates that we need to unmute the system audio when stopping capturing.
bool mute_done_ = false;
// Used for the captured audio on the callback thread.
std::unique_ptr<AudioBlockFifo> fifo_;
// If the caller requires resampling (should only be in exceptional cases and
// ideally, never), we support using an AudioConverter.
std::unique_ptr<AudioConverter> converter_;
std::unique_ptr<AudioBus> convert_bus_;
bool imperfect_buffer_size_conversion_ = false;
// Callback to send log messages to registered clients.
AudioManager::LogCallback log_callback_;
// For detecting and reporting glitches.
UINT64 expected_next_device_position_ = 0;
// Tracks error messages from IAudioCaptureClient::GetBuffer.
UINT64 num_timestamp_errors_ = 0;
base::TimeTicks record_start_time_;
base::TimeDelta time_until_first_timestamp_error_;
// Contains the last capture timestamp from IAudioCaptureClient::GetBuffer.
base::TimeTicks last_capture_time_;
// Max and min of difference in time between two successive timestamps.
// |min_timestamp_diff_| should always be larger than or equal to one micro-
// second.
base::TimeDelta max_timestamp_diff_;
base::TimeDelta min_timestamp_diff_;
// Set to true if the selected audio device supports raw audio capture.
// Also added to a UMS histogram.
bool raw_processing_supported_ = false;
// List of supported and active capture effects for the selected device in
// default (normal) audio processing mode.
std::vector<ABI::Windows::Media::Effects::AudioEffectType>
default_effect_types_;
// List of supported and active capture effects for the selected device in
// raw (minimal) audio processing mode. Will be empty in most cases.
std::vector<ABI::Windows::Media::Effects::AudioEffectType> raw_effect_types_;
// Set to true if the absolute difference between a QPC timestamp converted
// into a TimeTick value and a default base::TimeTicks::Now() is larger than
// 500 msec. A true return value should trigger usage of "fake" audio
// timestamps instead of default which are QPC based.
std::optional<bool> use_fake_audio_capture_timestamps_;
SEQUENCE_CHECKER(sequence_checker_);
};
} // namespace media
#endif // MEDIA_AUDIO_WIN_AUDIO_LOW_LATENCY_INPUT_WIN_H_