1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272

media / audio / win / core_audio_util_win.h [blame]

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Utility methods for the Core Audio API on Windows.
// Always ensure that Core Audio is supported before using these methods.
// Use media::CoreAudioUtil::IsSupported() for this purpose.
// Also, all methods must be called on a valid COM thread. This can be done
// by using the base::win::ScopedCOMInitializer helper class.

#ifndef MEDIA_AUDIO_WIN_CORE_AUDIO_UTIL_WIN_H_
#define MEDIA_AUDIO_WIN_CORE_AUDIO_UTIL_WIN_H_

#include <audioclient.h>
#include <mmdeviceapi.h>
#include <stdint.h>
#include <wrl/client.h>

#include <string>

#include "base/memory/raw_ptr.h"
#include "base/time/time.h"
#include "media/audio/audio_device_name.h"
#include "media/base/audio_parameters.h"
#include "media/base/media_export.h"

namespace media {

using ChannelConfig = uint32_t;

class MEDIA_EXPORT CoreAudioUtil {
 public:
  // Helper class which automates casting between WAVEFORMATEX and
  // WAVEFORMATEXTENSIBLE raw pointers using implicit constructors and
  // operator overloading. Note that, no memory is allocated by this utility
  // structure. It only serves as a handle (or a wrapper) of the structure
  // provided to it at construction.
  class MEDIA_EXPORT WaveFormatWrapper {
   public:
    WaveFormatWrapper(WAVEFORMATEXTENSIBLE* p)
        : ptr_(reinterpret_cast<WAVEFORMATEX*>(p)) {}
    WaveFormatWrapper(WAVEFORMATEX* p) : ptr_(p) {}
    ~WaveFormatWrapper() = default;

    operator WAVEFORMATEX*() const { return ptr_; }
    WAVEFORMATEX* operator->() const { return ptr_; }
    WAVEFORMATEX* get() const { return ptr_; }
    WAVEFORMATEXTENSIBLE* GetExtensible() const;

    bool IsExtensible() const;
    bool IsPcm() const;
    bool IsFloat() const;
    size_t size() const;

   private:
    raw_ptr<WAVEFORMATEX> ptr_;
  };

  CoreAudioUtil() = delete;
  CoreAudioUtil(const CoreAudioUtil&) = delete;
  CoreAudioUtil& operator=(const CoreAudioUtil&) = delete;

  // Returns true if Windows Core Audio is supported.
  // Always verify that this method returns true before using any of the
  // methods in this class.
  // WARNING: This function must be called once from the main thread before
  // it is safe to call from other threads.
  static bool IsSupported();

  // Converts a COM error into a human-readable string.
  static std::string ErrorToString(HRESULT hresult);

  // Prints/logs all fields of the format structure in |format|.
  // Also supports extended versions (WAVEFORMATEXTENSIBLE).
  static std::string WaveFormatToString(WaveFormatWrapper format);

  // Converts between reference time to base::TimeDelta.
  // One reference-time unit is 100 nanoseconds.
  // Example: double s = RefererenceTimeToTimeDelta(t).InMillisecondsF();
  static base::TimeDelta ReferenceTimeToTimeDelta(REFERENCE_TIME time);

  // Returns AUDCLNT_SHAREMODE_EXCLUSIVE if --enable-exclusive-mode is used
  // as command-line flag and AUDCLNT_SHAREMODE_SHARED otherwise (default).
  static AUDCLNT_SHAREMODE GetShareMode();

  // The Windows Multimedia Device (MMDevice) API enables audio clients to
  // discover audio endpoint devices and determine their capabilities.

  // Number of active audio devices in the specified flow data flow direction.
  // Set |data_flow| to eAll to retrieve the total number of active audio
  // devices.
  static int NumberOfActiveDevices(EDataFlow data_flow);

  // Creates an IMMDeviceEnumerator interface which provides methods for
  // enumerating audio endpoint devices.
  static Microsoft::WRL::ComPtr<IMMDeviceEnumerator> CreateDeviceEnumerator();

  // Create an endpoint device specified by |device_id| or a default device
  // specified by data-flow direction and role if
  // AudioDeviceDescription::IsDefaultDevice(|device_id|).
  static Microsoft::WRL::ComPtr<IMMDevice>
  CreateDevice(const std::string& device_id, EDataFlow data_flow, ERole role);

  // These functions return the device id of the default or communications
  // input/output device, or an empty string if no such device exists or if the
  // device has been disabled.
  static std::string GetDefaultInputDeviceID();
  static std::string GetDefaultOutputDeviceID();
  static std::string GetCommunicationsInputDeviceID();
  static std::string GetCommunicationsOutputDeviceID();

  // Returns the unique ID and user-friendly name of a given endpoint device.
  // Example: "{0.0.1.00000000}.{8db6020f-18e3-4f25-b6f5-7726c9122574}", and
  //          "Microphone (Realtek High Definition Audio)".
  static HRESULT GetDeviceName(IMMDevice* device, AudioDeviceName* name);

  // Returns the device ID/path of the controller (a.k.a. physical device that
  // |device| is connected to.  This ID will be the same for all devices from
  // the same controller so it is useful for doing things like determining
  // whether a set of output and input devices belong to the same controller.
  // The device enumerator is required as well as the device itself since
  // looking at the device topology is required and we need to open up
  // associated devices to determine the controller id.
  // If the ID could not be determined for some reason, an empty string is
  // returned.
  static std::string GetAudioControllerID(IMMDevice* device,
      IMMDeviceEnumerator* enumerator);

  // Accepts an id of an input device and finds a matching output device id.
  // If the associated hardware does not have an audio output device (e.g.
  // a webcam with a mic), an empty string is returned.
  static std::string GetMatchingOutputDeviceID(
      const std::string& input_device_id);

  // Gets the user-friendly name of the endpoint device which is represented
  // by a unique id in |device_id|.
  static std::string GetFriendlyName(const std::string& device_id,
                                     EDataFlow data_flow,
                                     ERole role);

  // Query if the audio device is a rendering device or a capture device.
  static EDataFlow GetDataFlow(IMMDevice* device);

  // The Windows Audio Session API (WASAPI) enables client applications to
  // manage the flow of audio data between the application and an audio endpoint
  // device.

  // Create an IAudioClient instance for a specific device or the default
  // device if AudioDeviceDescription::IsDefaultDevice(device_id).
  static Microsoft::WRL::ComPtr<IAudioClient>
  CreateClient(const std::string& device_id, EDataFlow data_flow, ERole role);
  static Microsoft::WRL::ComPtr<IAudioClient3>
  CreateClient3(const std::string& device_id, EDataFlow data_flow, ERole role);

  // Get the mix format that the audio engine uses internally for processing
  // of shared-mode streams. This format is not necessarily a format that the
  // audio endpoint device supports. The WAVEFORMATEXTENSIBLE structure can
  // specify both the mapping of channels to speakers and the number of bits of
  // precision in each sample. The first member of the WAVEFORMATEXTENSIBLE
  // structure is a WAVEFORMATEX structure and its wFormatTag will be set to
  // WAVE_FORMAT_EXTENSIBLE if the output structure is extended.
  // FormatIsExtensible() can be used to determine if that is the case or not.
  static HRESULT GetSharedModeMixFormat(IAudioClient* client,
                                        WAVEFORMATEXTENSIBLE* format);

  // Returns true if the specified |client| supports the format in |format|
  // for the given |share_mode| (shared or exclusive).
  static bool IsFormatSupported(IAudioClient* client,
                                AUDCLNT_SHAREMODE share_mode,
                                WaveFormatWrapper format);

  // Returns true if the specified |channel_layout| is supported for the
  // default IMMDevice where flow direction and role is define by |data_flow|
  // and |role|. If this method returns true for a certain channel layout, it
  // means that SharedModeInitialize() will succeed using a format based on
  // the preferred format where the channel layout has been modified.
  static bool IsChannelLayoutSupported(const std::string& device_id,
                                       EDataFlow data_flow,
                                       ERole role,
                                       ChannelLayout channel_layout);

  // For a shared-mode stream, the audio engine periodically processes the
  // data in the endpoint buffer at the period obtained in |device_period|.
  // For an exclusive mode stream, |device_period| corresponds to the minimum
  // time interval between successive processing by the endpoint device.
  // This period plus the stream latency between the buffer and endpoint device
  // represents the minimum possible latency that an audio application can
  // achieve. The time in |device_period| is expressed in 100-nanosecond units.
  static HRESULT GetDevicePeriod(IAudioClient* client,
                                 AUDCLNT_SHAREMODE share_mode,
                                 REFERENCE_TIME* device_period);

  // Get the preferred audio parameters for the given |device_id|. The acquired
  // values should only be utilized for shared mode streamed since there are no
  // preferred settings for an exclusive mode stream.
  static HRESULT GetPreferredAudioParameters(const std::string& device_id,
                                             bool is_output_device,
                                             AudioParameters* params,
                                             bool is_offload_stream = false);

  // Retrieves an integer mask which corresponds to the channel layout the
  // audio engine uses for its internal processing/mixing of shared-mode
  // streams. This mask indicates which channels are present in the multi-
  // channel stream. The least significant bit corresponds with the Front Left
  // speaker, the next least significant bit corresponds to the Front Right
  // speaker, and so on, continuing in the order defined in KsMedia.h.
  // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
  // for more details.
  static ChannelConfig GetChannelConfig(const std::string& device_id,
                                        EDataFlow data_flow);

  // After activating an IAudioClient interface on an audio endpoint device,
  // the client must initialize it once, and only once, to initialize the audio
  // stream between the client and the device. In shared mode, the client
  // connects indirectly through the audio engine which does the mixing.
  // In exclusive mode, the client connects directly to the audio hardware.
  // If a valid event is provided in |event_handle|, the client will be
  // initialized for event-driven buffer handling. If |event_handle| is set to
  // NULL, event-driven buffer handling is not utilized.
  // If |enable_audio_offload| is true, the buffer will be set to a larger one
  // as required by audio offloading feature.
  // This function will initialize the audio client as part of the default
  // audio session if NULL is passed for |session_guid|, otherwise the client
  // will be associated with the specified session.
  static HRESULT SharedModeInitialize(IAudioClient* client,
                                      WaveFormatWrapper format,
                                      HANDLE event_handle,
                                      uint32_t requested_buffer_size,
                                      uint32_t* endpoint_buffer_size,
                                      const GUID* session_guid,
                                      bool enable_audio_offload = false);

  // Create an IAudioRenderClient client for an existing IAudioClient given by
  // |client|. The IAudioRenderClient interface enables a client to write
  // output data to a rendering endpoint buffer.
  static Microsoft::WRL::ComPtr<IAudioRenderClient> CreateRenderClient(
      IAudioClient* client);

  // Create an IAudioCaptureClient client for an existing IAudioClient given by
  // |client|. The IAudioCaptureClient interface enables a client to read
  // input data from a capture endpoint buffer.
  static Microsoft::WRL::ComPtr<IAudioCaptureClient> CreateCaptureClient(
      IAudioClient* client);

  // Fills up the endpoint rendering buffer with silence for an existing
  // IAudioClient given by |client| and a corresponding IAudioRenderClient
  // given by |render_client|.
  static bool FillRenderEndpointBufferWithSilence(
      IAudioClient* client,
      IAudioRenderClient* render_client);

  // Enable audio offload on the client if supported. Returning true only when
  // the client supports audio offload, and at the same time the offload pin
  // for client's output is selected. For more details of audio offload, refer
  // to:
  // https://learn.microsoft.com/en-us/windows-hardware/drivers/audio/hardware-offloaded-audio-processing
  static bool EnableOffloadForClient(IAudioClient* client);

  // Check if audio offload can be enabled for client.
  static bool IsAudioOffloadSupported(IAudioClient* client);
};

// The special audio session identifier we use when opening up the default
// communication device.  This has the effect that a separate volume control
// will be shown in the system's volume mixer and control over ducking and
// visually observing the behavior of ducking, is easier.
// Use with |SharedModeInitialize|.
extern const GUID kCommunicationsSessionId;

}  // namespace media

#endif  // MEDIA_AUDIO_WIN_CORE_AUDIO_UTIL_WIN_H_