1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282

content / public / browser / tts_controller.h [blame]

// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_PUBLIC_BROWSER_TTS_CONTROLLER_H_
#define CONTENT_PUBLIC_BROWSER_TTS_CONTROLLER_H_

#include <memory>
#include <queue>
#include <set>
#include <string>
#include <vector>

#include "base/functional/callback_forward.h"
#include "base/memory/singleton.h"
#include "base/observer_list_types.h"
#include "base/scoped_observation_traits.h"
#include "build/chromeos_buildflags.h"
#include "content/common/content_export.h"
#include "content/public/browser/tts_utterance.h"
#include "url/gurl.h"

namespace content {
class BrowserContext;
class TtsPlatform;

// Information about one voice.
struct CONTENT_EXPORT VoiceData {
  VoiceData();
  VoiceData(const VoiceData& other);
  ~VoiceData();

  std::string name;
  std::string lang;
  std::string engine_id;
  std::set<TtsEventType> events;

  // If true, the synthesis engine is a remote network resource.
  // It may be higher latency and may incur bandwidth costs.
  bool remote;

  // If true, this is implemented by this platform's subclass of
  // TtsPlatformImpl. If false, this is implemented in a content embedder.
  bool native;
  std::string native_voice_identifier;

  // If true, the voice is provided by a remote TTS engine.
  bool from_remote_tts_engine = false;
};

enum class LanguageInstallStatus {
  NOT_INSTALLED,
  INSTALLING,
  INSTALLED,
  FAILED,
  UNKNOWN
};

// Interface that delegates TTS requests to engines in content embedders.
class CONTENT_EXPORT TtsEngineDelegate {
 public:
  virtual ~TtsEngineDelegate() {}

  // Return a list of all available voices registered. |source_url| will be used
  // for policy decisions by engines to determine which voices to return.
  virtual void GetVoices(BrowserContext* browser_context,
                         const GURL& source_url,
                         std::vector<VoiceData>* out_voices) = 0;

  // Speak the given utterance by sending an event to the given TTS engine.
  virtual void Speak(TtsUtterance* utterance, const VoiceData& voice) = 0;

  // Stop speaking the given utterance by sending an event to the target
  // associated with this utterance.
  virtual void Stop(TtsUtterance* utterance) = 0;
  // Pause in the middle of speaking this utterance.
  virtual void Pause(TtsUtterance* utterance) = 0;
  // Resume speaking this utterance.
  virtual void Resume(TtsUtterance* utterance) = 0;
  // Sends an InstallLanguageRequest event to extensions.
  virtual void InstallLanguageRequest(BrowserContext* browser_context,
                                      const std::string& lang,
                                      const std::string& client_id,
                                      int source) = 0;
  // Load the built-in TTS engine.
  virtual void LoadBuiltInTtsEngine(BrowserContext* browser_context) = 0;

  // Returns whether the built in engine is initialized.
  virtual bool IsBuiltInTtsEngineInitialized(
      BrowserContext* browser_context) = 0;
};

// Interface that delegates TTS requests to a remote engine from another browser
// process.
class CONTENT_EXPORT RemoteTtsEngineDelegate {
 public:
  virtual ~RemoteTtsEngineDelegate() = default;

  // Returns a list of voices from remote tts engine for |browser_context|.
  virtual void GetVoices(BrowserContext* browser_context,
                         std::vector<VoiceData>* out_voices) = 0;

  // Requests the given remote TTS engine to speak |utterance| with |voice|.
  virtual void Speak(TtsUtterance* utterance, const VoiceData& voice) = 0;

  // Requests the remote TTS engine associated with |utterance| to stop
  // speaking the |utterance|.
  virtual void Stop(TtsUtterance* utterance) = 0;

  // Requests the remote TTS engine associated with |utterance| to pause
  // speaking the |utterance|.
  virtual void Pause(TtsUtterance* utterance) = 0;

  // Requests the remote TTS engine associated with |utterance| to resume
  // speaking the |utterance|.
  virtual void Resume(TtsUtterance* utterance) = 0;
};

// Class that wants to be notified when the set of
// voices has changed.
class CONTENT_EXPORT VoicesChangedDelegate : public base::CheckedObserver {
 public:
  virtual void OnVoicesChanged() = 0;
};

// Class that wants to be notified when a language status changes.
class CONTENT_EXPORT UpdateLanguageStatusDelegate
    : public base::CheckedObserver {
 public:
  virtual void OnUpdateLanguageStatus(const std::string& lang,
                                      LanguageInstallStatus install_status,
                                      const std::string& error) = 0;
};

// Singleton class that manages text-to-speech for all TTS engines and
// APIs, maintaining a queue of pending utterances and keeping
// track of all state.
class CONTENT_EXPORT TtsController {
 public:
  // Get the single instance of this class.
  static TtsController* GetInstance();

  static void SkipAddNetworkChangeObserverForTests(bool enabled);

  // Returns true if we're currently speaking an utterance.
  virtual bool IsSpeaking() = 0;

  // Speak the given utterance. If the utterance's should_flush_queue flag is
  // true, clears the speech queue including the currently speaking utterance
  // (if one exists), and starts processing the speech queue by speaking the new
  // utterance immediately. Otherwise, enqueues the new utterance and triggers
  // continued processing of the speech queue.
  virtual void SpeakOrEnqueue(std::unique_ptr<TtsUtterance> utterance) = 0;

  // Stop all utterances and flush the queue. Implies leaving pause mode
  // as well.
  virtual void Stop() = 0;

  // Stops the current utterance if it matches the given |source_url|.
  virtual void Stop(const GURL& source_url) = 0;

  // Pause the speech queue. Some engines may support pausing in the middle
  // of an utterance.
  virtual void Pause() = 0;

  // Resume speaking.
  virtual void Resume() = 0;

  // Called by the content embedder when the status of a voice for a language
  // has changed.
  virtual void UpdateLanguageStatus(const std::string& lang,
                                    LanguageInstallStatus install_status,
                                    const std::string& error) = 0;

  // Add a delegate that wants to be notified when the set of voices changes.
  virtual void AddUpdateLanguageStatusDelegate(
      UpdateLanguageStatusDelegate* delegate) = 0;

  // Remove delegate that wants to be notified when the set of voices changes.
  virtual void RemoveUpdateLanguageStatusDelegate(
      UpdateLanguageStatusDelegate* delegate) = 0;

  // Requests to install a new voice for the language. For example, Reading Mode
  // manages voice installation by sending an InstallLanguageRequest event to
  // extensions, who can subscribe to this event and attempt to download a voice
  // for this language.
  // The "source" param can be defined by delegates and embedders. For example,
  // Reading Mode uses the tts_engine_events::TtsClientSource
  virtual void InstallLanguageRequest(BrowserContext* browser_context,
                                      const std::string& lang,
                                      const std::string& client_id,
                                      int source) = 0;

  // Handle events received from the speech engine. Events are forwarded to
  // the callback function, and in addition, completion and error events
  // trigger finishing the current utterance and starting the next one, if
  // any. If the |char_index| or |length| are not available, the speech engine
  // should pass -1.
  virtual void OnTtsEvent(int utterance_id,
                          TtsEventType event_type,
                          int char_index,
                          int length,
                          const std::string& error_message) = 0;

  // Called when the utterance with |utterance_id| becomes invalid.
  // For example, when the WebContents associated with the utterance
  // living in a standalone browser is destroyed, the utterance becomes
  // invalid and should not be spoken.
  virtual void OnTtsUtteranceBecameInvalid(int utterance_id) = 0;

  // Return a list of all available voices, including the native voice,
  // if supported, and all voices registered by engines. |source_url|
  // will be used for policy decisions by engines to determine which
  // voices to return.
  virtual void GetVoices(BrowserContext* browser_context,
                         const GURL& source_url,
                         std::vector<VoiceData>* out_voices) = 0;

  // Called by the content embedder or platform implementation when the
  // list of voices may have changed and should be re-queried.
  virtual void VoicesChanged() = 0;

  // Add a delegate that wants to be notified when the set of voices changes.
  virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;

  // Remove delegate that wants to be notified when the set of voices changes.
  virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;

  // Remove delegate that wants to be notified when an utterance fires an event.
  // Note: this cancels speech from any utterance with this delegate, and
  // removes any utterances with this delegate from the queue.
  virtual void RemoveUtteranceEventDelegate(
      UtteranceEventDelegate* delegate) = 0;

  // Set the delegate that processes TTS requests with engines in a content
  // embedder.
  virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0;

  // Sets the delegate that processes TTS requests with the remote enigne.
  virtual void SetRemoteTtsEngineDelegate(
      RemoteTtsEngineDelegate* delegate) = 0;

  // Get the delegate that processes TTS requests with engines in a content
  // embedder.
  virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0;

  // Triggers the TtsPlatform to update its list of voices and relay that update
  // through VoicesChanged.
  virtual void RefreshVoices() = 0;

  // Visible for testing.
  virtual void SetTtsPlatform(TtsPlatform* tts_platform) = 0;
  virtual int QueueSize() = 0;

  virtual void StripSSML(
      const std::string& utterance,
      base::OnceCallback<void(const std::string&)> callback) = 0;

 protected:
  virtual ~TtsController() {}
};

}  // namespace content

namespace base {

template <>
struct ScopedObservationTraits<content::TtsController,
                               content::VoicesChangedDelegate> {
  static void AddObserver(content::TtsController* source,
                          content::VoicesChangedDelegate* observer) {
    source->AddVoicesChangedDelegate(observer);
  }
  static void RemoveObserver(content::TtsController* source,
                             content::VoicesChangedDelegate* observer) {
    source->RemoveVoicesChangedDelegate(observer);
  }
};

}  // namespace base

#endif  // CONTENT_PUBLIC_BROWSER_TTS_CONTROLLER_H_