1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97

content / public / browser / speech_recognition_manager.h [blame]

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_MANAGER_H_
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_MANAGER_H_

#include "base/functional/callback.h"
#include "content/common/content_export.h"
#include "media/mojo/mojom/speech_recognition.mojom.h"
#include "media/mojo/mojom/speech_recognition_audio_forwarder.mojom.h"
#include "media/mojo/mojom/speech_recognizer.mojom-forward.h"
#include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h"

namespace content {

struct SpeechRecognitionAudioForwarderConfig;
class SpeechRecognitionEventListener;
struct SpeechRecognitionSessionConfig;
struct SpeechRecognitionSessionContext;

// The SpeechRecognitionManager (SRM) is a singleton class that handles SR
// functionalities within Chrome. Everyone that needs to perform SR should
// interface exclusively with the SRM, receiving events through the callback
// interface SpeechRecognitionEventListener.
// Since many different sources can use SR in different times (some overlapping
// is allowed while waiting for results), the SRM has the further responsibility
// of handling separately and reliably (taking into account also call sequences
// that might not make sense, e.g., two subsequent AbortSession calls).
// In this sense a session, within the SRM, models the ongoing evolution of a
// SR request from the viewpoint of the end-user, abstracting all the concrete
// operations that must be carried out, that will be handled by inner classes.
class SpeechRecognitionManager {
 public:
  enum { kSessionIDInvalid = 0 };

  // Returns the singleton instance.
  static CONTENT_EXPORT SpeechRecognitionManager* GetInstance();

  // Singleton manager setter useful for tests.
  static void CONTENT_EXPORT SetManagerForTesting(
      SpeechRecognitionManager* manager);

  // Creates a new recognition session.
  virtual int CreateSession(const SpeechRecognitionSessionConfig& config) = 0;

  // Creates a new recognition session. If the session mojo remotes are not
  // null, speech recognition session will be managed by the speech recognition
  // service, otherwise the session will be managed by the browser. If the audio
  // forwarder config is not null, the audio forwarder will be used to receive
  // audio, otherwise the audio will be received from the microphone.
  virtual int CreateSession(
      const SpeechRecognitionSessionConfig& config,
      mojo::PendingReceiver<media::mojom::SpeechRecognitionSession>
          session_receiver,
      mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient>
          client_remote,
      std::optional<SpeechRecognitionAudioForwarderConfig>
          audio_forwarder_config) = 0;

  // Starts/restarts recognition for an existing session, after performing a
  // preliminary check on the delegate (CheckRecognitionIsAllowed).
  virtual void StartSession(int session_id) = 0;

  // Aborts recognition for an existing session, without providing any result.
  virtual void AbortSession(int session_id) = 0;

  // Aborts all sessions for a given RenderFrame, without providing any result.
  virtual void AbortAllSessionsForRenderFrame(int render_process_id,
                                              int render_frame_id) = 0;

  // Stops audio capture for an existing session. The audio captured before the
  // call will be processed, possibly ending up with a result.
  virtual void StopAudioCaptureForSession(int session_id) = 0;

  // Retrieves the configuration of a session, as provided by the caller
  // upon CreateSession.
  virtual const SpeechRecognitionSessionConfig& GetSessionConfig(
      int session_id) = 0;

  // Retrieves the context associated to a session.
  virtual SpeechRecognitionSessionContext GetSessionContext(int session_id) = 0;

  virtual bool UseOnDeviceSpeechRecognition(
      const SpeechRecognitionSessionConfig& config) = 0;

 protected:
  virtual ~SpeechRecognitionManager() {}

 private:
  static SpeechRecognitionManager* manager_for_tests_;
};

}  // namespace content

#endif  // CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_MANAGER_H_