media / mojo / mojom / speech_recognition_service.mojom [blame]

// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

module media.mojom;

import "media/mojo/mojom/audio_data.mojom";
import "media/mojo/mojom/audio_parameters.mojom";
import "media/mojo/mojom/audio_stream_factory.mojom";
import "media/mojo/mojom/media_types.mojom";
import "media/mojo/mojom/speech_recognition.mojom";
import "mojo/public/mojom/base/file_path.mojom";
import "sandbox/policy/mojom/context.mojom";
import "sandbox/policy/mojom/sandbox.mojom";

// Like a SpeechRecognitionContext, except it binds AudioSourceFetcher speech
// recognizer objects, rather than SpeechRecognitionRecognizer objects.  In
// Chrome OS features like Dictation and Projector, every
// OnDeviceSpeechRecognizer can own a
// Remote<AudioSourceSpeechRecognitionContext>.
[RequireContext=sandbox.mojom.Context.kBrowser]
interface AudioSourceSpeechRecognitionContext {
  // Prepares microphone audio to be captured from within the
  // SpeechRecognitionService process, with results passed back to the
  // SpeechRecognitionRecognizerClient.
  [AllowedContext=sandbox.mojom.Context.kBrowser]
  BindAudioSourceFetcher(
      pending_receiver<AudioSourceFetcher> fetcher_receiver,
      pending_remote<SpeechRecognitionRecognizerClient> client,
      SpeechRecognitionOptions options)
      => (bool is_multichannel_supported);
};

// The main interface to a speech secognition service process.
// Used by the browser to issue top-level control requests to the service,
// acquired during process launch.
[ServiceSandbox=sandbox.mojom.Sandbox.kSpeechRecognition,
 RequireContext=sandbox.mojom.Context.kBrowser]
interface SpeechRecognitionService {
  // Binds a new SpeechRecognitionContext hosted in the service process.
  BindSpeechRecognitionContext(
      pending_receiver<SpeechRecognitionContext> context);

  // Binds a new AudioSourceSpeechRecognitionContext hosted in the service
  // process.
  [AllowedContext=sandbox.mojom.Context.kBrowser]
  BindAudioSourceSpeechRecognitionContext(
      pending_receiver<AudioSourceSpeechRecognitionContext> context);

  // Sets the file path to the Speech On-Device API (SODA) binary, the config
  // file paths for the language packs, and the primary language name.
  SetSodaPaths(mojo_base.mojom.FilePath binary_path,
              map<string, mojo_base.mojom.FilePath> config_paths,
              string primary_language_name);

  // Sets various params to the Speech On-Device API (SODA) binary.
  SetSodaParams(bool mask_offensive_words);

  // Sets the config file paths for the language packs. Used to notify the
  // service when new language packs are installed.
  SetSodaConfigPaths(map<string, mojo_base.mojom.FilePath> config_paths);
};

// The interface used to start and stop fetching audio from the microphone
// for speech recognition. Provided by the browser to the speech recognition
// service.
[RequireContext=sandbox.mojom.Context.kPrivilegedUtility]
interface AudioSourceFetcher {
  // Begin fetching audio. Results will be returned using the
  // Remote<SpeechRecognitionRecognizerClient> which was passed in
  // BindAudioSourceFetcher.
  [AllowedContext=sandbox.mojom.Context.kPrivilegedUtility]
  Start(pending_remote<AudioStreamFactory> factory, string device_id,
        media.mojom.AudioParameters audio_parameters);

  // Stops audio fetching.
  Stop();
};