1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204

ash / webui / recorder_app_ui / mojom / recorder_app.mojom [blame]

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

module ash.recorder_app.mojom;

import "chromeos/services/machine_learning/public/mojom/soda.mojom";
import "mojo/public/mojom/base/string16.mojom";
import "mojo/public/mojom/base/uuid.mojom";
import "services/on_device_model/public/mojom/on_device_model.mojom";
import "services/on_device_model/public/mojom/on_device_model_service.mojom";

// The state of the SODA or on-device model installation.
enum ModelStateType {
  // The model isn't available on this platform.
  kUnavailable,

  // The model is available but aren't fully installed yet (Can be either
  // language pack / library not installed for SODA, or base model / LoRA layer
  // not installed for on-device models).
  kNotInstalled,

  // The model is currently downloading and installing.
  kInstalling,

  // The model is installed and ready to be used.
  kInstalled,

  // There is an error while trying to install The model.
  kError,
};

// The state of the model installation, including download progress.
struct ModelState {
  // Current state of the model installation.
  ModelStateType type;

  // The install progress. Only available when the state is `kInstalling`.
  // Progress is the weighted average of the combined download percentage,
  // in a range of [0, 100].
  uint8? progress;
};

// Interface for listening to SODA or on-device model installation state.
interface ModelStateMonitor {
  // Update when state of the model is changed.
  // The initial state are returned by the AddSodaMonitor/AddModelMonitor call
  // when the `ModelStateMonitor` is added, and this method is only called on
  // subsequent state updates.
  Update(ModelState state);
};

// Information of an on-device model.
struct ModelInfo {
  // Associated DownLoadable Content (DLC) id. Id is used for requesting DLC
  // service to download corresponding system feature on-demand at run-time.
  mojo_base.mojom.Uuid model_id;
  // The maximum input token size.
  uint32 input_token_limit;
};

// Retrieved information for a microphone.
struct MicrophoneInfo {
  // Whether the microphone is the system default microphone.
  bool is_default;
  // Whether the microphone is an internal microphone.
  bool is_internal;
};

// Interface for listening to system quiet mode state.
interface QuietModeMonitor {
  // Update when system quiet mode is changed.
  // The initial state is returned by the `AddQuietModeMonitor` call when the
  // `QuietModeMonitor` is added, and this method is only called on subsequent
  // state updates.
  Update(bool in_quiet_mode);
};

// Information for a SODA language pack.
struct LangPackInfo {
  // Localized language in BCP-47 format e.g. "en-US".
  string language_code;
  // Language name in the application locale.
  mojo_base.mojom.String16 display_name;
  // Whether the language is supported by summarization and title suggestion.
  bool is_gen_ai_supported;
  // Whether the language is supported by speaker labeling. False if large SODA
  // model, which supports speaker label, is unavailable on the device.
  bool is_speaker_label_supported;
};

// Primary interface for the chrome://recorder-app WebUI.
interface PageHandler {
  // Returns model information of the given format feature type.
  // `feature` should be either `kAudioTitle` or `kAudioSummary`.
  GetModelInfo(on_device_model.mojom.FormatFeature feature)
      => (ModelInfo model_info);

  // Binds a new OnDeviceModel interface if possible using model from DLC
  // specified by `model_id`.
  // The list of possible IDs are listed in core/platform_handler.ts, and
  // corresponds to the DLC ID on platform side.
  LoadModel(mojo_base.mojom.Uuid model_id,
            pending_receiver<on_device_model.mojom.OnDeviceModel> model)
      => (on_device_model.mojom.LoadModelResult result);

  // Formats the input fields with the model uuid and feature combination.
  // The `fields` will represent the corresponding input key value pairs for
  // different model uuid and feature combinations.
  // The `result` can be nullopt if the model uuid and feature combination is
  // invalid or not supported.
  //
  // For example, a feature may need "name" & "something" & "action" to complete
  // the following string: "I'm {name}, I have {something}, please {action} for
  // me.". And different feature may require different sets of key value pairs.
  FormatModelInput(mojo_base.mojom.Uuid uuid,
                   on_device_model.mojom.FormatFeature feature,
                   map<string, string> fields) => (string? result);

  // Validates the `text` is safe or not with the given `safety_feature` and
  // `safety_info`. The `safety_info` should be the result returned by
  // `ClassifyTextSafety()` from `on_device_model.mojom.OnDeviceModel`. Returns
  // true if the `text` is safe.
  ValidateSafetyResult(on_device_model.mojom.SafetyFeature safety_feature,
                       string text,
                       on_device_model.mojom.SafetyInfo safety_info)
      => (bool is_safe);

  // Adds a new monitor for model state change.
  // Returns the current state.
  AddModelMonitor(mojo_base.mojom.Uuid model_id,
                  pending_remote<ModelStateMonitor> monitor)
      => (ModelState state);

  // Returns available language information.
  GetAvailableLangPacks() => (array<LangPackInfo> lang_packs);

  // Adds a new monitor for SODA state change. `language` is a localized
  // language in BCP-47 format e.g. "en-US".
  // Returns the current state.
  AddSodaMonitor(string language, pending_remote<ModelStateMonitor> monitor)
      => (ModelState state);

  // Requests installation for SODA library and the language pack. `language` is
  // a localized language in BCP-47 format e.g. "en-US".
  InstallSoda(string language) => ();

  // Loads speech recognizer with the given SodaClient and SodaRecognizer
  // interface. `language` is a localized language in BCP-47 format e.g.
  // "en-US". Returns false when there's error loading the speech recognizer.
  //
  // SODA should be installed before calling this, otherwise false will be
  // returned.
  //
  // The return is a boolean instead of
  // chromeos.machine_learning.mojom.LoadModelResult, to avoid exposing
  // implementation details to JS binding.
  // TODO(pihsun): Move the enum to a separate mojom file and return the full
  // enum, or have our own enum if different error needs to be treated
  // differently in frontend.
  LoadSpeechRecognizer(
      string language,
      pending_remote<chromeos.machine_learning.mojom.SodaClient> soda_client,
      pending_receiver<chromeos.machine_learning.mojom.SodaRecognizer>
          soda_recognizer) => (bool result);

  // Opens the chrome feedback dialog for AI and show `description_template` in
  // the description field.
  OpenAiFeedbackDialog(string description_template);

  // Returns the info of the microphone with the deviceId `source_id`, which is
  // the ID returned from `enumerateDevices()` in JavaScript.
  GetMicrophoneInfo(string source_id) => (MicrophoneInfo? info);

  // Adds a new monitor for system quiet mode change. Returns the current
  // state.
  AddQuietModeMonitor(pending_remote<QuietModeMonitor> monitor)
      => (bool in_quiet_mode);

  // Sets the system quiet mode.
  SetQuietMode(bool quiet_mode);

  // Gets whether speaker label can be used.
  // Note that this method specifically only query if the current profile has
  // the capabilities needed to use speaker label, and the device supports SODA
  // language pack that supports speaker label, but the caller still need to
  // ensure that SODA is available and installed to use speaker label.
  CanUseSpeakerLabel() => (bool supported);

  // Records the consent of speaker label from user.
  // Since the frontend doesn't have info about resource ID, the given
  // consent_description_names and consent_confirmation_name corresponds to the
  // name (camelCase, without the IDS_RECORDER prefix) in the resources.h, and
  // will be converted back to the corresponding numbered resource IDs.
  RecordSpeakerLabelConsent(bool consent_given,
                            array<string> consent_description_names,
                            string consent_confirmation_name);

  // Gets whether the system audio will be captured by audio loopback.
  // If returns true, users can use UI switch to include/exclude system audio
  // via `getDisplayMedia`. Otherwise, microphone echo cancellation should be
  // turned off to capture the system audio.
  CanCaptureSystemAudioWithLoopback() => (bool supported);
};