1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
ash / webui / recorder_app_ui / mojom / recorder_app.mojom [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module ash.recorder_app.mojom;
import "chromeos/services/machine_learning/public/mojom/soda.mojom";
import "mojo/public/mojom/base/string16.mojom";
import "mojo/public/mojom/base/uuid.mojom";
import "services/on_device_model/public/mojom/on_device_model.mojom";
import "services/on_device_model/public/mojom/on_device_model_service.mojom";
// The state of the SODA or on-device model installation.
enum ModelStateType {
// The model isn't available on this platform.
kUnavailable,
// The model is available but aren't fully installed yet (Can be either
// language pack / library not installed for SODA, or base model / LoRA layer
// not installed for on-device models).
kNotInstalled,
// The model is currently downloading and installing.
kInstalling,
// The model is installed and ready to be used.
kInstalled,
// There is an error while trying to install The model.
kError,
};
// The state of the model installation, including download progress.
struct ModelState {
// Current state of the model installation.
ModelStateType type;
// The install progress. Only available when the state is `kInstalling`.
// Progress is the weighted average of the combined download percentage,
// in a range of [0, 100].
uint8? progress;
};
// Interface for listening to SODA or on-device model installation state.
interface ModelStateMonitor {
// Update when state of the model is changed.
// The initial state are returned by the AddSodaMonitor/AddModelMonitor call
// when the `ModelStateMonitor` is added, and this method is only called on
// subsequent state updates.
Update(ModelState state);
};
// Information of an on-device model.
struct ModelInfo {
// Associated DownLoadable Content (DLC) id. Id is used for requesting DLC
// service to download corresponding system feature on-demand at run-time.
mojo_base.mojom.Uuid model_id;
// The maximum input token size.
uint32 input_token_limit;
};
// Retrieved information for a microphone.
struct MicrophoneInfo {
// Whether the microphone is the system default microphone.
bool is_default;
// Whether the microphone is an internal microphone.
bool is_internal;
};
// Interface for listening to system quiet mode state.
interface QuietModeMonitor {
// Update when system quiet mode is changed.
// The initial state is returned by the `AddQuietModeMonitor` call when the
// `QuietModeMonitor` is added, and this method is only called on subsequent
// state updates.
Update(bool in_quiet_mode);
};
// Information for a SODA language pack.
struct LangPackInfo {
// Localized language in BCP-47 format e.g. "en-US".
string language_code;
// Language name in the application locale.
mojo_base.mojom.String16 display_name;
// Whether the language is supported by summarization and title suggestion.
bool is_gen_ai_supported;
// Whether the language is supported by speaker labeling. False if large SODA
// model, which supports speaker label, is unavailable on the device.
bool is_speaker_label_supported;
};
// Primary interface for the chrome://recorder-app WebUI.
interface PageHandler {
// Returns model information of the given format feature type.
// `feature` should be either `kAudioTitle` or `kAudioSummary`.
GetModelInfo(on_device_model.mojom.FormatFeature feature)
=> (ModelInfo model_info);
// Binds a new OnDeviceModel interface if possible using model from DLC
// specified by `model_id`.
// The list of possible IDs are listed in core/platform_handler.ts, and
// corresponds to the DLC ID on platform side.
LoadModel(mojo_base.mojom.Uuid model_id,
pending_receiver<on_device_model.mojom.OnDeviceModel> model)
=> (on_device_model.mojom.LoadModelResult result);
// Formats the input fields with the model uuid and feature combination.
// The `fields` will represent the corresponding input key value pairs for
// different model uuid and feature combinations.
// The `result` can be nullopt if the model uuid and feature combination is
// invalid or not supported.
//
// For example, a feature may need "name" & "something" & "action" to complete
// the following string: "I'm {name}, I have {something}, please {action} for
// me.". And different feature may require different sets of key value pairs.
FormatModelInput(mojo_base.mojom.Uuid uuid,
on_device_model.mojom.FormatFeature feature,
map<string, string> fields) => (string? result);
// Validates the `text` is safe or not with the given `safety_feature` and
// `safety_info`. The `safety_info` should be the result returned by
// `ClassifyTextSafety()` from `on_device_model.mojom.OnDeviceModel`. Returns
// true if the `text` is safe.
ValidateSafetyResult(on_device_model.mojom.SafetyFeature safety_feature,
string text,
on_device_model.mojom.SafetyInfo safety_info)
=> (bool is_safe);
// Adds a new monitor for model state change.
// Returns the current state.
AddModelMonitor(mojo_base.mojom.Uuid model_id,
pending_remote<ModelStateMonitor> monitor)
=> (ModelState state);
// Returns available language information.
GetAvailableLangPacks() => (array<LangPackInfo> lang_packs);
// Adds a new monitor for SODA state change. `language` is a localized
// language in BCP-47 format e.g. "en-US".
// Returns the current state.
AddSodaMonitor(string language, pending_remote<ModelStateMonitor> monitor)
=> (ModelState state);
// Requests installation for SODA library and the language pack. `language` is
// a localized language in BCP-47 format e.g. "en-US".
InstallSoda(string language) => ();
// Loads speech recognizer with the given SodaClient and SodaRecognizer
// interface. `language` is a localized language in BCP-47 format e.g.
// "en-US". Returns false when there's error loading the speech recognizer.
//
// SODA should be installed before calling this, otherwise false will be
// returned.
//
// The return is a boolean instead of
// chromeos.machine_learning.mojom.LoadModelResult, to avoid exposing
// implementation details to JS binding.
// TODO(pihsun): Move the enum to a separate mojom file and return the full
// enum, or have our own enum if different error needs to be treated
// differently in frontend.
LoadSpeechRecognizer(
string language,
pending_remote<chromeos.machine_learning.mojom.SodaClient> soda_client,
pending_receiver<chromeos.machine_learning.mojom.SodaRecognizer>
soda_recognizer) => (bool result);
// Opens the chrome feedback dialog for AI and show `description_template` in
// the description field.
OpenAiFeedbackDialog(string description_template);
// Returns the info of the microphone with the deviceId `source_id`, which is
// the ID returned from `enumerateDevices()` in JavaScript.
GetMicrophoneInfo(string source_id) => (MicrophoneInfo? info);
// Adds a new monitor for system quiet mode change. Returns the current
// state.
AddQuietModeMonitor(pending_remote<QuietModeMonitor> monitor)
=> (bool in_quiet_mode);
// Sets the system quiet mode.
SetQuietMode(bool quiet_mode);
// Gets whether speaker label can be used.
// Note that this method specifically only query if the current profile has
// the capabilities needed to use speaker label, and the device supports SODA
// language pack that supports speaker label, but the caller still need to
// ensure that SODA is available and installed to use speaker label.
CanUseSpeakerLabel() => (bool supported);
// Records the consent of speaker label from user.
// Since the frontend doesn't have info about resource ID, the given
// consent_description_names and consent_confirmation_name corresponds to the
// name (camelCase, without the IDS_RECORDER prefix) in the resources.h, and
// will be converted back to the corresponding numbered resource IDs.
RecordSpeakerLabelConsent(bool consent_given,
array<string> consent_description_names,
string consent_confirmation_name);
// Gets whether the system audio will be captured by audio loopback.
// If returns true, users can use UI switch to include/exclude system audio
// via `getDisplayMedia`. Otherwise, microphone echo cancellation should be
// turned off to capture the system audio.
CanCaptureSystemAudioWithLoopback() => (bool supported);
};