1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205

ash / webui / media_app_ui / media_app_ui_untrusted.mojom [blame]

// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

module ash.media_app_ui.mojom;

import "chromeos/ash/components/mantis/mojom/mantis_processor.mojom";
import "chromeos/ash/components/mantis/mojom/mantis_service.mojom";
import "skia/public/mojom/bitmap.mojom";
import "ui/gfx/geometry/mojom/geometry.mojom";

// The result of `CreateMantisUntrustedService`, which can be a success or
// an error.
union MantisUntrustedServiceResult {
  // Implies success.
  pending_remote<MantisUntrustedService> service;
  // Implies failure.
  mantis.mojom.InitializeResult error;
};

// Factory interface to create a new untrusted Mojo connection (for the
// chrome-untrusted:// frame) communicating directly with the browser.
interface UntrustedServiceFactory {
  // Exposes OCR related APIs between the untrusted frame and the browser via
  // `receiver` and `page`.
  CreateOcrUntrustedService(pending_receiver<OcrUntrustedService> receiver,
                            pending_remote<OcrUntrustedPage> page);

  // Exposes Mahi related APIs between the untrusted frame and the browser via
  // `receiver` and `page`.
  CreateMahiUntrustedService(pending_receiver<MahiUntrustedService> receiver,
                             pending_remote<MahiUntrustedPage> page,
                             string file_name);

  // Checks Mantis feature availability, which can be restricted based on
  // device, user type, and others.
  IsMantisAvailable() => (bool result);

  // Exposes Mantis related APIs between the untrusted frame and the browser via
  // `receiver`. When called multiple times, the previous connection will be
  // closed. Therefore, it should be called once per WebUIController lifetime or
  // previous connection should be closed manually before calling this method
  // the second time (e.g. when opening new file in the same window).
  CreateMantisUntrustedService(pending_remote<MantisUntrustedPage> page)
      => (MantisUntrustedServiceResult result);
};

struct PageMetadata {
  // Opaque ID uniquely identifying a page.
  string id;
  gfx.mojom.RectF rect;
};

// Interface implemented in the browser process and called by the
// chrome-untrusted:// Media App page.
interface OcrUntrustedService {
  // Provides metadata about the document. Page metadata is always provided for
  // all pages. Called in the following cases:
  //   a) A page is rotated (only the rotated page is dirty, but potentially all
  //      pages will get updated metadata). This case is slightly special as a
  //      PageContentsUpdated() will also be called with the rotated page's ID.
  //      This method will always be called before PageContentsUpdated() for a
  //      rotated page.
  //   b) A page is deleted. The deleted page will be omitted from the provided
  //      page metadata.
  //   c) A deleted page is re-added (undo page delete). The deleted page's UUID
  //      will be added back into the page metadata in the correct array
  //      location.
  //   d) Pages are re-ordered. The array will be re-ordered to match.
  //   page_metadata: An array of the new coordinate bounds for each page in the
  //                  document, attached to each page's unique identifier.
  PageMetadataUpdated(array<PageMetadata> page_metadata);

  // Called when a page in the document becomes dirty, i.e. it needs to go
  // through OCR again. This includes when:
  //   a) A page is rotated. This is called in conjunction with the
  //      PageMetadataUpdated() API, because rotates also cause location
  //      changes.
  //   b) A page is edited. This could be a new annotation (freehand or text),
  //      or a form field edited.
  //   dirty_page_id: A string containing the unique identifier of the dirty
  //                  page.
  PageContentsUpdated(string dirty_page_id);

  // Called whenever the viewport changes, e.g. due to scrolling, zooming,
  // resizing the window, or opening and closing toolbars/panels.
  //   viewport_box: The new bounding box of the viewport.
  //   scale_factor: The ratio between CSS pixels (i.e. ignoring browser and
  //       pinch zoom) and ink units. Larger numbers indicate the document is
  //       more zoomed in.
  ViewportUpdated(gfx.mojom.RectF viewport_box, float scale_factor);
};

// Interface implemented in JavaScript by the chrome-untrusted:// page for Media
// App and called by browser process code.
interface OcrUntrustedPage {
  // Request the bitmap information for a page identified by its ID.
  RequestBitmap(string requestedPageId) => (skia.mojom.BitmapN32? page);

  // If a document is currently loaded, scrolls and zooms to the given viewport.
  // This allows certain AXActions to be implemented such as
  // `kScrollToMakeVisible` or `kSetScrollOffset`.
  //   viewport_box: The bounding box representing the new camera position.
  SetViewport(gfx.mojom.RectF viewport_box);

  // Tells the MediaApp when the OCR service has been turned on or off (either
  // due to the PDF OCR setting being changed, or the accessibility service
  // being toggled on or off).
  SetPdfOcrEnabled(bool enabled);
};

// Interfaces for Mahi support

// Interface implemented in the browser process and called by the
// chrome-untrusted:// Media App page.
interface MahiUntrustedService {
  // Notifies when media app finishes loading a PDF file. This may be used for
  // Mahi refresh availability.
  OnPdfLoaded();

  // Notifies when the file name of the loaded PDF file is updated (e.g. after
  // rename or save as), so that the Mahi UI (e.g. title on the result panel and
  // the refresh banner) can show the fresh file name.
  OnPdfFileNameUpdated(string new_name);

  // Notifies when the PDF context menu is shown. This may be used to show the
  // Mahi widget card. `selected_text` contains any currently highlighted /
  // selected text, with it being an empty string if there is no selected text.
  OnPdfContextMenuShow(gfx.mojom.RectF anchor, string selected_text);

  // Notifies when the PDF context menu is hidden. This may be used to hide the
  // Mahi widget card.
  OnPdfContextMenuHide();
};

// Interface implemented in JavaScript by the chrome-untrusted:// page for Media
// App and called by browser process code.
interface MahiUntrustedPage {
  // Hides the PDF context menu, if currently shown.
  HidePdfContextMenu();

  // Requests the text content of the PDF file.
  GetPdfContent(int32 limit) => (string? content);
};

// Interface implemented in the browser process and called by the
// chrome-untrusted:// Media App page. This interface is used after Mantis
// initialization is done and Media App is sending image inference request.
//
// All images and masks are in byte array, containing encoded format of an
// image. This allows us to pass images in various codecs (PNG, JPEG, WebP,...)
// supported by Mantis. Different encoded image will have different header,
// body, and length. The request will be passed to a sandboxed process in
// CrOS where it checks the image header and use different decoder
// appropriately.
interface MantisUntrustedService {
  // Performs image segmentation on the `image` based on the prior `selection`.
  // The `image` and `selection` are byte arrays containing the
  // encoded format of an image (e.g., PNG, JPEG) with the same dimension.
  // For more info about segmentation, refer to "Make a sticker" section of
  // http://go/mantis-bl-dd#heading=h.pbhc0hda5vkz.
  // TODO(http://crbug.com/370820145): change the opaque `array<uint8>` type.
  SegmentImage(array<uint8> image, array<uint8> selection)
      => (mantis.mojom.MantisResult result);

  // Fills the image generatively based on the `text` and `seed`. Pass the same
  // `seed` across method calls to get identical result. The `image` and `mask`
  // are byte arrays containing the encoded format of an image (e.g., PNG,
  // JPEG) with the same dimension. For more info about generative fill, refer
  // to "Reimagine" section of http://go/mantis-bl-dd#heading=h.3xkb38njxpz1.
  // TODO(http://crbug.com/370820145): change the opaque `array<uint8>` type.
  GenerativeFillImage(
      array<uint8> image, array<uint8> mask, string text, uint32 seed)
      => (mantis.mojom.MantisResult result);

  // Inpaints the image based on the `mask` and `seed`. Pass the same `seed`
  // across method calls to get identical result. The `image` and `mask` are
  // byte arrays containing the encoded format of an image (e.g., PNG, JPEG)
  // with the same dimension. Inpainting is similar with Generative Fill without
  // a `text` prompt, where it tries to fill the image by reconstructing the
  // "lost" part of the `image`. Also refer to "Expand background" section which
  // is a special case of Inpainting where the mask is in the boundaries of the
  // `image` http://go/mantis-bl-dd#heading=h.usc6s0rg999w.
  // TODO(http://crbug.com/370820145): change the opaque `array<uint8>` type.
  InpaintImage(array<uint8> image, array<uint8> mask, uint32 seed)
      => (mantis.mojom.MantisResult result);

  // Classifies image for Trust & Safety checking. Refer to "Trust & safety
  // check" subsection of "Entry point" for more details
  // http://go/mantis-bl-dd#heading=h.dqo8t5p2bpij.
  // TODO(http://crbug.com/370820145): change the opaque `array<uint8>` type.
  ClassifyImageSafety(array<uint8> image)
      => (mantis.mojom.SafetyClassifierVerdict verdict);
};

// Interface implemented in JavaScript by the chrome-untrusted:// page for Media
// App and called by browser process code.
interface MantisUntrustedPage {
  // Reports Mantis creation progress between 0.0 and 1.0 (inclusive), primarily
  // to monitor first-time DLC download. The associated
  // `MantisUntrustedServiceResult` is returned only upon completion
  // (1.0 progress) or if an error occurs. If the DLC has been previously
  // downloaded, the progress will immediately be reported as `1.0`.
  ReportMantisProgress(double progress);
};