1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148

ash / projector / projector_metadata_model.h [blame]

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_
#define ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_

#include <memory>
#include <string>
#include <vector>

#include "ash/ash_export.h"
#include "base/time/time.h"
#include "base/values.h"
#include "media/mojo/mojom/speech_recognition_result.h"

namespace ash {

// The speech recognition status.
enum class ASH_EXPORT RecognitionStatus : int {
  // Speech recognition was incomplete by the time the metadata
  // was written.
  kIncomplete = 0,
  // Speech recognition was completed by the time the metadata was
  // written to file.
  kComplete = 1,
  // Speech recognition had encountered an error.
  kError = 2
};

enum class ASH_EXPORT MetadataVersionNumber : int {
  kUnknown = 0,
  kV1 = 1,
  kV2 = 2
};

// Base class to describe a metadata item.
class MetadataItem {
 public:
  MetadataItem(const base::TimeDelta start_time,
               const base::TimeDelta end_time,
               const std::string& text);
  MetadataItem(const MetadataItem&) = delete;
  MetadataItem& operator=(const MetadataItem&) = delete;
  virtual ~MetadataItem();

  base::TimeDelta& start_time() { return start_time_; }

  base::TimeDelta& end_time() { return end_time_; }

  std::string& text() { return text_; }

  // Return the serialized metadata item. This is used for storage.
  virtual base::Value::Dict ToJson() = 0;

 protected:
  // The start time of the metadata item from the start of the recording
  // session.
  base::TimeDelta start_time_;
  // The end time of the metadata item from the start of the recording session.
  base::TimeDelta end_time_;
  // Text data of the metadata item.
  std::string text_;
};

// Class to describe a key idea.
class ASH_EXPORT ProjectorKeyIdea : public MetadataItem {
 public:
  ProjectorKeyIdea(const base::TimeDelta start_time,
                   const base::TimeDelta end_time,
                   const std::string& text = std::string());
  ProjectorKeyIdea(const ProjectorKeyIdea&) = delete;
  ProjectorKeyIdea& operator=(const ProjectorKeyIdea&) = delete;
  ~ProjectorKeyIdea() override;

  base::Value::Dict ToJson() override;
};

// Class to describe a transcription.
class ASH_EXPORT ProjectorTranscript : public MetadataItem {
 public:
  ProjectorTranscript(
      const base::TimeDelta start_time,
      const base::TimeDelta end_time,
      int group_id,
      const std::string& text,
      const std::vector<media::HypothesisParts>& hypothesis_parts);
  ProjectorTranscript(const ProjectorTranscript&) = delete;
  ProjectorTranscript& operator=(const ProjectorTranscript&) = delete;
  ~ProjectorTranscript() override;

  base::Value::Dict ToJson() override;

  std::vector<media::HypothesisParts>& hypothesis_parts() {
    return hypothesis_parts_;
  }

 private:
  const int group_id_;
  std::vector<media::HypothesisParts> hypothesis_parts_;
};

// Class to describe a projector metadata of a screencast session, including
// name, transcriptions, key_ideas, etc
class ASH_EXPORT ProjectorMetadata {
 public:
  ProjectorMetadata();
  ProjectorMetadata(const ProjectorMetadata&) = delete;
  ProjectorMetadata& operator=(const ProjectorMetadata&) = delete;
  ~ProjectorMetadata();

  // Sets the language of the transcript.
  void SetCaptionLanguage(const std::string& language);

  // Adds the transcript to the metadata.
  void AddTranscript(std::unique_ptr<ProjectorTranscript> transcript);

  // Notifies the metadata that transcription has completed.
  void SetSpeechRecognitionStatus(RecognitionStatus status);
  // Marks a beginning of a key idea. The timing info of the next transcript
  // will be used as the timing of the key idea.
  void SetMetadataVersionNumber(MetadataVersionNumber version);
  void MarkKeyIdea();
  // Serializes the metadata for storage.
  std::string Serialize();
  // Returns the number of transcripts.
  size_t GetTranscriptsCount() const { return transcripts_.size(); }

 private:
  base::Value::Dict ToJson();
  // Add sentence transcripts to the metadata.
  void AddSentenceTranscripts(
      std::vector<std::unique_ptr<ProjectorTranscript>> sentence_transcripts);
  std::vector<std::unique_ptr<ProjectorTranscript>> transcripts_;
  std::vector<std::unique_ptr<ProjectorKeyIdea>> key_ideas_;
  std::string caption_language_;

  // True if user mark the transcript as a key idea. It will be reset to false
  // when the final recognition result is received and recorded as a key idea.
  bool should_mark_key_idea_ = false;

  // The speech recognition status.
  RecognitionStatus speech_recognition_status_ = RecognitionStatus::kIncomplete;
  MetadataVersionNumber metadata_version_number_;
};

}  // namespace ash
#endif  // ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_