1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
ash / projector / projector_metadata_model.h [blame]
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_
#define ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_
#include <memory>
#include <string>
#include <vector>
#include "ash/ash_export.h"
#include "base/time/time.h"
#include "base/values.h"
#include "media/mojo/mojom/speech_recognition_result.h"
namespace ash {
// The speech recognition status.
enum class ASH_EXPORT RecognitionStatus : int {
// Speech recognition was incomplete by the time the metadata
// was written.
kIncomplete = 0,
// Speech recognition was completed by the time the metadata was
// written to file.
kComplete = 1,
// Speech recognition had encountered an error.
kError = 2
};
enum class ASH_EXPORT MetadataVersionNumber : int {
kUnknown = 0,
kV1 = 1,
kV2 = 2
};
// Base class to describe a metadata item.
class MetadataItem {
public:
MetadataItem(const base::TimeDelta start_time,
const base::TimeDelta end_time,
const std::string& text);
MetadataItem(const MetadataItem&) = delete;
MetadataItem& operator=(const MetadataItem&) = delete;
virtual ~MetadataItem();
base::TimeDelta& start_time() { return start_time_; }
base::TimeDelta& end_time() { return end_time_; }
std::string& text() { return text_; }
// Return the serialized metadata item. This is used for storage.
virtual base::Value::Dict ToJson() = 0;
protected:
// The start time of the metadata item from the start of the recording
// session.
base::TimeDelta start_time_;
// The end time of the metadata item from the start of the recording session.
base::TimeDelta end_time_;
// Text data of the metadata item.
std::string text_;
};
// Class to describe a key idea.
class ASH_EXPORT ProjectorKeyIdea : public MetadataItem {
public:
ProjectorKeyIdea(const base::TimeDelta start_time,
const base::TimeDelta end_time,
const std::string& text = std::string());
ProjectorKeyIdea(const ProjectorKeyIdea&) = delete;
ProjectorKeyIdea& operator=(const ProjectorKeyIdea&) = delete;
~ProjectorKeyIdea() override;
base::Value::Dict ToJson() override;
};
// Class to describe a transcription.
class ASH_EXPORT ProjectorTranscript : public MetadataItem {
public:
ProjectorTranscript(
const base::TimeDelta start_time,
const base::TimeDelta end_time,
int group_id,
const std::string& text,
const std::vector<media::HypothesisParts>& hypothesis_parts);
ProjectorTranscript(const ProjectorTranscript&) = delete;
ProjectorTranscript& operator=(const ProjectorTranscript&) = delete;
~ProjectorTranscript() override;
base::Value::Dict ToJson() override;
std::vector<media::HypothesisParts>& hypothesis_parts() {
return hypothesis_parts_;
}
private:
const int group_id_;
std::vector<media::HypothesisParts> hypothesis_parts_;
};
// Class to describe a projector metadata of a screencast session, including
// name, transcriptions, key_ideas, etc
class ASH_EXPORT ProjectorMetadata {
public:
ProjectorMetadata();
ProjectorMetadata(const ProjectorMetadata&) = delete;
ProjectorMetadata& operator=(const ProjectorMetadata&) = delete;
~ProjectorMetadata();
// Sets the language of the transcript.
void SetCaptionLanguage(const std::string& language);
// Adds the transcript to the metadata.
void AddTranscript(std::unique_ptr<ProjectorTranscript> transcript);
// Notifies the metadata that transcription has completed.
void SetSpeechRecognitionStatus(RecognitionStatus status);
// Marks a beginning of a key idea. The timing info of the next transcript
// will be used as the timing of the key idea.
void SetMetadataVersionNumber(MetadataVersionNumber version);
void MarkKeyIdea();
// Serializes the metadata for storage.
std::string Serialize();
// Returns the number of transcripts.
size_t GetTranscriptsCount() const { return transcripts_.size(); }
private:
base::Value::Dict ToJson();
// Add sentence transcripts to the metadata.
void AddSentenceTranscripts(
std::vector<std::unique_ptr<ProjectorTranscript>> sentence_transcripts);
std::vector<std::unique_ptr<ProjectorTranscript>> transcripts_;
std::vector<std::unique_ptr<ProjectorKeyIdea>> key_ideas_;
std::string caption_language_;
// True if user mark the transcript as a key idea. It will be reset to false
// when the final recognition result is received and recorded as a key idea.
bool should_mark_key_idea_ = false;
// The speech recognition status.
RecognitionStatus speech_recognition_status_ = RecognitionStatus::kIncomplete;
MetadataVersionNumber metadata_version_number_;
};
} // namespace ash
#endif // ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_