1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
content / browser / speech / soda_speech_recognition_engine_impl.h [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_
#define CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_
#include "base/memory/weak_ptr.h"
#include "base/sequence_checker.h"
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "media/mojo/mojom/audio_data.mojom.h"
#include "media/mojo/mojom/speech_recognition.mojom.h"
#include "mojo/public/cpp/bindings/receiver.h"
#include "mojo/public/cpp/bindings/remote.h"
namespace content {
class SpeechRecognitionManagerDelegate;
// This is the on-device implementation for `SpeechRecognitionEngine`.
//
// This class establishes a connection to the on-device speech recognition
// service using the content::SpeechRecognitionManagerDelegate. It will bind to
// the speech::CrosSpeechRecognitionService in ChromeOS-Ash. On LaCrOS, it will
// forward to Ash. On other platforms, it will bind to the
// speech::ChromeSpeechRecognitionService if the on-device speech recognition
// service is available. This class will be in the speech recognition available
// state when successfully bound.
class CONTENT_EXPORT SodaSpeechRecognitionEngineImpl
: public SpeechRecognitionEngine,
public media::mojom::SpeechRecognitionRecognizerClient {
public:
using SendAudioToSpeechRecognitionServiceCallback =
base::RepeatingCallback<void(media::mojom::AudioDataS16Ptr audio_data)>;
explicit SodaSpeechRecognitionEngineImpl(
const SpeechRecognitionSessionConfig& config);
~SodaSpeechRecognitionEngineImpl() override;
SodaSpeechRecognitionEngineImpl(const SodaSpeechRecognitionEngineImpl&) =
delete;
SodaSpeechRecognitionEngineImpl& operator=(
const SodaSpeechRecognitionEngineImpl&) = delete;
// Sets the delegate for tests.
static void SetSpeechRecognitionManagerDelegateForTesting(
SpeechRecognitionManagerDelegate*);
bool Initialize();
void SetOnReadyCallback(base::OnceCallback<void()> callback);
// content::SodaSpeechRecognitionEngineImpl:
void StartRecognition() override;
void EndRecognition() override;
void TakeAudioChunk(const AudioChunk& data) override;
void AudioChunksEnded() override;
int GetDesiredAudioChunkDurationMs() const override;
// media::mojom::SpeechRecognitionRecognizerClient:
void OnSpeechRecognitionRecognitionEvent(
const media::SpeechRecognitionResult& result,
OnSpeechRecognitionRecognitionEventCallback reply) override;
void OnSpeechRecognitionError() override;
void OnLanguageIdentificationEvent(
media::mojom::LanguageIdentificationEventPtr event) override;
void OnSpeechRecognitionStopped() override;
private:
// Callback executed when the recognizer is bound. Sets the flag indicating
// whether the speech recognition service supports multichannel audio.
void OnRecognizerBound(bool is_multichannel_supported);
// Called when the speech recognition context or the speech recognition
// recognizer is disconnected. Sends an error message to the UI and halts
// future transcriptions.
void OnRecognizerDisconnected();
void SendAudioToSpeechRecognitionService(
media::mojom::AudioDataS16Ptr audio_data);
void MarkDone();
void Abort(media::mojom::SpeechRecognitionErrorCode error);
media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(const AudioChunk& data);
base::OnceCallback<void()> on_ready_callback_;
// Sends audio to the speech recognition thread on the main thread.
SendAudioToSpeechRecognitionServiceCallback send_audio_callback_;
base::RepeatingCallback<void()> mark_done_callback_;
mojo::Remote<media::mojom::SpeechRecognitionContext>
speech_recognition_context_;
mojo::Remote<media::mojom::SpeechRecognitionRecognizer>
speech_recognition_recognizer_;
mojo::Receiver<media::mojom::SpeechRecognitionRecognizerClient>
speech_recognition_recognizer_client_{this};
SpeechRecognitionSessionConfig config_;
SEQUENCE_CHECKER(main_sequence_checker_);
// A flag indicating the recognition state.
bool is_start_recognition_ = false;
base::WeakPtrFactory<SodaSpeechRecognitionEngineImpl> weak_factory_{this};
};
} // namespace content
#endif // CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_