1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
media / learning / impl / learning_task_controller_impl.h [blame]
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef MEDIA_LEARNING_IMPL_LEARNING_TASK_CONTROLLER_IMPL_H_
#define MEDIA_LEARNING_IMPL_LEARNING_TASK_CONTROLLER_IMPL_H_
#include <memory>
#include <set>
#include "base/component_export.h"
#include "base/functional/callback.h"
#include "base/memory/weak_ptr.h"
#include "media/learning/common/learning_task_controller.h"
#include "media/learning/impl/distribution_reporter.h"
#include "media/learning/impl/feature_provider.h"
#include "media/learning/impl/learning_task_controller_helper.h"
#include "media/learning/impl/random_number_generator.h"
#include "media/learning/impl/training_algorithm.h"
namespace media {
namespace learning {
class DistributionReporter;
class LearningTaskControllerImplTest;
// Controller for a single learning task. Takes training examples, and forwards
// them to the learner(s). Responsible for things like:
// - Managing underlying learner(s) based on the learning task
// - Feature subset selection
// - UMA reporting on accuracy / feature importance
//
// The idea is that one can create a LearningTask, give it to an LTCI, and the
// LTCI will do the work of building / evaluating the model based on training
// examples that are provided to it.
class COMPONENT_EXPORT(LEARNING_IMPL) LearningTaskControllerImpl final
: public LearningTaskController,
public HasRandomNumberGenerator {
public:
LearningTaskControllerImpl(
const LearningTask& task,
std::unique_ptr<DistributionReporter> reporter = nullptr,
SequenceBoundFeatureProvider feature_provider =
SequenceBoundFeatureProvider());
~LearningTaskControllerImpl() override;
// LearningTaskController
// Note that we don't support |default_target|, since destroying us destroys
// everything. One might make the argument that only the mojo client /
// service should support default values, but it's much more convenient if
// they're part of the base api. So, since clients shouldn't be dealing with
// us directly (see LearningSessionImpl), it's okay.
void BeginObservation(base::UnguessableToken id,
const FeatureVector& features,
const std::optional<TargetValue>& default_target,
const std::optional<ukm::SourceId>& source_id) override;
void CompleteObservation(base::UnguessableToken id,
const ObservationCompletion& completion) override;
void CancelObservation(base::UnguessableToken id) override;
void UpdateDefaultTarget(
base::UnguessableToken id,
const std::optional<TargetValue>& default_target) override;
const LearningTask& GetLearningTask() override;
void PredictDistribution(const FeatureVector& features,
PredictionCB callback) override;
private:
// Add |example| to the training data, and process it.
void AddFinishedExample(LabelledExample example, ukm::SourceId source_id);
// Called by |training_cb_| when the model is trained. |training_weight| and
// |training_size| are the training set's total weight and number of examples.
void OnModelTrained(double training_weight,
int training_size,
std::unique_ptr<Model> model);
void SetTrainerForTesting(std::unique_ptr<TrainingAlgorithm> trainer);
// Update |task_| to reflect a randomly chosen subset of features.
void DoFeatureSubsetSelection();
LearningTask task_;
// Current batch of examples.
std::unique_ptr<TrainingData> training_data_;
// Most recently trained model, or null.
std::unique_ptr<Model> model_;
// We don't want to have multiple models in flight.
bool training_is_in_progress_ = false;
// Number of examples in |training_data_| that haven't been used for training.
// This helps us decide when to train a new model.
int num_untrained_examples_ = 0;
// Total weight and number of examples in the most recently trained model.
double last_training_weight_ = 0.;
size_t last_training_size_ = 0u;
// Training algorithm that we'll use.
std::unique_ptr<TrainingAlgorithm> trainer_;
// Optional reporter for training accuracy.
std::unique_ptr<DistributionReporter> reporter_;
// Helper that we use to handle deferred examples.
std::unique_ptr<LearningTaskControllerHelper> helper_;
// If the task specifies feature importance measurement, then this is the
// randomly chosen subset of features.
std::set<int> feature_indices_;
// Number of features that we expect in each observation.
size_t expected_feature_count_;
base::WeakPtrFactory<LearningTaskControllerImpl> weak_ptr_factory_{this};
friend class LearningTaskControllerImplTest;
};
} // namespace learning
} // namespace media
#endif // MEDIA_LEARNING_IMPL_LEARNING_TASK_CONTROLLER_IMPL_H_