1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
media / learning / impl / one_hot.h [blame]
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef MEDIA_LEARNING_IMPL_ONE_HOT_H_
#define MEDIA_LEARNING_IMPL_ONE_HOT_H_
#include <map>
#include <memory>
#include <optional>
#include <vector>
#include "base/component_export.h"
#include "media/learning/common/labelled_example.h"
#include "media/learning/common/learning_task.h"
#include "media/learning/common/value.h"
#include "media/learning/impl/model.h"
namespace media {
namespace learning {
// Converter class that memorizes a mapping from nominal features to numeric
// features with a one-hot encoding.
class COMPONENT_EXPORT(LEARNING_IMPL) OneHotConverter {
public:
// Build a one-hot converter for all nominal features |task|, using the values
// found in |training_data|.
OneHotConverter(const LearningTask& task, const TrainingData& training_data);
OneHotConverter(const OneHotConverter&) = delete;
OneHotConverter& operator=(const OneHotConverter&) = delete;
~OneHotConverter();
// Return the LearningTask that has only nominal features.
const LearningTask& converted_task() const { return converted_task_; }
// Convert |training_data| to be a one-hot model.
TrainingData Convert(const TrainingData& training_data) const;
// Convert |feature_vector| to match the one-hot model.
FeatureVector Convert(const FeatureVector& feature_vector) const;
private:
// Build a converter for original feature |index|.
void ProcessOneFeature(
size_t index,
const LearningTask::ValueDescription& original_description,
const TrainingData& training_data);
// Learning task with the feature descriptions adjusted for the one-hot model.
LearningTask converted_task_;
// [value] == vector index that should be 1 in the one-hot vector.
using ValueVectorIndexMap = std::map<Value, size_t>;
// [original task feature index] = optional converter for it. If the feature
// was kNumeric to begin with, then there will be no converter.
std::vector<std::optional<ValueVectorIndexMap>> converters_;
};
// Model that uses |Converter| to convert instances before sending them to the
// underlying model.
class COMPONENT_EXPORT(LEARNING_IMPL) ConvertingModel : public Model {
public:
ConvertingModel(std::unique_ptr<OneHotConverter> converter,
std::unique_ptr<Model> model);
ConvertingModel(const ConvertingModel&) = delete;
ConvertingModel& operator=(const ConvertingModel&) = delete;
~ConvertingModel() override;
// Model
TargetHistogram PredictDistribution(const FeatureVector& instance) override;
private:
std::unique_ptr<OneHotConverter> converter_;
std::unique_ptr<Model> model_;
};
} // namespace learning
} // namespace media
#endif // MEDIA_LEARNING_IMPL_ONE_HOT_H_