1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85

media / learning / impl / one_hot.h [blame]

// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef MEDIA_LEARNING_IMPL_ONE_HOT_H_
#define MEDIA_LEARNING_IMPL_ONE_HOT_H_

#include <map>
#include <memory>
#include <optional>
#include <vector>

#include "base/component_export.h"
#include "media/learning/common/labelled_example.h"
#include "media/learning/common/learning_task.h"
#include "media/learning/common/value.h"
#include "media/learning/impl/model.h"

namespace media {
namespace learning {

// Converter class that memorizes a mapping from nominal features to numeric
// features with a one-hot encoding.
class COMPONENT_EXPORT(LEARNING_IMPL) OneHotConverter {
 public:
  // Build a one-hot converter for all nominal features |task|, using the values
  // found in |training_data|.
  OneHotConverter(const LearningTask& task, const TrainingData& training_data);

  OneHotConverter(const OneHotConverter&) = delete;
  OneHotConverter& operator=(const OneHotConverter&) = delete;

  ~OneHotConverter();

  // Return the LearningTask that has only nominal features.
  const LearningTask& converted_task() const { return converted_task_; }

  // Convert |training_data| to be a one-hot model.
  TrainingData Convert(const TrainingData& training_data) const;

  // Convert |feature_vector| to match the one-hot model.
  FeatureVector Convert(const FeatureVector& feature_vector) const;

 private:
  // Build a converter for original feature |index|.
  void ProcessOneFeature(
      size_t index,
      const LearningTask::ValueDescription& original_description,
      const TrainingData& training_data);

  // Learning task with the feature descriptions adjusted for the one-hot model.
  LearningTask converted_task_;

  // [value] == vector index that should be 1 in the one-hot vector.
  using ValueVectorIndexMap = std::map<Value, size_t>;

  // [original task feature index] = optional converter for it.  If the feature
  // was kNumeric to begin with, then there will be no converter.
  std::vector<std::optional<ValueVectorIndexMap>> converters_;
};

// Model that uses |Converter| to convert instances before sending them to the
// underlying model.
class COMPONENT_EXPORT(LEARNING_IMPL) ConvertingModel : public Model {
 public:
  ConvertingModel(std::unique_ptr<OneHotConverter> converter,
                  std::unique_ptr<Model> model);

  ConvertingModel(const ConvertingModel&) = delete;
  ConvertingModel& operator=(const ConvertingModel&) = delete;

  ~ConvertingModel() override;

  // Model
  TargetHistogram PredictDistribution(const FeatureVector& instance) override;

 private:
  std::unique_ptr<OneHotConverter> converter_;
  std::unique_ptr<Model> model_;
};

}  // namespace learning
}  // namespace media

#endif  // MEDIA_LEARNING_IMPL_ONE_HOT_H_