1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170

content / browser / preloading / preloading_prediction.h [blame]

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_PRELOADING_PRELOADING_PREDICTION_H_
#define CONTENT_BROWSER_PRELOADING_PRELOADING_PREDICTION_H_

#include <optional>
#include <string_view>

#include "base/timer/elapsed_timer.h"
#include "content/browser/preloading/preloading_confidence.h"
#include "content/public/browser/preloading_data.h"
#include "services/metrics/public/cpp/ukm_source_id.h"
#include "url/gurl.h"

namespace content {

// PreloadingPrediction keeps track of every preloading prediction associated
// with various predictors as defined in content/public/preloading.h
// (please see for more details); whether the prediction is accurate or not;
// whether the prediction is confident enough or not.
class PreloadingPrediction {
 public:
  ~PreloadingPrediction();

  // Disallow copy and assign.
  PreloadingPrediction(const PreloadingPrediction& other) = delete;
  PreloadingPrediction& operator=(const PreloadingPrediction& other) = delete;
  PreloadingPrediction(PreloadingPrediction&&);
  PreloadingPrediction& operator=(PreloadingPrediction&&);

  // Records both UKMs Preloading_Prediction and
  // Preloading_Prediction_PreviousPrimaryPage. Metrics for both these are same.
  // Only difference is that the Preloading_Prediction_PreviousPrimaryPage UKM
  // is associated with the WebContents primary page that triggered the
  // preloading prediction. This is done to easily analyze the impact of the
  // preloading prediction on the primary visible page.
  void RecordPreloadingPredictionUKMs(
      ukm::SourceId navigated_page_source_id,
      std::optional<double> sampling_likelihood);

  // Sets `is_accurate_prediction_` to true if `navigated_url` matches the URL
  // predicate. It also records `time_to_next_navigation_`.
  void SetIsAccuratePrediction(const GURL& navigated_url);

  bool IsAccuratePrediction() const { return is_accurate_prediction_; }

  PreloadingPrediction(
      PreloadingPredictor predictor,
      PreloadingConfidence confidence,
      ukm::SourceId triggered_primary_page_source_id,
      base::RepeatingCallback<bool(const GURL&)> url_match_predicate);

  // Called by the `PreloadingDataImpl` that owns this prediction, to check the
  // validity of `predictor_type_`.
  PreloadingPredictor predictor_type() const { return predictor_type_; }

 private:
  // Preloading predictor of this preloading prediction.
  PreloadingPredictor predictor_type_;

  // Holds the triggered primary page of preloading operation ukm::SourceId.
  ukm::SourceId triggered_primary_page_source_id_;

  // Triggers can specify their own predicate to judge whether two URLs are
  // considered as pointing to the same destination as this varies for different
  // predictors.
  PreloadingURLMatchCallback url_match_predicate_;

  // Confidence percentage of predictor's preloading prediction. This value
  // should be between 0 - 100.
  PreloadingConfidence confidence_;

  // Set to true when preloading prediction was correct i.e., when the
  // navigation happens to the same predicted URL.
  bool is_accurate_prediction_ = false;

  // Records when the preloading prediction was first recorded.
  base::ElapsedTimer elapsed_timer_;

  // The time between the creation of the prediction and the start of the next
  // navigation, whether accurate or not. The latency is reported as standard
  // buckets, of 1.15 spacing.
  std::optional<base::TimeDelta> time_to_next_navigation_;
};

// The output of many predictors is a logit/probability score. To use this score
// for binary classification, we compare it to a threshold. If the score is
// above the threshold, we classify the instance as positive; otherwise, we
// classify it as negative. Threshold choice affects classifier precision and
// recall. There is a trade-off between precision and recall. If we set the
// threshold too low, we will have high precision but low recall. If we set the
// threshold too high, we will have high recall but low precision. To choose the
// best threshold, we can use ROC curves, precision-recall curves, or
// logit-precision and logit-recall curves. `ExperimentalPreloadingPrediction`
// helps us collect the UMA data required to achieve this.
class ExperimentalPreloadingPrediction {
 public:
  ExperimentalPreloadingPrediction() = delete;
  ExperimentalPreloadingPrediction(
      std::string_view name,
      PreloadingURLMatchCallback url_match_predicate,
      float score,
      float min_score,
      float max_score,
      size_t buckets);
  ~ExperimentalPreloadingPrediction();

  ExperimentalPreloadingPrediction(
      const ExperimentalPreloadingPrediction& other) = delete;
  ExperimentalPreloadingPrediction& operator=(
      const ExperimentalPreloadingPrediction& other) = delete;
  ExperimentalPreloadingPrediction(ExperimentalPreloadingPrediction&&);
  ExperimentalPreloadingPrediction& operator=(
      ExperimentalPreloadingPrediction&&);

  std::string_view PredictorName() const { return name_; }
  bool IsAccuratePrediction() const { return is_accurate_prediction_; }

  void SetIsAccuratePrediction(const GURL& navigated_url);
  void RecordToUMA() const;

 private:
  // Experimental predictor's name
  std::string_view name_;
  // Set to true when preloading prediction was correct i.e., when the
  // navigation happens to the same predicted URL.
  bool is_accurate_prediction_ = false;
  // The number of buckets that will be used for UMA aggregation. It must be
  // less than 101.
  uint8_t buckets_;
  // The logit or probability score output of the predictor model.
  // Normalized based on the min and max score values.
  float normalized_score_;
  // The callback to verify that the navigated URL is a match.
  PreloadingURLMatchCallback url_match_predicate_;
};

// Stores data relating to a prediction made by the preloading ML model. Once
// the outcome of whether the prediction is accurate is known, the provided
// callback is invoked.
class ModelPredictionTrainingData {
 public:
  using OutcomeCallback =
      base::OnceCallback<void(std::optional<double> sampling_likelihood,
                              bool is_accurate_prediction)>;

  ModelPredictionTrainingData(OutcomeCallback on_record_outcome,
                              PreloadingURLMatchCallback url_match_predicate);

  ~ModelPredictionTrainingData();
  ModelPredictionTrainingData(const ModelPredictionTrainingData&) = delete;
  ModelPredictionTrainingData& operator=(const ModelPredictionTrainingData&) =
      delete;
  ModelPredictionTrainingData(ModelPredictionTrainingData&&);
  ModelPredictionTrainingData& operator=(ModelPredictionTrainingData&&);

  void SetIsAccuratePrediction(const GURL& navigated_url);
  void Record(std::optional<double> sampling_likelihood);

 private:
  OutcomeCallback on_record_outcome_;
  PreloadingURLMatchCallback url_match_predicate_;
  bool is_accurate_prediction_ = false;
};

}  // namespace content

#endif  // CONTENT_BROWSER_PRELOADING_PRELOADING_PREDICTION_H_