1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116

media / gpu / svc_layers.h [blame]

// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef MEDIA_GPU_SVC_LAYERS_H_
#define MEDIA_GPU_SVC_LAYERS_H_

#include <stdint.h>

#include <vector>

#include "media/base/svc_scalability_mode.h"
#include "media/gpu/media_gpu_export.h"
#include "media/parsers/vp9_parser.h"
#include "media/video/video_encode_accelerator.h"
#include "third_party/abseil-cpp/absl/types/variant.h"
#include "ui/gfx/geometry/size.h"

namespace media {

class MEDIA_GPU_EXPORT SVCLayers {
 public:
  constexpr static size_t kMaxTemporalLayers = 3u;
  constexpr static size_t kMaxSpatialLayers = 3u;

  // Config is the SVC configuration used in SVCLayers. It cannot be changed
  // after creation. In other words, if active spatial layers or the number of
  // temporal layers are changed, a client needs to recreate SVCLayers with
  // the new Config.
  struct MEDIA_GPU_EXPORT Config {
    Config(const std::vector<gfx::Size>& spatial_layer_resolutions,
           size_t begin_active_layer,
           size_t end_active_layer,
           size_t num_temporal_layers,
           SVCInterLayerPredMode inter_layer_pred);
    ~Config();
    Config(const Config& config);

    // The resolutions in spatial layers. This includes resolutions in inactive
    // resolutions.
    std::vector<gfx::Size> spatial_layer_resolutions;
    // The active spatial layer resolutions index.
    // [begin_active_layer, end_active_layer - 1].
    size_t begin_active_layer = 0;
    size_t end_active_layer = 0;
    size_t num_temporal_layers = 0;
    // The active spatial layer resolutions, i.e., resolutions from
    // spatial_layer_resolutions[begin_active_layer] to
    // spatial_layer_resolutions[end_active_layer - 1].
    std::vector<gfx::Size> active_spatial_layer_resolutions;
    SVCInterLayerPredMode inter_layer_pred = SVCInterLayerPredMode::kOnKeyPic;
  };

  // The parameters used for encoding the current frame.
  struct MEDIA_GPU_EXPORT PictureParam {
    PictureParam();
    ~PictureParam();
    PictureParam(const PictureParam&);

    bool key_frame = false;
    gfx::Size frame_size;
    uint8_t refresh_frame_flags = 0;
    std::vector<uint8_t> reference_frame_indices;
  };

  explicit SVCLayers(const Config& config);

  std::pair<bool, std::optional<std::unique_ptr<SVCLayers>>>
  RecreateSVCLayersIfNeeded(VideoBitrateAllocation& bitrate_allocation);

  // These functions are constant functions. Unless Reset() or PostEncode() is
  // called, these functions returns the same result as the previous result.
  // Returns whether the current frame shall encoded as key frame.
  bool IsKeyFrame() const;
  // Gets PictureParam and fill the metadata for the current frame.
  void GetPictureParamAndMetadata(
      PictureParam& picture_param,
      absl::variant<Vp9Metadata*, SVCGenericMetadata*> metadata) const;

  // Resets the current spatial layer stream.
  // This can be called before encoding the SVC frame.
  void Reset();
  // Moves to the next frame by changing |spatial_index_| and |frame_num_| and
  // updates |frame_num_ref_frames_| by |refresh_frame_flags|.
  void PostEncode(uint8_t refresh_frame_flags);

  // Returns the currently used configuration and the spatial index and the
  // frame number for the current frame.
  const Config& config() const { return config_; }
  size_t spatial_idx() const { return spatial_idx_; }
  size_t frame_num() const { return frame_num_; }

 private:
  // Fill metadata for the first frame, i.e. frame_num=0.
  void FillMetadataForFirstFrame(
      absl::variant<Vp9Metadata*, SVCGenericMetadata*> metadata,
      bool& key_frame,
      uint8_t& refresh_frame_flags,
      std::vector<uint8_t>& reference_frame_indices) const;
  // Fill metadata for the second and later frames, i.e. frame_num != 0.
  void FillMetadataForNonFirstFrame(
      absl::variant<Vp9Metadata*, SVCGenericMetadata*> metadata,
      uint8_t& refresh_frame_flags,
      std::vector<uint8_t>& reference_frame_indices) const;

  const Config config_;

  size_t spatial_idx_ = 0;
  // The frame number since the last frame containing keyframe.
  // It is incremented after PostEncode() on the top spatial index.
  size_t frame_num_ = 0;
  // The |frame_num| of reference frames. This is used to compute |p_diff|.
  std::array<size_t, kVp9NumRefFrames> frame_num_ref_frames_;
};
}  // namespace media
#endif  // MEDIA_GPU_SVC_LAYERS_H_