1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
  302
  303
  304
  305
  306
  307
  308
  309
  310
  311
  312
  313
  314
  315
  316
  317
  318
  319
  320
  321
  322
  323
  324
  325
  326
  327
  328
  329
  330
  331
  332
  333
  334
  335
  336
  337
  338
  339
  340
  341
  342
  343
  344
  345
  346
  347
  348
  349
  350
  351
  352
  353
  354
  355
  356
  357
  358
  359
  360
  361
  362
  363
  364
  365
  366
  367
  368
  369
  370
  371
  372
  373
  374
  375
  376
  377
  378
  379
  380
  381
  382
  383
  384
  385
  386
  387
  388
  389
  390
  391
  392
  393
  394
  395
  396
  397
  398
  399
  400
  401
  402
  403
  404
  405
  406
  407
  408
  409
  410
  411
  412
  413
  414
  415
  416
  417
  418
  419
  420
  421
  422
  423
  424
  425
  426
  427
  428
  429
  430
  431
  432
  433
  434
  435
  436
  437
  438
  439
  440
  441
  442
  443
  444
  445
  446
  447
  448
  449
  450
  451
  452
  453
  454
  455
  456
  457
  458
  459
  460
  461
  462
  463
  464
  465
  466
  467
  468
  469
  470
  471
  472
  473
  474
  475
  476
  477
  478

media / parsers / vp9_parser.h [blame]

// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file contains an implementation of a VP9 bitstream parser. The main
// purpose of this parser is to support hardware decode acceleration. Some
// accelerators, e.g. libva which implements VA-API, require the caller
// (chrome) to feed them parsed VP9 frame header.
//
// See media::VP9Decoder for example usage.
//

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#ifndef MEDIA_PARSERS_VP9_PARSER_H_
#define MEDIA_PARSERS_VP9_PARSER_H_

#include <stddef.h>
#include <stdint.h>
#include <sys/types.h>

#include <memory>

#include "base/containers/circular_deque.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/raw_ptr_exclusion.h"
#include "media/base/decrypt_config.h"
#include "media/base/media_export.h"
#include "media/base/video_color_space.h"
#include "ui/gfx/geometry/size.h"

namespace media {

const int kVp9MaxProfile = 4;
const int kVp9NumRefFramesLog2 = 3;
const size_t kVp9NumRefFrames = 1 << kVp9NumRefFramesLog2;
const uint8_t kVp9MaxProb = 255;
const size_t kVp9NumRefsPerFrame = 3;
const size_t kVp9NumFrameContextsLog2 = 2;
const size_t kVp9NumFrameContexts = 1 << kVp9NumFrameContextsLog2;

using Vp9Prob = uint8_t;

enum class Vp9ColorSpace {
  UNKNOWN = 0,
  BT_601 = 1,
  BT_709 = 2,
  SMPTE_170 = 3,
  SMPTE_240 = 4,
  BT_2020 = 5,
  RESERVED = 6,
  SRGB = 7,
};

enum Vp9InterpolationFilter {
  EIGHTTAP = 0,
  EIGHTTAP_SMOOTH = 1,
  EIGHTTAP_SHARP = 2,
  BILINEAR = 3,
  SWITCHABLE = 4,
};

enum Vp9RefType {
  VP9_FRAME_INTRA = 0,
  VP9_FRAME_LAST = 1,
  VP9_FRAME_GOLDEN = 2,
  VP9_FRAME_ALTREF = 3,
  VP9_FRAME_MAX = 4,
};

enum Vp9ReferenceMode {
  SINGLE_REFERENCE = 0,
  COMPOUND_REFERENCE = 1,
  REFERENCE_MODE_SELECT = 2,
};

struct MEDIA_EXPORT Vp9SegmentationParams {
  static const size_t kNumSegments = 8;
  static const size_t kNumTreeProbs = kNumSegments - 1;
  static const size_t kNumPredictionProbs = 3;
  enum SegmentLevelFeature {
    SEG_LVL_ALT_Q = 0,
    SEG_LVL_ALT_LF = 1,
    SEG_LVL_REF_FRAME = 2,
    SEG_LVL_SKIP = 3,
    SEG_LVL_MAX
  };

  bool enabled;

  bool update_map;
  uint8_t tree_probs[kNumTreeProbs];
  bool temporal_update;
  uint8_t pred_probs[kNumPredictionProbs];

  bool update_data;
  bool abs_or_delta_update;
  bool feature_enabled[kNumSegments][SEG_LVL_MAX];
  int16_t feature_data[kNumSegments][SEG_LVL_MAX];

  int16_t y_dequant[kNumSegments][2];
  int16_t uv_dequant[kNumSegments][2];

  bool FeatureEnabled(size_t seg_id, SegmentLevelFeature feature) const {
    return feature_enabled[seg_id][feature];
  }

  int16_t FeatureData(size_t seg_id, SegmentLevelFeature feature) const {
    return feature_data[seg_id][feature];
  }
};

struct MEDIA_EXPORT Vp9LoopFilterParams {
  static const size_t kNumModeDeltas = 2;

  uint8_t level;
  uint8_t sharpness;

  bool delta_enabled;
  bool delta_update;
  bool update_ref_deltas[VP9_FRAME_MAX];
  int8_t ref_deltas[VP9_FRAME_MAX];
  bool update_mode_deltas[kNumModeDeltas];
  int8_t mode_deltas[kNumModeDeltas];

  // Calculated from above fields.
  uint8_t lvl[Vp9SegmentationParams::kNumSegments][VP9_FRAME_MAX]
             [kNumModeDeltas];
};

// Members of Vp9FrameHeader will be 0-initialized by Vp9Parser::ParseNextFrame.
struct MEDIA_EXPORT Vp9QuantizationParams {
  bool IsLossless() const {
    return base_q_idx == 0 && delta_q_y_dc == 0 && delta_q_uv_dc == 0 &&
           delta_q_uv_ac == 0;
  }

  uint8_t base_q_idx;
  int8_t delta_q_y_dc;
  int8_t delta_q_uv_dc;
  int8_t delta_q_uv_ac;
};

// Entropy context for frame parsing
struct MEDIA_EXPORT Vp9FrameContext {
  bool IsValid() const;

  Vp9Prob tx_probs_8x8[2][1];
  Vp9Prob tx_probs_16x16[2][2];
  Vp9Prob tx_probs_32x32[2][3];

  Vp9Prob coef_probs[4][2][2][6][6][3];
  Vp9Prob skip_prob[3];
  Vp9Prob inter_mode_probs[7][3];
  Vp9Prob interp_filter_probs[4][2];
  Vp9Prob is_inter_prob[4];

  Vp9Prob comp_mode_prob[5];
  Vp9Prob single_ref_prob[5][2];
  Vp9Prob comp_ref_prob[5];

  Vp9Prob y_mode_probs[4][9];
  Vp9Prob uv_mode_probs[10][9];
  Vp9Prob partition_probs[16][3];

  Vp9Prob mv_joint_probs[3];
  Vp9Prob mv_sign_prob[2];
  Vp9Prob mv_class_probs[2][10];
  Vp9Prob mv_class0_bit_prob[2];
  Vp9Prob mv_bits_prob[2][10];
  Vp9Prob mv_class0_fr_probs[2][2][3];
  Vp9Prob mv_fr_probs[2][3];
  Vp9Prob mv_class0_hp_prob[2];
  Vp9Prob mv_hp_prob[2];
};

struct MEDIA_EXPORT Vp9CompressedHeader {
  enum Vp9TxMode {
    ONLY_4X4 = 0,
    ALLOW_8X8 = 1,
    ALLOW_16X16 = 2,
    ALLOW_32X32 = 3,
    TX_MODE_SELECT = 4,
    TX_MODES = 5,
  };

  Vp9TxMode tx_mode;
  Vp9ReferenceMode reference_mode;
};

// VP9 frame header.
struct MEDIA_EXPORT Vp9FrameHeader {
  enum FrameType {
    KEYFRAME = 0,
    INTERFRAME = 1,
  };

  Vp9FrameHeader();
  Vp9FrameHeader(const Vp9FrameHeader&);
  Vp9FrameHeader(Vp9FrameHeader&&);
  Vp9FrameHeader& operator=(const Vp9FrameHeader&);
  Vp9FrameHeader& operator=(Vp9FrameHeader&&);
  ~Vp9FrameHeader();

  bool IsKeyframe() const;
  bool IsIntra() const;
  bool RefreshFlag(size_t i) const {
    return !!(refresh_frame_flags & (1u << i));
  }
  VideoColorSpace GetColorSpace() const;

  uint8_t profile = 0;

  bool show_existing_frame = false;
  uint8_t frame_to_show_map_idx = 0;

  FrameType frame_type{KEYFRAME};

  bool show_frame = false;
  bool error_resilient_mode = false;

  uint8_t bit_depth = 0;
  Vp9ColorSpace color_space{Vp9ColorSpace::UNKNOWN};
  bool color_range = false;
  uint8_t subsampling_x = 0;
  uint8_t subsampling_y = 0;

  // The range of frame_width and frame_height is 1..2^16.
  uint32_t frame_width = 0;
  uint32_t frame_height = 0;
  uint32_t render_width = 0;
  uint32_t render_height = 0;

  bool intra_only = false;
  uint8_t reset_frame_context = 0;
  uint8_t refresh_frame_flags = 0;
  uint8_t ref_frame_idx[kVp9NumRefsPerFrame] = {};
  bool ref_frame_sign_bias[Vp9RefType::VP9_FRAME_MAX] = {false};
  bool allow_high_precision_mv = false;
  Vp9InterpolationFilter interpolation_filter{Vp9InterpolationFilter::EIGHTTAP};

  bool refresh_frame_context = false;
  bool frame_parallel_decoding_mode = false;
  uint8_t frame_context_idx = 0;
  // |frame_context_idx_to_save_probs| is to be used by save_probs() only, and
  // |frame_context_idx| otherwise.
  uint8_t frame_context_idx_to_save_probs = 0;

  Vp9QuantizationParams quant_params = {};

  uint8_t tile_cols_log2 = 0;
  uint8_t tile_rows_log2 = 0;

  // Frame data. It is a responsibility of the client of the Vp9Parser to
  // maintain validity of this data while it is being used outside of that
  // class.
  // TODO(367764863) Rewrite to base::raw_span.
  RAW_PTR_EXCLUSION base::span<const uint8_t> data;

  // Size of compressed header in bytes.
  size_t header_size_in_bytes = 0;

  // Size of uncompressed header in bytes.
  size_t uncompressed_header_size = 0;

  Vp9CompressedHeader compressed_header = {};

  // Current frame entropy context after header parsing.
  Vp9FrameContext frame_context = {};

  // Segmentation and loop filter params from uncompressed header
  Vp9SegmentationParams segmentation = {};
  Vp9LoopFilterParams loop_filter = {};
};

// A parser for VP9 bitstream.
class MEDIA_EXPORT Vp9Parser {
 public:
  // ParseNextFrame() return values. See documentation for ParseNextFrame().
  enum Result {
    kOk,
    kInvalidStream,
    kEOStream,
  };

  // The parsing context to keep track of references.
  struct ReferenceSlot {
    bool initialized;
    uint32_t frame_width;
    uint32_t frame_height;
    uint8_t subsampling_x;
    uint8_t subsampling_y;
    uint8_t bit_depth;

    // More fields for consistency checking.
    uint8_t profile;
    Vp9ColorSpace color_space;
  };

  // The parsing context that persists across frames.
  class Context {
   public:
    void Reset();

    // Return ReferenceSlot for frame at |ref_idx|.
    const ReferenceSlot& GetRefSlot(size_t ref_idx) const;

    // Update contents of ReferenceSlot at |ref_idx| with the contents of
    // |ref_slot|.
    void UpdateRefSlot(size_t ref_idx, const ReferenceSlot& ref_slot);

    const Vp9SegmentationParams& segmentation() const { return segmentation_; }

    const Vp9LoopFilterParams& loop_filter() const { return loop_filter_; }

   private:
    friend class Vp9UncompressedHeaderParser;
    friend class Vp9Parser;
    friend class Vp9ParserTest;

    // Segmentation and loop filter state.
    Vp9SegmentationParams segmentation_;
    Vp9LoopFilterParams loop_filter_;

    // Frame references.
    ReferenceSlot ref_slots_[kVp9NumRefFrames];
  };

  // Stores start pointer and size of each frame within the current superframe.
  struct FrameInfo {
    FrameInfo();
    FrameInfo(const uint8_t* ptr, off_t size);
    FrameInfo(FrameInfo&& other);
    FrameInfo& operator=(FrameInfo&& other);

    // Move-only type. Copying would require manual duplication of
    // `other.decrypt_config`.
    FrameInfo(const FrameInfo& other) = delete;
    FrameInfo& operator=(const FrameInfo& other) = delete;

    ~FrameInfo();

    bool IsValid() const { return ptr != nullptr; }
    void Reset() { ptr = nullptr; }

    // Starting address of the frame.
    raw_ptr<const uint8_t, AllowPtrArithmetic> ptr = nullptr;

    // Size of the frame in bytes.
    off_t size = 0;

    // Necessary height and width to decode the frame.
    // This is filled only if the stream is SVC.
    gfx::Size allocate_size;

    std::unique_ptr<DecryptConfig> decrypt_config;
  };

  // See homonymous member variable for information on the parameter.
  explicit Vp9Parser(bool parsing_compressed_header);

  Vp9Parser(const Vp9Parser&) = delete;
  Vp9Parser& operator=(const Vp9Parser&) = delete;

  ~Vp9Parser();

  // Set a new stream buffer to read from, starting at |stream| and of size
  // |stream_size| in bytes. |stream| must point to the beginning of a single
  // frame or a single superframe, is owned by caller and must remain valid
  // until the next call to SetStream(). |spatial_layer_frame_size| may be
  // filled if the parsed stream is VP9 SVC. It stands for frame sizes of
  // spatial layers. SVC frame might have multiple frames without superframe
  // index. The info helps Vp9Parser detecting the beginning of each frame.
  void SetStream(const uint8_t* stream,
                 off_t stream_size,
                 const std::vector<uint32_t>& spatial_layer_frame_size,
                 std::unique_ptr<DecryptConfig> stream_config);

  void SetStream(const uint8_t* stream,
                 off_t stream_size,
                 std::unique_ptr<DecryptConfig> stream_config);

  // Parse the next frame in the current stream buffer, filling |fhdr| with
  // the parsed frame header and updating current segmentation and loop filter
  // state. The necessary frame size to decode |fhdr| fills in |allocate_size|.
  // The size can be larger than frame size of |fhdr| in the case of SVC stream.
  // Also fills |frame_decrypt_config| _if_ the parser was set to use a super
  // frame decrypt config.
  // Return kOk if a frame has successfully been parsed,
  //        kEOStream if there is no more data in the current stream buffer,
  //        kInvalidStream on error.
  Result ParseNextFrame(Vp9FrameHeader* fhdr,
                        gfx::Size* allocate_size,
                        std::unique_ptr<DecryptConfig>* frame_decrypt_config);

  // Perform the same superframe parsing logic, but don't attempt to parse
  // the normal frame headers afterwards, and then only return the decrypt
  // config, since the frame itself isn't useful for the testing.
  // Returns |true| if a frame would have been sent to |ParseUncompressedHeader|
  //         |false| if there was an error parsing the superframe.
  std::unique_ptr<DecryptConfig> NextFrameDecryptContextForTesting();
  std::string IncrementIVForTesting(std::string_view iv, uint32_t by);

  // Return current parsing context.
  const Context& context() const { return context_; }

  // Clear parser state and return to an initialized state.
  void Reset();

  // Determines if the passed in VP9 frame data contains a superframe or not.
  static bool IsSuperframe(const uint8_t* stream,
                           off_t stream_size,
                           const DecryptConfig* decrypt_config);

  // Extracts the frame information for a frame, if this is a superframe then
  // the returned list will contain each of the frames in decode order. An empty
  // list will be returned in the error case.
  static base::circular_deque<FrameInfo> ExtractFrames(
      const uint8_t* stream,
      off_t stream_size,
      const DecryptConfig* decrypt_config);

 private:
  base::circular_deque<FrameInfo> ParseSuperframe();
  // Parses a frame in SVC stream with |spatial_layer_frame_size_|.
  base::circular_deque<FrameInfo> ParseSVCFrame();

  // Returns true and populates |result| with the parsing result if parsing of
  // current frame is finished (possibly unsuccessfully). |fhdr| will only be
  // populated and valid if |result| is kOk. Otherwise return false, indicating
  // that the compressed header must be parsed next.
  bool ParseUncompressedHeader(const FrameInfo& frame_info,
                               Vp9FrameHeader* fhdr,
                               Result* result,
                               Vp9Parser::Context* context);

  // Returns true if parsing of current frame is finished and |result| will be
  // populated with value of parsing result. Otherwise, needs to continue setup
  // current frame.
  bool ParseCompressedHeader(const FrameInfo& frame_info, Result* result);

  int64_t GetQIndex(const Vp9QuantizationParams& quant, size_t segid) const;
  // Returns true if the setup to |context_| succeeded.
  bool SetupSegmentationDequant();
  void SetupLoopFilter();
  // Returns true if the setup to |context| succeeded.
  void UpdateSlots(Vp9Parser::Context* context);

  // Current address in the bitstream buffer.
  raw_ptr<const uint8_t> stream_;

  // Remaining bytes in stream_.
  off_t bytes_left_;

  // Set on ctor if the client needs VP9Parser to also parse compressed headers,
  // otherwise they'll be skipped.
  const bool parsing_compressed_header_;

  // FrameInfo for the remaining frames in the current superframe to be parsed.
  base::circular_deque<FrameInfo> frames_;

  Context context_;

  // Encrypted stream info.
  std::unique_ptr<DecryptConfig> stream_decrypt_config_;

  // The frame size of each spatial layer.
  std::vector<uint32_t> spatial_layer_frame_size_;

  FrameInfo curr_frame_info_;
  Vp9FrameHeader curr_frame_header_;
};

}  // namespace media

#endif  // MEDIA_PARSERS_VP9_PARSER_H_