1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
media / parsers / vp9_parser.h [blame]
// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file contains an implementation of a VP9 bitstream parser. The main
// purpose of this parser is to support hardware decode acceleration. Some
// accelerators, e.g. libva which implements VA-API, require the caller
// (chrome) to feed them parsed VP9 frame header.
//
// See media::VP9Decoder for example usage.
//
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#ifndef MEDIA_PARSERS_VP9_PARSER_H_
#define MEDIA_PARSERS_VP9_PARSER_H_
#include <stddef.h>
#include <stdint.h>
#include <sys/types.h>
#include <memory>
#include "base/containers/circular_deque.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/raw_ptr_exclusion.h"
#include "media/base/decrypt_config.h"
#include "media/base/media_export.h"
#include "media/base/video_color_space.h"
#include "ui/gfx/geometry/size.h"
namespace media {
const int kVp9MaxProfile = 4;
const int kVp9NumRefFramesLog2 = 3;
const size_t kVp9NumRefFrames = 1 << kVp9NumRefFramesLog2;
const uint8_t kVp9MaxProb = 255;
const size_t kVp9NumRefsPerFrame = 3;
const size_t kVp9NumFrameContextsLog2 = 2;
const size_t kVp9NumFrameContexts = 1 << kVp9NumFrameContextsLog2;
using Vp9Prob = uint8_t;
enum class Vp9ColorSpace {
UNKNOWN = 0,
BT_601 = 1,
BT_709 = 2,
SMPTE_170 = 3,
SMPTE_240 = 4,
BT_2020 = 5,
RESERVED = 6,
SRGB = 7,
};
enum Vp9InterpolationFilter {
EIGHTTAP = 0,
EIGHTTAP_SMOOTH = 1,
EIGHTTAP_SHARP = 2,
BILINEAR = 3,
SWITCHABLE = 4,
};
enum Vp9RefType {
VP9_FRAME_INTRA = 0,
VP9_FRAME_LAST = 1,
VP9_FRAME_GOLDEN = 2,
VP9_FRAME_ALTREF = 3,
VP9_FRAME_MAX = 4,
};
enum Vp9ReferenceMode {
SINGLE_REFERENCE = 0,
COMPOUND_REFERENCE = 1,
REFERENCE_MODE_SELECT = 2,
};
struct MEDIA_EXPORT Vp9SegmentationParams {
static const size_t kNumSegments = 8;
static const size_t kNumTreeProbs = kNumSegments - 1;
static const size_t kNumPredictionProbs = 3;
enum SegmentLevelFeature {
SEG_LVL_ALT_Q = 0,
SEG_LVL_ALT_LF = 1,
SEG_LVL_REF_FRAME = 2,
SEG_LVL_SKIP = 3,
SEG_LVL_MAX
};
bool enabled;
bool update_map;
uint8_t tree_probs[kNumTreeProbs];
bool temporal_update;
uint8_t pred_probs[kNumPredictionProbs];
bool update_data;
bool abs_or_delta_update;
bool feature_enabled[kNumSegments][SEG_LVL_MAX];
int16_t feature_data[kNumSegments][SEG_LVL_MAX];
int16_t y_dequant[kNumSegments][2];
int16_t uv_dequant[kNumSegments][2];
bool FeatureEnabled(size_t seg_id, SegmentLevelFeature feature) const {
return feature_enabled[seg_id][feature];
}
int16_t FeatureData(size_t seg_id, SegmentLevelFeature feature) const {
return feature_data[seg_id][feature];
}
};
struct MEDIA_EXPORT Vp9LoopFilterParams {
static const size_t kNumModeDeltas = 2;
uint8_t level;
uint8_t sharpness;
bool delta_enabled;
bool delta_update;
bool update_ref_deltas[VP9_FRAME_MAX];
int8_t ref_deltas[VP9_FRAME_MAX];
bool update_mode_deltas[kNumModeDeltas];
int8_t mode_deltas[kNumModeDeltas];
// Calculated from above fields.
uint8_t lvl[Vp9SegmentationParams::kNumSegments][VP9_FRAME_MAX]
[kNumModeDeltas];
};
// Members of Vp9FrameHeader will be 0-initialized by Vp9Parser::ParseNextFrame.
struct MEDIA_EXPORT Vp9QuantizationParams {
bool IsLossless() const {
return base_q_idx == 0 && delta_q_y_dc == 0 && delta_q_uv_dc == 0 &&
delta_q_uv_ac == 0;
}
uint8_t base_q_idx;
int8_t delta_q_y_dc;
int8_t delta_q_uv_dc;
int8_t delta_q_uv_ac;
};
// Entropy context for frame parsing
struct MEDIA_EXPORT Vp9FrameContext {
bool IsValid() const;
Vp9Prob tx_probs_8x8[2][1];
Vp9Prob tx_probs_16x16[2][2];
Vp9Prob tx_probs_32x32[2][3];
Vp9Prob coef_probs[4][2][2][6][6][3];
Vp9Prob skip_prob[3];
Vp9Prob inter_mode_probs[7][3];
Vp9Prob interp_filter_probs[4][2];
Vp9Prob is_inter_prob[4];
Vp9Prob comp_mode_prob[5];
Vp9Prob single_ref_prob[5][2];
Vp9Prob comp_ref_prob[5];
Vp9Prob y_mode_probs[4][9];
Vp9Prob uv_mode_probs[10][9];
Vp9Prob partition_probs[16][3];
Vp9Prob mv_joint_probs[3];
Vp9Prob mv_sign_prob[2];
Vp9Prob mv_class_probs[2][10];
Vp9Prob mv_class0_bit_prob[2];
Vp9Prob mv_bits_prob[2][10];
Vp9Prob mv_class0_fr_probs[2][2][3];
Vp9Prob mv_fr_probs[2][3];
Vp9Prob mv_class0_hp_prob[2];
Vp9Prob mv_hp_prob[2];
};
struct MEDIA_EXPORT Vp9CompressedHeader {
enum Vp9TxMode {
ONLY_4X4 = 0,
ALLOW_8X8 = 1,
ALLOW_16X16 = 2,
ALLOW_32X32 = 3,
TX_MODE_SELECT = 4,
TX_MODES = 5,
};
Vp9TxMode tx_mode;
Vp9ReferenceMode reference_mode;
};
// VP9 frame header.
struct MEDIA_EXPORT Vp9FrameHeader {
enum FrameType {
KEYFRAME = 0,
INTERFRAME = 1,
};
Vp9FrameHeader();
Vp9FrameHeader(const Vp9FrameHeader&);
Vp9FrameHeader(Vp9FrameHeader&&);
Vp9FrameHeader& operator=(const Vp9FrameHeader&);
Vp9FrameHeader& operator=(Vp9FrameHeader&&);
~Vp9FrameHeader();
bool IsKeyframe() const;
bool IsIntra() const;
bool RefreshFlag(size_t i) const {
return !!(refresh_frame_flags & (1u << i));
}
VideoColorSpace GetColorSpace() const;
uint8_t profile = 0;
bool show_existing_frame = false;
uint8_t frame_to_show_map_idx = 0;
FrameType frame_type{KEYFRAME};
bool show_frame = false;
bool error_resilient_mode = false;
uint8_t bit_depth = 0;
Vp9ColorSpace color_space{Vp9ColorSpace::UNKNOWN};
bool color_range = false;
uint8_t subsampling_x = 0;
uint8_t subsampling_y = 0;
// The range of frame_width and frame_height is 1..2^16.
uint32_t frame_width = 0;
uint32_t frame_height = 0;
uint32_t render_width = 0;
uint32_t render_height = 0;
bool intra_only = false;
uint8_t reset_frame_context = 0;
uint8_t refresh_frame_flags = 0;
uint8_t ref_frame_idx[kVp9NumRefsPerFrame] = {};
bool ref_frame_sign_bias[Vp9RefType::VP9_FRAME_MAX] = {false};
bool allow_high_precision_mv = false;
Vp9InterpolationFilter interpolation_filter{Vp9InterpolationFilter::EIGHTTAP};
bool refresh_frame_context = false;
bool frame_parallel_decoding_mode = false;
uint8_t frame_context_idx = 0;
// |frame_context_idx_to_save_probs| is to be used by save_probs() only, and
// |frame_context_idx| otherwise.
uint8_t frame_context_idx_to_save_probs = 0;
Vp9QuantizationParams quant_params = {};
uint8_t tile_cols_log2 = 0;
uint8_t tile_rows_log2 = 0;
// Frame data. It is a responsibility of the client of the Vp9Parser to
// maintain validity of this data while it is being used outside of that
// class.
// TODO(367764863) Rewrite to base::raw_span.
RAW_PTR_EXCLUSION base::span<const uint8_t> data;
// Size of compressed header in bytes.
size_t header_size_in_bytes = 0;
// Size of uncompressed header in bytes.
size_t uncompressed_header_size = 0;
Vp9CompressedHeader compressed_header = {};
// Current frame entropy context after header parsing.
Vp9FrameContext frame_context = {};
// Segmentation and loop filter params from uncompressed header
Vp9SegmentationParams segmentation = {};
Vp9LoopFilterParams loop_filter = {};
};
// A parser for VP9 bitstream.
class MEDIA_EXPORT Vp9Parser {
public:
// ParseNextFrame() return values. See documentation for ParseNextFrame().
enum Result {
kOk,
kInvalidStream,
kEOStream,
};
// The parsing context to keep track of references.
struct ReferenceSlot {
bool initialized;
uint32_t frame_width;
uint32_t frame_height;
uint8_t subsampling_x;
uint8_t subsampling_y;
uint8_t bit_depth;
// More fields for consistency checking.
uint8_t profile;
Vp9ColorSpace color_space;
};
// The parsing context that persists across frames.
class Context {
public:
void Reset();
// Return ReferenceSlot for frame at |ref_idx|.
const ReferenceSlot& GetRefSlot(size_t ref_idx) const;
// Update contents of ReferenceSlot at |ref_idx| with the contents of
// |ref_slot|.
void UpdateRefSlot(size_t ref_idx, const ReferenceSlot& ref_slot);
const Vp9SegmentationParams& segmentation() const { return segmentation_; }
const Vp9LoopFilterParams& loop_filter() const { return loop_filter_; }
private:
friend class Vp9UncompressedHeaderParser;
friend class Vp9Parser;
friend class Vp9ParserTest;
// Segmentation and loop filter state.
Vp9SegmentationParams segmentation_;
Vp9LoopFilterParams loop_filter_;
// Frame references.
ReferenceSlot ref_slots_[kVp9NumRefFrames];
};
// Stores start pointer and size of each frame within the current superframe.
struct FrameInfo {
FrameInfo();
FrameInfo(const uint8_t* ptr, off_t size);
FrameInfo(FrameInfo&& other);
FrameInfo& operator=(FrameInfo&& other);
// Move-only type. Copying would require manual duplication of
// `other.decrypt_config`.
FrameInfo(const FrameInfo& other) = delete;
FrameInfo& operator=(const FrameInfo& other) = delete;
~FrameInfo();
bool IsValid() const { return ptr != nullptr; }
void Reset() { ptr = nullptr; }
// Starting address of the frame.
raw_ptr<const uint8_t, AllowPtrArithmetic> ptr = nullptr;
// Size of the frame in bytes.
off_t size = 0;
// Necessary height and width to decode the frame.
// This is filled only if the stream is SVC.
gfx::Size allocate_size;
std::unique_ptr<DecryptConfig> decrypt_config;
};
// See homonymous member variable for information on the parameter.
explicit Vp9Parser(bool parsing_compressed_header);
Vp9Parser(const Vp9Parser&) = delete;
Vp9Parser& operator=(const Vp9Parser&) = delete;
~Vp9Parser();
// Set a new stream buffer to read from, starting at |stream| and of size
// |stream_size| in bytes. |stream| must point to the beginning of a single
// frame or a single superframe, is owned by caller and must remain valid
// until the next call to SetStream(). |spatial_layer_frame_size| may be
// filled if the parsed stream is VP9 SVC. It stands for frame sizes of
// spatial layers. SVC frame might have multiple frames without superframe
// index. The info helps Vp9Parser detecting the beginning of each frame.
void SetStream(const uint8_t* stream,
off_t stream_size,
const std::vector<uint32_t>& spatial_layer_frame_size,
std::unique_ptr<DecryptConfig> stream_config);
void SetStream(const uint8_t* stream,
off_t stream_size,
std::unique_ptr<DecryptConfig> stream_config);
// Parse the next frame in the current stream buffer, filling |fhdr| with
// the parsed frame header and updating current segmentation and loop filter
// state. The necessary frame size to decode |fhdr| fills in |allocate_size|.
// The size can be larger than frame size of |fhdr| in the case of SVC stream.
// Also fills |frame_decrypt_config| _if_ the parser was set to use a super
// frame decrypt config.
// Return kOk if a frame has successfully been parsed,
// kEOStream if there is no more data in the current stream buffer,
// kInvalidStream on error.
Result ParseNextFrame(Vp9FrameHeader* fhdr,
gfx::Size* allocate_size,
std::unique_ptr<DecryptConfig>* frame_decrypt_config);
// Perform the same superframe parsing logic, but don't attempt to parse
// the normal frame headers afterwards, and then only return the decrypt
// config, since the frame itself isn't useful for the testing.
// Returns |true| if a frame would have been sent to |ParseUncompressedHeader|
// |false| if there was an error parsing the superframe.
std::unique_ptr<DecryptConfig> NextFrameDecryptContextForTesting();
std::string IncrementIVForTesting(std::string_view iv, uint32_t by);
// Return current parsing context.
const Context& context() const { return context_; }
// Clear parser state and return to an initialized state.
void Reset();
// Determines if the passed in VP9 frame data contains a superframe or not.
static bool IsSuperframe(const uint8_t* stream,
off_t stream_size,
const DecryptConfig* decrypt_config);
// Extracts the frame information for a frame, if this is a superframe then
// the returned list will contain each of the frames in decode order. An empty
// list will be returned in the error case.
static base::circular_deque<FrameInfo> ExtractFrames(
const uint8_t* stream,
off_t stream_size,
const DecryptConfig* decrypt_config);
private:
base::circular_deque<FrameInfo> ParseSuperframe();
// Parses a frame in SVC stream with |spatial_layer_frame_size_|.
base::circular_deque<FrameInfo> ParseSVCFrame();
// Returns true and populates |result| with the parsing result if parsing of
// current frame is finished (possibly unsuccessfully). |fhdr| will only be
// populated and valid if |result| is kOk. Otherwise return false, indicating
// that the compressed header must be parsed next.
bool ParseUncompressedHeader(const FrameInfo& frame_info,
Vp9FrameHeader* fhdr,
Result* result,
Vp9Parser::Context* context);
// Returns true if parsing of current frame is finished and |result| will be
// populated with value of parsing result. Otherwise, needs to continue setup
// current frame.
bool ParseCompressedHeader(const FrameInfo& frame_info, Result* result);
int64_t GetQIndex(const Vp9QuantizationParams& quant, size_t segid) const;
// Returns true if the setup to |context_| succeeded.
bool SetupSegmentationDequant();
void SetupLoopFilter();
// Returns true if the setup to |context| succeeded.
void UpdateSlots(Vp9Parser::Context* context);
// Current address in the bitstream buffer.
raw_ptr<const uint8_t> stream_;
// Remaining bytes in stream_.
off_t bytes_left_;
// Set on ctor if the client needs VP9Parser to also parse compressed headers,
// otherwise they'll be skipped.
const bool parsing_compressed_header_;
// FrameInfo for the remaining frames in the current superframe to be parsed.
base::circular_deque<FrameInfo> frames_;
Context context_;
// Encrypted stream info.
std::unique_ptr<DecryptConfig> stream_decrypt_config_;
// The frame size of each spatial layer.
std::vector<uint32_t> spatial_layer_frame_size_;
FrameInfo curr_frame_info_;
Vp9FrameHeader curr_frame_header_;
};
} // namespace media
#endif // MEDIA_PARSERS_VP9_PARSER_H_