1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
media / gpu / svc_layers.cc [blame]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#include "media/gpu/svc_layers.h"
#include "base/logging.h"
namespace media {
namespace {
constexpr static size_t kMaxNumUsedRefFramesEachSpatialLayer = 2;
static_assert(kMaxNumUsedRefFramesEachSpatialLayer == 2u,
"SVCLayers uses two reference frames for each spatial layer");
constexpr static size_t kMaxNumUsedReferenceFrames =
kMaxNumUsedRefFramesEachSpatialLayer * SVCLayers::kMaxSpatialLayers;
static_assert(kMaxNumUsedReferenceFrames == 6u,
"SVCLayers uses six reference frames");
enum FrameFlags : uint8_t {
kNone = 0,
kReference = 1,
kUpdate = 2,
kReferenceAndUpdate = kReference | kUpdate,
};
struct FrameConfig {
constexpr FrameConfig(size_t layer_index,
FrameFlags first,
FrameFlags second,
bool temporal_up_switch)
: layer_index_(layer_index),
buffer_flags_{first, second},
temporal_up_switch_(temporal_up_switch) {}
// SVCLayers uses 2 reference frame slots for each spatial layer, and
// totally uses up to 6 reference frame slots. SL0 uses the first two (0, 1)
// slots, SL1 uses middle two (2, 3) slots, and SL2 uses last two (4, 5)
// slots.
std::vector<uint8_t> GetRefFrameIndices(size_t spatial_idx) const {
std::vector<uint8_t> indices;
for (size_t i = 0; i < kMaxNumUsedRefFramesEachSpatialLayer; ++i) {
if (buffer_flags_[i] & FrameFlags::kReference) {
indices.push_back(i +
kMaxNumUsedRefFramesEachSpatialLayer * spatial_idx);
}
}
return indices;
}
std::vector<uint8_t> GetRefreshIndices(size_t spatial_idx) const {
std::vector<uint8_t> indices;
for (size_t i = 0; i < kMaxNumUsedRefFramesEachSpatialLayer; ++i) {
if (buffer_flags_[i] & FrameFlags::kUpdate) {
indices.push_back(i +
kMaxNumUsedRefFramesEachSpatialLayer * spatial_idx);
}
}
return indices;
}
size_t layer_index() const { return layer_index_; }
bool temporal_up_switch() const { return temporal_up_switch_; }
private:
const size_t layer_index_;
const FrameFlags buffer_flags_[kMaxNumUsedRefFramesEachSpatialLayer];
const bool temporal_up_switch_;
};
FrameConfig GetFrameConfig(size_t num_temporal_layers, size_t frame_num) {
switch (num_temporal_layers) {
case 1:
// In this case, the number of spatial layers must great than 1.
// TL0 references and updates the 'first' buffer.
// [TL0]---[TL0]
return FrameConfig(0, kReferenceAndUpdate, kNone, true);
case 2: {
// TL0 references and updates the 'first' buffer.
// TL1 references 'first' buffer.
// [TL1]
// /
// [TL0]-----[TL0]
constexpr FrameConfig TL2Pattern[] = {
FrameConfig(0, kReferenceAndUpdate, kNone, true),
FrameConfig(1, kReference, kNone, true),
};
return TL2Pattern[frame_num % std::size(TL2Pattern)];
}
case 3: {
// TL0 references and updates the 'first' buffer.
// TL1 references 'first' and updates 'second'.
// TL2 references either 'first' or 'second' buffer.
// [TL2] [TL2]
// _/ [TL1]--/
// /_______/
// [TL0]--------------[TL0]
constexpr FrameConfig TL3Pattern[] = {
FrameConfig(0, kReferenceAndUpdate, kNone, true),
FrameConfig(2, kReference, kNone, true),
FrameConfig(1, kReference, kUpdate, true),
FrameConfig(2, kNone, kReference, false),
};
return TL3Pattern[frame_num % std::size(TL3Pattern)];
}
default:
NOTREACHED();
}
}
// Checks if all the bitrate values in the active layers range are not zero and
// all the ones in non active layers range are zero.
bool ValidateBitrates(const VideoBitrateAllocation& bitrate_allocation,
size_t begin_active_spatial_layer,
size_t end_active_spatial_layer,
size_t num_temporal_layers) {
for (size_t sid = 0; sid < VideoBitrateAllocation::kMaxSpatialLayers; ++sid) {
for (size_t tid = 0; tid < VideoBitrateAllocation::kMaxTemporalLayers;
++tid) {
const bool is_active = bitrate_allocation.GetBitrateBps(sid, tid) > 0;
const bool expected_active = begin_active_spatial_layer <= sid &&
sid < end_active_spatial_layer &&
tid < num_temporal_layers;
if (is_active != expected_active) {
DVLOG(1) << "Invalid bitrate, sid=" << sid << ", tid=" << tid
<< " : bitrate_allocation=" << bitrate_allocation.ToString();
return false;
}
}
}
return true;
}
// Fills the spatial layers range and the number of temporal layers whose
// bitrate is not zero.
// |begin_active_spatial_layer| - the lowest active spatial layer index.
// |end_active_spatial_layer| - the last active spatial layer index + 1.
// |num_temporal_layers| - the number of temporal layers.
//
// The active spatial layer doesn't have to start with the bottom one, but the
// active temporal layer must start with the bottom one. In other words, if
// the spatial layer, spatial_index, is active, then
// GetBitrateBps(spatial_index, 0) must not be zero.
// Returns false VideoBitrateAllocation is invalid.
bool ValidateAndGetActiveLayers(
const VideoBitrateAllocation& bitrate_allocation,
size_t& begin_active_spatial_layer,
size_t& end_active_spatial_layer,
size_t& num_temporal_layers) {
if (bitrate_allocation.GetSumBps() == 0) {
DVLOG(1) << "No active bitrate: bitrate_allocation="
<< bitrate_allocation.ToString();
return false;
}
begin_active_spatial_layer = 0;
end_active_spatial_layer = 0;
num_temporal_layers = 0;
for (size_t sid = 0; sid < VideoBitrateAllocation::kMaxSpatialLayers; ++sid) {
if (bitrate_allocation.GetBitrateBps(sid, 0) != 0) {
begin_active_spatial_layer = sid;
break;
}
}
for (int sid = VideoBitrateAllocation::kMaxSpatialLayers - 1;
sid >= base::checked_cast<int>(begin_active_spatial_layer); --sid) {
if (bitrate_allocation.GetBitrateBps(sid, 0) != 0) {
end_active_spatial_layer = sid + 1;
break;
}
}
if (end_active_spatial_layer == 0) {
DVLOG(1) << "Invalid bitrate: bitrate_allocation="
<< bitrate_allocation.ToString();
return false;
}
// This assumes the number of temporal layers are the same in all the spatial
// layers. This will not be satisfied if we support a mix of hw/sw encoders.
// See the discussion:
// https://chromium-review.googlesource.com/c/chromium/src/+/5040171/2/media/base/video_bitrate_allocation.cc#200
for (int tid = VideoBitrateAllocation::kMaxTemporalLayers - 1; tid >= 0;
--tid) {
if (bitrate_allocation.GetBitrateBps(begin_active_spatial_layer, tid) !=
0) {
num_temporal_layers = tid + 1;
break;
}
}
return ValidateBitrates(bitrate_allocation, begin_active_spatial_layer,
end_active_spatial_layer, num_temporal_layers);
}
} // namespace
SVCLayers::Config::Config(
const std::vector<gfx::Size>& spatial_layer_resolutions,
size_t begin_active_layer,
size_t end_active_layer,
size_t num_temporal_layers,
SVCInterLayerPredMode inter_layer_pred)
: spatial_layer_resolutions(spatial_layer_resolutions),
begin_active_layer(begin_active_layer),
end_active_layer(end_active_layer),
num_temporal_layers(num_temporal_layers),
active_spatial_layer_resolutions(
spatial_layer_resolutions.begin() + begin_active_layer,
spatial_layer_resolutions.begin() + end_active_layer),
inter_layer_pred(inter_layer_pred) {}
SVCLayers::Config::~Config() = default;
SVCLayers::Config::Config(const Config&) = default;
SVCLayers::PictureParam::PictureParam() = default;
SVCLayers::PictureParam::~PictureParam() = default;
SVCLayers::PictureParam::PictureParam(const PictureParam&) = default;
SVCLayers::SVCLayers(const Config& config) : config_(config) {}
std::pair<bool, std::optional<std::unique_ptr<SVCLayers>>>
SVCLayers::RecreateSVCLayersIfNeeded(
VideoBitrateAllocation& bitrate_allocation) {
size_t begin_active_spatial_layer;
size_t end_active_spatial_layer;
size_t num_temporal_layers;
if (!ValidateAndGetActiveLayers(
bitrate_allocation, begin_active_spatial_layer,
end_active_spatial_layer, num_temporal_layers)) {
// Invalid active layer.
// See ValidateAndGetActiveLayers() comment for detail.
return std::make_pair(false, std::nullopt);
}
const auto& old_config = config();
if (end_active_spatial_layer > old_config.spatial_layer_resolutions.size() ||
end_active_spatial_layer - begin_active_spatial_layer >
old_config.spatial_layer_resolutions.size()) {
DVLOG(1) << "Requested spatial layer exceeds the initial spatial layer "
<< "configuration: " << bitrate_allocation.ToString();
return std::make_pair(false, std::nullopt);
}
// Change VideoBitrateAllocation so that the active spatial layers to
// start with 0. This is necessary for the software rate controller.
if (begin_active_spatial_layer > 0) {
for (size_t sid = begin_active_spatial_layer;
sid < end_active_spatial_layer; sid++) {
for (size_t tid = 0; tid < num_temporal_layers; tid++) {
const uint32_t bitrate = bitrate_allocation.GetBitrateBps(sid, tid);
CHECK_NE(bitrate, 0u);
bitrate_allocation.SetBitrate(sid - begin_active_spatial_layer, tid,
bitrate);
bitrate_allocation.SetBitrate(sid, tid, 0u);
}
}
}
// Only updating the number of temporal layers don't have to force keyframe.
// But we produce keyframe in the case to not complex the code, assuming
// updating the number of temporal layers don't often happen.
// If this is not true, we should avoid producing keyframe in this case.
if (old_config.begin_active_layer != begin_active_spatial_layer ||
old_config.end_active_layer != end_active_spatial_layer ||
old_config.num_temporal_layers != num_temporal_layers) {
std::optional<std::unique_ptr<SVCLayers>> svc_layers =
std::make_unique<SVCLayers>(SVCLayers::Config(
old_config.spatial_layer_resolutions, begin_active_spatial_layer,
end_active_spatial_layer, num_temporal_layers,
old_config.inter_layer_pred));
return std::make_pair(true, std::move(svc_layers));
}
return std::make_pair(true, std::nullopt);
}
void SVCLayers::Reset() {
CHECK_EQ(spatial_idx_, 0u);
frame_num_ = 0;
frame_num_ref_frames_.fill(0);
}
void SVCLayers::PostEncode(uint8_t refresh_frame_flags) {
for (size_t i = 0; i < kVp9NumRefFrames; ++i) {
if (refresh_frame_flags & (1 << i)) {
frame_num_ref_frames_[i] = frame_num_;
}
}
spatial_idx_ += 1;
if (spatial_idx_ == config_.active_spatial_layer_resolutions.size()) {
spatial_idx_ = 0;
frame_num_ += 1;
}
}
bool SVCLayers::IsKeyFrame() const {
if (frame_num_ != 0) {
return false;
}
if (config_.inter_layer_pred == SVCInterLayerPredMode::kOnKeyPic) {
return spatial_idx_ == 0;
}
CHECK(config_.active_spatial_layer_resolutions.size() == 1 ||
config_.inter_layer_pred == SVCInterLayerPredMode::kOff);
return true;
}
void SVCLayers::GetPictureParamAndMetadata(
PictureParam& picture_param,
absl::variant<Vp9Metadata*, SVCGenericMetadata*> metadata) const {
picture_param.frame_size =
config_.active_spatial_layer_resolutions[spatial_idx_];
// |SVCLayers| follows the WebRTC SVC spec. so we don't use
// |svc_metadata.reference_flags| and |svc_metadata.refresh_flags|.
if (auto* svc_metadata = absl::get_if<SVCGenericMetadata*>(&metadata)) {
(*svc_metadata)->follow_svc_spec = true;
}
if (frame_num_ == 0) {
FillMetadataForFirstFrame(metadata, picture_param.key_frame,
picture_param.refresh_frame_flags,
picture_param.reference_frame_indices);
return;
}
picture_param.key_frame = false;
FillMetadataForNonFirstFrame(metadata, picture_param.refresh_frame_flags,
picture_param.reference_frame_indices);
}
void SVCLayers::FillMetadataForFirstFrame(
absl::variant<Vp9Metadata*, SVCGenericMetadata*> metadata,
bool& key_frame,
uint8_t& refresh_frame_flags,
std::vector<uint8_t>& reference_frame_indices) const {
CHECK_EQ(frame_num_, 0u);
// Taking L3Tx as example, |refresh_indices| and |reference_frame_indices| are
// as follows.
// kOnKeyPic | refresh_indices | reference_frame_indices |
// L0 (keyframe) | {0, 1, 2, 3, 4, 5, 6, 7} | {} |
// L1 | {2} | {0} |
// L2 | {4} | {2} |
//
// KOff
// L0 (keyframe) | {0, 1, 2, 3, 4, 5, 6, 7} | {} |
// L1 (keyframe) | {2} | {} |
// L2 (keyframe) | {4} | {} |
if (spatial_idx_ == 0) {
key_frame = true;
refresh_frame_flags = 0xff;
reference_frame_indices = {};
} else {
key_frame = config_.inter_layer_pred == SVCInterLayerPredMode::kOff;
refresh_frame_flags =
1 << (spatial_idx_ * kMaxNumUsedRefFramesEachSpatialLayer);
reference_frame_indices = {};
if (config_.inter_layer_pred == SVCInterLayerPredMode::kOnKeyPic) {
reference_frame_indices = {base::checked_cast<uint8_t>(
(spatial_idx_ - 1) * kMaxNumUsedRefFramesEachSpatialLayer)};
}
}
if (auto* svc_metadata = absl::get_if<SVCGenericMetadata*>(&metadata)) {
(*svc_metadata)->temporal_idx = 0;
(*svc_metadata)->spatial_idx = spatial_idx_;
} else {
CHECK(absl::holds_alternative<Vp9Metadata*>(metadata));
auto& vp9_metadata = absl::get<Vp9Metadata*>(metadata);
// Since this is the first frame, there is no reference frame in the same
// spatial layer.
vp9_metadata->inter_pic_predicted = false;
// The first frame is TL0 and references no frame.
vp9_metadata->temporal_up_switch = true;
vp9_metadata->end_of_picture =
spatial_idx_ == config_.active_spatial_layer_resolutions.size() - 1;
if (config_.inter_layer_pred == SVCInterLayerPredMode::kOnKeyPic) {
vp9_metadata->referenced_by_upper_spatial_layers =
!vp9_metadata->end_of_picture;
vp9_metadata->reference_lower_spatial_layers = spatial_idx_ != 0;
} else {
vp9_metadata->referenced_by_upper_spatial_layers = false;
vp9_metadata->reference_lower_spatial_layers = false;
}
vp9_metadata->temporal_idx = 0;
vp9_metadata->spatial_idx = spatial_idx_;
if (key_frame) {
vp9_metadata->spatial_layer_resolutions =
config_.active_spatial_layer_resolutions;
vp9_metadata->begin_active_spatial_layer_index =
base::checked_cast<uint8_t>(config_.begin_active_layer);
vp9_metadata->end_active_spatial_layer_index =
base::checked_cast<uint8_t>(config_.end_active_layer);
}
}
}
void SVCLayers::FillMetadataForNonFirstFrame(
absl::variant<Vp9Metadata*, SVCGenericMetadata*> metadata,
uint8_t& refresh_frame_flags,
std::vector<uint8_t>& reference_frame_indices) const {
CHECK_NE(frame_num_, 0u);
const FrameConfig frame_config =
GetFrameConfig(config_.num_temporal_layers, frame_num_);
refresh_frame_flags = 0;
for (const uint8_t i : frame_config.GetRefreshIndices(spatial_idx_)) {
refresh_frame_flags |= 1 << i;
}
reference_frame_indices = frame_config.GetRefFrameIndices(spatial_idx_);
if (auto* svc_metadata = absl::get_if<SVCGenericMetadata*>(&metadata)) {
(*svc_metadata)->temporal_idx = frame_config.layer_index();
(*svc_metadata)->spatial_idx = spatial_idx_;
} else {
CHECK(absl::holds_alternative<Vp9Metadata*>(metadata));
auto& vp9_metadata = absl::get<Vp9Metadata*>(metadata);
vp9_metadata->inter_pic_predicted = !reference_frame_indices.empty();
vp9_metadata->temporal_up_switch = frame_config.temporal_up_switch();
// No reference between spatial layers in kOnKeyPic (frame_num!=0) and kOff.
vp9_metadata->referenced_by_upper_spatial_layers = false;
vp9_metadata->reference_lower_spatial_layers = false;
vp9_metadata->end_of_picture =
spatial_idx_ == config_.active_spatial_layer_resolutions.size() - 1;
vp9_metadata->temporal_idx = frame_config.layer_index();
vp9_metadata->spatial_idx = spatial_idx_;
for (const uint8_t i : reference_frame_indices) {
const uint8_t p_diff =
base::checked_cast<uint8_t>(frame_num_ - frame_num_ref_frames_[i]);
vp9_metadata->p_diffs.push_back(p_diff);
}
}
}
} // namespace media