1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191

media / muxers / mp4_muxer_delegate.h [blame]

// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef MEDIA_MUXERS_MP4_MUXER_DELEGATE_H_
#define MEDIA_MUXERS_MP4_MUXER_DELEGATE_H_

#include <memory>
#include <optional>
#include <string>
#include <vector>

#include "base/sequence_checker.h"
#include "base/thread_annotations.h"
#include "base/time/time.h"
#include "media/base/audio_encoder.h"
#include "media/base/video_encoder.h"
#include "media/formats/mp4/box_definitions.h"
#include "media/formats/mp4/writable_box_definitions.h"
#include "media/muxers/mp4_muxer_context.h"
#include "media/muxers/muxer.h"

namespace media {

class AudioParameters;
class Mp4MuxerDelegateFragment;
enum VideoCodecProfile;

#if BUILDFLAG(USE_PROPRIETARY_CODECS) || \
    BUILDFLAG(ENABLE_HEVC_PARSER_AND_HW_DECODER)
class H26xAnnexBToBitstreamConverter;
#endif

class Mp4MuxerDelegateInterface {
 public:
  virtual ~Mp4MuxerDelegateInterface() = default;

  virtual void AddVideoFrame(
      const Muxer::VideoParameters& params,
      scoped_refptr<DecoderBuffer> encoded_data,
      std::optional<VideoEncoder::CodecDescription> codec_description,
      base::TimeTicks timestamp) = 0;

  virtual void AddAudioFrame(
      const AudioParameters& params,
      scoped_refptr<DecoderBuffer> encoded_data,
      std::optional<AudioEncoder::CodecDescription> codec_description,
      base::TimeTicks timestamp) = 0;

  virtual bool Flush() = 0;

  virtual bool FlushFragment() = 0;
};

// Mp4MuxerDelegate builds the MP4 boxes from the encoded stream.
// The boxes fields will start to be populated from the first stream and
// complete in the `Flush` API call. The created box data is a complete
// MP4 format and internal data will be cleared at the end of `Flush`.
class MEDIA_EXPORT Mp4MuxerDelegate : public Mp4MuxerDelegateInterface {
 public:
  Mp4MuxerDelegate(
      AudioCodec audio_codec,
      VideoCodec video_codec,
      std::optional<VideoCodecProfile> video_profile,
      std::optional<VideoCodecLevel> video_level,
      bool add_parameter_sets_in_bitstream,
      Muxer::WriteDataCB write_callback,
      size_t audio_sample_count_per_fragment = kAudioFragmentCount);
  ~Mp4MuxerDelegate() override;
  Mp4MuxerDelegate(const Mp4MuxerDelegate&) = delete;
  Mp4MuxerDelegate& operator=(const Mp4MuxerDelegate&) = delete;

  void AddVideoFrame(
      const Muxer::VideoParameters& params,
      scoped_refptr<DecoderBuffer> encoded_data,
      std::optional<VideoEncoder::CodecDescription> codec_description,
      base::TimeTicks timestamp) override;

  void AddAudioFrame(
      const AudioParameters& params,
      scoped_refptr<DecoderBuffer> encoded_data,
      std::optional<AudioEncoder::CodecDescription> codec_description,
      base::TimeTicks timestamp) override;
  // Write to the big endian ISO-BMFF boxes and call `write_callback`.
  bool Flush() override;
  bool FlushFragment() override;

 private:
  void BuildFileTypeBox(mp4::writable_boxes::FileType& mp4_file_type_box);
  void BuildMovieBox();
  void BuildVideoTrackFragmentRandomAccess(
      base::TimeTicks start_timestamp,
      mp4::writable_boxes::TrackFragmentRandomAccess&
          fragment_random_access_box_writer,
      size_t written_offset);

  void BuildMovieVideoTrack(
      const Muxer::VideoParameters& params,
      const DecoderBuffer& encoded_data,
      std::optional<VideoEncoder::CodecDescription> codec_description);
  void AddDataToVideoFragment(scoped_refptr<DecoderBuffer> encoded_data);
  void BuildMovieAudioTrack(
      const AudioParameters& params,
      const DecoderBuffer& encoded_data,
      std::optional<AudioEncoder::CodecDescription> codec_description);
  void AddDataToAudioFragment(scoped_refptr<DecoderBuffer> encoded_data);

  void AddLastSampleTimestamp(int track_index, base::TimeDelta inverse_of_rate);
  int GetNextTrackIndex();
  void CreateFragmentIfNeeded(bool audio, bool is_key_frame);
  void EnsureInitialized();
  void LogBoxInfo() const;

  // The `MaybeFlushFileTypeBoxForStartup` function will be called to write the
  // file type box when the first frame is added, which makes `onstart` event
  // fired. It will return the size of the file type box.
  size_t MaybeFlushFileTypeBoxForStartup();
  size_t MaybeFlushMoovBox();
  void MaybeFlushMoofAndMfraBoxes(size_t written_offset);
  size_t GetAudioOnlyFragmentCount() const;

#if BUILDFLAG(USE_PROPRIETARY_CODECS) || \
    BUILDFLAG(ENABLE_HEVC_PARSER_AND_HW_DECODER)
  scoped_refptr<DecoderBuffer> ConvertNALUData(
      scoped_refptr<DecoderBuffer> encoded_data);
#endif

  std::unique_ptr<Mp4MuxerContext> context_;
  Muxer::WriteDataCB write_callback_;

  // The MP4 has single movie box and multiple fragment boxes.
  std::unique_ptr<mp4::writable_boxes::Movie> moov_;

  // Only key video frame has `SPS` and `PPS` and it will be a
  // signal of new fragment. In Windows, key frame is every 100th frame.
  std::vector<std::unique_ptr<Mp4MuxerDelegateFragment>> fragments_;

  // video and audio index is a 0 based index that is an item of the container.
  // The track id would be plus one on this index value.
  std::optional<size_t> video_track_index_;
  std::optional<size_t> audio_track_index_;

  int next_track_index_ = 0;

  // Duration time delta for the video track.
  base::TimeTicks start_video_time_;
  base::TimeTicks last_video_time_;

  // Duration time delta for the audio track.
  base::TimeTicks start_audio_time_;
  base::TimeTicks last_audio_time_;

  double video_frame_rate_ = 0;
  int audio_sample_rate_ = 0;

  // Flush for startup is only called once.
  std::optional<size_t> written_file_type_box_size_;

  std::optional<size_t> written_mov_box_size_;

  bool live_mode_ = false;

  uint32_t sequence_number_ = 1;

  AudioCodec audio_codec_ = AudioCodec::kUnknown;
  VideoCodec video_codec_ = VideoCodec::kUnknown;

  const std::optional<media::VideoCodecProfile> video_profile_;
  const std::optional<media::VideoCodecLevel> video_level_;

  const bool add_parameter_sets_in_bitstream_ = false;

  // 1000 is a count that audio samples in the same fragment
  // when no video frame is added. In Windows, when video frames are present,
  // the audio counts per fragment is much less than it.
  static constexpr uint32_t kAudioFragmentCount = 1000u;

  const size_t audio_sample_count_per_fragment_;

#if BUILDFLAG(USE_PROPRIETARY_CODECS) || \
    BUILDFLAG(ENABLE_HEVC_PARSER_AND_HW_DECODER)
  std::unique_ptr<H26xAnnexBToBitstreamConverter> h26x_converter_;
#endif

  Muxer::WriteDataCB write_data_callback_ GUARDED_BY_CONTEXT(sequence_checker_);
  SEQUENCE_CHECKER(sequence_checker_);
};

}  // namespace media

#endif  // MEDIA_MUXERS_MP4_MUXER_DELEGATE_H_