Files
monibuca/plugin/mp4/pkg/track.go
2025-02-23 22:56:08 +08:00

395 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package mp4
import (
"slices"
. "m7s.live/v5/plugin/mp4/pkg/box"
)
type (
Track struct {
Cid MP4_CODEC_TYPE
TrackId uint32
SampleTable
Duration uint32
Height uint32
Width uint32
SampleRate uint32
SampleSize uint16
SampleCount uint32
ChannelCount uint8
Timescale uint32
// StartDts uint64
// EndDts uint64
// StartPts uint64
// EndPts uint64
Samplelist []Sample
ExtraData []byte
isFragment bool
fragments []Fragment
defaultSize uint32
defaultDuration uint32
// defaultSampleFlags uint32
// baseDataOffset uint64
// stbl []byte
// FragmentSequenceNumber uint32
//for subsample
// defaultIsProtected uint8
// defaultPerSampleIVSize uint8
// defaultCryptByteBlock uint8
// defaultSkipByteBlock uint8
// defaultConstantIV []byte
// defaultKID [16]byte
// lastSeig *SeigSampleGroupEntry
// lastSaiz *SaizBox
// subSamples []SencEntry
}
Fragment struct {
Offset uint64
Duration uint32
FirstTs uint64
LastTs uint64
}
)
func (track *Track) makeElstBox() *EditListBox {
delay := track.Samplelist[0].Timestamp * 1000 / uint32(track.Timescale)
entryCount := 1
version := byte(0)
boxSize := 12
entrySize := 12
if delay > 0xFFFFFFFF {
version = 1
entrySize = 20
}
// if delay > 0 {
// entryCount += 1
// }
boxSize += 4 + entrySize*entryCount
entrys := make([]ELSTEntry, entryCount)
// if entryCount > 1 {
// elst.entrys.entrys[0].segmentDuration = startCt
// elst.entrys.entrys[0].mediaTime = -1
// elst.entrys.entrys[0].mediaRateInteger = 0x0001
// elst.entrys.entrys[0].mediaRateFraction = 0
// }
//简单起见mediaTime先固定为0,即不延迟播放
entrys[entryCount-1].SegmentDuration = uint64(track.Duration)
entrys[entryCount-1].MediaTime = 0
entrys[entryCount-1].MediaRateInteger = 0x0001
entrys[entryCount-1].MediaRateFraction = 0
return CreateEditListBox(version, entrys)
}
func (track *Track) Seek(dts uint64) int {
for i, sample := range track.Samplelist {
if sample.Timestamp*1000/uint32(track.Timescale) < uint32(dts) {
continue
} else if track.Cid.IsVideo() {
if sample.KeyFrame {
return i
}
} else {
return i
}
}
return -1
}
func (track *Track) makeEdtsBox() *ContainerBox {
return CreateContainerBox(TypeEDTS, track.makeElstBox())
}
func (track *Track) AddSampleEntry(entry Sample) {
if len(track.Samplelist) < 1 {
track.Duration = 0
} else {
delta := int64(entry.Timestamp - track.Samplelist[len(track.Samplelist)-1].Timestamp)
track.Samplelist[len(track.Samplelist)-1].Duration = uint32(delta)
if delta < 0 {
track.Duration += 1
} else {
track.Duration += uint32(delta)
}
}
track.Samplelist = append(track.Samplelist, entry)
}
func (track *Track) makeTkhdBox() *TrackHeaderBox {
duration := uint64(track.Duration)
tkhd := CreateTrackHeaderBox(track.TrackId, duration, track.Width, track.Height)
if track.Cid == MP4_CODEC_AAC || track.Cid == MP4_CODEC_G711A || track.Cid == MP4_CODEC_G711U || track.Cid == MP4_CODEC_OPUS {
tkhd.Volume = 0x0100
} else {
tkhd.Width = track.Width << 16
tkhd.Height = track.Height << 16
}
return tkhd
}
func (track *Track) makeMinfBox() *ContainerBox {
var mhdbox IBox
switch track.Cid {
case MP4_CODEC_H264, MP4_CODEC_H265:
mhdbox = CreateVideoMediaHeaderBox()
case MP4_CODEC_G711A, MP4_CODEC_G711U, MP4_CODEC_AAC,
MP4_CODEC_MP2, MP4_CODEC_MP3, MP4_CODEC_OPUS:
mhdbox = CreateSoundMediaHeaderBox()
default:
panic("unsupport codec id")
}
dinfbox := CreateDataInformationBox()
stblbox := track.makeStblBox()
return CreateContainerBox(TypeMINF, mhdbox, dinfbox, stblbox)
}
func (track *Track) makeMdiaBox() *ContainerBox {
duration := uint64(track.Duration)
if track.isFragment {
duration = 0
}
mdhdbox := CreateMediaHeaderBox(track.Timescale, duration)
hdlrbox := MakeHdlrBox(GetHandlerType(track.Cid))
minfbox := track.makeMinfBox()
return CreateContainerBox(TypeMDIA, mdhdbox, hdlrbox, minfbox)
}
func (track *Track) makeStblBox() IBox {
track.STSD = track.makeStsd(GetHandlerType(track.Cid))
if !track.isFragment {
if track.Cid == MP4_CODEC_H264 || track.Cid == MP4_CODEC_H265 {
track.STSS = track.makeStssBox()
}
} else {
track.STTS = CreateSTTSBox(nil)
track.STSC = CreateSTSCBox(nil)
track.STSZ = CreateSTSZBox(0, nil)
track.STCO = CreateSTCOBox(nil)
}
return CreateContainerBox(TypeSTBL, track.STSD, track.STSS, track.STSZ, track.STSC, track.STTS, track.CTTS, track.STCO)
}
func (track *Track) makeStsd(handler_type HandlerType) *STSDBox {
var avbox IBox
if track.Cid == MP4_CODEC_H264 {
avbox = CreateDataBox(TypeAVCC, track.ExtraData)
} else if track.Cid == MP4_CODEC_H265 {
avbox = CreateDataBox(TypeHVCC, track.ExtraData)
} else if track.Cid == MP4_CODEC_AAC || track.Cid == MP4_CODEC_MP2 || track.Cid == MP4_CODEC_MP3 {
avbox = CreateESDSBox(uint16(track.TrackId), track.Cid, track.ExtraData)
} else if track.Cid == MP4_CODEC_OPUS {
avbox = CreateOpusSpecificBox(track.ExtraData)
}
var entry IBox
if handler_type == TypeVIDE {
entry = CreateVisualSampleEntry(GetCodecNameWithCodecId(track.Cid), uint16(track.Width), uint16(track.Height), avbox)
} else if handler_type == TypeSOUN {
entry = CreateAudioSampleEntry(GetCodecNameWithCodecId(track.Cid), uint16(track.ChannelCount), uint16(track.SampleSize), track.SampleRate, avbox)
}
return CreateSTSDBox(entry)
}
func (track *Track) MakeMoof(fragmentId uint32) *ContainerBox {
tfhd := track.makeTfhdBox(0)
tfdt := track.makeTfdtBox()
turnEntries := make([]TrunEntry, 0)
for _, sample := range track.Samplelist {
var sampleFlags uint32
if sample.KeyFrame {
sampleFlags = SAMPLE_FLAG_IS_LEADING | SAMPLE_FLAG_IS_DEPENDED_ON | SAMPLE_FLAG_DEPENDS_ON_NO
} else {
sampleFlags = SAMPLE_FLAG_DEPENDS_ON_YES
}
turnEntries = append(turnEntries, TrunEntry{
SampleSize: uint32(sample.Size),
SampleDuration: uint32(sample.Duration),
SampleCompositionTimeOffset: int32(sample.CTS),
SampleFlags: sampleFlags,
})
}
trun := CreateTrackRunBox(TR_FLAG_DATA_OFFSET|TR_FLAG_DATA_SAMPLE_DURATION|TR_FLAG_DATA_SAMPLE_SIZE|TR_FLAG_DATA_SAMPLE_FLAGS|TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME, turnEntries)
traf := CreateContainerBox(TypeTRAF, tfhd, tfdt, trun)
result := CreateContainerBox(TypeMOOF, CreateMovieFragmentHeaderBox(fragmentId), traf)
trun.DataOffset = int32(result.Size()) + int32(BasicBoxLen) // TODO: mdat large than 4GB
return result
}
func (track *Track) makeTfhdBox(moofOffset uint64) *TrackFragmentHeaderBox {
tfFlags := uint32(0)
// tfFlags |= TF_FLAG_DEFAULT_BASE_IS_MOOF
// tfFlags |= TF_FLAG_DEFAULT_SAMPLE_FLAGS_PRESENT
tfhd := CreateTrackFragmentHeaderBox(track.TrackId, tfFlags)
tfhd.BaseDataOffset = moofOffset
// Calculate default sample duration
if len(track.Samplelist) > 1 {
var totalDuration uint64 = 0
var count int = 0
for i := 1; i < len(track.Samplelist); i++ {
duration := track.Samplelist[i].Timestamp - track.Samplelist[i-1].Timestamp
if duration > 0 {
totalDuration += uint64(duration)
count++
}
}
if count > 0 {
tfhd.DefaultSampleDuration = uint32(totalDuration / uint64(count))
}
}
// Set default sample size
if len(track.Samplelist) > 0 {
tfhd.DefaultSampleSize = uint32(track.Samplelist[0].Size)
}
// Set default sample flags
if track.Cid.IsVideo() {
tfhd.DefaultSampleFlags = 16842752
} else {
tfhd.DefaultSampleFlags = 0
}
return tfhd
}
func (track *Track) makeTfdtBox() *TrackFragmentBaseMediaDecodeTimeBox {
return CreateTrackFragmentBaseMediaDecodeTimeBox(uint64(track.Samplelist[0].Timestamp))
}
func (track *Track) makeStssBox() *STSSBox {
var stss []uint32
for i, sample := range track.Samplelist {
if sample.KeyFrame {
stss = append(stss, uint32(i+1))
}
}
return CreateSTSSBox(stss)
}
func (track *Track) makeTfraBox() *TrackFragmentRandomAccessBox {
return CreateTrackFragmentRandomAccessBox(track.TrackId, slices.Collect(func(yield func(TFRAEntry) bool) {
for _, f := range track.fragments {
if !yield(TFRAEntry{
Time: f.FirstTs,
MoofOffset: f.Offset,
TrafNumber: uint32(1),
TrunNumber: uint32(1),
SampleNumber: uint32(1),
}) {
break
}
}
}))
}
func (track *Track) makeStblTable() {
sameSize := true
movchunks := make([]movchunk, 0)
ckn := uint32(0)
var stts []STTSEntry
var ctts []CTTSEntry
var stco []uint64
for i, sample := range track.Samplelist {
sttsEntry := STTSEntry{SampleCount: 1, SampleDelta: 1}
cttsEntry := CTTSEntry{SampleCount: 1, SampleOffset: uint32(sample.CTS)}
if i == len(track.Samplelist)-1 {
stts = append(stts, sttsEntry)
} else {
var delta uint32 = 1
if track.Samplelist[i+1].Timestamp >= sample.Timestamp {
delta = track.Samplelist[i+1].Timestamp - sample.Timestamp
}
if len(stts) > 0 && delta == uint32(stts[len(stts)-1].SampleDelta) {
stts[len(stts)-1].SampleCount++
} else {
sttsEntry.SampleDelta = uint32(delta)
stts = append(stts, sttsEntry)
}
}
if len(ctts) == 0 {
ctts = append(ctts, cttsEntry)
} else {
if ctts[len(ctts)-1].SampleOffset == cttsEntry.SampleOffset {
ctts[len(ctts)-1].SampleCount++
} else {
ctts = append(ctts, cttsEntry)
}
}
if sameSize && i < len(track.Samplelist)-1 && track.Samplelist[i+1].Size != track.Samplelist[i].Size {
sameSize = false
}
if i > 0 && sample.Offset == track.Samplelist[i-1].Offset+int64(track.Samplelist[i-1].Size) {
movchunks[ckn-1].samplenum++
} else {
ck := movchunk{chunknum: ckn, samplenum: 1, chunkoffset: uint64(sample.Offset)}
movchunks = append(movchunks, ck)
stco = append(stco, uint64(sample.Offset))
ckn++
}
}
var sampleSize uint32
var entrySizelist []uint32
if sameSize {
sampleSize = uint32(track.Samplelist[0].Size)
} else {
entrySizelist = make([]uint32, len(track.Samplelist))
for i := 0; i < len(track.Samplelist); i++ {
entrySizelist[i] = uint32(track.Samplelist[i].Size)
}
}
var stsc []STSCEntry
for i, chunk := range movchunks {
if i == 0 || chunk.samplenum != movchunks[i-1].samplenum {
stsc = append(stsc, STSCEntry{FirstChunk: chunk.chunknum + 1, SampleDescriptionIndex: 1, SamplesPerChunk: chunk.samplenum})
}
}
if track.Cid == MP4_CODEC_H264 || track.Cid == MP4_CODEC_H265 {
track.CTTS = CreateCTTSBox(ctts)
}
track.STTS = CreateSTTSBox(stts)
track.STSC = CreateSTSCBox(stsc)
track.STCO = CreateSTCOBox(stco)
track.STSZ = CreateSTSZBox(sampleSize, entrySizelist)
}
// func (track *Track) makeSidxBox(totalSidxSize uint32, refsize uint32) []byte {
// sidx := NewSegmentIndexBox()
// sidx.ReferenceID = track.TrackId
// sidx.TimeScale = track.Timescale
// sidx.EarliestPresentationTime = track.StartPts
// sidx.ReferenceCount = 1
// sidx.FirstOffset = 52 + uint64(totalSidxSize)
// entry := SidxEntry{
// ReferenceType: 0,
// ReferencedSize: refsize,
// SubsegmentDuration: 0,
// StartsWithSAP: 1,
// SAPType: 0,
// SAPDeltaTime: 0,
// }
// if len(track.Samplelist) > 0 {
// entry.SubsegmentDuration = uint32(track.Samplelist[len(track.Samplelist)-1].DTS) - uint32(track.StartDts)
// }
// sidx.Entrys = append(sidx.Entrys, entry)
// sidx.Box.Box.Size = sidx.Size()
// _, boxData := sidx.Encode()
// return boxData
// }