mirror of
https://github.com/langhuihui/monibuca.git
synced 2025-12-24 13:48:04 +08:00
507 lines
14 KiB
Go
507 lines
14 KiB
Go
package mp4
|
||
|
||
import (
|
||
"io"
|
||
|
||
. "m7s.live/v5/plugin/mp4/pkg/box"
|
||
)
|
||
|
||
type (
|
||
Track struct {
|
||
Cid MP4_CODEC_TYPE
|
||
TrackId uint32
|
||
SampleTable
|
||
Duration uint32
|
||
Height uint32
|
||
Width uint32
|
||
SampleRate uint32
|
||
SampleSize uint16
|
||
SampleCount uint32
|
||
ChannelCount uint8
|
||
Timescale uint32
|
||
StartDts uint64
|
||
EndDts uint64
|
||
StartPts uint64
|
||
EndPts uint64
|
||
Samplelist []Sample
|
||
ELST *EditListBox
|
||
ExtraData []byte
|
||
writer io.WriteSeeker
|
||
fragments []Fragment
|
||
defaultSize uint32
|
||
defaultDuration uint32
|
||
defaultSampleFlags uint32
|
||
baseDataOffset uint64
|
||
|
||
//for subsample
|
||
defaultIsProtected uint8
|
||
defaultPerSampleIVSize uint8
|
||
defaultCryptByteBlock uint8
|
||
defaultSkipByteBlock uint8
|
||
defaultConstantIV []byte
|
||
defaultKID [16]byte
|
||
lastSeig *SeigSampleGroupEntry
|
||
lastSaiz *SaizBox
|
||
subSamples []SencEntry
|
||
}
|
||
Fragment struct {
|
||
Offset uint64
|
||
Duration uint32
|
||
FirstDts uint64
|
||
FirstPts uint64
|
||
LastPts uint64
|
||
LastDts uint64
|
||
}
|
||
)
|
||
|
||
func (track *Track) makeElstBox() []byte {
|
||
delay := track.Samplelist[0].PTS * 1000 / uint64(track.Timescale)
|
||
entryCount := 1
|
||
version := byte(0)
|
||
boxSize := 12
|
||
entrySize := 12
|
||
if delay > 0xFFFFFFFF {
|
||
version = 1
|
||
entrySize = 20
|
||
}
|
||
// if delay > 0 {
|
||
// entryCount += 1
|
||
// }
|
||
boxSize += 4 + entrySize*entryCount
|
||
elst := NewEditListBox(version)
|
||
elst.Entrys = make([]ELSTEntry, entryCount)
|
||
// if entryCount > 1 {
|
||
// elst.entrys.entrys[0].segmentDuration = startCt
|
||
// elst.entrys.entrys[0].mediaTime = -1
|
||
// elst.entrys.entrys[0].mediaRateInteger = 0x0001
|
||
// elst.entrys.entrys[0].mediaRateFraction = 0
|
||
// }
|
||
|
||
//简单起见,mediaTime先固定为0,即不延迟播放
|
||
elst.Entrys[entryCount-1].SegmentDuration = uint64(track.Duration)
|
||
elst.Entrys[entryCount-1].MediaTime = 0
|
||
elst.Entrys[entryCount-1].MediaRateInteger = 0x0001
|
||
elst.Entrys[entryCount-1].MediaRateFraction = 0
|
||
|
||
_, boxdata := elst.Encode(boxSize)
|
||
return boxdata
|
||
|
||
}
|
||
|
||
func (track *Track) Seek(dts uint64) int {
|
||
for i, sample := range track.Samplelist {
|
||
if sample.DTS*1000/uint64(track.Timescale) < dts {
|
||
continue
|
||
} else if track.Cid.IsVideo() {
|
||
if sample.KeyFrame {
|
||
return i
|
||
}
|
||
} else {
|
||
return i
|
||
}
|
||
}
|
||
return -1
|
||
}
|
||
|
||
func (track *Track) makeEdtsBox() []byte {
|
||
elst := track.makeElstBox()
|
||
edts := BasicBox{Type: TypeEDTS, Size: 8 + uint64(len(elst))}
|
||
offset, edtsbox := edts.Encode()
|
||
copy(edtsbox[offset:], elst)
|
||
return edtsbox
|
||
}
|
||
|
||
func (track *Track) AddSampleEntry(entry Sample) {
|
||
if len(track.Samplelist) <= 1 {
|
||
track.Duration = 0
|
||
} else {
|
||
delta := int64(entry.DTS - track.Samplelist[len(track.Samplelist)-1].DTS)
|
||
if delta < 0 {
|
||
track.Duration += 1
|
||
} else {
|
||
track.Duration += uint32(delta)
|
||
}
|
||
}
|
||
track.Samplelist = append(track.Samplelist, entry)
|
||
}
|
||
|
||
func (track *Track) makeTkhdBox() []byte {
|
||
tkhd := NewTrackHeaderBox()
|
||
tkhd.Duration = uint64(track.Duration)
|
||
tkhd.Track_ID = track.TrackId
|
||
if track.Cid == MP4_CODEC_AAC || track.Cid == MP4_CODEC_G711A || track.Cid == MP4_CODEC_G711U || track.Cid == MP4_CODEC_OPUS {
|
||
tkhd.Volume = 0x0100
|
||
} else {
|
||
tkhd.Width = track.Width << 16
|
||
tkhd.Height = track.Height << 16
|
||
}
|
||
_, tkhdbox := tkhd.Encode()
|
||
return tkhdbox
|
||
}
|
||
|
||
func (track *Track) makeMinfBox() []byte {
|
||
var mhdbox []byte
|
||
switch track.Cid {
|
||
case MP4_CODEC_H264, MP4_CODEC_H265:
|
||
mhdbox = MakeVmhdBox()
|
||
case MP4_CODEC_G711A, MP4_CODEC_G711U, MP4_CODEC_AAC,
|
||
MP4_CODEC_MP2, MP4_CODEC_MP3, MP4_CODEC_OPUS:
|
||
mhdbox = MakeSmhdBox()
|
||
default:
|
||
panic("unsupport codec id")
|
||
}
|
||
dinfbox := MakeDefaultDinfBox()
|
||
stblbox := track.makeStblBox()
|
||
|
||
minf := BasicBox{Type: TypeMINF, Size: 8 + uint64(len(mhdbox)+len(dinfbox)+len(stblbox))}
|
||
offset, minfbox := minf.Encode()
|
||
copy(minfbox[offset:], mhdbox)
|
||
offset += len(mhdbox)
|
||
copy(minfbox[offset:], dinfbox)
|
||
offset += len(dinfbox)
|
||
copy(minfbox[offset:], stblbox)
|
||
offset += len(stblbox)
|
||
return minfbox
|
||
}
|
||
|
||
func (track *Track) makeMdiaBox() []byte {
|
||
mdhdbox := MakeMdhdBox(track.Duration)
|
||
hdlrbox := MakeHdlrBox(GetHandlerType(track.Cid))
|
||
minfbox := track.makeMinfBox()
|
||
mdia := BasicBox{Type: TypeMDIA, Size: 8 + uint64(len(mdhdbox)+len(hdlrbox)+len(minfbox))}
|
||
offset, mdiabox := mdia.Encode()
|
||
copy(mdiabox[offset:], mdhdbox)
|
||
offset += len(mdhdbox)
|
||
copy(mdiabox[offset:], hdlrbox)
|
||
offset += len(hdlrbox)
|
||
copy(mdiabox[offset:], minfbox)
|
||
offset += len(minfbox)
|
||
return mdiabox
|
||
}
|
||
|
||
func (track *Track) makeStblBox() []byte {
|
||
var stsdbox, sttsbox, cttsbox, stscbox, stszbox, stcobox, stssbox []byte
|
||
stsdbox = track.makeStsd(GetHandlerType(track.Cid))
|
||
if track.SampleTable.STTS != nil {
|
||
_, sttsbox = track.SampleTable.STTS.Encode()
|
||
}
|
||
if track.SampleTable.CTTS != nil {
|
||
_, cttsbox = track.SampleTable.CTTS.Encode()
|
||
}
|
||
if track.SampleTable.STSC != nil {
|
||
_, stscbox = track.SampleTable.STSC.Encode()
|
||
}
|
||
if track.SampleTable.STSZ != nil {
|
||
_, stszbox = track.SampleTable.STSZ.Encode()
|
||
}
|
||
if track.SampleTable.STCO != nil {
|
||
_, stcobox = track.SampleTable.STCO.Encode()
|
||
}
|
||
if track.Cid == MP4_CODEC_H264 || track.Cid == MP4_CODEC_H265 {
|
||
stssbox = track.makeStssBox()
|
||
}
|
||
|
||
stbl := BasicBox{Type: TypeSTBL, Size: uint64(8 + len(stsdbox) + len(sttsbox) + len(cttsbox) + len(stscbox) + len(stszbox) + len(stcobox) + len(stssbox))}
|
||
offset, stblbox := stbl.Encode()
|
||
copy(stblbox[offset:], stsdbox)
|
||
offset += len(stsdbox)
|
||
copy(stblbox[offset:], sttsbox)
|
||
offset += len(sttsbox)
|
||
copy(stblbox[offset:], cttsbox)
|
||
offset += len(cttsbox)
|
||
copy(stblbox[offset:], stscbox)
|
||
offset += len(stscbox)
|
||
copy(stblbox[offset:], stszbox)
|
||
offset += len(stszbox)
|
||
copy(stblbox[offset:], stcobox)
|
||
offset += len(stcobox)
|
||
copy(stblbox[offset:], stssbox)
|
||
offset += len(stssbox)
|
||
return stblbox
|
||
}
|
||
|
||
func (track *Track) makeStsd(handler_type HandlerType) []byte {
|
||
var avbox []byte
|
||
if track.Cid == MP4_CODEC_H264 {
|
||
avbox = MakeAvcCBox(track.ExtraData)
|
||
} else if track.Cid == MP4_CODEC_H265 {
|
||
avbox = MakeHvcCBox(track.ExtraData)
|
||
} else if track.Cid == MP4_CODEC_AAC || track.Cid == MP4_CODEC_MP2 || track.Cid == MP4_CODEC_MP3 {
|
||
avbox = MakeEsdsBox(track.TrackId, track.Cid, track.ExtraData)
|
||
} else if track.Cid == MP4_CODEC_OPUS {
|
||
avbox = MakeOpusSpecificBox(track.ExtraData)
|
||
}
|
||
|
||
var se []byte
|
||
var offset int
|
||
if handler_type == TypeVIDE {
|
||
entry := NewVisualSampleEntry(GetCodecNameWithCodecId(track.Cid))
|
||
entry.Width = uint16(track.Width)
|
||
entry.Height = uint16(track.Height)
|
||
offset, se = entry.Encode(entry.Size() + uint64(len(avbox)))
|
||
} else if handler_type == TypeSOUN {
|
||
entry := NewAudioSampleEntry(GetCodecNameWithCodecId(track.Cid))
|
||
entry.ChannelCount = uint16(track.ChannelCount)
|
||
entry.Samplerate = track.SampleRate
|
||
entry.SampleSize = track.SampleSize
|
||
offset, se = entry.Encode(entry.Size() + uint64(len(avbox)))
|
||
}
|
||
copy(se[offset:], avbox)
|
||
|
||
var stsd SampleDescriptionBox = 1
|
||
offset2, stsdbox := stsd.Encode(FullBoxLen + 4 + uint64(len(se)))
|
||
copy(stsdbox[offset2:], se)
|
||
return stsdbox
|
||
}
|
||
|
||
// fmp4
|
||
func (track *Track) makeTraf(moofOffset int64, moofSize int64) []byte {
|
||
tfhd := track.makeTfhdBox(uint64(moofOffset))
|
||
tfdt := track.makeTfdtBox()
|
||
trun := track.makeTrunBoxes(moofSize)
|
||
|
||
traf := BasicBox{Type: TypeTRAF, Size: 8 + uint64(len(tfhd)+len(tfdt)+len(trun))}
|
||
offset, boxData := traf.Encode()
|
||
copy(boxData[offset:], tfhd)
|
||
offset += len(tfhd)
|
||
copy(boxData[offset:], tfdt)
|
||
offset += len(tfdt)
|
||
copy(boxData[offset:], trun)
|
||
offset += len(trun)
|
||
return boxData
|
||
}
|
||
|
||
func (track *Track) makeTfhdBox(offset uint64) []byte {
|
||
tfFlags := TF_FLAG_SAMPLE_DESCRIPTION_INDEX_PRESENT
|
||
tfFlags |= TF_FLAG_DEAAULT_BASE_IS_MOOF
|
||
tfhd := NewTrackFragmentHeaderBox(track.TrackId)
|
||
tfhd.BaseDataOffset = offset
|
||
if len(track.Samplelist) > 1 {
|
||
tfhd.DefaultSampleDuration = uint32(track.Samplelist[1].DTS - track.Samplelist[0].DTS)
|
||
} else if len(track.Samplelist) == 1 && len(track.fragments) > 0 {
|
||
tfhd.DefaultSampleDuration = uint32(track.Samplelist[0].DTS - track.fragments[len(track.fragments)-1].LastDts)
|
||
} else {
|
||
tfhd.DefaultSampleDuration = 0
|
||
tfFlags |= TF_FLAG_DURATION_IS_EMPTY
|
||
}
|
||
if len(track.Samplelist) > 0 {
|
||
tfFlags |= TF_FLAG_DEAAULT_SAMPLE_FLAGS_PRESENT
|
||
tfFlags |= TF_FLAG_DEFAULT_SAMPLE_DURATION_PRESENT
|
||
tfFlags |= TF_FLAG_DEFAULT_SAMPLE_SIZE_PRESENT
|
||
tfhd.DefaultSampleSize = uint32(track.Samplelist[0].Size)
|
||
} else {
|
||
tfhd.DefaultSampleSize = 0
|
||
}
|
||
//ffmpeg movenc.c mov_write_tfhd_tag
|
||
if track.Cid.IsVideo() {
|
||
tfhd.DefaultSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES | MOV_FRAG_SAMPLE_FLAG_IS_NON_SYNC
|
||
} else {
|
||
tfhd.DefaultSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO
|
||
}
|
||
track.defaultDuration = tfhd.DefaultSampleDuration
|
||
track.defaultSize = tfhd.DefaultSampleSize
|
||
track.defaultSampleFlags = tfhd.DefaultSampleFlags
|
||
_, boxData := tfhd.Encode(tfFlags)
|
||
return boxData
|
||
}
|
||
|
||
func (track *Track) makeTfdtBox() []byte {
|
||
tfdt := NewTrackFragmentBaseMediaDecodeTimeBox(uint64(track.Samplelist[0].DTS))
|
||
_, boxData := tfdt.Encode()
|
||
return boxData
|
||
}
|
||
|
||
func (track *Track) makeTrunBoxes(moofSize int64) []byte {
|
||
boxes := make([]byte, 0, 128)
|
||
start := 0
|
||
end := 0
|
||
for i := 1; i < len(track.Samplelist); i++ {
|
||
if track.Samplelist[i].Offset == track.Samplelist[i-1].Offset+int64(track.Samplelist[i-1].Size) {
|
||
continue
|
||
}
|
||
end = i
|
||
boxes = append(boxes, track.makeTrunBox(start, end, moofSize)...)
|
||
start = end
|
||
}
|
||
|
||
if start < len(track.Samplelist) {
|
||
boxes = append(boxes, track.makeTrunBox(start, len(track.Samplelist), moofSize)...)
|
||
}
|
||
return boxes
|
||
}
|
||
|
||
func (track *Track) makeStssBox() (boxdata []byte) {
|
||
var stss SyncSampleBox
|
||
for i, sample := range track.Samplelist {
|
||
if sample.KeyFrame {
|
||
stss = append(stss, uint32(i+1))
|
||
}
|
||
}
|
||
_, boxdata = stss.Encode()
|
||
return
|
||
}
|
||
|
||
func (track *Track) makeTfraBox() []byte {
|
||
tfra := NewTrackFragmentRandomAccessBox(track.TrackId)
|
||
tfra.LengthSizeOfSampleNum = 0
|
||
tfra.LengthSizeOfTrafNum = 0
|
||
tfra.LengthSizeOfTrunNum = 0
|
||
for _, f := range track.fragments {
|
||
tfra.FragEntrys = append(tfra.FragEntrys, FragEntry{
|
||
Time: f.FirstPts,
|
||
MoofOffset: f.Offset,
|
||
})
|
||
}
|
||
_, tfraData := tfra.Encode()
|
||
return tfraData
|
||
}
|
||
|
||
func (track *Track) makeTrunBox(start, end int, moofSize int64) []byte {
|
||
flag := TR_FLAG_DATA_OFFSET
|
||
if track.Cid.IsVideo() && track.Samplelist[start].KeyFrame {
|
||
flag |= TR_FLAG_DATA_FIRST_SAMPLE_FLAGS
|
||
}
|
||
|
||
for j := start; j < end; j++ {
|
||
if track.Samplelist[j].Size != int(track.defaultSize) {
|
||
flag |= TR_FLAG_DATA_SAMPLE_SIZE
|
||
}
|
||
if j+1 < end {
|
||
if track.Samplelist[j+1].DTS-track.Samplelist[j].DTS != uint64(track.defaultDuration) {
|
||
flag |= TR_FLAG_DATA_SAMPLE_DURATION
|
||
}
|
||
} else {
|
||
// if track.lastSample.DTS-track.Samplelist[j].DTS != uint64(track.defaultDuration) {
|
||
// flag |= TR_FLAG_DATA_SAMPLE_DURATION
|
||
// }
|
||
}
|
||
if track.Samplelist[j].PTS != track.Samplelist[j].DTS {
|
||
flag |= TR_FLAG_DATA_SAMPLE_COMPOSITION_TIME
|
||
}
|
||
}
|
||
|
||
trun := NewTrackRunBox()
|
||
trun.SampleCount = uint32(end - start)
|
||
|
||
trun.Dataoffset = int32(moofSize + track.Samplelist[start].Offset)
|
||
trun.FirstSampleFlags = MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO
|
||
for i := start; i < end; i++ {
|
||
sampleDuration := uint32(0)
|
||
if i == len(track.Samplelist)-1 {
|
||
sampleDuration = track.defaultDuration
|
||
} else {
|
||
sampleDuration = uint32(track.Samplelist[i+1].DTS - track.Samplelist[i].DTS)
|
||
}
|
||
|
||
entry := TrunEntry{
|
||
SampleDuration: sampleDuration,
|
||
SampleSize: uint32(track.Samplelist[i].Size),
|
||
SampleCompositionTimeOffset: uint32(track.Samplelist[i].PTS - track.Samplelist[i].DTS),
|
||
}
|
||
trun.EntryList = append(trun.EntryList, entry)
|
||
}
|
||
_, boxData := trun.Encode(flag)
|
||
return boxData
|
||
}
|
||
|
||
func (track *Track) makeStblTable() {
|
||
sameSize := true
|
||
movchunks := make([]movchunk, 0)
|
||
ckn := uint32(0)
|
||
var stts TimeToSampleBox
|
||
var ctts CompositionOffsetBox
|
||
var stco ChunkOffsetBox
|
||
for i, sample := range track.Samplelist {
|
||
sttsEntry := STTSEntry{SampleCount: 1, SampleDelta: 1}
|
||
cttsEntry := CTTSEntry{SampleCount: 1, SampleOffset: uint32(sample.PTS) - uint32(sample.DTS)}
|
||
if i == len(track.Samplelist)-1 {
|
||
stts = append(stts, sttsEntry)
|
||
} else {
|
||
var delta uint64 = 1
|
||
if track.Samplelist[i+1].PTS >= sample.PTS {
|
||
delta = track.Samplelist[i+1].PTS - sample.PTS
|
||
}
|
||
|
||
if len(stts) > 0 && delta == uint64(stts[len(stts)-1].SampleDelta) {
|
||
stts[len(stts)-1].SampleCount++
|
||
} else {
|
||
sttsEntry.SampleDelta = uint32(delta)
|
||
stts = append(stts, sttsEntry)
|
||
}
|
||
}
|
||
|
||
if len(ctts) == 0 {
|
||
ctts = append(ctts, cttsEntry)
|
||
} else {
|
||
if ctts[len(ctts)-1].SampleOffset == cttsEntry.SampleOffset {
|
||
ctts[len(ctts)-1].SampleCount++
|
||
} else {
|
||
ctts = append(ctts, cttsEntry)
|
||
}
|
||
}
|
||
if sameSize && i < len(track.Samplelist)-1 && track.Samplelist[i+1].Size != track.Samplelist[i].Size {
|
||
sameSize = false
|
||
}
|
||
if i > 0 && sample.Offset == track.Samplelist[i-1].Offset+int64(track.Samplelist[i-1].Size) {
|
||
movchunks[ckn-1].samplenum++
|
||
} else {
|
||
ck := movchunk{chunknum: ckn, samplenum: 1, chunkoffset: uint64(sample.Offset)}
|
||
movchunks = append(movchunks, ck)
|
||
stco = append(stco, uint64(sample.Offset))
|
||
ckn++
|
||
}
|
||
}
|
||
stsz := &SampleSizeBox{
|
||
SampleSize: 0,
|
||
SampleCount: uint32(len(track.Samplelist)),
|
||
}
|
||
if sameSize {
|
||
stsz.SampleSize = uint32(track.Samplelist[0].Size)
|
||
} else {
|
||
stsz.EntrySizelist = make([]uint32, stsz.SampleCount)
|
||
for i := 0; i < len(stsz.EntrySizelist); i++ {
|
||
stsz.EntrySizelist[i] = uint32(track.Samplelist[i].Size)
|
||
}
|
||
}
|
||
|
||
var stsc SampleToChunkBox
|
||
for i, chunk := range movchunks {
|
||
if i == 0 || chunk.samplenum != movchunks[i-1].samplenum {
|
||
stsc = append(stsc, STSCEntry{FirstChunk: chunk.chunknum + 1, SampleDescriptionIndex: 1, SamplesPerChunk: chunk.samplenum})
|
||
}
|
||
}
|
||
|
||
track.SampleTable.STTS = &stts
|
||
track.SampleTable.STSC = &stsc
|
||
track.SampleTable.STCO = &stco
|
||
track.SampleTable.STSZ = stsz
|
||
if track.Cid == MP4_CODEC_H264 || track.Cid == MP4_CODEC_H265 {
|
||
track.SampleTable.CTTS = &ctts
|
||
}
|
||
}
|
||
|
||
func (track *Track) makeSidxBox(totalSidxSize uint32, refsize uint32) []byte {
|
||
sidx := NewSegmentIndexBox()
|
||
sidx.ReferenceID = track.TrackId
|
||
sidx.TimeScale = track.Timescale
|
||
sidx.EarliestPresentationTime = track.StartPts
|
||
sidx.ReferenceCount = 1
|
||
sidx.FirstOffset = 52 + uint64(totalSidxSize)
|
||
entry := SidxEntry{
|
||
ReferenceType: 0,
|
||
ReferencedSize: refsize,
|
||
SubsegmentDuration: 0,
|
||
StartsWithSAP: 1,
|
||
SAPType: 0,
|
||
SAPDeltaTime: 0,
|
||
}
|
||
|
||
if len(track.Samplelist) > 0 {
|
||
entry.SubsegmentDuration = uint32(track.Samplelist[len(track.Samplelist)-1].DTS) - uint32(track.StartDts)
|
||
}
|
||
sidx.Entrys = append(sidx.Entrys, entry)
|
||
sidx.Box.Box.Size = sidx.Size()
|
||
_, boxData := sidx.Encode()
|
||
return boxData
|
||
}
|