fmp4: add muxer for fragmented MP4's.

examples: add example to mux to fragmented MP4
2025-12-24 13:57:59 +08:00 · 2021-08-03 23:19:57 +02:00
parent 638a8b2e0c
commit 8e7646130f
7 changed files with 907 additions and 279 deletions
--- a/examples/mp4_to_fmp4/main.go
+++ b/examples/mp4_to_fmp4/main.go
@@ -0,0 +1,21 @@
+package main
+
+import (
+	"github.com/kerberos-io/joy4/av/avutil"
+	"github.com/kerberos-io/joy4/format"
+)
+
+func init() {
+	format.RegisterAll()
+}
+func main() {
+	muxer, err := avutil.Create("sintel_fragmentedz.fmp4")
+	if err != nil {
+		panic(err)
+	}
+	demuxer, _ := avutil.Open("sintel.mp4")
+	avutil.CopyFile(muxer, demuxer)
+
+	muxer.Close()
+	demuxer.Close()
+}
--- a/format/fmp4/handler.go
+++ b/format/fmp4/handler.go
@@ -0,0 +1,46 @@
+package fmp4
+
+import (
+	"bytes"
+	"io"
+	"log"
+
+	"github.com/kerberos-io/joy4/av"
+	"github.com/kerberos-io/joy4/av/avutil"
+	"github.com/kerberos-io/joy4/format/mp4/mp4io"
+)
+
+var CodecTypes = []av.CodecType{av.H264, av.AAC}
+
+func Handler(h *avutil.RegisterHandler) {
+	h.Ext = ".fmp4"
+
+	h.Probe = func(b []byte) bool {
+		probe_reader := bytes.NewReader(b)
+		var atoms []mp4io.Atom
+		var err error
+		if atoms, err = mp4io.ReadFileAtoms(probe_reader); err != nil {
+			log.Printf("fmp4: Probe(): errored on read file atoms when probing")
+			return false
+		}
+
+		for _, atom := range atoms {
+			if atom.Tag() == mp4io.MOOF {
+				return true
+			}
+		}
+
+		return false
+	}
+
+	/*
+		h.ReaderDemuxer = func(r io.Reader) av.Demuxer {
+			return NewDemuxer(r.(io.ReadSeeker))
+		}
+	*/
+	h.WriterMuxer = func(w io.Writer) av.Muxer {
+		return NewMuxer(w.(io.WriteSeeker))
+	}
+
+	h.CodecTypes = CodecTypes
+}
--- a/format/fmp4/muxer.go
+++ b/format/fmp4/muxer.go
@@ -0,0 +1,450 @@
+package fmp4
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"time"
+
+	"github.com/kerberos-io/joy4/av"
+	"github.com/kerberos-io/joy4/codec/aacparser"
+	"github.com/kerberos-io/joy4/codec/h264parser"
+	"github.com/kerberos-io/joy4/format/mp4/mp4io"
+	"github.com/kerberos-io/joy4/utils/bits/pio"
+)
+
+type Muxer struct {
+	w       io.WriteSeeker
+	bufw    *bufio.Writer
+	wpos    int64
+	streams []*Stream
+
+	moof_seqnum uint32
+	// Streams must start with Keyframes / IDRs
+	// A keyframe is a complete sample that contains all information to produce a single image.
+	// All other samples are deltas w.r.t to the last keyframe that's why MP4's must
+	// always start with a keyframe because any other type of frame will have not point of reference.
+	// It does mean we lose some data but it was useless anyways.
+	// This is on the muxer & not on an individual stream to prevent audio (pre first keyframe) of making
+	// it into our MP4 essentially delaying the audio by a few seconds perhaps (depends on keyframe interval).
+	gotFirstKeyframe bool
+}
+
+func NewMuxer(w io.WriteSeeker) *Muxer {
+	return &Muxer{
+		w:    w,
+		bufw: bufio.NewWriterSize(w, pio.RecommendBufioSize),
+	}
+}
+
+func (self *Muxer) newStream(codec av.CodecData, trackId int) (err error) {
+	switch codec.Type() {
+	case av.H264, av.AAC:
+
+	default:
+		err = fmt.Errorf("mp4: codec type=%v is not supported", codec.Type())
+		return
+	}
+	stream := &Stream{
+		CodecData: codec,
+		Idx:       trackId,
+	}
+
+	stream.sample = &mp4io.SampleTable{
+		SampleDesc:    &mp4io.SampleDesc{},
+		TimeToSample:  &mp4io.TimeToSample{},
+		SampleToChunk: &mp4io.SampleToChunk{},
+		SampleSize:    &mp4io.SampleSize{},
+		ChunkOffset:   &mp4io.ChunkOffset{},
+	}
+
+	stream.trackAtom = &mp4io.Track{
+		Header: &mp4io.TrackHeader{
+			TrackId:  int32(len(self.streams) + 1),
+			Flags:    0x0003, // Track enabled | Track in movie
+			Duration: 0,      // fill later
+			Matrix:   [9]int32{0x10000, 0, 0, 0, 0x10000, 0, 0, 0, 0x40000000},
+		},
+		Media: &mp4io.Media{
+			Header: &mp4io.MediaHeader{
+				TimeScale: 0, // fill later
+				Duration:  0, // fill later
+				Language:  21956,
+			},
+			Info: &mp4io.MediaInfo{
+				Sample: stream.sample,
+				Data: &mp4io.DataInfo{
+					Refer: &mp4io.DataRefer{
+						Url: &mp4io.DataReferUrl{
+							Flags: 0x000001, // Self reference
+						},
+					},
+				},
+			},
+		},
+	}
+
+	switch codec.Type() {
+	case av.H264:
+		stream.sample.SyncSample = &mp4io.SyncSample{}
+	}
+
+	stream.timeScale = 12288 // 90000 //
+	stream.muxer = self
+	self.streams = append(self.streams, stream)
+
+	return
+}
+
+func (self *Stream) fillTrackAtom() (err error) {
+	self.trackAtom.Media.Header.TimeScale = int32(self.timeScale)
+	self.trackAtom.Media.Header.Duration = int32(self.duration)
+
+	if self.Type() == av.H264 {
+		codec := self.CodecData.(h264parser.CodecData)
+		width, height := codec.Width(), codec.Height()
+		self.sample.SampleDesc.AVC1Desc = &mp4io.AVC1Desc{
+			DataRefIdx:           1,
+			HorizontalResolution: 72,
+			VorizontalResolution: 72,
+			Width:                int16(width),
+			Height:               int16(height),
+			FrameCount:           1,
+			Depth:                24,
+			ColorTableId:         -1,
+			Conf:                 &mp4io.AVC1Conf{Data: codec.AVCDecoderConfRecordBytes()},
+		}
+		self.trackAtom.Media.Handler = &mp4io.HandlerRefer{
+			SubType: [4]byte{'v', 'i', 'd', 'e'},
+			Name:    []byte("Video Media Handler"),
+		}
+		self.trackAtom.Media.Info.Video = &mp4io.VideoMediaInfo{
+			Flags: 0x000001,
+		}
+		self.trackAtom.Header.TrackWidth = float64(width)
+		self.trackAtom.Header.TrackHeight = float64(height)
+
+	} else if self.Type() == av.AAC {
+		codec := self.CodecData.(aacparser.CodecData)
+		audioConfig := codec.MPEG4AudioConfigBytes()
+		self.sample.SampleDesc.MP4ADesc = &mp4io.MP4ADesc{
+			DataRefIdx:       2,
+			NumberOfChannels: int16(codec.ChannelLayout().Count()),
+			SampleSize:       int16(codec.SampleFormat().BytesPerSample()),
+			SampleRate:       float64(codec.SampleRate()),
+			Conf: &mp4io.ElemStreamDesc{
+				DecConfig: audioConfig,
+			},
+		}
+		self.trackAtom.Header.Volume = 1
+		self.trackAtom.Header.AlternateGroup = 1
+		self.trackAtom.Media.Handler = &mp4io.HandlerRefer{
+			SubType: [4]byte{'s', 'o', 'u', 'n'},
+			Name:    []byte{'S', 'o', 'u', 'n', 'd', 'H', 'a', 'n', 'd', 'l', 'e', 'r', 0},
+		}
+		self.trackAtom.Media.Info.Sound = &mp4io.SoundMediaInfo{}
+
+	} else {
+		err = fmt.Errorf("mp4: codec type=%d invalid", self.Type())
+	}
+
+	return
+}
+
+func (self *Muxer) WriteHeader(streams []av.CodecData) (err error) {
+	self.streams = []*Stream{}
+	for i, stream := range streams {
+		if err = self.newStream(stream, i+1); err != nil {
+			// no need to stop the recording if a codec doesnt match, still try to...
+		}
+	}
+	/*
+		https://www.w3.org/2013/12/byte-stream-format-registry/isobmff-byte-stream-format.html#h2_iso-init-segments
+		The user agent must run the end of stream algorithm with the error parameter set to "decode" if any of the following conditions are met:
+			- A File Type Box contains a major_brand or compatible_brand that the user agent does not support.
+			- A box or field in the Movie Header Box is encountered that violates the requirements mandated by the major_brand or one of the compatible_brands in the File Type Box.
+			- The tracks in the Movie Header Box contain samples (i.e. the entry_count in the stts, stsc or stco boxes are not set to zero).
+			- A Movie Extends (mvex) box is not contained in the Movie (moov) box to indicate that Movie Fragments are to be expected.
+	*/
+
+	moov := &mp4io.Movie{}
+	moov.Header = &mp4io.MovieHeader{
+		PreferredRate:     1,
+		PreferredVolume:   1,
+		Matrix:            [9]int32{0x10000, 0, 0, 0, 0x10000, 0, 0, 0, 0x40000000},
+		NextTrackId:       int32(len(self.streams)), // ffmpeg uses the last track id as the next track id, makes no sense
+		PreviewTime:       time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC),
+		PreviewDuration:   time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC),
+		PosterTime:        time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC),
+		SelectionTime:     time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC),
+		SelectionDuration: time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC),
+		CurrentTime:       time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC),
+	}
+
+	// Movie Extend MVEX is required for fragmented MP4s
+	trackExtends := make([]*mp4io.TrackExtend, 0)
+	for _, stream := range self.streams {
+		// Add an extension for every available track along with their track ids.
+		ext := &mp4io.TrackExtend{
+			TrackId:              uint32(stream.Idx),
+			DefaultSampleDescIdx: uint32(1),
+		}
+		trackExtends = append(trackExtends, ext)
+	}
+	moov.MovieExtend = &mp4io.MovieExtend{
+		Tracks: trackExtends,
+	}
+
+	// TODO(atom): write a parser of the User Data Box (udta)
+
+	maxDur := time.Duration(0)
+	timeScale := int64(10000)
+	for _, stream := range self.streams {
+		if err = stream.fillTrackAtom(); err != nil {
+			return
+		}
+		dur := stream.tsToTime(stream.duration)
+		stream.trackAtom.Header.Duration = int32(timeToTs(dur, timeScale))
+		if dur > maxDur {
+			maxDur = dur
+		}
+		moov.Tracks = append(moov.Tracks, stream.trackAtom)
+	}
+	moov.Header.TimeScale = int32(timeScale)
+	moov.Header.Duration = int32(timeToTs(maxDur, timeScale))
+
+	b := make([]byte, moov.Len())
+	moov.Marshal(b)
+	if _, err = self.w.Write(b); err != nil {
+		return
+	}
+
+	return
+}
+
+func (self *Stream) BuildTrackFragmentWithoutOffset() (trackFragment *mp4io.TrackFrag, err error) {
+	// new duration
+	newDts := self.dts
+
+	// Create TrackFragRunEntries
+	trackfragentries := make([]mp4io.TrackFragRunEntry, 0) // https://ffmpeg.org/pipermail/ffmpeg-devel/2014-November/164898.html
+	// Loop over all the samples and build the Track Fragment Entries.
+	// Each sample gets its own entry which essentially captures the duration of the sample
+	// and the location within the MDAT relative by the size of the sample.
+	for _, pkt := range self.pkts {
+
+		// Calculate the duration of the frame, if no previous frames were recorded then
+		// invent a timestamp (1ms) for it to make sure it's not 0.
+		var duration time.Duration
+		if self.lastpkt != nil {
+			duration = pkt.Time - self.lastpkt.Time
+		}
+
+		// Increment the decode timestamp for the next Track Fragment Decode Time (TFDT)
+		// Essentially it is the combination of the durations of the samples.
+		newDts += self.timeToTs(duration)
+
+		/*
+			if duration == 0 {
+				duration = 40 * time.Millisecond
+			}*/
+
+		// Audio tends to build very predictable packets due to its sampling rate
+		// A possible optimization would be to rely on the default flags instead.
+		// This requires looping over all packets and verifying the default size & duration.const
+		// Saves a few bytes for each trun entry and could be looked into.const
+		// Current behavior is to explicitly write all of the entries their size & duration.
+		entry := mp4io.TrackFragRunEntry{
+			Duration: uint32(self.timeToTs(duration)), // The timescaled duration e.g 2999
+			Size:     uint32(len(pkt.Data)),           // The length of the sample in bytes e.g 51677
+			Flags:    uint32(33554432),
+			Cts:      uint32(self.timeToTs(pkt.CompositionTime)), // Composition timestamp is typically for B-frames, which are not used in RTSP
+		}
+		trackfragentries = append(trackfragentries, entry)
+		self.lastpkt = pkt
+	}
+
+	// Build the Track Fragment
+	DefaultSampleFlags := uint32(0)
+	if self.Type().String() == "H264" {
+		DefaultSampleFlags = 16842752
+	} else {
+		// audio
+		DefaultSampleFlags = 33554432
+	}
+
+	// If no fragment entries are available, then just set the durations to 512
+	// TODO: demuxer bug for B-frames has the same dts
+	DefaultDuration := uint32(512)
+
+	// Set the track frag flags such that they include the flag for CTS
+	trackFragRunFlags := uint32(mp4io.TRUN_DATA_OFFSET | mp4io.TRUN_FIRST_SAMPLE_FLAGS | mp4io.TRUN_SAMPLE_SIZE | mp4io.TRUN_SAMPLE_DURATION)
+	if self.hasBFrames { // TODO: add check if video track
+		//trackFragRunFlags = trackFragRunFlags | mp4io.TRUN_SAMPLE_CTS
+		trackFragRunFlags = uint32(mp4io.TRUN_DATA_OFFSET | mp4io.TRUN_FIRST_SAMPLE_FLAGS | mp4io.TRUN_SAMPLE_SIZE | mp4io.TRUN_SAMPLE_CTS)
+		// TODO: in ffmpeg this is 33554432 for video track & none if audio
+	}
+
+	FirstSampleFlags := uint32(mp4io.TRUN_SAMPLE_SIZE | mp4io.TRUN_SAMPLE_DURATION) // mp4io.TRUN_DATA_OFFSET | mp4io.TRUN_FIRST_SAMPLE_FLAGS |
+	// The first packet is a b-frame so set the first sample flags to have a CTS.
+	if len(self.pkts) > 0 && self.pkts[0].CompositionTime > 0 {
+		FirstSampleFlags = uint32(mp4io.TRUN_SAMPLE_SIZE | mp4io.TRUN_SAMPLE_CTS)
+	}
+
+	trackFragment = &mp4io.TrackFrag{
+		Header: &mp4io.TrackFragHeader{
+			Version:         uint8(0),
+			Flags:           uint32(mp4io.TFHD_DEFAULT_FLAGS | mp4io.TFHD_DEFAULT_DURATION | mp4io.TFHD_DEFAULT_BASE_IS_MOOF), // uint32(131128),
+			TrackId:         uint32(self.Idx),
+			DefaultDuration: DefaultDuration,
+			DefaultFlags:    DefaultSampleFlags, // TODO: fix to real flags
+		},
+		DecodeTime: &mp4io.TrackFragDecodeTime{
+			Version:    uint8(1), // Decides whether 1 = 64bit, 0 = 32bit timestamp
+			Flags:      uint32(0),
+			DecodeTime: uint64(self.dts), // Decode timestamp timescaled
+		},
+		Run: &mp4io.TrackFragRun{
+			Version:          uint8(0),
+			Flags:            trackFragRunFlags, // The flags if 0 then no DataOffset & no FirstSampleFlags
+			DataOffset:       uint32(368),       // NOTE: this is rewritten later
+			FirstSampleFlags: FirstSampleFlags,
+			Entries:          trackfragentries,
+		},
+	}
+
+	// Set the next dts
+	newDts += self.timeToTs(1 * time.Millisecond)
+	self.dts = newDts
+
+	// Reset hasBFrames
+	self.hasBFrames = false
+
+	return
+}
+
+func (self *Muxer) flushMoof() (err error) {
+
+	// Build the Track Frags
+	trackFragments := make([]*mp4io.TrackFrag, 0)
+	for _, stream := range self.streams {
+		// Build the Track Frag for this stream
+		var trackFragment *mp4io.TrackFrag
+		trackFragment, err = stream.BuildTrackFragmentWithoutOffset()
+		if err != nil {
+			return
+		}
+		trackFragments = append(trackFragments, trackFragment)
+	}
+
+	// Defer the clearing of the packets, we'll need them later in this function to
+	// write the MDAT contents & calculate its size.
+	defer func() {
+		for _, stream := range self.streams {
+			stream.pkts = make([]*av.Packet, 0)
+		}
+	}()
+
+	moof := &mp4io.MovieFrag{
+		Header: &mp4io.MovieFragHeader{
+			Version: uint8(0),
+			Flags:   uint32(0),
+			Seqnum:  self.moof_seqnum,
+		},
+		Tracks: trackFragments,
+	}
+
+	// Fix the dataoffsets of the track run
+	nextDataOffset := uint32(moof.Len() + 8)
+	for _, track := range moof.Tracks {
+		track.Run.DataOffset = nextDataOffset
+		for _, entry := range track.Run.Entries {
+			nextDataOffset += entry.Size
+		}
+	}
+
+	// Write the MOOF
+	b := make([]byte, moof.Len())
+	moof.Marshal(b)
+	if _, err = self.w.Write(b); err != nil {
+		return
+	}
+	b = nil
+
+	// Write the MDAT size
+	mdatsize := uint32(8) // skip itself
+	for _, fragment := range trackFragments {
+		for _, entry := range fragment.Run.Entries {
+			mdatsize += entry.Size
+		}
+	}
+
+	taghdr := make([]byte, 4)
+	pio.PutU32BE(taghdr, mdatsize)
+	if _, err = self.w.Write(taghdr); err != nil {
+		return
+	}
+	taghdr = nil
+
+	// Write the MDAT header
+	taghdr = make([]byte, 4)
+	pio.PutU32BE(taghdr, uint32(mp4io.MDAT))
+	if _, err = self.w.Write(taghdr); err != nil {
+		return
+	}
+	taghdr = nil
+
+	// Write the MDAT contents
+	for _, stream := range self.streams {
+		for _, pkt := range stream.pkts {
+			if _, err = self.w.Write(pkt.Data); err != nil {
+				return
+			}
+		}
+	}
+
+	// Increment the SeqNum
+	self.moof_seqnum++
+
+	return
+}
+
+func (self *Muxer) WritePacket(pkt av.Packet) (err error) {
+	// Check if pkt.Idx is a valid stream
+	if len(self.streams) < int(pkt.Idx+1) {
+		return
+	}
+	stream := self.streams[pkt.Idx]
+
+	// Wait until we have a video packet & it's a keyframe
+	if pkt.IsKeyFrame && !self.gotFirstKeyframe && stream.Type().IsVideo() {
+		// First keyframe found, we can start processing
+		self.gotFirstKeyframe = true
+	} else if !self.gotFirstKeyframe {
+		// Skip all packets until keyframe first
+		return
+	} else if pkt.IsKeyFrame {
+		// At this point, we have a keyframe and had one before.
+		self.flushMoof()
+	}
+
+	if err = stream.writePacket(pkt); err != nil {
+		return
+	}
+
+	return
+}
+
+func (self *Stream) writePacket(pkt av.Packet /*, rawdur time.Duration*/) (err error) {
+	self.pkts = append(self.pkts, &pkt)
+	// Optimization: set the has B Frames boolean to indicate that there are B-Frames
+	// that require the TrackFragRun will require the CTS flags.
+	self.hasBFrames = self.hasBFrames || pkt.CompositionTime > 0
+	return
+}
+
+func (self *Muxer) WriteTrailer() (err error) {
+	self.bufw = nil
+	self.streams = nil
+	return
+}
--- a/format/fmp4/stream.go
+++ b/format/fmp4/stream.go
@@ -0,0 +1,45 @@
+package fmp4
+
+import (
+	"time"
+
+	"github.com/kerberos-io/joy4/av"
+	"github.com/kerberos-io/joy4/format/mp4/mp4io"
+)
+
+type Stream struct {
+	av.CodecData
+
+	trackAtom *mp4io.Track
+	Idx       int
+
+	// pkts to be used in MDAT and MOOF > TRAF > TRUN
+	lastpkt    *av.Packet
+	pkts       []*av.Packet
+	hasBFrames bool
+
+	timeScale int64
+	duration  int64
+
+	muxer *Muxer
+	//demuxer *Demuxer
+
+	sample *mp4io.SampleTable
+	dts    int64
+}
+
+func timeToTs(tm time.Duration, timeScale int64) int64 {
+	return int64(tm * time.Duration(timeScale) / time.Second)
+}
+
+func tsToTime(ts int64, timeScale int64) time.Duration {
+	return time.Duration(ts) * time.Second / time.Duration(timeScale)
+}
+
+func (self *Stream) timeToTs(tm time.Duration) int64 {
+	return int64(tm * time.Duration(self.timeScale) / time.Second)
+}
+
+func (self *Stream) tsToTime(ts int64) time.Duration {
+	return time.Duration(ts) * time.Second / time.Duration(self.timeScale)
+}
--- a/format/format.go
+++ b/format/format.go
@@ -1,21 +1,22 @@
 package format

 import (
+	"github.com/kerberos-io/joy4/av/avutil"
+	"github.com/kerberos-io/joy4/format/aac"
+	"github.com/kerberos-io/joy4/format/flv"
+	"github.com/kerberos-io/joy4/format/fmp4"
 	"github.com/kerberos-io/joy4/format/mp4"
-	"github.com/kerberos-io/joy4/format/ts"
 	"github.com/kerberos-io/joy4/format/rtmp"
 	"github.com/kerberos-io/joy4/format/rtsp"
-	"github.com/kerberos-io/joy4/format/flv"
-	"github.com/kerberos-io/joy4/format/aac"
-	"github.com/kerberos-io/joy4/av/avutil"
+	"github.com/kerberos-io/joy4/format/ts"
 )

 func RegisterAll() {
 	avutil.DefaultHandlers.Add(mp4.Handler)
+	avutil.DefaultHandlers.Add(fmp4.Handler)
 	avutil.DefaultHandlers.Add(ts.Handler)
 	avutil.DefaultHandlers.Add(rtmp.Handler)
 	avutil.DefaultHandlers.Add(rtsp.Handler)
 	avutil.DefaultHandlers.Add(flv.Handler)
 	avutil.DefaultHandlers.Add(aac.Handler)
 }
-
--- a/format/mp4/mp4io/atoms.go
+++ b/format/mp4/mp4io/atoms.go
--- a/format/mp4/mp4io/mp4io.go
+++ b/format/mp4/mp4io/mp4io.go
@@ -1,20 +1,20 @@
-
 package mp4io

 import (
-	"github.com/kerberos-io/joy4/utils/bits/pio"
-	"os"
-	"io"
 	"fmt"
-	"time"
+	"io"
 	"math"
+	"os"
 	"strings"
+	"time"
+
+	"github.com/kerberos-io/joy4/utils/bits/pio"
 )

 type ParseError struct {
-	Debug string
+	Debug  string
 	Offset int
-	prev *ParseError
+	prev   *ParseError
 }

 func (self *ParseError) Error() string {
@@ -22,7 +22,7 @@ func (self *ParseError) Error() string {
 	for p := self; p != nil; p = p.prev {
 		s = append(s, fmt.Sprintf("%s:%d", p.Debug, p.Offset))
 	}
-	return "mp4io: parse error: "+strings.Join(s, ",")
+	return "mp4io: parse error: " + strings.Join(s, ",")
 }

 func parseErr(debug string, offset int, prev error) (err error) {
@@ -33,37 +33,37 @@ func parseErr(debug string, offset int, prev error) (err error) {
 func GetTime32(b []byte) (t time.Time) {
 	sec := pio.U32BE(b)
 	t = time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC)
-	t = t.Add(time.Second*time.Duration(sec))
+	t = t.Add(time.Second * time.Duration(sec))
 	return
 }

 func PutTime32(b []byte, t time.Time) {
 	dur := t.Sub(time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC))
-	sec := uint32(dur/time.Second)
+	sec := uint32(dur / time.Second)
 	pio.PutU32BE(b, sec)
 }

 func GetTime64(b []byte) (t time.Time) {
 	sec := pio.U64BE(b)
 	t = time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC)
-	t = t.Add(time.Second*time.Duration(sec))
+	t = t.Add(time.Second * time.Duration(sec))
 	return
 }

 func PutTime64(b []byte, t time.Time) {
 	dur := t.Sub(time.Date(1904, time.January, 1, 0, 0, 0, 0, time.UTC))
-	sec := uint64(dur/time.Second)
+	sec := uint64(dur / time.Millisecond)
 	pio.PutU64BE(b, sec)
 }

 func PutFixed16(b []byte, f float64) {
 	intpart, fracpart := math.Modf(f)
 	b[0] = uint8(intpart)
-	b[1] = uint8(fracpart*256.0)
+	b[1] = uint8(fracpart * 256.0)
 }

 func GetFixed16(b []byte) float64 {
-	return float64(b[0])+float64(b[1])/256.0
+	return float64(b[0]) + float64(b[1])/256.0
 }

 func PutFixed32(b []byte, f float64) {
@@ -73,7 +73,7 @@ func PutFixed32(b []byte, f float64) {
 }

 func GetFixed32(b []byte) float64 {
-	return float64(pio.U16BE(b[0:2]))+float64(pio.U16BE(b[2:4]))/65536.0
+	return float64(pio.U16BE(b[0:2])) + float64(pio.U16BE(b[2:4]))/65536.0
 }

 type Tag uint32
@@ -89,21 +89,21 @@ func (self Tag) String() string {
 	return string(b[:])
 }

-type Atom interface{
-	Pos() (int,int)
+type Atom interface {
+	Pos() (int, int)
 	Tag() Tag
 	Marshal([]byte) int
-	Unmarshal([]byte, int) (int,error)
+	Unmarshal([]byte, int) (int, error)
 	Len() int
 	Children() []Atom
 }

 type AtomPos struct {
 	Offset int
-	Size int
+	Size   int
 }

-func (self AtomPos) Pos() (int,int) {
+func (self AtomPos) Pos() (int, int) {
 	return self.Offset, self.Size
 }

@@ -186,11 +186,16 @@ const (
 	MP4ESDescrTag          = 3
 	MP4DecConfigDescrTag   = 4
 	MP4DecSpecificDescrTag = 5
+	MP4DescrTag            = 6
+)
+
+const (
+	MP4DecConfigDataSize = 2 + 3 + 4 + 4
 )

 type ElemStreamDesc struct {
 	DecConfig []byte
-	TrackId uint16
+	TrackId   uint16
 	AtomPos
 }

@@ -200,10 +205,10 @@ func (self ElemStreamDesc) Children() []Atom {

 func (self ElemStreamDesc) fillLength(b []byte, length int) (n int) {
 	for i := 3; i > 0; i-- {
-		b[n] = uint8(length>>uint(7*i))&0x7f|0x80
+		b[n] = uint8(length>>uint(7*i))&0x7f | 0x80
 		n++
 	}
-	b[n] = uint8(length&0x7f)
+	b[n] = uint8(length & 0x7f)
 	n++
 	return
 }
@@ -220,7 +225,11 @@ func (self ElemStreamDesc) fillDescHdr(b []byte, tag uint8, datalen int) (n int)
 }

 func (self ElemStreamDesc) lenESDescHdr() (n int) {
-	return self.lenDescHdr()+3
+	return self.lenDescHdr()
+}
+
+func (self ElemStreamDesc) lenESDescData() (n int) {
+	return 3
 }

 func (self ElemStreamDesc) fillESDescHdr(b []byte, datalen int) (n int) {
@@ -233,7 +242,7 @@ func (self ElemStreamDesc) fillESDescHdr(b []byte, datalen int) (n int) {
 }

 func (self ElemStreamDesc) lenDecConfigDescHdr() (n int) {
-	return self.lenDescHdr()+2+3+4+4+self.lenDescHdr()
+	return self.lenDescHdr() + MP4DecConfigDataSize + self.lenDescHdr()
 }

 func (self ElemStreamDesc) fillDecConfigDescHdr(b []byte, datalen int) (n int) {
@@ -256,7 +265,28 @@ func (self ElemStreamDesc) fillDecConfigDescHdr(b []byte, datalen int) (n int) {
 }

 func (self ElemStreamDesc) Len() (n int) {
-	return 8+4+self.lenESDescHdr()+self.lenDecConfigDescHdr()+len(self.DecConfig)+self.lenDescHdr()+1
+	n += 8
+	n += 4
+	// 0x03 MP4ESDescHeader
+	// 5
+	n += self.lenESDescHdr()
+
+	// + ESID + ESFlags
+	// + 2    + 1
+	n += self.lenESDescData()
+
+	// 0x04 MP4DecConfigDescrTag + MP4DecConfigDataSize + 0x05 MP4DecSpecificDescrHeader
+	// 5						 + 13				 	+ 5
+	n += self.lenDecConfigDescHdr()
+
+	// Variable size configuration
+	n += len(self.DecConfig)
+
+	// 0x06 MP4DescrHeader 	+ 1
+	// 5 				+ 1
+	n += self.lenDescHdr() + 1
+
+	return // 8 + 4 + self.lenESDescHdr() + self.lenDecConfigDescHdr() + len(self.DecConfig) + self.lenDescHdr() + 1
 }

 // Version(4)
@@ -282,9 +312,11 @@ func (self ElemStreamDesc) Marshal(b []byte) (n int) {
 	n += 4
 	datalen := self.Len()
 	n += self.fillESDescHdr(b[n:], datalen-n-self.lenESDescHdr())
-	n += self.fillDecConfigDescHdr(b[n:], datalen-n-self.lenDescHdr()-1)
+
+	n += self.fillDecConfigDescHdr(b[n:], datalen-n-self.lenDescHdr()-self.lenDescHdr()-1)
 	copy(b[n:], self.DecConfig)
 	n += len(self.DecConfig)
+
 	n += self.fillDescHdr(b[n:], 0x06, datalen-n-self.lenDescHdr())
 	b[n] = 0x02
 	n++
@@ -310,6 +342,8 @@ func (self *ElemStreamDesc) parseDesc(b []byte, offset int) (n int, err error) {
 	if hdrlen, tag, datalen, err = self.parseDescHdr(b, offset); err != nil {
 		return
 	}
+
+	// Skip over the header lenth (tag size 1 byte + lenlen)
 	n += hdrlen

 	if len(b) < n+datalen {
@@ -328,17 +362,16 @@ func (self *ElemStreamDesc) parseDesc(b []byte, offset int) (n int, err error) {
 		}

 	case MP4DecConfigDescrTag:
-		const size = 2+3+4+4
-		if len(b) < n+size {
+		if len(b) < n+MP4DecConfigDataSize {
 			err = parseErr("MP4DecSpecificDescrTag", offset+n, err)
 			return
 		}
-		if _, err = self.parseDesc(b[n+size:], offset+n+size); err != nil {
+		if _, err = self.parseDesc(b[n+MP4DecConfigDataSize:], offset+n+MP4DecConfigDataSize); err != nil {
 			return
 		}

 	case MP4DecSpecificDescrTag:
-		self.DecConfig = b[n:]
+		self.DecConfig = b[n : n+datalen]
 	}

 	n += datalen
@@ -353,7 +386,7 @@ func (self *ElemStreamDesc) parseLength(b []byte, offset int) (n int, length int
 		}
 		c := b[n]
 		n++
-		length = (length<<7)|(int(c)&0x7f)
+		length = (length << 7) | (int(c) & 0x7f)
 		if c&0x80 == 0 {
 			break
 		}
@@ -362,13 +395,13 @@ func (self *ElemStreamDesc) parseLength(b []byte, offset int) (n int, length int
 }

 func (self *ElemStreamDesc) parseDescHdr(b []byte, offset int) (n int, tag uint8, datalen int, err error) {
+	var lenlen int
 	if len(b) < n+1 {
 		err = parseErr("tag", offset+n, err)
 		return
 	}
 	tag = b[n]
 	n++
-	var lenlen int
 	if lenlen, datalen, err = self.parseLength(b[n:], offset+n); err != nil {
 		return
 	}
@@ -500,4 +533,3 @@ func (self *Track) GetElemStreamDesc() (esds *ElemStreamDesc) {
 	esds, _ = atom.(*ElemStreamDesc)
 	return
 }
-