From bcd6f1b5499909afd0550dba6f9983c0201f5f12 Mon Sep 17 00:00:00 2001 From: aler9 <46489434+aler9@users.noreply.github.com> Date: Fri, 3 Jun 2022 10:27:05 +0200 Subject: [PATCH] h264: fix DTS extractor with B-frames and remove PTS-DTS offset --- .../mpegtsencoder.go | 32 +++++++++++----- pkg/h264/dtsextractor.go | 37 +++++++++++++------ pkg/h264/dtsextractor_test.go | 8 ++-- 3 files changed, 52 insertions(+), 25 deletions(-) diff --git a/examples/client-read-h264-save-to-disk/mpegtsencoder.go b/examples/client-read-h264-save-to-disk/mpegtsencoder.go index be5f3951..015176d1 100644 --- a/examples/client-read-h264-save-to-disk/mpegtsencoder.go +++ b/examples/client-read-h264-save-to-disk/mpegtsencoder.go @@ -11,10 +11,6 @@ import ( "github.com/asticode/go-astits" ) -const ( - ptsDTSOffset = 400 * time.Millisecond -) - // mpegtsEncoder allows to encode H264 NALUs into MPEG-TS. type mpegtsEncoder struct { sps []byte @@ -25,6 +21,7 @@ type mpegtsEncoder struct { mux *astits.Muxer dtsExtractor *h264.DTSExtractor firstIDRReceived bool + startDTS time.Duration } // newMPEGTSEncoder allocates a mpegtsEncoder. @@ -92,6 +89,8 @@ func (e *mpegtsEncoder) encode(nalus [][]byte, pts time.Duration) error { filteredNALUs = append(filteredNALUs, nalu) } + var dts time.Duration + if !e.firstIDRReceived { // skip samples silently until we find one with a IDR if !idrPresent { @@ -100,14 +99,27 @@ func (e *mpegtsEncoder) encode(nalus [][]byte, pts time.Duration) error { e.firstIDRReceived = true e.dtsExtractor = h264.NewDTSExtractor() - } - dts, err := e.dtsExtractor.Extract(filteredNALUs, pts) - if err != nil { - return err - } + var err error + dts, err = e.dtsExtractor.Extract(filteredNALUs, pts) + if err != nil { + return err + } - pts += ptsDTSOffset + e.startDTS = dts + dts = 0 + pts -= e.startDTS + + } else { + var err error + dts, err = e.dtsExtractor.Extract(filteredNALUs, pts) + if err != nil { + return err + } + + dts -= e.startDTS + pts -= e.startDTS + } oh := &astits.PESOptionalHeader{ MarkerBits: 2, diff --git a/pkg/h264/dtsextractor.go b/pkg/h264/dtsextractor.go index d21d83f2..46f3d391 100644 --- a/pkg/h264/dtsextractor.go +++ b/pkg/h264/dtsextractor.go @@ -96,12 +96,13 @@ func getPOCDiff(poc1 uint32, poc2 uint32, sps *SPS) int32 { // DTSExtractor is a utility that allows to extract NALU DTS from PTS. type DTSExtractor struct { - sps []byte - spsp *SPS - prevPTS time.Duration - prevDTS time.Duration - prevPOCDiff int32 - expectedPOC uint32 + sps []byte + spsp *SPS + prevPTS time.Duration + prevDTS time.Duration + prevPOCDiff int32 + expectedPOC uint32 + ptsDTSOffset time.Duration } // NewDTSExtractor allocates a DTSExtractor. @@ -128,6 +129,14 @@ func (d *DTSExtractor) extractInner( } d.sps = append([]byte(nil), nalu...) d.spsp = &spsp + + // in case of B-frames, we have to subtract from DTS the maximum number of reordered frames + if d.spsp.VUI != nil && d.spsp.VUI.TimingInfo != nil { + d.ptsDTSOffset = time.Duration(math.Round(float64(time.Duration(d.spsp.VUI.MaxNumReorderFrames)*time.Second* + time.Duration(d.spsp.VUI.TimingInfo.NumUnitsInTick)*2) / float64(d.spsp.VUI.TimingInfo.TimeScale))) + } else { + d.ptsDTSOffset = 0 + } } // set IDR present flag @@ -142,7 +151,7 @@ func (d *DTSExtractor) extractInner( if idrPresent || d.spsp.PicOrderCntType == 2 { d.expectedPOC = 0 - return pts, 0, nil + return pts - d.ptsDTSOffset, 0, nil } // compute expectedPOC immediately in order to store it even in case of errors @@ -157,6 +166,11 @@ func (d *DTSExtractor) extractInner( pocDiff := getPOCDiff(poc, d.expectedPOC, d.spsp) if pocDiff == 0 { + return pts - d.ptsDTSOffset, 0, nil + } + + // special case to eliminate errors near 0 + if d.spsp.VUI != nil && d.spsp.VUI.TimingInfo != nil && pocDiff == -int32(d.spsp.VUI.MaxNumReorderFrames)*2 { return pts, pocDiff, nil } @@ -165,12 +179,13 @@ func (d *DTSExtractor) extractInner( return 0, 0, fmt.Errorf("invalid frame POC") } - return d.prevPTS + time.Duration(math.Round(float64(pts-d.prevPTS)/float64(pocDiff/2+1))), pocDiff, nil + return d.prevPTS - d.ptsDTSOffset + + time.Duration(math.Round(float64(pts-d.prevPTS)/float64(pocDiff/2+1))), pocDiff, nil } - // pocDiff : prevPOCDiff = (pts - dts) : (prevPTS - prevDTS) - return pts + time.Duration(math.Round(float64(d.prevDTS-d.prevPTS)*float64(pocDiff)/float64(d.prevPOCDiff))), - pocDiff, nil + // pocDiff : prevPOCDiff = (pts - dts - ptsDTSOffset) : (prevPTS - prevDTS - ptsDTSOffset) + return pts - d.ptsDTSOffset + time.Duration(math.Round(float64(d.prevDTS-d.prevPTS+d.ptsDTSOffset)* + float64(pocDiff)/float64(d.prevPOCDiff))), pocDiff, nil } // Extract extracts the DTS of a NALU group. diff --git a/pkg/h264/dtsextractor_test.go b/pkg/h264/dtsextractor_test.go index 50e38fd4..06953636 100644 --- a/pkg/h264/dtsextractor_test.go +++ b/pkg/h264/dtsextractor_test.go @@ -80,7 +80,7 @@ func TestDTSExtractor(t *testing.T) { }, }, 0, - 0, + -400 * time.Millisecond, }, { [][]byte{ @@ -97,7 +97,7 @@ func TestDTSExtractor(t *testing.T) { }, }, 800 * time.Millisecond, - 200 * time.Millisecond, + -200 * time.Millisecond, }, { [][]byte{ @@ -114,7 +114,7 @@ func TestDTSExtractor(t *testing.T) { }, }, 400 * time.Millisecond, - 400 * time.Millisecond, + 0, }, { [][]byte{ @@ -130,7 +130,7 @@ func TestDTSExtractor(t *testing.T) { }, }, 200 * time.Millisecond, - 600 * time.Millisecond, + 200 * time.Millisecond, }, }