h264: fix DTS extractor with B-frames and remove PTS-DTS offset

This commit is contained in:
aler9
2022-06-03 10:27:05 +02:00
parent 787c516d79
commit bcd6f1b549
3 changed files with 52 additions and 25 deletions

View File

@@ -11,10 +11,6 @@ import (
"github.com/asticode/go-astits" "github.com/asticode/go-astits"
) )
const (
ptsDTSOffset = 400 * time.Millisecond
)
// mpegtsEncoder allows to encode H264 NALUs into MPEG-TS. // mpegtsEncoder allows to encode H264 NALUs into MPEG-TS.
type mpegtsEncoder struct { type mpegtsEncoder struct {
sps []byte sps []byte
@@ -25,6 +21,7 @@ type mpegtsEncoder struct {
mux *astits.Muxer mux *astits.Muxer
dtsExtractor *h264.DTSExtractor dtsExtractor *h264.DTSExtractor
firstIDRReceived bool firstIDRReceived bool
startDTS time.Duration
} }
// newMPEGTSEncoder allocates a mpegtsEncoder. // newMPEGTSEncoder allocates a mpegtsEncoder.
@@ -92,6 +89,8 @@ func (e *mpegtsEncoder) encode(nalus [][]byte, pts time.Duration) error {
filteredNALUs = append(filteredNALUs, nalu) filteredNALUs = append(filteredNALUs, nalu)
} }
var dts time.Duration
if !e.firstIDRReceived { if !e.firstIDRReceived {
// skip samples silently until we find one with a IDR // skip samples silently until we find one with a IDR
if !idrPresent { if !idrPresent {
@@ -100,14 +99,27 @@ func (e *mpegtsEncoder) encode(nalus [][]byte, pts time.Duration) error {
e.firstIDRReceived = true e.firstIDRReceived = true
e.dtsExtractor = h264.NewDTSExtractor() e.dtsExtractor = h264.NewDTSExtractor()
}
dts, err := e.dtsExtractor.Extract(filteredNALUs, pts) var err error
if err != nil { dts, err = e.dtsExtractor.Extract(filteredNALUs, pts)
return err if err != nil {
} return err
}
pts += ptsDTSOffset e.startDTS = dts
dts = 0
pts -= e.startDTS
} else {
var err error
dts, err = e.dtsExtractor.Extract(filteredNALUs, pts)
if err != nil {
return err
}
dts -= e.startDTS
pts -= e.startDTS
}
oh := &astits.PESOptionalHeader{ oh := &astits.PESOptionalHeader{
MarkerBits: 2, MarkerBits: 2,

View File

@@ -96,12 +96,13 @@ func getPOCDiff(poc1 uint32, poc2 uint32, sps *SPS) int32 {
// DTSExtractor is a utility that allows to extract NALU DTS from PTS. // DTSExtractor is a utility that allows to extract NALU DTS from PTS.
type DTSExtractor struct { type DTSExtractor struct {
sps []byte sps []byte
spsp *SPS spsp *SPS
prevPTS time.Duration prevPTS time.Duration
prevDTS time.Duration prevDTS time.Duration
prevPOCDiff int32 prevPOCDiff int32
expectedPOC uint32 expectedPOC uint32
ptsDTSOffset time.Duration
} }
// NewDTSExtractor allocates a DTSExtractor. // NewDTSExtractor allocates a DTSExtractor.
@@ -128,6 +129,14 @@ func (d *DTSExtractor) extractInner(
} }
d.sps = append([]byte(nil), nalu...) d.sps = append([]byte(nil), nalu...)
d.spsp = &spsp d.spsp = &spsp
// in case of B-frames, we have to subtract from DTS the maximum number of reordered frames
if d.spsp.VUI != nil && d.spsp.VUI.TimingInfo != nil {
d.ptsDTSOffset = time.Duration(math.Round(float64(time.Duration(d.spsp.VUI.MaxNumReorderFrames)*time.Second*
time.Duration(d.spsp.VUI.TimingInfo.NumUnitsInTick)*2) / float64(d.spsp.VUI.TimingInfo.TimeScale)))
} else {
d.ptsDTSOffset = 0
}
} }
// set IDR present flag // set IDR present flag
@@ -142,7 +151,7 @@ func (d *DTSExtractor) extractInner(
if idrPresent || d.spsp.PicOrderCntType == 2 { if idrPresent || d.spsp.PicOrderCntType == 2 {
d.expectedPOC = 0 d.expectedPOC = 0
return pts, 0, nil return pts - d.ptsDTSOffset, 0, nil
} }
// compute expectedPOC immediately in order to store it even in case of errors // compute expectedPOC immediately in order to store it even in case of errors
@@ -157,6 +166,11 @@ func (d *DTSExtractor) extractInner(
pocDiff := getPOCDiff(poc, d.expectedPOC, d.spsp) pocDiff := getPOCDiff(poc, d.expectedPOC, d.spsp)
if pocDiff == 0 { if pocDiff == 0 {
return pts - d.ptsDTSOffset, 0, nil
}
// special case to eliminate errors near 0
if d.spsp.VUI != nil && d.spsp.VUI.TimingInfo != nil && pocDiff == -int32(d.spsp.VUI.MaxNumReorderFrames)*2 {
return pts, pocDiff, nil return pts, pocDiff, nil
} }
@@ -165,12 +179,13 @@ func (d *DTSExtractor) extractInner(
return 0, 0, fmt.Errorf("invalid frame POC") return 0, 0, fmt.Errorf("invalid frame POC")
} }
return d.prevPTS + time.Duration(math.Round(float64(pts-d.prevPTS)/float64(pocDiff/2+1))), pocDiff, nil return d.prevPTS - d.ptsDTSOffset +
time.Duration(math.Round(float64(pts-d.prevPTS)/float64(pocDiff/2+1))), pocDiff, nil
} }
// pocDiff : prevPOCDiff = (pts - dts) : (prevPTS - prevDTS) // pocDiff : prevPOCDiff = (pts - dts - ptsDTSOffset) : (prevPTS - prevDTS - ptsDTSOffset)
return pts + time.Duration(math.Round(float64(d.prevDTS-d.prevPTS)*float64(pocDiff)/float64(d.prevPOCDiff))), return pts - d.ptsDTSOffset + time.Duration(math.Round(float64(d.prevDTS-d.prevPTS+d.ptsDTSOffset)*
pocDiff, nil float64(pocDiff)/float64(d.prevPOCDiff))), pocDiff, nil
} }
// Extract extracts the DTS of a NALU group. // Extract extracts the DTS of a NALU group.

View File

@@ -80,7 +80,7 @@ func TestDTSExtractor(t *testing.T) {
}, },
}, },
0, 0,
0, -400 * time.Millisecond,
}, },
{ {
[][]byte{ [][]byte{
@@ -97,7 +97,7 @@ func TestDTSExtractor(t *testing.T) {
}, },
}, },
800 * time.Millisecond, 800 * time.Millisecond,
200 * time.Millisecond, -200 * time.Millisecond,
}, },
{ {
[][]byte{ [][]byte{
@@ -114,7 +114,7 @@ func TestDTSExtractor(t *testing.T) {
}, },
}, },
400 * time.Millisecond, 400 * time.Millisecond,
400 * time.Millisecond, 0,
}, },
{ {
[][]byte{ [][]byte{
@@ -130,7 +130,7 @@ func TestDTSExtractor(t *testing.T) {
}, },
}, },
200 * time.Millisecond, 200 * time.Millisecond,
600 * time.Millisecond, 200 * time.Millisecond,
}, },
} }