Add WithDirectPTS option for precise RTP timestamp (#3287)

This commit is contained in:
Masayuki
2025-12-05 10:56:04 +09:00
committed by GitHub
parent 210cd958d5
commit c6b288f937
2 changed files with 224 additions and 6 deletions

View File

@@ -42,6 +42,8 @@ type (
videoWidth uint16
videoHeight uint16
directPTS bool
// VP8, VP9
currentFrame []byte
@@ -149,8 +151,18 @@ func (i *IVFWriter) timestampToPts(timestamp uint64) uint64 {
func (i *IVFWriter) writeFrame(frame []byte, timestamp uint64) error {
frameHeader := make([]byte, 12)
//nolint:gosec // G115
binary.LittleEndian.PutUint32(frameHeader[0:], uint32(len(frame))) // Frame length
binary.LittleEndian.PutUint64(frameHeader[4:], i.timestampToPts(timestamp)) // PTS
binary.LittleEndian.PutUint32(frameHeader[0:], uint32(len(frame))) // Frame length
var pts uint64
if i.directPTS {
// Direct PTS mode: use timestamp directly as PTS.
pts = timestamp
} else {
// Existing behavior: convert using timebase.
pts = i.timestampToPts(timestamp)
}
binary.LittleEndian.PutUint64(frameHeader[4:], pts) // PTS
i.count++
if _, err := i.ioWriter.Write(frameHeader); err != nil {
@@ -172,15 +184,23 @@ func (i *IVFWriter) WriteRTP(packet *rtp.Packet) error {
if i.count == 0 {
i.firstFrameTimestamp = packet.Timestamp
}
relativeTstampMs := 1000 * uint64(packet.Timestamp-i.firstFrameTimestamp) / i.clockRate
var timestamp uint64
if i.directPTS {
// Direct PTS mode: use RTP timestamp directly (no millisecond conversion).
timestamp = uint64(packet.Timestamp - i.firstFrameTimestamp)
} else {
// Existing behavior: convert to milliseconds first.
timestamp = 1000 * uint64(packet.Timestamp-i.firstFrameTimestamp) / i.clockRate
}
switch i.codec {
case codecVP8:
return i.writeVP8(packet, relativeTstampMs)
return i.writeVP8(packet, timestamp)
case codecVP9:
return i.writeVP9(packet, relativeTstampMs)
return i.writeVP9(packet, timestamp)
case codecAV1:
return i.writeAV1(packet, relativeTstampMs)
return i.writeAV1(packet, timestamp)
default:
return errCodecUnset
}
@@ -361,3 +381,21 @@ func WithFrameRate(numerator, denominator uint32) Option {
return nil
}
}
// WithDirectPTS enables direct use of RTP timestamps as PTS values
// without millisecond conversion.
//
// When this option is used, RTP timestamps are written directly as PTS values,
// preserving full timestamp precision. Use WithFrameRate to set the appropriate
// timebase (e.g., WithFrameRate(1, 90000) for standard 90kHz RTP clock).
//
// Example usage for standard RTP video (90kHz clock rate):
//
// NewWith(file, WithFrameRate(1, 90000), WithDirectPTS())
func WithDirectPTS() Option {
return func(i *IVFWriter) error {
i.directPTS = true
return nil
}
}

View File

@@ -416,3 +416,183 @@ func TestIVFWriter_WithFrameRate(t *testing.T) {
0x01, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x84, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}, buffer.Bytes())
}
func TestIVFWriter_WithDirectPTS(t *testing.T) {
buffer := &bytes.Buffer{}
writer, err := NewWith(buffer, WithFrameRate(1, 90000), WithDirectPTS())
assert.NoError(t, err)
assert.True(t, writer.directPTS)
assert.Equal(t, uint32(1), writer.timebaseNumerator)
assert.Equal(t, uint32(90000), writer.timebaseDenominator)
assert.NoError(t, writer.Close())
}
func TestIVFWriter_DirectPTS_VP8(t *testing.T) {
buffer := &bytes.Buffer{}
writer, err := NewWith(buffer, WithCodec(mimeTypeVP8), WithFrameRate(1, 90000), WithDirectPTS())
assert.NoError(t, err)
// Write keyframe with timestamp 0.
keyframePacket := &rtp.Packet{
Header: rtp.Header{
Marker: true,
Timestamp: 0,
},
// VP8 keyframe: S=1, P=0
Payload: []byte{0x10, 0x00, 0x00, 0x9d, 0x01, 0x2a},
}
assert.NoError(t, writer.WriteRTP(keyframePacket))
assert.Equal(t, uint64(1), writer.count)
// Write second frame with timestamp 6000 (15fps at 90kHz clock).
frame2 := &rtp.Packet{
Header: rtp.Header{
Marker: true,
Timestamp: 6000,
},
// VP8 interframe: S=1, P=1
Payload: []byte{0x10, 0x01, 0x00, 0x9d, 0x01, 0x2a},
}
assert.NoError(t, writer.WriteRTP(frame2))
assert.Equal(t, uint64(2), writer.count)
// Write third frame with timestamp 12000.
frame3 := &rtp.Packet{
Header: rtp.Header{
Marker: true,
Timestamp: 12000,
},
Payload: []byte{0x10, 0x01, 0x00, 0x9d, 0x01, 0x2a},
}
assert.NoError(t, writer.WriteRTP(frame3))
assert.Equal(t, uint64(3), writer.count)
assert.NoError(t, writer.Close())
// Verify IVF structure.
data := buffer.Bytes()
assert.True(t, len(data) > 32, "Buffer should contain header + frames")
// Check IVF header timebase (offset 16-20: denominator, offset 20-24: numerator).
timebaseDenom := uint32(data[16]) | uint32(data[17])<<8 | uint32(data[18])<<16 | uint32(data[19])<<24
timebaseNum := uint32(data[20]) | uint32(data[21])<<8 | uint32(data[22])<<16 | uint32(data[23])<<24
assert.Equal(t, uint32(90000), timebaseDenom)
assert.Equal(t, uint32(1), timebaseNum)
// Verify PTS values in frame headers.
// Frame 1: PTS should be 0.
pts1 := uint64(data[36]) | uint64(data[37])<<8 | uint64(data[38])<<16 | uint64(data[39])<<24 |
uint64(data[40])<<32 | uint64(data[41])<<40 | uint64(data[42])<<48 | uint64(data[43])<<56
assert.Equal(t, uint64(0), pts1, "First frame PTS should be 0")
// Frame 2: PTS should be 6000 (RTP timestamp directly).
frameSize1 := uint32(data[32]) | uint32(data[33])<<8 | uint32(data[34])<<16 | uint32(data[35])<<24
frame2Offset := 32 + 12 + int(frameSize1)
pts2 := uint64(data[frame2Offset+4]) | uint64(data[frame2Offset+5])<<8 |
uint64(data[frame2Offset+6])<<16 | uint64(data[frame2Offset+7])<<24 |
uint64(data[frame2Offset+8])<<32 | uint64(data[frame2Offset+9])<<40 |
uint64(data[frame2Offset+10])<<48 | uint64(data[frame2Offset+11])<<56
assert.Equal(t, uint64(6000), pts2, "Second frame PTS should be 6000")
}
func TestIVFWriter_DirectPTS_Precision(t *testing.T) {
buffer := &bytes.Buffer{}
writer, err := NewWith(buffer, WithCodec(mimeTypeVP8), WithFrameRate(1, 90000), WithDirectPTS())
assert.NoError(t, err)
// Simulate 15fps video (6000 RTP ticks per frame at 90kHz).
// 225 frames = 15 seconds.
timestamps := make([]uint32, 225)
for idx := range timestamps {
timestamps[idx] = uint32(idx) * 6000 //nolint:gosec // Test code with known safe values.
}
for idx, ts := range timestamps {
packet := &rtp.Packet{
Header: rtp.Header{
Marker: true,
Timestamp: ts,
},
// VP8 keyframe for first, interframe for rest.
Payload: []byte{0x10, 0x00, 0x00, 0x9d, 0x01, 0x2a},
}
if idx > 0 {
packet.Payload[1] = 0x01 // Set non-keyframe flag.
}
assert.NoError(t, writer.WriteRTP(packet))
}
assert.NoError(t, writer.Close())
// Verify frame count.
assert.Equal(t, uint64(225), writer.count)
// Verify last frame PTS is exactly 224 * 6000 = 1344000.
data := buffer.Bytes()
offset := 32 // Start after IVF header.
var lastPTS uint64
for idx := 0; idx < 225; idx++ {
frameSize := uint32(data[offset]) | uint32(data[offset+1])<<8 |
uint32(data[offset+2])<<16 | uint32(data[offset+3])<<24
lastPTS = uint64(data[offset+4]) | uint64(data[offset+5])<<8 |
uint64(data[offset+6])<<16 | uint64(data[offset+7])<<24 |
uint64(data[offset+8])<<32 | uint64(data[offset+9])<<40 |
uint64(data[offset+10])<<48 | uint64(data[offset+11])<<56
offset += 12 + int(frameSize)
}
// Last frame should have PTS = 224 * 6000 = 1344000.
assert.Equal(t, uint64(224*6000), lastPTS, "Last frame PTS should be exactly 1344000")
}
func TestIVFWriter_BackwardCompatibility(t *testing.T) {
// Test that default behavior (without WithDirectPTS) remains unchanged.
buffer := &bytes.Buffer{}
writer, err := NewWith(buffer, WithCodec(mimeTypeVP8))
assert.NoError(t, err)
assert.False(t, writer.directPTS, "Default should not use direct PTS mode")
// Write keyframe.
keyframePacket := &rtp.Packet{
Header: rtp.Header{
Marker: true,
Timestamp: 90000, // 1 second at 90kHz.
},
Payload: []byte{0x10, 0x00, 0x00, 0x9d, 0x01, 0x2a},
}
assert.NoError(t, writer.WriteRTP(keyframePacket))
// Write second frame at 2 seconds.
frame2 := &rtp.Packet{
Header: rtp.Header{
Marker: true,
Timestamp: 180000, // 2 seconds at 90kHz.
},
Payload: []byte{0x10, 0x01, 0x00, 0x9d, 0x01, 0x2a},
}
assert.NoError(t, writer.WriteRTP(frame2))
assert.NoError(t, writer.Close())
// Verify PTS uses millisecond conversion (legacy behavior).
data := buffer.Bytes()
// First frame PTS should be 0.
pts1 := uint64(data[36]) | uint64(data[37])<<8 | uint64(data[38])<<16 | uint64(data[39])<<24 |
uint64(data[40])<<32 | uint64(data[41])<<40 | uint64(data[42])<<48 | uint64(data[43])<<56
assert.Equal(t, uint64(0), pts1)
// Second frame: (180000-90000)/90000 * 1000 = 1000ms, then 1000 * 1 / 30 = 33 PTS.
frameSize1 := uint32(data[32]) | uint32(data[33])<<8 | uint32(data[34])<<16 | uint32(data[35])<<24
frame2Offset := 32 + 12 + int(frameSize1)
pts2 := uint64(data[frame2Offset+4]) | uint64(data[frame2Offset+5])<<8 |
uint64(data[frame2Offset+6])<<16 | uint64(data[frame2Offset+7])<<24 |
uint64(data[frame2Offset+8])<<32 | uint64(data[frame2Offset+9])<<40 |
uint64(data[frame2Offset+10])<<48 | uint64(data[frame2Offset+11])<<56
assert.Equal(t, uint64(33), pts2, "Legacy mode: PTS should be 33 (1000ms * 1/30)")
}