diff --git a/pkg/media/ivfwriter/ivfwriter.go b/pkg/media/ivfwriter/ivfwriter.go index a088027b..1e9809ac 100644 --- a/pkg/media/ivfwriter/ivfwriter.go +++ b/pkg/media/ivfwriter/ivfwriter.go @@ -42,6 +42,8 @@ type ( videoWidth uint16 videoHeight uint16 + directPTS bool + // VP8, VP9 currentFrame []byte @@ -149,8 +151,18 @@ func (i *IVFWriter) timestampToPts(timestamp uint64) uint64 { func (i *IVFWriter) writeFrame(frame []byte, timestamp uint64) error { frameHeader := make([]byte, 12) //nolint:gosec // G115 - binary.LittleEndian.PutUint32(frameHeader[0:], uint32(len(frame))) // Frame length - binary.LittleEndian.PutUint64(frameHeader[4:], i.timestampToPts(timestamp)) // PTS + binary.LittleEndian.PutUint32(frameHeader[0:], uint32(len(frame))) // Frame length + + var pts uint64 + if i.directPTS { + // Direct PTS mode: use timestamp directly as PTS. + pts = timestamp + } else { + // Existing behavior: convert using timebase. + pts = i.timestampToPts(timestamp) + } + binary.LittleEndian.PutUint64(frameHeader[4:], pts) // PTS + i.count++ if _, err := i.ioWriter.Write(frameHeader); err != nil { @@ -172,15 +184,23 @@ func (i *IVFWriter) WriteRTP(packet *rtp.Packet) error { if i.count == 0 { i.firstFrameTimestamp = packet.Timestamp } - relativeTstampMs := 1000 * uint64(packet.Timestamp-i.firstFrameTimestamp) / i.clockRate + + var timestamp uint64 + if i.directPTS { + // Direct PTS mode: use RTP timestamp directly (no millisecond conversion). + timestamp = uint64(packet.Timestamp - i.firstFrameTimestamp) + } else { + // Existing behavior: convert to milliseconds first. + timestamp = 1000 * uint64(packet.Timestamp-i.firstFrameTimestamp) / i.clockRate + } switch i.codec { case codecVP8: - return i.writeVP8(packet, relativeTstampMs) + return i.writeVP8(packet, timestamp) case codecVP9: - return i.writeVP9(packet, relativeTstampMs) + return i.writeVP9(packet, timestamp) case codecAV1: - return i.writeAV1(packet, relativeTstampMs) + return i.writeAV1(packet, timestamp) default: return errCodecUnset } @@ -361,3 +381,21 @@ func WithFrameRate(numerator, denominator uint32) Option { return nil } } + +// WithDirectPTS enables direct use of RTP timestamps as PTS values +// without millisecond conversion. +// +// When this option is used, RTP timestamps are written directly as PTS values, +// preserving full timestamp precision. Use WithFrameRate to set the appropriate +// timebase (e.g., WithFrameRate(1, 90000) for standard 90kHz RTP clock). +// +// Example usage for standard RTP video (90kHz clock rate): +// +// NewWith(file, WithFrameRate(1, 90000), WithDirectPTS()) +func WithDirectPTS() Option { + return func(i *IVFWriter) error { + i.directPTS = true + + return nil + } +} diff --git a/pkg/media/ivfwriter/ivfwriter_test.go b/pkg/media/ivfwriter/ivfwriter_test.go index d23137a0..6b02b28f 100644 --- a/pkg/media/ivfwriter/ivfwriter_test.go +++ b/pkg/media/ivfwriter/ivfwriter_test.go @@ -416,3 +416,183 @@ func TestIVFWriter_WithFrameRate(t *testing.T) { 0x01, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x84, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, buffer.Bytes()) } + +func TestIVFWriter_WithDirectPTS(t *testing.T) { + buffer := &bytes.Buffer{} + + writer, err := NewWith(buffer, WithFrameRate(1, 90000), WithDirectPTS()) + assert.NoError(t, err) + assert.True(t, writer.directPTS) + assert.Equal(t, uint32(1), writer.timebaseNumerator) + assert.Equal(t, uint32(90000), writer.timebaseDenominator) + + assert.NoError(t, writer.Close()) +} + +func TestIVFWriter_DirectPTS_VP8(t *testing.T) { + buffer := &bytes.Buffer{} + + writer, err := NewWith(buffer, WithCodec(mimeTypeVP8), WithFrameRate(1, 90000), WithDirectPTS()) + assert.NoError(t, err) + + // Write keyframe with timestamp 0. + keyframePacket := &rtp.Packet{ + Header: rtp.Header{ + Marker: true, + Timestamp: 0, + }, + // VP8 keyframe: S=1, P=0 + Payload: []byte{0x10, 0x00, 0x00, 0x9d, 0x01, 0x2a}, + } + assert.NoError(t, writer.WriteRTP(keyframePacket)) + assert.Equal(t, uint64(1), writer.count) + + // Write second frame with timestamp 6000 (15fps at 90kHz clock). + frame2 := &rtp.Packet{ + Header: rtp.Header{ + Marker: true, + Timestamp: 6000, + }, + // VP8 interframe: S=1, P=1 + Payload: []byte{0x10, 0x01, 0x00, 0x9d, 0x01, 0x2a}, + } + assert.NoError(t, writer.WriteRTP(frame2)) + assert.Equal(t, uint64(2), writer.count) + + // Write third frame with timestamp 12000. + frame3 := &rtp.Packet{ + Header: rtp.Header{ + Marker: true, + Timestamp: 12000, + }, + Payload: []byte{0x10, 0x01, 0x00, 0x9d, 0x01, 0x2a}, + } + assert.NoError(t, writer.WriteRTP(frame3)) + assert.Equal(t, uint64(3), writer.count) + + assert.NoError(t, writer.Close()) + + // Verify IVF structure. + data := buffer.Bytes() + assert.True(t, len(data) > 32, "Buffer should contain header + frames") + + // Check IVF header timebase (offset 16-20: denominator, offset 20-24: numerator). + timebaseDenom := uint32(data[16]) | uint32(data[17])<<8 | uint32(data[18])<<16 | uint32(data[19])<<24 + timebaseNum := uint32(data[20]) | uint32(data[21])<<8 | uint32(data[22])<<16 | uint32(data[23])<<24 + assert.Equal(t, uint32(90000), timebaseDenom) + assert.Equal(t, uint32(1), timebaseNum) + + // Verify PTS values in frame headers. + // Frame 1: PTS should be 0. + pts1 := uint64(data[36]) | uint64(data[37])<<8 | uint64(data[38])<<16 | uint64(data[39])<<24 | + uint64(data[40])<<32 | uint64(data[41])<<40 | uint64(data[42])<<48 | uint64(data[43])<<56 + assert.Equal(t, uint64(0), pts1, "First frame PTS should be 0") + + // Frame 2: PTS should be 6000 (RTP timestamp directly). + frameSize1 := uint32(data[32]) | uint32(data[33])<<8 | uint32(data[34])<<16 | uint32(data[35])<<24 + frame2Offset := 32 + 12 + int(frameSize1) + pts2 := uint64(data[frame2Offset+4]) | uint64(data[frame2Offset+5])<<8 | + uint64(data[frame2Offset+6])<<16 | uint64(data[frame2Offset+7])<<24 | + uint64(data[frame2Offset+8])<<32 | uint64(data[frame2Offset+9])<<40 | + uint64(data[frame2Offset+10])<<48 | uint64(data[frame2Offset+11])<<56 + assert.Equal(t, uint64(6000), pts2, "Second frame PTS should be 6000") +} + +func TestIVFWriter_DirectPTS_Precision(t *testing.T) { + buffer := &bytes.Buffer{} + + writer, err := NewWith(buffer, WithCodec(mimeTypeVP8), WithFrameRate(1, 90000), WithDirectPTS()) + assert.NoError(t, err) + + // Simulate 15fps video (6000 RTP ticks per frame at 90kHz). + // 225 frames = 15 seconds. + timestamps := make([]uint32, 225) + for idx := range timestamps { + timestamps[idx] = uint32(idx) * 6000 //nolint:gosec // Test code with known safe values. + } + + for idx, ts := range timestamps { + packet := &rtp.Packet{ + Header: rtp.Header{ + Marker: true, + Timestamp: ts, + }, + // VP8 keyframe for first, interframe for rest. + Payload: []byte{0x10, 0x00, 0x00, 0x9d, 0x01, 0x2a}, + } + if idx > 0 { + packet.Payload[1] = 0x01 // Set non-keyframe flag. + } + assert.NoError(t, writer.WriteRTP(packet)) + } + + assert.NoError(t, writer.Close()) + + // Verify frame count. + assert.Equal(t, uint64(225), writer.count) + + // Verify last frame PTS is exactly 224 * 6000 = 1344000. + data := buffer.Bytes() + offset := 32 // Start after IVF header. + + var lastPTS uint64 + for idx := 0; idx < 225; idx++ { + frameSize := uint32(data[offset]) | uint32(data[offset+1])<<8 | + uint32(data[offset+2])<<16 | uint32(data[offset+3])<<24 + lastPTS = uint64(data[offset+4]) | uint64(data[offset+5])<<8 | + uint64(data[offset+6])<<16 | uint64(data[offset+7])<<24 | + uint64(data[offset+8])<<32 | uint64(data[offset+9])<<40 | + uint64(data[offset+10])<<48 | uint64(data[offset+11])<<56 + offset += 12 + int(frameSize) + } + + // Last frame should have PTS = 224 * 6000 = 1344000. + assert.Equal(t, uint64(224*6000), lastPTS, "Last frame PTS should be exactly 1344000") +} + +func TestIVFWriter_BackwardCompatibility(t *testing.T) { + // Test that default behavior (without WithDirectPTS) remains unchanged. + buffer := &bytes.Buffer{} + + writer, err := NewWith(buffer, WithCodec(mimeTypeVP8)) + assert.NoError(t, err) + assert.False(t, writer.directPTS, "Default should not use direct PTS mode") + + // Write keyframe. + keyframePacket := &rtp.Packet{ + Header: rtp.Header{ + Marker: true, + Timestamp: 90000, // 1 second at 90kHz. + }, + Payload: []byte{0x10, 0x00, 0x00, 0x9d, 0x01, 0x2a}, + } + assert.NoError(t, writer.WriteRTP(keyframePacket)) + + // Write second frame at 2 seconds. + frame2 := &rtp.Packet{ + Header: rtp.Header{ + Marker: true, + Timestamp: 180000, // 2 seconds at 90kHz. + }, + Payload: []byte{0x10, 0x01, 0x00, 0x9d, 0x01, 0x2a}, + } + assert.NoError(t, writer.WriteRTP(frame2)) + assert.NoError(t, writer.Close()) + + // Verify PTS uses millisecond conversion (legacy behavior). + data := buffer.Bytes() + + // First frame PTS should be 0. + pts1 := uint64(data[36]) | uint64(data[37])<<8 | uint64(data[38])<<16 | uint64(data[39])<<24 | + uint64(data[40])<<32 | uint64(data[41])<<40 | uint64(data[42])<<48 | uint64(data[43])<<56 + assert.Equal(t, uint64(0), pts1) + + // Second frame: (180000-90000)/90000 * 1000 = 1000ms, then 1000 * 1 / 30 = 33 PTS. + frameSize1 := uint32(data[32]) | uint32(data[33])<<8 | uint32(data[34])<<16 | uint32(data[35])<<24 + frame2Offset := 32 + 12 + int(frameSize1) + pts2 := uint64(data[frame2Offset+4]) | uint64(data[frame2Offset+5])<<8 | + uint64(data[frame2Offset+6])<<16 | uint64(data[frame2Offset+7])<<24 | + uint64(data[frame2Offset+8])<<32 | uint64(data[frame2Offset+9])<<40 | + uint64(data[frame2Offset+10])<<48 | uint64(data[frame2Offset+11])<<56 + assert.Equal(t, uint64(33), pts2, "Legacy mode: PTS should be 33 (1000ms * 1/30)") +}