support publishing, reading and proxying MPEG-2 audio (MP3) tracks with RTMP (#1102) (#1736)

2025-10-27 01:30:25 +08:00 · 2023-04-25 18:13:51 +02:00
parent c314d77596
commit 2d17dff3b5
27 changed files with 727 additions and 487 deletions
--- a/internal/core/rtmp_conn.go
+++ b/internal/core/rtmp_conn.go
@@ -14,6 +14,7 @@ import (
 	"github.com/bluenviron/gortsplib/v3/pkg/media"
 	"github.com/bluenviron/gortsplib/v3/pkg/ringbuffer"
 	"github.com/bluenviron/mediacommon/pkg/codecs/h264"
+	"github.com/bluenviron/mediacommon/pkg/codecs/mpeg2audio"
 	"github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio"
 	"github.com/google/uuid"
 	"github.com/notedit/rtmp/format/flv/flvio"
@@ -39,6 +40,96 @@ func pathNameAndQuery(inURL *url.URL) (string, url.Values, string) {
 	return pathName, ur.Query(), ur.RawQuery
 }

+type rtmpWriteFunc func(msg interface{}) error
+
+func getRTMPWriteFunc(medi *media.Media, format formats.Format, stream *stream) rtmpWriteFunc {
+	switch format.(type) {
+	case *formats.H264:
+		return func(msg interface{}) error {
+			tmsg := msg.(*message.MsgVideo)
+
+			if tmsg.H264Type == flvio.AVC_SEQHDR {
+				var conf h264conf.Conf
+				err := conf.Unmarshal(tmsg.Payload)
+				if err != nil {
+					return fmt.Errorf("unable to parse H264 config: %v", err)
+				}
+
+				au := [][]byte{
+					conf.SPS,
+					conf.PPS,
+				}
+
+				return stream.writeUnit(medi, format, &formatprocessor.UnitH264{
+					PTS: tmsg.DTS + tmsg.PTSDelta,
+					AU:  au,
+					NTP: time.Now(),
+				})
+			}
+
+			if tmsg.H264Type == flvio.AVC_NALU {
+				au, err := h264.AVCCUnmarshal(tmsg.Payload)
+				if err != nil {
+					return fmt.Errorf("unable to decode AVCC: %v", err)
+				}
+
+				return stream.writeUnit(medi, format, &formatprocessor.UnitH264{
+					PTS: tmsg.DTS + tmsg.PTSDelta,
+					AU:  au,
+					NTP: time.Now(),
+				})
+			}
+
+			return nil
+		}
+
+	case *formats.H265:
+		return func(msg interface{}) error {
+			tmsg := msg.(*message.MsgVideo)
+
+			au, err := h264.AVCCUnmarshal(tmsg.Payload)
+			if err != nil {
+				return fmt.Errorf("unable to decode AVCC: %v", err)
+			}
+
+			return stream.writeUnit(medi, format, &formatprocessor.UnitH265{
+				PTS: tmsg.DTS + tmsg.PTSDelta,
+				AU:  au,
+				NTP: time.Now(),
+			})
+		}
+
+	case *formats.MPEG2Audio:
+		return func(msg interface{}) error {
+			tmsg := msg.(*message.MsgAudio)
+
+			return stream.writeUnit(medi, format, &formatprocessor.UnitMPEG2Audio{
+				PTS:    tmsg.DTS,
+				Frames: [][]byte{tmsg.Payload},
+				NTP:    time.Now(),
+			})
+		}
+
+	case *formats.MPEG4Audio:
+		return func(msg interface{}) error {
+			tmsg := msg.(*message.MsgAudio)
+
+			if tmsg.AACType != flvio.AAC_RAW {
+				return nil
+			}
+
+			return stream.writeUnit(medi, format, &formatprocessor.UnitMPEG4Audio{
+				PTS: tmsg.DTS,
+				AUs: [][]byte{tmsg.Payload},
+				NTP: time.Now(),
+			})
+		}
+
+	default:
+		return nil
+	}
+}
+
 type rtmpConnState int

 const (
@@ -73,11 +164,10 @@ type rtmpConn struct {
 	pathManager               rtmpConnPathManager
 	parent                    rtmpConnParent

-	ctx       context.Context
-	ctxCancel func()
-	uuid      uuid.UUID
-	created   time.Time
-	// path       *path
+	ctx        context.Context
+	ctxCancel  func()
+	uuid       uuid.UUID
+	created    time.Time
 	state      rtmpConnState
 	stateMutex sync.Mutex
 }
@@ -252,18 +342,6 @@ func (c *rtmpConn) runRead(ctx context.Context, u *url.URL) error {
 	c.state = rtmpConnStateRead
 	c.stateMutex.Unlock()

-	var videoFormat *formats.H264
-	videoMedia := res.stream.medias().FindFormat(&videoFormat)
-	videoFirstIDRFound := false
-	var videoStartDTS time.Duration
-
-	var audioFormat *formats.MPEG4Audio
-	audioMedia := res.stream.medias().FindFormat(&audioFormat)
-
-	if videoFormat == nil && audioFormat == nil {
-		return fmt.Errorf("the stream doesn't contain an H264 track or an AAC track")
-	}
-
 	ringBuffer, _ := ringbuffer.New(uint64(c.readBufferCount))
 	go func() {
 		<-ctx.Done()
@@ -271,152 +349,24 @@ func (c *rtmpConn) runRead(ctx context.Context, u *url.URL) error {
 	}()

 	var medias media.Medias
+	videoFirstIDRFound := false
+	var videoStartDTS time.Duration
+
+	videoMedia, videoFormat := c.findVideoFormat(res.stream, ringBuffer,
+		&videoFirstIDRFound, &videoStartDTS)
 	if videoMedia != nil {
 		medias = append(medias, videoMedia)
-
-		videoStartPTSFilled := false
-		var videoStartPTS time.Duration
-		var videoDTSExtractor *h264.DTSExtractor
-
-		res.stream.readerAdd(c, videoMedia, videoFormat, func(unit formatprocessor.Unit) {
-			ringBuffer.Push(func() error {
-				tunit := unit.(*formatprocessor.UnitH264)
-
-				if tunit.AU == nil {
-					return nil
-				}
-
-				if !videoStartPTSFilled {
-					videoStartPTSFilled = true
-					videoStartPTS = tunit.PTS
-				}
-				pts := tunit.PTS - videoStartPTS
-
-				idrPresent := false
-				nonIDRPresent := false
-
-				for _, nalu := range tunit.AU {
-					typ := h264.NALUType(nalu[0] & 0x1F)
-					switch typ {
-					case h264.NALUTypeIDR:
-						idrPresent = true
-
-					case h264.NALUTypeNonIDR:
-						nonIDRPresent = true
-					}
-				}
-
-				var dts time.Duration
-
-				// wait until we receive an IDR
-				if !videoFirstIDRFound {
-					if !idrPresent {
-						return nil
-					}
-
-					videoFirstIDRFound = true
-					videoDTSExtractor = h264.NewDTSExtractor()
-
-					var err error
-					dts, err = videoDTSExtractor.Extract(tunit.AU, pts)
-					if err != nil {
-						return err
-					}
-
-					videoStartDTS = dts
-					dts = 0
-					pts -= videoStartDTS
-				} else {
-					if !idrPresent && !nonIDRPresent {
-						return nil
-					}
-
-					var err error
-					dts, err = videoDTSExtractor.Extract(tunit.AU, pts)
-					if err != nil {
-						return err
-					}
-
-					dts -= videoStartDTS
-					pts -= videoStartDTS
-				}
-
-				avcc, err := h264.AVCCMarshal(tunit.AU)
-				if err != nil {
-					return err
-				}
-
-				c.nconn.SetWriteDeadline(time.Now().Add(time.Duration(c.writeTimeout)))
-				err = c.conn.WriteMessage(&message.MsgVideo{
-					ChunkStreamID:   message.MsgVideoChunkStreamID,
-					MessageStreamID: 0x1000000,
-					IsKeyFrame:      idrPresent,
-					H264Type:        flvio.AVC_NALU,
-					Payload:         avcc,
-					DTS:             dts,
-					PTSDelta:        pts - dts,
-				})
-				if err != nil {
-					return err
-				}
-
-				return nil
-			})
-		})
 	}

-	if audioMedia != nil {
+	audioMedia, audioFormat := c.findAudioFormat(res.stream, ringBuffer,
+		videoFormat, &videoFirstIDRFound, &videoStartDTS)
+	if audioFormat != nil {
 		medias = append(medias, audioMedia)
+	}

-		audioStartPTSFilled := false
-		var audioStartPTS time.Duration
-
-		res.stream.readerAdd(c, audioMedia, audioFormat, func(unit formatprocessor.Unit) {
-			ringBuffer.Push(func() error {
-				tunit := unit.(*formatprocessor.UnitMPEG4Audio)
-
-				if tunit.AUs == nil {
-					return nil
-				}
-
-				if !audioStartPTSFilled {
-					audioStartPTSFilled = true
-					audioStartPTS = tunit.PTS
-				}
-				pts := tunit.PTS - audioStartPTS
-
-				if videoFormat != nil {
-					if !videoFirstIDRFound {
-						return nil
-					}
-
-					pts -= videoStartDTS
-					if pts < 0 {
-						return nil
-					}
-				}
-
-				for i, au := range tunit.AUs {
-					c.nconn.SetWriteDeadline(time.Now().Add(time.Duration(c.writeTimeout)))
-					err := c.conn.WriteMessage(&message.MsgAudio{
-						ChunkStreamID:   message.MsgAudioChunkStreamID,
-						MessageStreamID: 0x1000000,
-						Rate:            flvio.SOUND_44Khz,
-						Depth:           flvio.SOUND_16BIT,
-						Channels:        flvio.SOUND_STEREO,
-						AACType:         flvio.AAC_RAW,
-						Payload:         au,
-						DTS: pts + time.Duration(i)*mpeg4audio.SamplesPerAccessUnit*
-							time.Second/time.Duration(audioFormat.ClockRate()),
-					})
-					if err != nil {
-						return err
-					}
-				}
-
-				return nil
-			})
-		})
+	if videoFormat == nil && audioFormat == nil {
+		return fmt.Errorf(
+			"the stream doesn't contain any supported codec, which are currently H264, MPEG2-Audio, MPEG4-Audio")
 	}

 	defer res.stream.readerRemove(c)
@@ -463,6 +413,245 @@ func (c *rtmpConn) runRead(ctx context.Context, u *url.URL) error {
 	}
 }

+func (c *rtmpConn) findVideoFormat(stream *stream, ringBuffer *ringbuffer.RingBuffer,
+	videoFirstIDRFound *bool, videoStartDTS *time.Duration,
+) (*media.Media, formats.Format) {
+	var videoFormatH264 *formats.H264
+	videoMedia := stream.medias().FindFormat(&videoFormatH264)
+
+	if videoFormatH264 != nil {
+		videoStartPTSFilled := false
+		var videoStartPTS time.Duration
+		var videoDTSExtractor *h264.DTSExtractor
+
+		stream.readerAdd(c, videoMedia, videoFormatH264, func(unit formatprocessor.Unit) {
+			ringBuffer.Push(func() error {
+				tunit := unit.(*formatprocessor.UnitH264)
+
+				if tunit.AU == nil {
+					return nil
+				}
+
+				if !videoStartPTSFilled {
+					videoStartPTSFilled = true
+					videoStartPTS = tunit.PTS
+				}
+				pts := tunit.PTS - videoStartPTS
+
+				idrPresent := false
+				nonIDRPresent := false
+
+				for _, nalu := range tunit.AU {
+					typ := h264.NALUType(nalu[0] & 0x1F)
+					switch typ {
+					case h264.NALUTypeIDR:
+						idrPresent = true
+
+					case h264.NALUTypeNonIDR:
+						nonIDRPresent = true
+					}
+				}
+
+				var dts time.Duration
+
+				// wait until we receive an IDR
+				if !*videoFirstIDRFound {
+					if !idrPresent {
+						return nil
+					}
+
+					*videoFirstIDRFound = true
+					videoDTSExtractor = h264.NewDTSExtractor()
+
+					var err error
+					dts, err = videoDTSExtractor.Extract(tunit.AU, pts)
+					if err != nil {
+						return err
+					}
+
+					*videoStartDTS = dts
+					dts = 0
+					pts -= *videoStartDTS
+				} else {
+					if !idrPresent && !nonIDRPresent {
+						return nil
+					}
+
+					var err error
+					dts, err = videoDTSExtractor.Extract(tunit.AU, pts)
+					if err != nil {
+						return err
+					}
+
+					dts -= *videoStartDTS
+					pts -= *videoStartDTS
+				}
+
+				avcc, err := h264.AVCCMarshal(tunit.AU)
+				if err != nil {
+					return err
+				}
+
+				c.nconn.SetWriteDeadline(time.Now().Add(time.Duration(c.writeTimeout)))
+				err = c.conn.WriteMessage(&message.MsgVideo{
+					ChunkStreamID:   message.MsgVideoChunkStreamID,
+					MessageStreamID: 0x1000000,
+					IsKeyFrame:      idrPresent,
+					H264Type:        flvio.AVC_NALU,
+					Payload:         avcc,
+					DTS:             dts,
+					PTSDelta:        pts - dts,
+				})
+				if err != nil {
+					return err
+				}
+
+				return nil
+			})
+		})
+
+		return videoMedia, videoFormatH264
+	}
+
+	return nil, nil
+}
+
+func (c *rtmpConn) findAudioFormat(stream *stream, ringBuffer *ringbuffer.RingBuffer,
+	videoFormat formats.Format, videoFirstIDRFound *bool, videoStartDTS *time.Duration,
+) (*media.Media, formats.Format) {
+	var audioFormatMPEG4 *formats.MPEG4Audio
+	audioMedia := stream.medias().FindFormat(&audioFormatMPEG4)
+
+	if audioMedia != nil {
+		audioStartPTSFilled := false
+		var audioStartPTS time.Duration
+
+		stream.readerAdd(c, audioMedia, audioFormatMPEG4, func(unit formatprocessor.Unit) {
+			ringBuffer.Push(func() error {
+				tunit := unit.(*formatprocessor.UnitMPEG4Audio)
+
+				if tunit.AUs == nil {
+					return nil
+				}
+
+				if !audioStartPTSFilled {
+					audioStartPTSFilled = true
+					audioStartPTS = tunit.PTS
+				}
+				pts := tunit.PTS - audioStartPTS
+
+				if videoFormat != nil {
+					if !*videoFirstIDRFound {
+						return nil
+					}
+
+					pts -= *videoStartDTS
+					if pts < 0 {
+						return nil
+					}
+				}
+
+				for i, au := range tunit.AUs {
+					c.nconn.SetWriteDeadline(time.Now().Add(time.Duration(c.writeTimeout)))
+					err := c.conn.WriteMessage(&message.MsgAudio{
+						ChunkStreamID:   message.MsgAudioChunkStreamID,
+						MessageStreamID: 0x1000000,
+						Codec:           message.CodecMPEG4Audio,
+						Rate:            flvio.SOUND_44Khz,
+						Depth:           flvio.SOUND_16BIT,
+						Channels:        flvio.SOUND_STEREO,
+						AACType:         flvio.AAC_RAW,
+						Payload:         au,
+						DTS: pts + time.Duration(i)*mpeg4audio.SamplesPerAccessUnit*
+							time.Second/time.Duration(audioFormatMPEG4.ClockRate()),
+					})
+					if err != nil {
+						return err
+					}
+				}
+
+				return nil
+			})
+		})
+
+		return audioMedia, audioFormatMPEG4
+	}
+
+	var audioFormatMPEG2 *formats.MPEG2Audio
+	audioMedia = stream.medias().FindFormat(&audioFormatMPEG2)
+
+	if audioMedia != nil {
+		audioStartPTSFilled := false
+		var audioStartPTS time.Duration
+
+		stream.readerAdd(c, audioMedia, audioFormatMPEG2, func(unit formatprocessor.Unit) {
+			ringBuffer.Push(func() error {
+				tunit := unit.(*formatprocessor.UnitMPEG2Audio)
+
+				if !audioStartPTSFilled {
+					audioStartPTSFilled = true
+					audioStartPTS = tunit.PTS
+				}
+				pts := tunit.PTS - audioStartPTS
+
+				if videoFormat != nil {
+					if !*videoFirstIDRFound {
+						return nil
+					}
+
+					pts -= *videoStartDTS
+					if pts < 0 {
+						return nil
+					}
+				}
+
+				for _, frame := range tunit.Frames {
+					var h mpeg2audio.FrameHeader
+					err := h.Unmarshal(frame)
+					if err != nil {
+						return err
+					}
+
+					if !(!h.MPEG2 && h.Layer == 3) {
+						return fmt.Errorf("RTMP only supports MPEG-1 audio layer 3")
+					}
+
+					channels := uint8(flvio.SOUND_STEREO)
+					if h.ChannelMode == mpeg2audio.ChannelModeMono {
+						channels = flvio.SOUND_MONO
+					}
+
+					msg := &message.MsgAudio{
+						ChunkStreamID:   message.MsgAudioChunkStreamID,
+						MessageStreamID: 0x1000000,
+						Codec:           message.CodecMPEG2Audio,
+						Rate:            flvio.SOUND_44Khz,
+						Depth:           flvio.SOUND_16BIT,
+						Channels:        channels,
+						Payload:         frame,
+						DTS:             pts,
+					}
+
+					c.nconn.SetWriteDeadline(time.Now().Add(time.Duration(c.writeTimeout)))
+					err = c.conn.WriteMessage(msg)
+					if err != nil {
+						return err
+					}
+
+					pts += time.Duration(h.SampleCount()) *
+						time.Second / time.Duration(h.SampleRate)
+				}
+
+				return nil
+			})
+		})
+
+		return audioMedia, audioFormatMPEG2
+	}
+
+	return nil, nil
+}
+
 func (c *rtmpConn) runPublish(ctx context.Context, u *url.URL) error {
 	pathName, query, rawQuery := pathNameAndQuery(u)

@@ -538,31 +727,8 @@ func (c *rtmpConn) runPublish(ctx context.Context, u *url.URL) error {
 	// disable write deadline to allow outgoing acknowledges
 	c.nconn.SetWriteDeadline(time.Time{})

-	var onVideoData func(time.Duration, [][]byte)
-
-	if _, ok := videoFormat.(*formats.H264); ok {
-		onVideoData = func(pts time.Duration, au [][]byte) {
-			err = rres.stream.writeData(videoMedia, videoFormat, &formatprocessor.UnitH264{
-				PTS: pts,
-				AU:  au,
-				NTP: time.Now(),
-			})
-			if err != nil {
-				c.log(logger.Warn, "%v", err)
-			}
-		}
-	} else {
-		onVideoData = func(pts time.Duration, au [][]byte) {
-			err = rres.stream.writeData(videoMedia, videoFormat, &formatprocessor.UnitH265{
-				PTS: pts,
-				AU:  au,
-				NTP: time.Now(),
-			})
-			if err != nil {
-				c.log(logger.Warn, "%v", err)
-			}
-		}
-	}
+	videoWriteFunc := getRTMPWriteFunc(videoMedia, videoFormat, rres.stream)
+	audioWriteFunc := getRTMPWriteFunc(audioMedia, audioFormat, rres.stream)

 	for {
 		c.nconn.SetReadDeadline(time.Now().Add(time.Duration(c.readTimeout)))
@@ -577,34 +743,9 @@ func (c *rtmpConn) runPublish(ctx context.Context, u *url.URL) error {
 				return fmt.Errorf("received a video packet, but track is not set up")
 			}

-			if tmsg.H264Type == flvio.AVC_SEQHDR {
-				var conf h264conf.Conf
-				err = conf.Unmarshal(tmsg.Payload)
-				if err != nil {
-					return fmt.Errorf("unable to parse H264 config: %v", err)
-				}
-
-				au := [][]byte{
-					conf.SPS,
-					conf.PPS,
-				}
-
-				err := rres.stream.writeData(videoMedia, videoFormat, &formatprocessor.UnitH264{
-					PTS: tmsg.DTS + tmsg.PTSDelta,
-					AU:  au,
-					NTP: time.Now(),
-				})
-				if err != nil {
-					c.log(logger.Warn, "%v", err)
-				}
-			} else if tmsg.H264Type == flvio.AVC_NALU {
-				au, err := h264.AVCCUnmarshal(tmsg.Payload)
-				if err != nil {
-					c.log(logger.Warn, "unable to decode AVCC: %v", err)
-					continue
-				}
-
-				onVideoData(tmsg.DTS+tmsg.PTSDelta, au)
+			err := videoWriteFunc(tmsg)
+			if err != nil {
+				c.log(logger.Warn, "%v", err)
 			}

 		case *message.MsgAudio:
@@ -612,15 +753,9 @@ func (c *rtmpConn) runPublish(ctx context.Context, u *url.URL) error {
 				return fmt.Errorf("received an audio packet, but track is not set up")
 			}

-			if tmsg.AACType == flvio.AAC_RAW {
-				err := rres.stream.writeData(audioMedia, audioFormat, &formatprocessor.UnitMPEG4Audio{
-					PTS: tmsg.DTS,
-					AUs: [][]byte{tmsg.Payload},
-					NTP: time.Now(),
-				})
-				if err != nil {
-					c.log(logger.Warn, "%v", err)
-				}
+			err := audioWriteFunc(tmsg)
+			if err != nil {
+				c.log(logger.Warn, "%v", err)
 			}
 		}
 	}