diff --git a/pkg/formats/format.go b/pkg/formats/format.go index e55e261e..5691bbc1 100644 --- a/pkg/formats/format.go +++ b/pkg/formats/format.go @@ -142,7 +142,10 @@ func Unmarshal(md *psdp.MediaDescription, payloadTypeStr string) (Format, error) return &LPCM{} case codec == "mpeg4-generic": - return &MPEG4Audio{} + return &MPEG4AudioGeneric{} + + case codec == "mp4a-latm": + return &MPEG4AudioLATM{} case codec == "vorbis": return &Vorbis{} diff --git a/pkg/formats/format_test.go b/pkg/formats/format_test.go index dc501106..379b8fa9 100644 --- a/pkg/formats/format_test.go +++ b/pkg/formats/format_test.go @@ -9,7 +9,15 @@ import ( "github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio" ) -func TestNewFromMediaDescription(t *testing.T) { +func intPtr(v int) *int { + return &v +} + +func boolPtr(v bool) *bool { + return &v +} + +func TestUnmarshal(t *testing.T) { for _, ca := range []struct { name string md *psdp.MediaDescription @@ -250,6 +258,68 @@ func TestNewFromMediaDescription(t *testing.T) { IndexDeltaLength: 0, }, }, + { + "audio aac lc latm", + &psdp.MediaDescription{ + MediaName: psdp.MediaName{ + Media: "audio", + Protos: []string{"RTP", "AVP"}, + Formats: []string{"96"}, + }, + Attributes: []psdp.Attribute{ + { + Key: "rtpmap", + Value: "96 MP4A-LATM/24000/2", + }, + { + Key: "fmtp", + Value: "96 110 profile-level-id=1; bitrate=64000; cpresent=0; " + + "object=2; config=400026203fc0", + }, + }, + }, + &MPEG4AudioLATM{ + PayloadTyp: 96, + SampleRate: 24000, + Channels: 2, + ProfileLevelID: 30, + Bitrate: intPtr(64000), + CPresent: boolPtr(false), + Object: 2, + Config: []byte{0x40, 0x00, 0x26, 0x20, 0x3f, 0xc0}, + }, + }, + { + "audio aac v2 latm", + &psdp.MediaDescription{ + MediaName: psdp.MediaName{ + Media: "audio", + Protos: []string{"RTP", "AVP"}, + Formats: []string{"110"}, + }, + Attributes: []psdp.Attribute{ + { + Key: "rtpmap", + Value: "110 MP4A-LATM/24000/1", + }, + { + Key: "fmtp", + Value: "110 profile-level-id=15; object=2; cpresent=0; " + + "config=400026103fc0; SBR-enabled=1", + }, + }, + }, + &MPEG4AudioLATM{ + PayloadTyp: 110, + SampleRate: 24000, + Channels: 1, + ProfileLevelID: 15, + CPresent: boolPtr(false), + Object: 2, + SBREnabled: boolPtr(true), + Config: []byte{0x40, 0x00, 0x26, 0x10, 0x3f, 0xc0}, + }, + }, { "audio vorbis", &psdp.MediaDescription{ @@ -562,14 +632,8 @@ func TestNewFromMediaDescription(t *testing.T) { }, &VP8{ PayloadTyp: 96, - MaxFR: func() *int { - v := 123 - return &v - }(), - MaxFS: func() *int { - v := 456 - return &v - }(), + MaxFR: intPtr(123), + MaxFS: intPtr(456), }, }, { @@ -593,18 +657,9 @@ func TestNewFromMediaDescription(t *testing.T) { }, &VP9{ PayloadTyp: 96, - MaxFR: func() *int { - v := 123 - return &v - }(), - MaxFS: func() *int { - v := 456 - return &v - }(), - ProfileID: func() *int { - v := 789 - return &v - }(), + MaxFR: intPtr(123), + MaxFS: intPtr(456), + ProfileID: intPtr(789), }, }, { @@ -714,7 +769,7 @@ func TestNewFromMediaDescription(t *testing.T) { } } -func TestNewFromMediaDescriptionErrors(t *testing.T) { +func TestUnmarshalErrors(t *testing.T) { for _, ca := range []struct { name string md *psdp.MediaDescription @@ -794,7 +849,7 @@ func TestNewFromMediaDescriptionErrors(t *testing.T) { }, }, }, - "invalid AAC config (zz)", + "invalid AAC config: zz", }, { "audio aac invalid config 2", @@ -815,7 +870,7 @@ func TestNewFromMediaDescriptionErrors(t *testing.T) { }, }, }, - "invalid AAC config (aa)", + "invalid AAC config: aa", }, { "audio aac missing sizelength", @@ -857,7 +912,7 @@ func TestNewFromMediaDescriptionErrors(t *testing.T) { }, }, }, - "invalid AAC SizeLength (aaa)", + "invalid AAC SizeLength: aaa", }, { "audio aac invalid indexlength", @@ -878,7 +933,7 @@ func TestNewFromMediaDescriptionErrors(t *testing.T) { }, }, }, - "invalid AAC IndexLength (aaa)", + "invalid AAC IndexLength: aaa", }, { "audio aac invalid indexdeltalength", @@ -899,7 +954,7 @@ func TestNewFromMediaDescriptionErrors(t *testing.T) { }, }, }, - "invalid AAC IndexDeltaLength (aaa)", + "invalid AAC IndexDeltaLength: aaa", }, { "audio vorbis missing configuration", diff --git a/pkg/formats/mpeg4_audio.go b/pkg/formats/mpeg4_audio_generic.go similarity index 72% rename from pkg/formats/mpeg4_audio.go rename to pkg/formats/mpeg4_audio_generic.go index aba13390..ad5b6f72 100644 --- a/pkg/formats/mpeg4_audio.go +++ b/pkg/formats/mpeg4_audio_generic.go @@ -11,9 +11,12 @@ import ( "github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio" ) -// MPEG4Audio is a RTP format that uses a MPEG-4 audio codec. +// MPEG4Audio is an alias for MPEG4AudioGeneric. +type MPEG4Audio = MPEG4AudioGeneric + +// MPEG4AudioGeneric is a RTP format that uses a MPEG-4 audio codec. // Specification: https://datatracker.ietf.org/doc/html/rfc3640 -type MPEG4Audio struct { +type MPEG4AudioGeneric struct { PayloadTyp uint8 Config *mpeg4audio.Config SizeLength int @@ -22,21 +25,21 @@ type MPEG4Audio struct { } // String implements Format. -func (f *MPEG4Audio) String() string { - return "MPEG4-audio" +func (f *MPEG4AudioGeneric) String() string { + return "MPEG4-audio-gen" } // ClockRate implements Format. -func (f *MPEG4Audio) ClockRate() int { +func (f *MPEG4AudioGeneric) ClockRate() int { return f.Config.SampleRate } // PayloadType implements Format. -func (f *MPEG4Audio) PayloadType() uint8 { +func (f *MPEG4AudioGeneric) PayloadType() uint8 { return f.PayloadTyp } -func (f *MPEG4Audio) unmarshal( +func (f *MPEG4AudioGeneric) unmarshal( payloadType uint8, clock string, codec string, rtpmap string, fmtp map[string]string, ) error { @@ -47,33 +50,33 @@ func (f *MPEG4Audio) unmarshal( case "config": enc, err := hex.DecodeString(val) if err != nil { - return fmt.Errorf("invalid AAC config (%v)", val) + return fmt.Errorf("invalid AAC config: %v", val) } f.Config = &mpeg4audio.Config{} err = f.Config.Unmarshal(enc) if err != nil { - return fmt.Errorf("invalid AAC config (%v)", val) + return fmt.Errorf("invalid AAC config: %v", val) } case "sizelength": n, err := strconv.ParseUint(val, 10, 64) if err != nil { - return fmt.Errorf("invalid AAC SizeLength (%v)", val) + return fmt.Errorf("invalid AAC SizeLength: %v", val) } f.SizeLength = int(n) case "indexlength": n, err := strconv.ParseUint(val, 10, 64) if err != nil { - return fmt.Errorf("invalid AAC IndexLength (%v)", val) + return fmt.Errorf("invalid AAC IndexLength: %v", val) } f.IndexLength = int(n) case "indexdeltalength": n, err := strconv.ParseUint(val, 10, 64) if err != nil { - return fmt.Errorf("invalid AAC IndexDeltaLength (%v)", val) + return fmt.Errorf("invalid AAC IndexDeltaLength: %v", val) } f.IndexDeltaLength = int(n) } @@ -91,7 +94,7 @@ func (f *MPEG4Audio) unmarshal( } // Marshal implements Format. -func (f *MPEG4Audio) Marshal() (string, map[string]string) { +func (f *MPEG4AudioGeneric) Marshal() (string, map[string]string) { enc, err := f.Config.Marshal() if err != nil { return "", nil @@ -102,19 +105,23 @@ func (f *MPEG4Audio) Marshal() (string, map[string]string) { sampleRate = f.Config.ExtensionSampleRate } - fmtp := make(map[string]string) + fmtp := map[string]string{ + "profile-level-id": "1", + "mode": "AAC-hbr", + } - fmtp["profile-level-id"] = "1" - fmtp["mode"] = "AAC-hbr" if f.SizeLength > 0 { fmtp["sizelength"] = strconv.FormatInt(int64(f.SizeLength), 10) } + if f.IndexLength > 0 { fmtp["indexlength"] = strconv.FormatInt(int64(f.IndexLength), 10) } + if f.IndexDeltaLength > 0 { fmtp["indexdeltalength"] = strconv.FormatInt(int64(f.IndexDeltaLength), 10) } + fmtp["config"] = hex.EncodeToString(enc) return "mpeg4-generic/" + strconv.FormatInt(int64(sampleRate), 10) + @@ -122,12 +129,12 @@ func (f *MPEG4Audio) Marshal() (string, map[string]string) { } // PTSEqualsDTS implements Format. -func (f *MPEG4Audio) PTSEqualsDTS(*rtp.Packet) bool { +func (f *MPEG4AudioGeneric) PTSEqualsDTS(*rtp.Packet) bool { return true } // CreateDecoder creates a decoder able to decode the content of the format. -func (f *MPEG4Audio) CreateDecoder() *rtpmpeg4audio.Decoder { +func (f *MPEG4AudioGeneric) CreateDecoder() *rtpmpeg4audio.Decoder { d := &rtpmpeg4audio.Decoder{ SampleRate: f.Config.SampleRate, SizeLength: f.SizeLength, @@ -139,7 +146,7 @@ func (f *MPEG4Audio) CreateDecoder() *rtpmpeg4audio.Decoder { } // CreateEncoder creates an encoder able to encode the content of the format. -func (f *MPEG4Audio) CreateEncoder() *rtpmpeg4audio.Encoder { +func (f *MPEG4AudioGeneric) CreateEncoder() *rtpmpeg4audio.Encoder { e := &rtpmpeg4audio.Encoder{ PayloadType: f.PayloadTyp, SampleRate: f.Config.SampleRate, diff --git a/pkg/formats/mpeg4_audio_test.go b/pkg/formats/mpeg4_audio_generic_test.go similarity index 83% rename from pkg/formats/mpeg4_audio_test.go rename to pkg/formats/mpeg4_audio_generic_test.go index 47c6f6e7..0feabb6d 100644 --- a/pkg/formats/mpeg4_audio_test.go +++ b/pkg/formats/mpeg4_audio_generic_test.go @@ -9,8 +9,8 @@ import ( "github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio" ) -func TestMPEG4AudioAttributes(t *testing.T) { - format := &MPEG4Audio{ +func TestMPEG4AudioGenericAttributes(t *testing.T) { + format := &MPEG4AudioGeneric{ PayloadTyp: 96, Config: &mpeg4audio.Config{ Type: mpeg4audio.ObjectTypeAACLC, @@ -21,14 +21,14 @@ func TestMPEG4AudioAttributes(t *testing.T) { IndexLength: 3, IndexDeltaLength: 3, } - require.Equal(t, "MPEG4-audio", format.String()) + require.Equal(t, "MPEG4-audio-gen", format.String()) require.Equal(t, 48000, format.ClockRate()) require.Equal(t, uint8(96), format.PayloadType()) require.Equal(t, true, format.PTSEqualsDTS(&rtp.Packet{})) } -func TestMPEG4AudioMediaDescription(t *testing.T) { - format := &MPEG4Audio{ +func TestMPEG4AudioGenericMediaDescription(t *testing.T) { + format := &MPEG4AudioGeneric{ PayloadTyp: 96, Config: &mpeg4audio.Config{ Type: mpeg4audio.ObjectTypeAACLC, @@ -52,8 +52,8 @@ func TestMPEG4AudioMediaDescription(t *testing.T) { }, fmtp) } -func TestMPEG4AudioDecEncoder(t *testing.T) { - format := &MPEG4Audio{ +func TestMPEG4AudioGenericDecEncoder(t *testing.T) { + format := &MPEG4AudioGeneric{ PayloadTyp: 96, Config: &mpeg4audio.Config{ Type: mpeg4audio.ObjectTypeAACLC, diff --git a/pkg/formats/mpeg4_audio_latm.go b/pkg/formats/mpeg4_audio_latm.go new file mode 100644 index 00000000..1e118320 --- /dev/null +++ b/pkg/formats/mpeg4_audio_latm.go @@ -0,0 +1,164 @@ +package formats + +import ( + "encoding/hex" + "fmt" + "strconv" + "strings" + + "github.com/pion/rtp" +) + +// MPEG4AudioLATM is a RTP format that uses a MPEG-4 audio codec. +// Specification: https://datatracker.ietf.org/doc/html/rfc6416#section-7.3 +type MPEG4AudioLATM struct { + PayloadTyp uint8 + SampleRate int + Channels int + ProfileLevelID int + Bitrate *int + Object int + CPresent *bool + Config []byte + SBREnabled *bool +} + +// String implements Format. +func (f *MPEG4AudioLATM) String() string { + return "MPEG4-audio-latm" +} + +// ClockRate implements Format. +func (f *MPEG4AudioLATM) ClockRate() int { + return f.SampleRate +} + +// PayloadType implements Format. +func (f *MPEG4AudioLATM) PayloadType() uint8 { + return f.PayloadTyp +} + +func (f *MPEG4AudioLATM) unmarshal( + payloadType uint8, clock string, codec string, + rtpmap string, fmtp map[string]string, +) error { + tmp := strings.SplitN(clock, "/", 2) + if len(tmp) != 2 { + return fmt.Errorf("invalid clock: %v", clock) + } + + tmp2, err := strconv.ParseInt(tmp[0], 10, 64) + if err != nil { + return err + } + f.SampleRate = int(tmp2) + + tmp2, err = strconv.ParseInt(tmp[1], 10, 64) + if err != nil { + return err + } + f.Channels = int(tmp2) + + f.PayloadTyp = payloadType + f.ProfileLevelID = 30 // default value defined by specification + + for key, val := range fmtp { + switch key { + case "profile-level-id": + tmp, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return fmt.Errorf("invalid profile-level-id: %v", val) + } + + f.ProfileLevelID = int(tmp) + + case "bitrate": + tmp, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return fmt.Errorf("invalid bitrate: %v", val) + } + + v := int(tmp) + f.Bitrate = &v + + case "object": + tmp, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return fmt.Errorf("invalid object: %v", val) + } + + f.Object = int(tmp) + + case "cpresent": + tmp, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return fmt.Errorf("invalid cpresent: %v", val) + } + + v := (tmp == 1) + f.CPresent = &v + + case "config": + var err error + f.Config, err = hex.DecodeString(val) + if err != nil { + return fmt.Errorf("invalid AAC config: %v", val) + } + + case "sbr-enabled": + tmp, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return fmt.Errorf("invalid SBR-enabled: %v", val) + } + + v := (tmp == 1) + f.SBREnabled = &v + } + } + + if f.Object == 0 { + return fmt.Errorf("object is missing") + } + if f.Config == nil { + return fmt.Errorf("config is missing") + } + + return nil +} + +// Marshal implements Format. +func (f *MPEG4AudioLATM) Marshal() (string, map[string]string) { + fmtp := map[string]string{ + "profile-level-id": strconv.FormatInt(int64(f.ProfileLevelID), 10), + "config": hex.EncodeToString(f.Config), + "object": strconv.FormatInt(int64(f.Object), 10), + } + + if f.Bitrate != nil { + fmtp["bitrate"] = strconv.FormatInt(int64(*f.Bitrate), 10) + } + + if f.CPresent != nil { + if *f.CPresent { + fmtp["cpresent"] = "1" + } else { + fmtp["cpresent"] = "0" + } + } + + if f.SBREnabled != nil { + if *f.CPresent { + fmtp["SBR-enabled"] = "1" + } else { + fmtp["SBR-enabled"] = "0" + } + } + + return "MP4A-LATM/" + strconv.FormatInt(int64(f.SampleRate), 10) + + "/" + strconv.FormatInt(int64(f.Channels), 10), fmtp +} + +// PTSEqualsDTS implements Format. +func (f *MPEG4AudioLATM) PTSEqualsDTS(*rtp.Packet) bool { + return true +} diff --git a/pkg/formats/mpeg4_audio_latm_test.go b/pkg/formats/mpeg4_audio_latm_test.go new file mode 100644 index 00000000..d59ba352 --- /dev/null +++ b/pkg/formats/mpeg4_audio_latm_test.go @@ -0,0 +1,42 @@ +package formats + +import ( + "testing" + + "github.com/pion/rtp" + "github.com/stretchr/testify/require" +) + +func TestMPEG4AudioLATMAttributes(t *testing.T) { + format := &MPEG4AudioLATM{ + PayloadTyp: 96, + SampleRate: 48000, + Channels: 2, + Object: 2, + ProfileLevelID: 1, + Config: []byte{0x01, 0x02, 0x03}, + } + require.Equal(t, "MPEG4-audio-latm", format.String()) + require.Equal(t, 48000, format.ClockRate()) + require.Equal(t, uint8(96), format.PayloadType()) + require.Equal(t, true, format.PTSEqualsDTS(&rtp.Packet{})) +} + +func TestMPEG4AudioLATMMediaDescription(t *testing.T) { + format := &MPEG4AudioLATM{ + PayloadTyp: 96, + SampleRate: 48000, + Channels: 2, + Object: 2, + ProfileLevelID: 1, + Config: []byte{0x01, 0x02, 0x03}, + } + + rtpmap, fmtp := format.Marshal() + require.Equal(t, "MP4A-LATM/48000/2", rtpmap) + require.Equal(t, map[string]string{ + "profile-level-id": "1", + "object": "2", + "config": "010203", + }, fmtp) +} diff --git a/pkg/formats/mpeg4_video.go b/pkg/formats/mpeg4_video_es.go similarity index 68% rename from pkg/formats/mpeg4_video.go rename to pkg/formats/mpeg4_video_es.go index be3b93b6..4b1b7af0 100644 --- a/pkg/formats/mpeg4_video.go +++ b/pkg/formats/mpeg4_video_es.go @@ -9,39 +9,38 @@ import ( "github.com/pion/rtp" ) -// MPEG4Video is a RTP format that uses the video codec defined in MPEG-4 part 2. +// MPEG4Video is an alias for MPEG4VideoES. +type MPEG4Video = MPEG4VideoES + +// MPEG4VideoES is a RTP format that uses the video codec defined in MPEG-4 part 2. // Specification: https://datatracker.ietf.org/doc/html/rfc6416#section-7.1 -type MPEG4Video struct { +type MPEG4VideoES struct { PayloadTyp uint8 ProfileLevelID int Config []byte } // String implements Format. -func (f *MPEG4Video) String() string { - return "MPEG4-video" +func (f *MPEG4VideoES) String() string { + return "MPEG4-video-es" } // ClockRate implements Format. -func (f *MPEG4Video) ClockRate() int { +func (f *MPEG4VideoES) ClockRate() int { return 90000 } // PayloadType implements Format. -func (f *MPEG4Video) PayloadType() uint8 { +func (f *MPEG4VideoES) PayloadType() uint8 { return f.PayloadTyp } -func (f *MPEG4Video) unmarshal( +func (f *MPEG4VideoES) unmarshal( payloadType uint8, clock string, codec string, rtpmap string, fmtp map[string]string, ) error { f.PayloadTyp = payloadType - - // If this parameter is not specified by - // the procedure, its default value of 1 (Simple Profile/Level 1) is - // used. - f.ProfileLevelID = 1 + f.ProfileLevelID = 1 // default value defined by specification for key, val := range fmtp { switch key { @@ -66,7 +65,7 @@ func (f *MPEG4Video) unmarshal( } // Marshal implements Format. -func (f *MPEG4Video) Marshal() (string, map[string]string) { +func (f *MPEG4VideoES) Marshal() (string, map[string]string) { fmtp := map[string]string{ "profile-level-id": strconv.FormatInt(int64(f.ProfileLevelID), 10), "config": strings.ToUpper(hex.EncodeToString(f.Config)), @@ -76,6 +75,6 @@ func (f *MPEG4Video) Marshal() (string, map[string]string) { } // PTSEqualsDTS implements Format. -func (f *MPEG4Video) PTSEqualsDTS(*rtp.Packet) bool { +func (f *MPEG4VideoES) PTSEqualsDTS(*rtp.Packet) bool { return true } diff --git a/pkg/formats/mpeg4_video_test.go b/pkg/formats/mpeg4_video_es_test.go similarity index 75% rename from pkg/formats/mpeg4_video_test.go rename to pkg/formats/mpeg4_video_es_test.go index 226c646f..9899e7c3 100644 --- a/pkg/formats/mpeg4_video_test.go +++ b/pkg/formats/mpeg4_video_es_test.go @@ -7,20 +7,20 @@ import ( "github.com/stretchr/testify/require" ) -func TestMPEG4VideoAttributes(t *testing.T) { - format := &MPEG4Video{ +func TestMPEG4VideoESAttributes(t *testing.T) { + format := &MPEG4VideoES{ PayloadTyp: 96, ProfileLevelID: 1, Config: []byte{0x01, 0x02, 0x03}, } - require.Equal(t, "MPEG4-video", format.String()) + require.Equal(t, "MPEG4-video-es", format.String()) require.Equal(t, 90000, format.ClockRate()) require.Equal(t, uint8(96), format.PayloadType()) require.Equal(t, true, format.PTSEqualsDTS(&rtp.Packet{})) } -func TestMPEG4VideoMediaDescription(t *testing.T) { - format := &MPEG4Video{ +func TestMPEG4VideoESMediaDescription(t *testing.T) { + format := &MPEG4VideoES{ PayloadTyp: 96, ProfileLevelID: 1, Config: []byte{0x0a, 0x0b, 0x03}, diff --git a/pkg/formats/mpeg4_video_generic.go b/pkg/formats/mpeg4_video_generic.go new file mode 100644 index 00000000..124b18d4 --- /dev/null +++ b/pkg/formats/mpeg4_video_generic.go @@ -0,0 +1,3 @@ +package formats + +// TODO diff --git a/pkg/formats/mpeg4_video_generic_test.go b/pkg/formats/mpeg4_video_generic_test.go new file mode 100644 index 00000000..124b18d4 --- /dev/null +++ b/pkg/formats/mpeg4_video_generic_test.go @@ -0,0 +1,3 @@ +package formats + +// TODO diff --git a/pkg/media/medias_test.go b/pkg/media/medias_test.go index f5f421e0..ff61c0ec 100644 --- a/pkg/media/medias_test.go +++ b/pkg/media/medias_test.go @@ -553,7 +553,7 @@ func TestMediasUnmarshalErrors(t *testing.T) { "a=rtpmap:97 mpeg4-generic/44100/2\r\n" + "a=fmtp:97 profile-level-id=1;mode=AAC-hbr;sizelength=13;indexlength=3;indexdeltalength=3;config=zzz1210\r\n" + "a=control:streamid=1\r\n", - "media 2 is invalid: invalid AAC config (zzz1210)", + "media 2 is invalid: invalid AAC config: zzz1210", }, } { t.Run(ca.name, func(t *testing.T) {