diff --git a/go.mod b/go.mod index 55d8da3d..61659e18 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.18 require ( github.com/asticode/go-astits v1.11.0 - github.com/bluenviron/mediacommon v0.2.1 + github.com/bluenviron/mediacommon v0.3.1 github.com/google/uuid v1.3.0 github.com/pion/rtcp v1.2.10 github.com/pion/rtp v0.0.0-20230107162714-c3ea6851e25b diff --git a/go.sum b/go.sum index 3b58c9c2..9384f10c 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,8 @@ github.com/asticode/go-astikit v0.30.0 h1:DkBkRQRIxYcknlaU7W7ksNfn4gMFsB0tqMJflx github.com/asticode/go-astikit v0.30.0/go.mod h1:h4ly7idim1tNhaVkdVBeXQZEE3L0xblP7fCWbgwipF0= github.com/asticode/go-astits v1.11.0 h1:GTHUXht0ZXAJXsVbsLIcyfHr1Bchi4QQwMARw2ZWAng= github.com/asticode/go-astits v1.11.0/go.mod h1:QSHmknZ51pf6KJdHKZHJTLlMegIrhega3LPWz3ND/iI= -github.com/bluenviron/mediacommon v0.2.1 h1:NkbbXwjMqspatQwSyGDYqxg14ftxJSjNsnPNocUxScw= -github.com/bluenviron/mediacommon v0.2.1/go.mod h1:t0dqPsWUTchyvib0MhixIwXEgvDX4V9G+I0GzWLQRb8= +github.com/bluenviron/mediacommon v0.3.1 h1:C4okNqyN1Mg5CVGcGKk2tEk9Uj2hHZusHV7nqdjn1Lk= +github.com/bluenviron/mediacommon v0.3.1/go.mod h1:t0dqPsWUTchyvib0MhixIwXEgvDX4V9G+I0GzWLQRb8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/pkg/formats/format_test.go b/pkg/formats/format_test.go index 3852cb7b..d2febb72 100644 --- a/pkg/formats/format_test.go +++ b/pkg/formats/format_test.go @@ -233,7 +233,40 @@ var casesFormat = []struct { }, }, { - "audio aac lc latm", + "audio aac he-aac v2 ps", + "audio", + 96, + "mpeg4-generic/48000/2", + map[string]string{ + "streamtype": "5", + "profile-level-id": "48", + "mode": "AAC-hbr", + "config": "eb098800", + "sizelength": "13", + }, + &MPEG4Audio{ + PayloadTyp: 96, + ProfileLevelID: 48, + Config: &mpeg4audio.Config{ + Type: 2, + ExtensionType: 29, + ExtensionSampleRate: 48000, + SampleRate: 24000, + ChannelCount: 1, + }, + SizeLength: 13, + }, + "mpeg4-generic/48000/2", + map[string]string{ + "streamtype": "5", + "profile-level-id": "48", + "mode": "AAC-hbr", + "config": "eb098800", + "sizelength": "13", + }, + }, + { + "audio aac latm lc", "audio", 96, "MP4A-LATM/24000/2", @@ -246,13 +279,21 @@ var casesFormat = []struct { }, &MPEG4AudioLATM{ PayloadTyp: 96, - SampleRate: 24000, - Channels: 2, ProfileLevelID: 1, Bitrate: intPtr(64000), CPresent: boolPtr(false), - Object: 2, - Config: []byte{0x40, 0x00, 0x26, 0x20, 0x3f, 0xc0}, + Config: &mpeg4audio.StreamMuxConfig{ + Programs: []*mpeg4audio.StreamMuxConfigProgram{{ + Layers: []*mpeg4audio.StreamMuxConfigLayer{{ + AudioSpecificConfig: &mpeg4audio.Config{ + Type: 2, + SampleRate: 24000, + ChannelCount: 2, + }, + LatmBufferFullness: 255, + }}, + }}, + }, }, "MP4A-LATM/24000/2", map[string]string{ @@ -264,7 +305,7 @@ var casesFormat = []struct { }, }, { - "audio aac v2 latm", + "audio aac latm he-aac v2", "audio", 110, "MP4A-LATM/24000/1", @@ -277,13 +318,21 @@ var casesFormat = []struct { }, &MPEG4AudioLATM{ PayloadTyp: 110, - SampleRate: 24000, - Channels: 1, ProfileLevelID: 15, CPresent: boolPtr(false), - Object: 2, SBREnabled: boolPtr(true), - Config: []byte{0x40, 0x00, 0x26, 0x10, 0x3f, 0xc0}, + Config: &mpeg4audio.StreamMuxConfig{ + Programs: []*mpeg4audio.StreamMuxConfigProgram{{ + Layers: []*mpeg4audio.StreamMuxConfigLayer{{ + AudioSpecificConfig: &mpeg4audio.Config{ + Type: 2, + SampleRate: 24000, + ChannelCount: 1, + }, + LatmBufferFullness: 255, + }}, + }}, + }, }, "MP4A-LATM/24000/1", map[string]string{ @@ -294,6 +343,89 @@ var casesFormat = []struct { "SBR-enabled": "1", }, }, + { + "audio aac latm hierarchical sbr", + "audio", + 110, + "MP4A-LATM/48000/2", + map[string]string{ + "profile-level-id": "44", + "bitrate": "64000", + "cpresent": "0", + "config": "40005623101fe0", + "sbr-enabled": "1", + }, + &MPEG4AudioLATM{ + PayloadTyp: 110, + ProfileLevelID: 44, + CPresent: boolPtr(false), + SBREnabled: boolPtr(true), + Bitrate: intPtr(64000), + Config: &mpeg4audio.StreamMuxConfig{ + Programs: []*mpeg4audio.StreamMuxConfigProgram{{ + Layers: []*mpeg4audio.StreamMuxConfigLayer{{ + AudioSpecificConfig: &mpeg4audio.Config{ + Type: 2, + ExtensionType: 5, + ExtensionSampleRate: 48000, + SampleRate: 24000, + ChannelCount: 2, + }, + LatmBufferFullness: 255, + }}, + }}, + }, + }, + "MP4A-LATM/48000/2", + map[string]string{ + "profile-level-id": "44", + "object": "2", + "cpresent": "0", + "config": "40005623101fe0", + "SBR-enabled": "1", + "bitrate": "64000", + }, + }, + { + "audio aac latm hierarchical ps", + "audio", + 110, + "MP4A-LATM/48000/2", + map[string]string{ + "profile-level-id": "48", + "bitrate": "64000", + "cpresent": "0", + "config": "4001d613101fe0", + }, + &MPEG4AudioLATM{ + PayloadTyp: 110, + ProfileLevelID: 48, + Bitrate: intPtr(64000), + CPresent: boolPtr(false), + Config: &mpeg4audio.StreamMuxConfig{ + Programs: []*mpeg4audio.StreamMuxConfigProgram{{ + Layers: []*mpeg4audio.StreamMuxConfigLayer{{ + AudioSpecificConfig: &mpeg4audio.Config{ + Type: 2, + ExtensionType: 29, + ExtensionSampleRate: 48000, + SampleRate: 24000, + ChannelCount: 1, + }, + LatmBufferFullness: 255, + }}, + }}, + }, + }, + "MP4A-LATM/48000/2", + map[string]string{ + "profile-level-id": "48", + "object": "2", + "cpresent": "0", + "config": "4001d613101fe0", + "bitrate": "64000", + }, + }, { "audio aac latm no channels", "audio", @@ -305,19 +437,27 @@ var casesFormat = []struct { }, &MPEG4AudioLATM{ PayloadTyp: 110, - SampleRate: 48000, - Channels: 1, ProfileLevelID: 30, CPresent: boolPtr(false), - Object: 2, - Config: []byte{0x40, 0x00, 0x23, 0x10}, + Config: &mpeg4audio.StreamMuxConfig{ + Programs: []*mpeg4audio.StreamMuxConfigProgram{{ + Layers: []*mpeg4audio.StreamMuxConfigLayer{{ + AudioSpecificConfig: &mpeg4audio.Config{ + Type: 2, + SampleRate: 48000, + ChannelCount: 1, + }, + LatmBufferFullness: 255, + }}, + }}, + }, }, "MP4A-LATM/48000/1", map[string]string{ "profile-level-id": "30", "object": "2", "cpresent": "0", - "config": "40002310", + "config": "400023103fc0", }, }, { diff --git a/pkg/formats/mpeg4_audio_generic.go b/pkg/formats/mpeg4_audio_generic.go index f196422c..9da7ff63 100644 --- a/pkg/formats/mpeg4_audio_generic.go +++ b/pkg/formats/mpeg4_audio_generic.go @@ -120,8 +120,13 @@ func (f *MPEG4AudioGeneric) RTPMap() string { sampleRate = f.Config.ExtensionSampleRate } + channelCount := f.Config.ChannelCount + if f.Config.ExtensionType == mpeg4audio.ObjectTypePS { + channelCount = 2 + } + return "mpeg4-generic/" + strconv.FormatInt(int64(sampleRate), 10) + - "/" + strconv.FormatInt(int64(f.Config.ChannelCount), 10) + "/" + strconv.FormatInt(int64(channelCount), 10) } // FMTP implements Format. diff --git a/pkg/formats/mpeg4_audio_latm.go b/pkg/formats/mpeg4_audio_latm.go index 88237f10..4e80fd34 100644 --- a/pkg/formats/mpeg4_audio_latm.go +++ b/pkg/formats/mpeg4_audio_latm.go @@ -4,7 +4,6 @@ import ( "encoding/hex" "fmt" "strconv" - "strings" "github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio" "github.com/pion/rtp" @@ -14,13 +13,10 @@ import ( // Specification: https://datatracker.ietf.org/doc/html/rfc6416#section-7.3 type MPEG4AudioLATM struct { PayloadTyp uint8 - SampleRate int - Channels int ProfileLevelID int Bitrate *int - Object mpeg4audio.ObjectType CPresent *bool - Config []byte + Config *mpeg4audio.StreamMuxConfig SBREnabled *bool } @@ -28,24 +24,6 @@ func (f *MPEG4AudioLATM) unmarshal( payloadType uint8, clock string, codec string, rtpmap string, fmtp map[string]string, ) error { - tmp := strings.SplitN(clock, "/", 2) - - tmp1, err := strconv.ParseInt(tmp[0], 10, 64) - if err != nil { - return err - } - f.SampleRate = int(tmp1) - - if len(tmp) >= 2 { - tmp2, err := strconv.ParseInt(tmp[1], 10, 64) - if err != nil { - return err - } - f.Channels = int(tmp2) - } else { - f.Channels = 1 - } - f.PayloadTyp = payloadType f.ProfileLevelID = 30 // default value defined by specification @@ -68,22 +46,6 @@ func (f *MPEG4AudioLATM) unmarshal( v := int(tmp) f.Bitrate = &v - case "object": - tmp, err := strconv.ParseInt(val, 10, 64) - if err != nil { - return fmt.Errorf("invalid object type: %v", val) - } - - f.Object = mpeg4audio.ObjectType(int(tmp)) - - switch f.Object { - case mpeg4audio.ObjectTypeAACLC, - mpeg4audio.ObjectTypeSBR, - mpeg4audio.ObjectTypePS: - default: - return fmt.Errorf("unsupported object type: %d", f.Object) - } - case "cpresent": tmp, err := strconv.ParseInt(val, 10, 64) if err != nil { @@ -94,12 +56,17 @@ func (f *MPEG4AudioLATM) unmarshal( f.CPresent = &v case "config": - var err error - f.Config, err = hex.DecodeString(val) + enc, err := hex.DecodeString(val) if err != nil { return fmt.Errorf("invalid AAC config: %v", val) } + f.Config = &mpeg4audio.StreamMuxConfig{} + err = f.Config.Unmarshal(enc) + if err != nil { + return fmt.Errorf("invalid AAC config: %v", err) + } + case "sbr-enabled": tmp, err := strconv.ParseInt(val, 10, 64) if err != nil { @@ -111,10 +78,6 @@ func (f *MPEG4AudioLATM) unmarshal( } } - if f.Object == 0 { - f.Object = 2 - } - if f.Config == nil { return fmt.Errorf("config is missing") } @@ -129,7 +92,7 @@ func (f *MPEG4AudioLATM) String() string { // ClockRate implements Format. func (f *MPEG4AudioLATM) ClockRate() int { - return f.SampleRate + return f.Config.Programs[0].Layers[0].AudioSpecificConfig.SampleRate } // PayloadType implements Format. @@ -139,16 +102,33 @@ func (f *MPEG4AudioLATM) PayloadType() uint8 { // RTPMap implements Format. func (f *MPEG4AudioLATM) RTPMap() string { - return "MP4A-LATM/" + strconv.FormatInt(int64(f.SampleRate), 10) + - "/" + strconv.FormatInt(int64(f.Channels), 10) + aoc := f.Config.Programs[0].Layers[0].AudioSpecificConfig + + sampleRate := aoc.SampleRate + if aoc.ExtensionSampleRate != 0 { + sampleRate = aoc.ExtensionSampleRate + } + + channelCount := aoc.ChannelCount + if aoc.ExtensionType == mpeg4audio.ObjectTypePS { + channelCount = 2 + } + + return "MP4A-LATM/" + strconv.FormatInt(int64(sampleRate), 10) + + "/" + strconv.FormatInt(int64(channelCount), 10) } // FMTP implements Format. func (f *MPEG4AudioLATM) FMTP() map[string]string { + enc, err := f.Config.Marshal() + if err != nil { + return nil + } + fmtp := map[string]string{ "profile-level-id": strconv.FormatInt(int64(f.ProfileLevelID), 10), - "config": hex.EncodeToString(f.Config), - "object": strconv.FormatInt(int64(f.Object), 10), + "config": hex.EncodeToString(enc), + "object": strconv.FormatInt(int64(f.Config.Programs[0].Layers[0].AudioSpecificConfig.Type), 10), } if f.Bitrate != nil { diff --git a/pkg/formats/mpeg4_audio_latm_test.go b/pkg/formats/mpeg4_audio_latm_test.go index 386b9ab9..de9f1820 100644 --- a/pkg/formats/mpeg4_audio_latm_test.go +++ b/pkg/formats/mpeg4_audio_latm_test.go @@ -3,6 +3,7 @@ package formats import ( "testing" + "github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio" "github.com/pion/rtp" "github.com/stretchr/testify/require" ) @@ -10,13 +11,21 @@ import ( func TestMPEG4AudioLATMAttributes(t *testing.T) { format := &MPEG4AudioLATM{ PayloadTyp: 96, - SampleRate: 48000, - Channels: 2, - Object: 2, ProfileLevelID: 1, - Config: []byte{0x01, 0x02, 0x03}, + Config: &mpeg4audio.StreamMuxConfig{ + Programs: []*mpeg4audio.StreamMuxConfigProgram{{ + Layers: []*mpeg4audio.StreamMuxConfigLayer{{ + AudioSpecificConfig: &mpeg4audio.Config{ + Type: 2, + SampleRate: 44100, + ChannelCount: 2, + }, + LatmBufferFullness: 255, + }}, + }}, + }, } require.Equal(t, "MPEG4-audio-latm", format.String()) - require.Equal(t, 48000, format.ClockRate()) + require.Equal(t, 44100, format.ClockRate()) require.Equal(t, true, format.PTSEqualsDTS(&rtp.Packet{})) }