From 592d62e859d99f9ff36e323ab0f256539c71d7a3 Mon Sep 17 00:00:00 2001 From: aler9 <46489434+aler9@users.noreply.github.com> Date: Fri, 5 Aug 2022 23:33:44 +0200 Subject: [PATCH] support HE-AAC v1 and HE-AAC v2 tracks (#1068) --- pkg/mpeg4audio/config.go | 116 +++++++++++++++++++++++----------- pkg/mpeg4audio/config_test.go | 10 +++ pkg/mpeg4audio/objecttype.go | 1 + track_aac.go | 7 +- 4 files changed, 97 insertions(+), 37 deletions(-) diff --git a/pkg/mpeg4audio/config.go b/pkg/mpeg4audio/config.go index 8d19cd45..6670115e 100644 --- a/pkg/mpeg4audio/config.go +++ b/pkg/mpeg4audio/config.go @@ -16,6 +16,9 @@ type Config struct { FrameLengthFlag bool DependsOnCoreCoder bool CoreCoderDelay uint16 + + // SBR specific + ExtensionSampleRate int } // Unmarshal decodes a Config. @@ -32,8 +35,9 @@ func (c *Config) Unmarshal(buf []byte) error { switch c.Type { case ObjectTypeAACLC: + case ObjectTypeSBR: default: - return fmt.Errorf("unsupported type: %d", c.Type) + return fmt.Errorf("unsupported object type: %d", c.Type) } sampleRateIndex, err := bits.ReadBits(buf, &pos, 4) @@ -45,7 +49,7 @@ func (c *Config) Unmarshal(buf []byte) error { case sampleRateIndex <= 12: c.SampleRate = sampleRates[sampleRateIndex] - case sampleRateIndex == 15: + case sampleRateIndex == 0x0F: tmp, err := bits.ReadBits(buf, &pos, 24) if err != nil { return err @@ -75,31 +79,53 @@ func (c *Config) Unmarshal(buf []byte) error { return fmt.Errorf("invalid channel configuration (%d)", channelConfig) } - c.FrameLengthFlag, err = bits.ReadFlag(buf, &pos) - if err != nil { - return err - } - - c.DependsOnCoreCoder, err = bits.ReadFlag(buf, &pos) - if err != nil { - return err - } - - if c.DependsOnCoreCoder { - tmp, err := bits.ReadBits(buf, &pos, 14) + if c.Type == ObjectTypeSBR { + extensionSamplingFrequencyIndex, err := bits.ReadBits(buf, &pos, 4) if err != nil { return err } - c.CoreCoderDelay = uint16(tmp) - } - extensionFlag, err := bits.ReadFlag(buf, &pos) - if err != nil { - return err - } + switch { + case extensionSamplingFrequencyIndex <= 12: + c.ExtensionSampleRate = sampleRates[extensionSamplingFrequencyIndex] - if extensionFlag { - return fmt.Errorf("unsupported") + case extensionSamplingFrequencyIndex == 0x0F: + tmp, err := bits.ReadBits(buf, &pos, 24) + if err != nil { + return err + } + c.ExtensionSampleRate = int(tmp) + + default: + return fmt.Errorf("invalid extension sample rate index (%d)", extensionSamplingFrequencyIndex) + } + } else { + c.FrameLengthFlag, err = bits.ReadFlag(buf, &pos) + if err != nil { + return err + } + + c.DependsOnCoreCoder, err = bits.ReadFlag(buf, &pos) + if err != nil { + return err + } + + if c.DependsOnCoreCoder { + tmp, err := bits.ReadBits(buf, &pos, 14) + if err != nil { + return err + } + c.CoreCoderDelay = uint16(tmp) + } + + extensionFlag, err := bits.ReadFlag(buf, &pos) + if err != nil { + return err + } + + if extensionFlag { + return fmt.Errorf("unsupported") + } } return nil @@ -115,8 +141,17 @@ func (c Config) marshalSize() int { n += 4 } - if c.DependsOnCoreCoder { - n += 14 + if c.Type == ObjectTypeSBR { + _, ok := reverseSampleRates[c.ExtensionSampleRate] + if !ok { + n += 28 + } else { + n += 4 + } + } else { + if c.DependsOnCoreCoder { + n += 14 + } } ret := n / 8 @@ -153,23 +188,32 @@ func (c Config) Marshal() ([]byte, error) { default: return nil, fmt.Errorf("invalid channel count (%d)", c.ChannelCount) } - bits.WriteBits(buf, &pos, uint64(channelConfig), 4) - if c.FrameLengthFlag { - bits.WriteBits(buf, &pos, 1, 1) + if c.Type == ObjectTypeSBR { + sampleRateIndex, ok := reverseSampleRates[c.ExtensionSampleRate] + if !ok { + bits.WriteBits(buf, &pos, uint64(0x0F), 4) + bits.WriteBits(buf, &pos, uint64(c.ExtensionSampleRate), 24) + } else { + bits.WriteBits(buf, &pos, uint64(sampleRateIndex), 4) + } } else { - bits.WriteBits(buf, &pos, 0, 1) - } + if c.FrameLengthFlag { + bits.WriteBits(buf, &pos, 1, 1) + } else { + bits.WriteBits(buf, &pos, 0, 1) + } - if c.DependsOnCoreCoder { - bits.WriteBits(buf, &pos, 1, 1) - } else { - bits.WriteBits(buf, &pos, 0, 1) - } + if c.DependsOnCoreCoder { + bits.WriteBits(buf, &pos, 1, 1) + } else { + bits.WriteBits(buf, &pos, 0, 1) + } - if c.DependsOnCoreCoder { - bits.WriteBits(buf, &pos, uint64(c.CoreCoderDelay), 14) + if c.DependsOnCoreCoder { + bits.WriteBits(buf, &pos, uint64(c.CoreCoderDelay), 14) + } } return buf, nil diff --git a/pkg/mpeg4audio/config_test.go b/pkg/mpeg4audio/config_test.go index 40ee5af8..30e88ea2 100644 --- a/pkg/mpeg4audio/config_test.go +++ b/pkg/mpeg4audio/config_test.go @@ -67,6 +67,16 @@ var configCases = []struct { CoreCoderDelay: 385, }, }, + { + "sbr (he-aac v1) 44.1khz stereo", + []byte{0x2b, 0x8a, 0x00}, + Config{ + Type: ObjectTypeSBR, + SampleRate: 22050, + ChannelCount: 1, + ExtensionSampleRate: 44100, + }, + }, } func TestConfigUnmarshal(t *testing.T) { diff --git a/pkg/mpeg4audio/objecttype.go b/pkg/mpeg4audio/objecttype.go index 0d84b0b3..27b87893 100644 --- a/pkg/mpeg4audio/objecttype.go +++ b/pkg/mpeg4audio/objecttype.go @@ -6,4 +6,5 @@ type ObjectType int // supported types. const ( ObjectTypeAACLC ObjectType = 2 + ObjectTypeSBR ObjectType = 5 ) diff --git a/track_aac.go b/track_aac.go index 651cf2f7..b8559a97 100644 --- a/track_aac.go +++ b/track_aac.go @@ -128,6 +128,11 @@ func (t *TrackAAC) MediaDescription() *psdp.MediaDescription { typ := strconv.FormatInt(int64(t.PayloadType), 10) + sampleRate := t.Config.SampleRate + if t.Config.ExtensionSampleRate != 0 { + sampleRate = t.Config.ExtensionSampleRate + } + return &psdp.MediaDescription{ MediaName: psdp.MediaName{ Media: "audio", @@ -137,7 +142,7 @@ func (t *TrackAAC) MediaDescription() *psdp.MediaDescription { Attributes: []psdp.Attribute{ { Key: "rtpmap", - Value: typ + " mpeg4-generic/" + strconv.FormatInt(int64(t.Config.SampleRate), 10) + + Value: typ + " mpeg4-generic/" + strconv.FormatInt(int64(sampleRate), 10) + "/" + strconv.FormatInt(int64(t.Config.ChannelCount), 10), }, {