update aac

2025-09-27 03:45:54 +08:00 · 2021-01-09 07:51:19 +08:00
parent db8f2790f8
commit 7bbbb37d80
12 changed files with 506 additions and 213 deletions
--- a/av/codec/aac/adtsheader.go
+++ b/av/codec/aac/adtsheader.go
@@ -0,0 +1,91 @@
+// Copyright (c) 2019,CAOHONGJU All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package aac
+
+// ADTSHeader adts header include fixed and varlable header
+type ADTSHeader [7]byte
+
+func NewADTSHeader(profile, sampleRateIdx, channelConfig byte, payloadSize int) ADTSHeader {
+	// AAC-ADTS
+	// 6.2 Audio Data Transport Stream, ADTS
+	// in aac-iso-13818-7.pdf, page 26.
+	// fixed 7bytes header
+	adtsHeader := ADTSHeader{0xff, 0xf1, 0x00, 0x00, 0x00, 0x0f, 0xfc}
+	// the frame length is the AAC raw data plus the adts header size.
+	frameLen := payloadSize + 7
+
+	// adts_fixed_header
+	// 2B, 16bits
+	// int16_t syncword; //12bits, '1111 1111 1111'
+	// int8_t ID; //1bit, '0'
+	// int8_t layer; //2bits, '00'
+	// int8_t protection_absent; //1bit, can be '1'
+
+	// 12bits
+	// int8_t profile; //2bit, 7.1 Profiles, page 40
+	// TSAacSampleFrequency sampling_frequency_index; //4bits, Table 35, page 46
+	// int8_t private_bit; //1bit, can be '0'
+	// int8_t channel_configuration; //3bits, Table 8
+	// int8_t original_or_copy; //1bit, can be '0'
+	// int8_t home; //1bit, can be '0'
+
+	// adts_variable_header
+	// 28bits
+	// int8_t copyright_identification_bit; //1bit, can be '0'
+	// int8_t copyright_identification_start; //1bit, can be '0'
+	// int16_t frame_length; //13bits
+	// int16_t adts_buffer_fullness; //11bits, 7FF signals that the bitstream is a variable rate bitstream.
+	// int8_t number_of_raw_data_blocks_in_frame; //2bits, 0 indicating 1 raw_data_block()
+
+	// profile, 2bits
+	adtsHeader[2] = (profile << 6) & 0xc0
+	// sampling_frequency_index 4bits
+	adtsHeader[2] |= (sampleRateIdx << 2) & 0x3c
+	// channel_configuration 3bits
+	adtsHeader[2] |= (channelConfig >> 2) & 0x01
+	adtsHeader[3] = (channelConfig << 6) & 0xc0
+	// frame_length 13bits
+	adtsHeader[3] |= uint8((frameLen >> 11) & 0x03)
+	adtsHeader[4] = uint8((frameLen >> 3) & 0xff)
+	adtsHeader[5] = uint8((frameLen << 5) & 0xe0)
+	// adts_buffer_fullness; //11bits
+	adtsHeader[5] |= 0x1f
+
+	return adtsHeader
+}
+
+func (h ADTSHeader) Profile() uint8 {
+	return (h[2] >> 6)
+}
+
+func (h ADTSHeader) SamplingIndex() uint8 {
+	return h[2] >> 2 & 0xf
+}
+
+func (h ADTSHeader) SampleRate() int {
+	return SampleRates[int(h.SamplingIndex())]
+}
+
+func (h ADTSHeader) ChannelConfig() uint8 {
+	return (h[2]&0x1)<<2 | h[3]>>6
+}
+
+func (h ADTSHeader) Channels() uint8 {
+	return aacAudioChannels[int(h.ChannelConfig())]
+}
+
+func (h ADTSHeader) FrameLength() int {
+	return int((uint32(h[3]&0x3) << 11) |
+		(uint32(h[4]) << 3) |
+		uint32((h[5]>>5)&0x7))
+}
+
+func (h ADTSHeader) PayloadSize() int {
+	return h.FrameLength() - len(h)
+}
+
+func (h ADTSHeader) ToAsc() []byte {
+	return Encode2BytesASC(h.Profile() + 1,h.SamplingIndex(),h.ChannelConfig())
+}
--- a/av/codec/aac/adtsheader_test.go
+++ b/av/codec/aac/adtsheader_test.go
@@ -0,0 +1,58 @@
+// Copyright (c) 2019,CAOHONGJU All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package aac
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNewADTSHeader(t *testing.T) {
+	tests := []struct {
+		name          string
+		profile       byte
+		sampleRateIdx byte
+		channelConfig byte
+		payloadSize   int
+	}{
+		{"case1", 1, 4, 2, 200},
+		{"case1", 2, 3, 4, 5345},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := NewADTSHeader(tt.profile, tt.sampleRateIdx, tt.channelConfig, tt.payloadSize)
+			assert.Equal(t, tt.profile, got.Profile())
+			assert.Equal(t, tt.sampleRateIdx, got.SamplingIndex())
+			assert.Equal(t, tt.channelConfig, got.ChannelConfig())
+			assert.Equal(t, tt.payloadSize, got.PayloadSize())
+		})
+	}
+}
+
+func TestADTSHeader_ToAsc(t *testing.T) {
+	tests := []struct {
+		name          string
+		profile       byte
+		sampleRateIdx byte
+		channelConfig byte
+		payloadSize   int
+	}{
+		{"case1", 1, 4, 2, 200},
+		{"case1", 2, 3, 4, 5345},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := NewADTSHeader(tt.profile, tt.sampleRateIdx, tt.channelConfig, tt.payloadSize)
+			config := got.ToAsc()
+			var asc AudioSpecificConfig
+			asc.Decode(config)
+
+			assert.Equal(t, tt.profile, asc.ObjectType-1)
+			assert.Equal(t, tt.sampleRateIdx, asc.SamplingIndex)
+			assert.Equal(t, tt.channelConfig, asc.ChannelConfig)
+		})
+	}
+}
--- a/av/codec/aac/asc.go
+++ b/av/codec/aac/asc.go
@@ -0,0 +1,191 @@
+// Copyright (c) 2019,CAOHONGJU All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+//
+// Translate from FFmpeg mpeg4audio.h mpeg4audio.c
+//
+package aac
+
+import (
+	"encoding/binary"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"runtime/debug"
+
+	"github.com/cnotch/ipchub/utils/bits"
+)
+
+// RawSps AudioSpecificConfig 的别名
+type RawSPS = AudioSpecificConfig
+
+// AudioSpecificConfig .
+type AudioSpecificConfig struct {
+	ObjectType       uint8
+	SamplingIndex    uint8
+	SampleRate       int
+	ChannelConfig    uint8
+	Sbr              int ///< -1 implicit, 1 presence
+	ExtObjectType    uint8
+	ExtSamplingIndex uint8
+	ExtSampleRate    int
+	ExtChannelConfig uint8
+	Channels         uint8
+	Ps               int ///< -1 implicit, 1 presence
+	FrameLengthShort int
+}
+
+// DecodeString 从 hex 字串解码 sps
+func (asc *AudioSpecificConfig) DecodeString(config string) error {
+	data, err := hex.DecodeString(config)
+	if err != nil {
+		return err
+	}
+	return asc.Decode(data)
+}
+
+// Decode 从字节序列中解码 sps
+func (asc *AudioSpecificConfig) Decode(config []byte) (err error) {
+	defer func() {
+		if r := recover(); r != nil {
+			err = fmt.Errorf("AudioSpecificConfig decode panic；r = %v \n %s", r, debug.Stack())
+		}
+	}()
+
+	r := bits.NewReader(config)
+
+	asc.ObjectType = getObjectType(r)
+	asc.SamplingIndex, asc.SampleRate = getSampleRate(r)
+	asc.ChannelConfig = r.ReadUint8(4)
+	if int(asc.ChannelConfig) < len(aacAudioChannels) {
+		asc.Channels = aacAudioChannels[asc.ChannelConfig]
+	}
+	asc.Sbr = -1
+	asc.Ps = -1
+	if asc.ObjectType == AOT_SBR || (asc.ObjectType == AOT_PS &&
+		0 == r.Peek(3)&0x03 && 0 == r.Peek(9)&0x3F) { // check for W6132 Annex YYYY draft MP3onMP4
+		if asc.ObjectType == AOT_PS {
+			asc.Ps = 1
+		}
+		asc.ExtObjectType = AOT_SBR
+		asc.Sbr = 1
+		asc.ExtSamplingIndex, asc.ExtSampleRate = getSampleRate(r)
+		asc.ObjectType = getObjectType(r)
+		if asc.ObjectType == AOT_ER_BSAC {
+			asc.ExtChannelConfig = r.ReadUint8(4)
+		}
+	} else {
+		asc.ExtObjectType = AOT_NULL
+		asc.ExtSampleRate = 0
+	}
+
+	if asc.ObjectType == AOT_ALS {
+		r.Skip(5)
+		if r.Peek(24) != binary.BigEndian.Uint32([]byte{0, 'A', 'L', 'S'}) {
+			r.Skip(24)
+		}
+
+		if err = asc.parseConfigALS(r); err != nil {
+			return
+		}
+	}
+
+	if asc.ExtObjectType != AOT_SBR {
+		for r.BitsLeft() > 15 {
+			if r.Peek(11) == 0x2b7 { // sync extension
+				r.Skip(11)
+				asc.ExtObjectType = getObjectType(r)
+				if asc.ExtObjectType == AOT_SBR {
+					asc.Sbr = int(r.ReadBit())
+					if asc.Sbr == 1 {
+						asc.ExtSamplingIndex, asc.ExtSampleRate = getSampleRate(r)
+						if asc.ExtSampleRate == asc.SampleRate {
+							asc.Sbr = -1
+						}
+					}
+
+				}
+				if r.BitsLeft() > 11 && r.Read(11) == 0x548 {
+					asc.Ps = int(r.ReadBit())
+				}
+
+				break
+			} else {
+				r.Skip(1) // skip 1 bit
+			}
+		}
+	}
+
+	//PS requires SBR
+	if asc.Sbr == 0 {
+		asc.Ps = 0
+	}
+	//Limit implicit PS to the HE-AACv2 Profile
+	if (asc.Ps == -1 && asc.ObjectType != AOT_AAC_LC) || (asc.Channels&^0x01) != 0 {
+		asc.Ps = 0
+	}
+	return
+}
+
+func (asc *AudioSpecificConfig) ToAdtsHeader(payloadSize int) ADTSHeader {
+	sampleRateIdx := asc.SamplingIndex
+	if asc.ExtSampleRate > 0 {
+		sampleRateIdx = asc.ExtSamplingIndex
+	}
+
+	return NewADTSHeader(asc.ObjectType-1, sampleRateIdx, asc.ChannelConfig, payloadSize)
+}
+
+func Encode2BytesASC(objType, samplingIdx, channelConfig byte) []byte {
+	var config = make([]byte, 2)
+	config[0] = objType<<3 | (samplingIdx>>1)&0x07
+	config[1] = samplingIdx<<7 | (channelConfig&0x0f)<<3
+	return config
+}
+
+var errInvalidData = errors.New("Invalid data found when processing input")
+
+func (asc *AudioSpecificConfig) parseConfigALS(r *bits.Reader) (err error) {
+	if r.BitsLeft() < 112 {
+		return errInvalidData
+	}
+
+	if r.Read(32) != binary.BigEndian.Uint32([]byte{'A', 'L', 'S', 0}) {
+		return errInvalidData
+	}
+
+	// override AudioSpecificConfig channel configuration and sample rate
+	// which are buggy in old ALS conformance files
+	asc.SampleRate = r.ReadInt(32)
+
+	if asc.SampleRate <= 0 {
+		return errInvalidData
+	}
+
+	// skip number of samples
+	r.Skip(32)
+
+	// read number of channels
+	asc.ChannelConfig = 0
+	asc.Channels = uint8(r.ReadInt(16) + 1)
+	return
+}
+
+func getObjectType(r *bits.Reader) (objType uint8) {
+	objType = r.ReadUint8(5)
+
+	if AOT_ESCAPE == objType {
+		objType = r.ReadUint8(6) + 32
+	}
+	return
+}
+
+func getSampleRate(r *bits.Reader) (sampleRateIdx uint8, sampleRate int) {
+	sampleRateIdx = r.ReadUint8(4)
+	if sampleRateIdx == 0xf {
+		sampleRate = r.ReadInt(24)
+	} else {
+		sampleRate = SampleRate(int(sampleRateIdx))
+	}
+	return
+}
--- a/av/codec/aac/asc_test.go
+++ b/av/codec/aac/asc_test.go
@@ -0,0 +1,36 @@
+// Copyright (c) 2019,CAOHONGJU All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package aac
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestAudioSpecificConfig_DecodeString(t *testing.T) {
+	tests := []struct {
+		name       string
+		config     string
+		wantErr    bool
+		objectType uint8
+		sampleRate int
+		channels   uint8
+	}{
+		{"case1", "121056E500", false, 2, 44100, 2},
+		{"case2", "1190", false, 2, 48000, 2},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var asc AudioSpecificConfig
+			if err := asc.DecodeString(tt.config); (err != nil) != tt.wantErr {
+				t.Errorf("AudioSpecificConfig.DecodeString() error = %v, wantErr %v", err, tt.wantErr)
+			}
+			assert.Equal(t, asc.ObjectType, tt.objectType)
+			assert.Equal(t, asc.SampleRate, tt.sampleRate)
+			assert.Equal(t, asc.Channels, tt.channels)
+		})
+	}
+}
--- a/av/codec/aac/const.go
+++ b/av/codec/aac/const.go
@@ -11,12 +11,64 @@ const (
 	SamplesPerFrame = 1024
 )

+// Auido Object Type
+const (
+	AOT_NULL            = iota     ///< Support?                Name
+	AOT_AAC_MAIN                   ///< Y                       Main
+	AOT_AAC_LC                     ///< Y                       Low Complexity
+	AOT_AAC_SSR                    ///< N (code in SoC repo)    Scalable Sample Rate
+	AOT_AAC_LTP                    ///< Y                       Long Term Prediction
+	AOT_SBR                        ///< Y                       Spectral Band Replication HE-AAC
+	AOT_AAC_SCALABLE               ///< N                       Scalable
+	AOT_TWINVQ                     ///< N                       Twin Vector Quantizer
+	AOT_CELP                       ///< N                       Code Excited Linear Prediction
+	AOT_HVXC                       ///< N                       Harmonic Vector eXcitation Coding
+	AOT_TTSI            = 2 + iota ///< N(code = 12)            Text-To-Speech Interface
+	AOT_MAINSYNTH                  ///< N                       Main Synthesis
+	AOT_WAVESYNTH                  ///< N                       Wavetable Synthesis
+	AOT_MIDI                       ///< N                       General MIDI
+	AOT_SAFX                       ///< N                       Algorithmic Synthesis and Audio Effects
+	AOT_ER_AAC_LC                  ///< N                       Error Resilient Low Complexity
+	AOT_ER_AAC_LTP      = 3 + iota ///< N(code = 19)            Error Resilient Long Term Prediction
+	AOT_ER_AAC_SCALABLE            ///< N                       Error Resilient Scalable
+	AOT_ER_TWINVQ                  ///< N                       Error Resilient Twin Vector Quantizer
+	AOT_ER_BSAC                    ///< N                       Error Resilient Bit-Sliced Arithmetic Coding
+	AOT_ER_AAC_LD                  ///< N                       Error Resilient Low Delay
+	AOT_ER_CELP                    ///< N                       Error Resilient Code Excited Linear Prediction
+	AOT_ER_HVXC                    ///< N                       Error Resilient Harmonic Vector eXcitation Coding
+	AOT_ER_HILN                    ///< N                       Error Resilient Harmonic and Individual Lines plus Noise
+	AOT_ER_PARAM                   ///< N                       Error Resilient Parametric
+	AOT_SSC                        ///< N                       SinuSoidal Coding
+	AOT_PS                         ///< N                       Parametric Stereo
+	AOT_SURROUND                   ///< N                       MPEG Surround
+	AOT_ESCAPE                     ///< Y                       Escape Value
+	AOT_L1                         ///< Y                       Layer 1
+	AOT_L2                         ///< Y                       Layer 2
+	AOT_L3                         ///< Y                       Layer 3
+	AOT_DST                        ///< N                       Direct Stream Transfer
+	AOT_ALS                        ///< Y                       Audio LosslesS
+	AOT_SLS                        ///< N                       Scalable LosslesS
+	AOT_SLS_NON_CORE               ///< N                       Scalable LosslesS (non core)
+	AOT_ER_AAC_ELD                 ///< N                       Error Resilient Enhanced Low Delay
+	AOT_SMR_SIMPLE                 ///< N                       Symbolic Music Representation Simple
+	AOT_SMR_MAIN                   ///< N                       Symbolic Music Representation Main
+	AOT_USAC_NOSBR                 ///< N                       Unified Speech and Audio Coding (no SBR)
+	AOT_SAOC                       ///< N                       Spatial Audio Object Coding
+	AOT_LD_SURROUND                ///< N                       Low Delay MPEG Surround
+	AOT_USAC                       ///< N                       Unified Speech and Audio Coding
+)
+
 // AAC Profile 表示使用哪个级别的 AAC。
 // 如 01 Low Complexity(LC) – AAC LC
 const (
-	ProfileMain = iota // 0 Main profile
-	ProfileLC          // 1 Low Complexity profile (LC)
-	ProfileSSR         // 2 Scalable Sampling Rate profile (SSR)
+	ProfileMain = AOT_AAC_MAIN - 1
+	ProfileLow  = AOT_AAC_LC - 1
+	ProfileSSR  = AOT_AAC_SSR - 1
+	ProfileLTP  = AOT_AAC_LTP - 1
+	ProfileHE   = AOT_SBR - 1
+	ProfileLD   = AOT_ER_AAC_LD - 1
+	ProfileHE2  = AOT_PS - 1
+	ProfileELD  = AOT_ER_AAC_ELD - 1
 )

 // AAC 采样频率
@@ -41,8 +93,8 @@ func SampleRate(index int) int {
 	return SampleRates[index]
 }

-// SampleRateIndex .
-func SampleRateIndex(rate int) int {
+// SamplingIndex .
+func SamplingIndex(rate int) int {
 	i := sort.Search(len(SampleRates), func(i int) bool { return SampleRates[i] <= rate })
 	if i < len(SampleRates) && SampleRates[i] == rate {
 		return i
@@ -51,7 +103,7 @@ func SampleRateIndex(rate int) int {
 }

 // SampleRates 采用频率集合
-var SampleRates = []int{
+var SampleRates = [16]int{
 	96000, 88200, 64000, 48000,
 	44100, 32000, 24000, 22050,
 	16000, 12000, 11025, 8000,
@@ -78,3 +130,13 @@ const (
 	ChannelSevenPlusOne        // 7
 	ChannelReserved            // 8
 )
+
+var aacAudioChannels = [8]uint8{
+	0, 1, 2, 3,
+	4, 5, 6, 8,
+}
+
+// 参数集索引
+const (
+	ParameterSetConfig = 0
+)
--- a/av/codec/aac/shortcut.go
+++ b/av/codec/aac/shortcut.go
@@ -0,0 +1,29 @@
+// Copyright (c) 2019,CAOHONGJU All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package aac
+
+import "github.com/cnotch/ipchub/av/codec"
+
+// MetadataIsReady .
+func MetadataIsReady(am *codec.AudioMeta) bool {
+	config := am.Sps //ParameterSet(ParameterSetConfig)
+	if len(config) == 0 {
+		return false
+	}
+	if am.SampleRate == 0 {
+		// decode
+		var asc AudioSpecificConfig
+		if err := asc.Decode(config); err != nil {
+			return false
+		}
+		am.Channels = int(asc.Channels)
+		am.SampleRate = asc.SampleRate
+		if asc.ExtSampleRate > 0 {
+			am.SampleRate = asc.ExtSampleRate
+		}
+		am.SampleSize = 16
+	}
+	return true
+}
--- a/av/codec/aac/sps.go
+++ b/av/codec/aac/sps.go
@@ -1,69 +0,0 @@
-// Copyright (c) 2019,CAOHONGJU All rights reserved.
-// Use of this source code is governed by a MIT-style
-// license that can be found in the LICENSE file.
-
-package aac
-
-import (
-	"encoding/hex"
-	"errors"
-)
-
-// RawSPS .
-// RawSPS == flv.AudioSpecificConfig
-type RawSPS struct {
-	Profile            byte   // 5 bits
-	SampleRate         byte   // 4 bits
-	ChannelConfig      byte   // 4 bits
-	FrameLengthFlag    byte   // 1 bits
-	DependsOnCoreCoder byte   // 1 bits
-	ExtensionFlag      byte   // 1 bits
-	SyncExtensionType  uint16 // 11 bits
-	Profile2           byte   // 5 bits
-	SbrPresentFlag     byte   // 1 bits
-}
-
-// DecodeString 从 hex 字串解码 sps
-func (sps *RawSPS) DecodeString(config string) error {
-	data, err := hex.DecodeString(config)
-	if err != nil {
-		return err
-	}
-	return sps.Decode(data)
-}
-
-// Decode 从字节序列中解码 sps
-func (sps *RawSPS) Decode(config []byte) error {
-	if len(config) < 2 {
-		return errors.New("config miss data")
-	}
-
-	// 5 bits
-	sps.Profile = config[0] >> 3
-	// 4 bits
-	sps.SampleRate = (config[0]&0x07)<<1 | (config[1] >> 7)
-	// 4 bits
-	sps.ChannelConfig = (config[1] >> 3) & 0x0f
-	sps.FrameLengthFlag = (config[1] >> 2) & 0x01
-	sps.DependsOnCoreCoder = (config[1] >> 1) & 0x01
-	sps.ExtensionFlag = config[1] & 0x01
-
-	if len(config) > 3 {
-		sps.SyncExtensionType = ((uint16(config[2]) << 8) | uint16(config[3])) >> 5
-		sps.Profile2 = config[3] & 0x1f
-	}
-	if len(config) > 4 {
-		sps.SbrPresentFlag = config[4] & 0x01
-	}
-
-	return nil
-}
-
-// Encode2Bytes .
-func (sps *RawSPS) Encode2Bytes() (buff [2]byte) {
-	buff[0] = sps.Profile<<3 | (sps.SampleRate>>1)&0x07
-	buff[1] = sps.SampleRate<<7 | (sps.ChannelConfig&0x0f)<<3 |
-		(sps.FrameLengthFlag&0x01)<<2 | (sps.DependsOnCoreCoder&0x01)<<1 |
-		sps.ExtensionFlag&0x01
-	return
-}
--- a/av/codec/aac/sps_test.go
+++ b/av/codec/aac/sps_test.go
@@ -1,51 +0,0 @@
-// Copyright (c) 2019,CAOHONGJU All rights reserved.
-// Use of this source code is governed by a MIT-style
-// license that can be found in the LICENSE file.
-
-package aac
-
-import "testing"
-
-func TestRawSPS_DecodeString(t *testing.T) {
-	tests := []struct {
-		name    string
-		config  string
-		wantErr bool
-	}{
-		{"case1", "121056E500", false},
-		{"case2", "1190", false},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			var sps RawSPS
-			if err := sps.DecodeString(tt.config); (err != nil) != tt.wantErr {
-				t.Errorf("RawSPS.DecodeString() error = %v, wantErr %v", err, tt.wantErr)
-			}
-			profile := sps.Profile
-			_ = profile
-		})
-	}
-}
-
-func TestRawSPS_Encode2Bytes(t *testing.T) {
-	tests := []struct {
-		name        string
-		config      string
-		configBytes [2]byte
-	}{
-		{"case1", "121056E500", [2]byte{0x12, 0x10}},
-		{"case2", "1190", [2]byte{0x11, 0x90}},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			var sps RawSPS
-			sps.DecodeString(tt.config)
-			config :=sps.Encode2Bytes()
-			if config != tt.configBytes {
-				t.Errorf("RawSPS.Encode2Bytes() return = %v, want = %v", config, tt.configBytes)
-			}
-			profile := sps.Profile
-			_ = profile
-		})
-	}
-}
--- a/av/format/mpegts/frame.go
+++ b/av/format/mpegts/frame.go
@@ -119,54 +119,8 @@ func (frame *Frame) prepareAvcHeader(sps, pps []byte) {
 }

 func (frame *Frame) prepareAacHeader(sps *aac.RawSPS) {
-	// AAC-ADTS
-	// 6.2 Audio Data Transport Stream, ADTS
-	// in aac-iso-13818-7.pdf, page 26.
-	// fixed 7bytes header
-	adtsHeader := [7]uint8{0xff, 0xf1, 0x00, 0x00, 0x00, 0x0f, 0xfc}
-	size := len(frame.Payload)
-	// the frame length is the AAC raw data plus the adts header size.
-	frameLen := size + 7
-
-	// adts_fixed_header
-	// 2B, 16bits
-	// int16_t syncword; //12bits, '1111 1111 1111'
-	// int8_t ID; //1bit, '0'
-	// int8_t layer; //2bits, '00'
-	// int8_t protection_absent; //1bit, can be '1'
-
-	// 12bits
-	// int8_t profile; //2bit, 7.1 Profiles, page 40
-	// TSAacSampleFrequency sampling_frequency_index; //4bits, Table 35, page 46
-	// int8_t private_bit; //1bit, can be '0'
-	// int8_t channel_configuration; //3bits, Table 8
-	// int8_t original_or_copy; //1bit, can be '0'
-	// int8_t home; //1bit, can be '0'
-
-	// adts_variable_header
-	// 28bits
-	// int8_t copyright_identification_bit; //1bit, can be '0'
-	// int8_t copyright_identification_start; //1bit, can be '0'
-	// int16_t frame_length; //13bits
-	// int16_t adts_buffer_fullness; //11bits, 7FF signals that the bitstream is a variable rate bitstream.
-	// int8_t number_of_raw_data_blocks_in_frame; //2bits, 0 indicating 1 raw_data_block()
-
-	// profile, 2bits
-	adtsHeader[2] = (sps.Profile << 6) & 0xc0
-	// sampling_frequency_index 4bits
-	adtsHeader[2] |= (sps.SampleRate << 2) & 0x3c
-	// channel_configuration 3bits
-	adtsHeader[2] |= (sps.ChannelConfig >> 2) & 0x01
-	adtsHeader[3] = (sps.ChannelConfig << 6) & 0xc0
-	// frame_length 13bits
-	adtsHeader[3] |= uint8((frameLen >> 11) & 0x03)
-	adtsHeader[4] = uint8((frameLen >> 3) & 0xff)
-	adtsHeader[5] = uint8((frameLen << 5) & 0xe0)
-	// adts_buffer_fullness; //11bits
-	adtsHeader[5] |= 0x1f
-
+	adtsHeader := sps.ToAdtsHeader(len(frame.Payload))
 	frame.Header = adtsHeader[:]
-
 	return
 }

--- a/av/format/mpegts/muxeravcaac.go
+++ b/av/format/mpegts/muxeravcaac.go
@@ -69,17 +69,10 @@ func (muxer *MuxerAvcAac) prepareAacSps() (err error) {
 		return
 	}

-	if 0 == muxer.audioSps.Profile || 0x1f == muxer.audioSps.Profile {
-		err = fmt.Errorf("hls decdoe audio aac sequence header failed, aac profile=%d", muxer.audioSps.Profile)
+	if muxer.audioSps.ObjectType == aac.AOT_NULL || muxer.audioSps.ObjectType == aac.AOT_ESCAPE {
+		err = fmt.Errorf("tsmuxer decdoe audio aac sequence header failed, aac object type=%d", muxer.audioSps.ObjectType)
 		return
 	}
-
-	// the profile = object_id + 1
-	// @see aac-mp4a-format-ISO_IEC_14496-3+2001.pdf, page 78,
-	//      Table 1. A.9 MPEG-2 Audio profiles and MPEG-4 Audio object types
-	// so the aac_profile should plus 1, not minus 1, and nginx-rtmp used it to
-	// downcast aac SSR to LC.
-	muxer.audioSps.Profile--
 	return
 }

--- a/av/format/sdp/parsemeta.go
+++ b/av/format/sdp/parsemeta.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.

-package media
+package sdp

 import (
 	"encoding/base64"
@@ -16,10 +16,10 @@ import (
 	"github.com/pixelbender/go-sdp/sdp"
 )

-func parseMeta(rawsdp string, video *codec.VideoMeta, audio *codec.AudioMeta) {
+func ParseMetadata(rawsdp string, video *codec.VideoMeta, audio *codec.AudioMeta) error {
 	sdp, err := sdp.ParseString(rawsdp)
 	if err != nil {
-		return
+		return err
 	}

 	for _, media := range sdp.Media {
@@ -51,12 +51,13 @@ func parseMeta(rawsdp string, video *codec.VideoMeta, audio *codec.AudioMeta) {
 			}
 		}
 	}
+	return nil
 }

 func parseAudioMeta(m *sdp.Format, audio *codec.AudioMeta) {
-	audio.SampleRate = 44100
-	audio.Channels = 2
 	audio.SampleSize = 16
+	audio.Channels = 2
+	audio.SampleRate = 44100
 	if m.ClockRate > 0 {
 		audio.SampleRate = m.ClockRate
 	}
@@ -69,7 +70,6 @@ func parseAudioMeta(m *sdp.Format, audio *codec.AudioMeta) {
 		return
 	}
 	if audio.Codec == "AAC" {
-		audio.Sps = []byte{0x11, 0x90, 0x56, 0xe5, 0x00}
 		for _, p := range m.Params {
 			i := strings.Index(p, "config=")
 			if i < 0 {
@@ -82,22 +82,28 @@ func parseAudioMeta(m *sdp.Format, audio *codec.AudioMeta) {
 				p = p[:endi]
 			}

-			if sps, err := hex.DecodeString(p); err == nil {
-				copy(audio.Sps, sps)
-			} else {
-				var rawSps aac.RawSPS
-				rawSps.Profile = 2
-				rawSps.SampleRate = byte(aac.SampleRateIndex(audio.SampleRate))
-				rawSps.ChannelConfig = byte(audio.Channels)
-				config := rawSps.Encode2Bytes()
-				copy(audio.Sps, config[:])
+			var config []byte
+			var err error
+			if config, err = hex.DecodeString(p); err != nil {
+				config = aac.Encode2BytesASC(2,
+					byte(aac.SamplingIndex(audio.SampleRate)),
+					byte(audio.Channels))
 			}
+
+			// audio.SetParameterSet(aac.ParameterSetConfig, config)
+			audio.Sps = config
+			_ = aac.MetadataIsReady(audio)
 			break
 		}
 	}
+
 }

 func parseVideoMeta(m *sdp.Format, video *codec.VideoMeta) {
+	if m.ClockRate > 0 {
+		video.ClockRate = m.ClockRate
+	}
+
 	if len(m.Params) == 0 {
 		return
 	}
@@ -131,24 +137,16 @@ func parseH264SpsPps(s string, video *codec.VideoMeta) {
 	}

 	sps, err := base64.StdEncoding.DecodeString(spsStr)
-	if err != nil {
-		return
+	if err == nil {
+		// video.SetParameterSet(h264.ParameterSetSps, sps)
+		video.Sps = sps
 	}

 	pps, err := base64.StdEncoding.DecodeString(ppsStr)
-	if err != nil {
-		return
-	}
-
-	var rawSps h264.RawSPS
-	err = rawSps.Decode(sps)
-	if err != nil {
-		return
-	}
-
-	video.Width = rawSps.Width()
-	video.Height = rawSps.Height()
-	video.FrameRate = rawSps.FrameRate()
-	video.Sps = sps
+	if err == nil {
+		// video.SetParameterSet(h264.ParameterSetPps, pps)
 		video.Pps = pps
 	}
+
+	_ = h264.MetadataIsReady(video)
+}
--- a/media/stream.go
+++ b/media/stream.go
@@ -15,6 +15,7 @@ import (
 	"github.com/cnotch/ipchub/av/format/hls"
 	"github.com/cnotch/ipchub/av/format/mpegts"
 	"github.com/cnotch/ipchub/av/format/rtp"
+	"github.com/cnotch/ipchub/av/format/sdp"
 	"github.com/cnotch/ipchub/config"
 	"github.com/cnotch/ipchub/media/cache"
 	"github.com/cnotch/ipchub/stats"
@@ -77,7 +78,7 @@ func NewStream(path string, rawsdp string, options ...Option) *Stream {
 	}

 	// parseMeta
-	parseMeta(rawsdp, &s.Video, &s.Audio)
+	sdp.ParseMetadata(rawsdp, &s.Video, &s.Audio)

 	// init Cache
 	switch s.Video.Codec {