From b2489b2305ebdf9cadf179ef54fcee7ebfa8304a Mon Sep 17 00:00:00 2001 From: dexter <178529795@qq.com> Date: Wed, 2 Feb 2022 10:39:09 +0800 Subject: [PATCH] =?UTF-8?q?4.0=E5=88=9D=E6=AD=A5=E6=94=B9=E9=80=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- audio_track.go | 188 --- b.go | 34 - base_track.go | 196 --- codec/codec.go | 220 +++ codec/flv.go | 110 ++ codec/h264.go | 293 ++++ codec/h265.go | 531 +++++++ codec/mp4.go | 2419 ++++++++++++++++++++++++++++++++ codec/sps.go | 227 +++ common/frame.go | 126 ++ common/index.go | 38 + common/ring.go | 36 + common/ring_av.go | 59 + ring.go => common/ring_lock.go | 110 +- common/stream.go | 9 + data_track.go | 101 +- events.go | 20 + go.mod | 18 +- go.sum | 20 +- hook.go | 98 -- hook_test.go | 28 - main.go | 61 +- publisher.go | 5 + ring_av.go | 91 -- rtp.go | 106 -- rtp_audio.go | 46 - rtp_video.go | 343 ----- stream.go | 435 +++--- subscriber.go | 105 +- track/aac.go | 38 + track/audio.go | 89 ++ track/base.go | 56 + track/g711.go | 30 + track/h264.go | 78 + track/h265.go | 75 + track/video.go | 137 ++ tracks.go | 83 ++ util/big_endian.go | 17 + util/bits/bits.go | 118 ++ util/bits/bits_test.go | 61 + util/bits/bufio/bufio.go | 22 + util/bits/golomb_reader.go | 65 + util/bits/pio/pio.go | 3 + util/bits/pio/reader.go | 121 ++ util/bits/pio/vec.go | 68 + util/bits/pio/vec_test.go | 22 + util/bits/pio/writer.go | 87 ++ util/buffer.go | 30 + util/buffer_test.go | 18 + util/bytes_pool.go | 17 + util/index.go | 5 + util/logger.go | 87 ++ util/map.go | 70 + util/ring.go | 136 ++ util/slice.go | 21 + util/socket.go | 92 ++ util/sse.go | 70 + video_track.go | 444 ------ video_track_test.go | 20 - 59 files changed, 6192 insertions(+), 2061 deletions(-) delete mode 100644 audio_track.go delete mode 100644 b.go delete mode 100644 base_track.go create mode 100644 codec/codec.go create mode 100644 codec/flv.go create mode 100644 codec/h264.go create mode 100644 codec/h265.go create mode 100644 codec/mp4.go create mode 100644 codec/sps.go create mode 100644 common/frame.go create mode 100644 common/index.go create mode 100644 common/ring.go create mode 100644 common/ring_av.go rename ring.go => common/ring_lock.go (51%) create mode 100644 common/stream.go create mode 100644 events.go delete mode 100644 hook.go delete mode 100644 hook_test.go create mode 100644 publisher.go delete mode 100644 ring_av.go delete mode 100644 rtp.go delete mode 100644 rtp_audio.go delete mode 100644 rtp_video.go create mode 100644 track/aac.go create mode 100644 track/audio.go create mode 100644 track/base.go create mode 100644 track/g711.go create mode 100644 track/h264.go create mode 100644 track/h265.go create mode 100644 track/video.go create mode 100644 tracks.go create mode 100644 util/big_endian.go create mode 100644 util/bits/bits.go create mode 100644 util/bits/bits_test.go create mode 100644 util/bits/bufio/bufio.go create mode 100644 util/bits/golomb_reader.go create mode 100644 util/bits/pio/pio.go create mode 100644 util/bits/pio/reader.go create mode 100644 util/bits/pio/vec.go create mode 100644 util/bits/pio/vec_test.go create mode 100644 util/bits/pio/writer.go create mode 100644 util/buffer.go create mode 100644 util/buffer_test.go create mode 100644 util/bytes_pool.go create mode 100644 util/index.go create mode 100644 util/logger.go create mode 100644 util/map.go create mode 100644 util/ring.go create mode 100644 util/slice.go create mode 100644 util/socket.go create mode 100644 util/sse.go delete mode 100644 video_track.go delete mode 100644 video_track_test.go diff --git a/audio_track.go b/audio_track.go deleted file mode 100644 index f94c5ba..0000000 --- a/audio_track.go +++ /dev/null @@ -1,188 +0,0 @@ -package engine - -import ( - "time" - - "github.com/Monibuca/utils/v3/codec" -) - -type AudioPack struct { - AVPack - Raw []byte -} -type AudioTrack struct { - AVTrack - SoundRate int //2bit - SoundSize byte //1bit - Channels byte //1bit - ExtraData []byte `json:"-"` //rtmp协议需要先发这个帧 - PushByteStream func(ts uint32, payload []byte) `json:"-"` - PushRaw func(ts uint32, payload []byte) `json:"-"` - writeByteStream func() //使用函数写入,避免申请内存 - *AudioPack `json:"-"` // 当前正在写入的音频对象 - -} - -func (at *AudioTrack) pushByteStream(ts uint32, payload []byte) { - if len(payload) == 0 { - return - } - switch at.CodecID = payload[0] >> 4; at.CodecID { - case codec.CodecID_AAC: - if len(payload) < 4 || payload[1] != 0 { - return - } else { - config1, config2 := payload[2], payload[3] - //audioObjectType = (config1 & 0xF8) >> 3 - // 1 AAC MAIN ISO/IEC 14496-3 subpart 4 - // 2 AAC LC ISO/IEC 14496-3 subpart 4 - // 3 AAC SSR ISO/IEC 14496-3 subpart 4 - // 4 AAC LTP ISO/IEC 14496-3 subpart 4 - at.SoundRate = codec.SamplingFrequencies[((config1&0x7)<<1)|(config2>>7)] - at.Channels = ((config2 >> 3) & 0x0F) //声道 - //frameLengthFlag = (config2 >> 2) & 0x01 - //dependsOnCoreCoder = (config2 >> 1) & 0x01 - //extensionFlag = config2 & 0x01 - at.ExtraData = payload - at.timebase = time.Duration(at.SoundRate) - at.PushByteStream = func(ts uint32, payload []byte) { - if len(payload) < 3 { - return - } - at.setTS(ts) - at.Raw = payload[2:] - at.Payload = payload - at.push() - } - at.Stream.AudioTracks.AddTrack("aac", at) - } - default: - at.SoundRate = codec.SoundRate[(payload[0]&0x0c)>>2] // 采样率 0 = 5.5 kHz or 1 = 11 kHz or 2 = 22 kHz or 3 = 44 kHz - at.SoundSize = (payload[0] & 0x02) >> 1 // 采样精度 0 = 8-bit samples or 1 = 16-bit samples - at.Channels = payload[0]&0x01 + 1 - at.ExtraData = payload[:1] - at.timebase = time.Duration(at.SoundRate) - at.PushByteStream = func(ts uint32, payload []byte) { - if len(payload) < 2 { - return - } - at.setTS(ts) - at.Raw = payload[1:] - at.Payload = payload - at.push() - } - switch at.CodecID { - case codec.CodecID_PCMA: - at.Stream.AudioTracks.AddTrack("pcma", at) - case codec.CodecID_PCMU: - at.Stream.AudioTracks.AddTrack("pcmu", at) - } - at.PushByteStream(ts, payload) - } - -} - -func (at *AudioTrack) setCurrent() { - at.AVTrack.setCurrent() - at.AudioPack = at.Value.(*AudioPack) -} - -func (at *AudioTrack) pushRaw(ts uint32, payload []byte) { - switch at.CodecID { - case 10: - at.writeByteStream = func() { - at.Reset() - at.Buffer.Write([]byte{at.ExtraData[0], 1}) - at.Buffer.Write(at.Raw) - at.Bytes2Payload() - } - default: - at.writeByteStream = func() { - at.Reset() - at.WriteByte(at.ExtraData[0]) - at.Buffer.Write(at.Raw) - at.Bytes2Payload() - } - } - at.PushRaw = func(ts uint32, payload []byte) { - at.setTS(ts) - at.Raw = payload - at.push() - } - at.PushRaw(ts, payload) -} - -// Push 来自发布者推送的音频 -func (at *AudioTrack) push() { - if at.Stream != nil { - at.Stream.Update() - } - if at.writeByteStream != nil { - at.writeByteStream() - } - at.addBytes(len(at.Raw)) - at.GetBPS() - if at.Timestamp.Sub(at.ts) > time.Second { - at.resetBPS() - } - at.Step() - at.setCurrent() -} - -func (s *Stream) NewAudioTrack(codec byte) (at *AudioTrack) { - at = &AudioTrack{} - at.timebase = 8000 - at.CodecID = codec - at.PushByteStream = at.pushByteStream - at.PushRaw = at.pushRaw - at.Stream = s - at.Init(s.Context, 256) - at.poll = time.Millisecond * 10 - at.Do(func(v interface{}) { - v.(*AVItem).Value = new(AudioPack) - }) - at.setCurrent() - switch codec { - case 10: - s.AudioTracks.AddTrack("aac", at) - case 7: - s.AudioTracks.AddTrack("pcma", at) - case 8: - s.AudioTracks.AddTrack("pcmu", at) - } - return -} -func (at *AudioTrack) SetASC(asc []byte) { - at.ExtraData = append([]byte{0xAF, 0}, asc...) - config1 := asc[0] - config2 := asc[1] - at.CodecID = 10 - //audioObjectType = (config1 & 0xF8) >> 3 - // 1 AAC MAIN ISO/IEC 14496-3 subpart 4 - // 2 AAC LC ISO/IEC 14496-3 subpart 4 - // 3 AAC SSR ISO/IEC 14496-3 subpart 4 - // 4 AAC LTP ISO/IEC 14496-3 subpart 4 - at.SoundRate = codec.SamplingFrequencies[((config1&0x7)<<1)|(config2>>7)] - at.Channels = (config2 >> 3) & 0x0F //声道 - //frameLengthFlag = (config2 >> 2) & 0x01 - //dependsOnCoreCoder = (config2 >> 1) & 0x01 - //extensionFlag = config2 & 0x01 - at.timebase = time.Duration(at.SoundRate) - at.Stream.AudioTracks.AddTrack("aac", at) -} - -func (at *AudioTrack) Play(onAudio func(uint32, *AudioPack), exit1, exit2 <-chan struct{}) { - ar := at.Clone() - item, ap := ar.Read() - for startTimestamp := item.Timestamp; ; item, ap = ar.Read() { - select { - case <-exit1: - return - case <-exit2: - return - default: - onAudio(uint32(item.Timestamp.Sub(startTimestamp).Milliseconds()), ap.(*AudioPack)) - ar.MoveNext() - } - } -} diff --git a/b.go b/b.go deleted file mode 100644 index 1c4a656..0000000 --- a/b.go +++ /dev/null @@ -1,34 +0,0 @@ -package engine - -type TSSlice []uint32 - -func (s TSSlice) Len() int { return len(s) } -func (s TSSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s TSSlice) Less(i, j int) bool { return s[i] < s[j] } - -type B struct { - TSSlice - data []*RTPNalu - MaxTS uint32 -} - -func (b *B) Push(x interface{}) { - p := x.(*RTPNalu) - if p.PTS > b.MaxTS { - b.MaxTS = p.PTS - } - b.TSSlice = append(b.TSSlice, p.PTS) - b.data = append(b.data, p) -} - -func (b *B) Pop() interface{} { - l := b.Len()-1 - defer func() { - b.TSSlice = b.TSSlice[:l] - b.data = b.data[:l] - }() - return struct { - DTS uint32 - *RTPNalu - }{DTS: b.TSSlice[l], RTPNalu: b.data[l]} -} diff --git a/base_track.go b/base_track.go deleted file mode 100644 index f2dd546..0000000 --- a/base_track.go +++ /dev/null @@ -1,196 +0,0 @@ -package engine - -import ( - "bytes" - "container/ring" - "context" - "encoding/json" - "sync" - "time" - - "github.com/Monibuca/utils/v3" -) - -type Track interface { - GetBPS() -} -type BaseTrack struct { - Stream *Stream `json:"-"` - PacketCount int - BPS int - bytes int - ts time.Time -} - -func (t *BaseTrack) addBytes(size int) { - t.bytes += size -} - -type AVPack struct { - bytes.Buffer - Payload []byte // 字节流格式的媒体数据,如果是需要拼接而成的,则等同于Buffer里面的值 -} - -func (pack *AVPack) Bytes2Payload() { - pack.Payload = pack.Bytes() -} - -type AVTrack struct { - AVRing `json:"-"` - CodecID byte - BaseTrack - *AVItem `json:"-"` //当前正在写入的数据对象 - lastTs uint32 - lastTime time.Time - timebase time.Duration -} - -func (t *DataTrack) resetBPS() { - t.bytes = 0 - t.ts = t.Current().Timestamp -} - -func (t *DataTrack) GetBPS() { - t.PacketCount++ - t.Sequence = t.PacketCount - if delta := time.Since(t.ts); delta != 0 { - t.BPS = t.bytes * 1000 / int(delta) - } -} - -func (t *AVTrack) setCurrent() { - t.AVItem = t.Current() -} - -func (t *AVTrack) resetBPS() { - t.bytes = 0 - t.ts = t.Current().Timestamp -} - -func (t *AVTrack) GetBPS() { - t.PacketCount++ - t.Sequence = t.PacketCount - if delta := int(t.Timestamp.Sub(t.ts).Milliseconds()); delta != 0 { - t.BPS = t.bytes * 1000 / delta - } -} - -func (t *AVTrack) setTS(ts uint32) { - if t.lastTs == 0 { - t.Timestamp = time.Now() - } else { - if t.lastTs > ts || ts-t.lastTs > 10000 { - utils.Printf("timestamp wrong %s lastTs:%d currentTs:%d", t.Stream.StreamPath, t.lastTs, ts) - //按照频率估算时间戳增量 - t.Timestamp = t.lastTime.Add(time.Second / t.timebase) - } else { - t.Timestamp = t.lastTime.Add(time.Duration(ts-t.lastTs) * time.Millisecond) - } - } - t.lastTs = ts - t.lastTime = t.Timestamp -} - -// func (t *Track_Base) Dispose() { -// t.RingDisposable.Dispose() -// } - -type Tracks struct { - RingBuffer - m map[string]Track - context.Context - sync.RWMutex - head *ring.Ring -} - -func (ts *Tracks) MarshalJSON() ([]byte, error) { - ts.RLock() - defer ts.RUnlock() - return json.Marshal(ts.m) -} - -func (ts *Tracks) Init(ctx context.Context) { - ts.RingBuffer.Init(ctx, 8) - ts.head = ts.Ring - ts.m = make(map[string]Track) - ts.Context, _ = context.WithTimeout(context.Background(), time.Second*5) -} - -func (ts *Tracks) AddTrack(name string, t Track) { - ts.Lock() - defer ts.Unlock() - if _, ok := ts.m[name]; !ok { - ts.m[name] = t - ts.Write(name) - } -} -func (ts *Tracks) GetTrack(name string) Track { - ts.RLock() - defer ts.RUnlock() - return ts.m[name] -} - -func (ts *Tracks) OnTrack(callback func(string, Track)) { - ts.SubRing(ts.head).ReadLoop(func(name string) { - callback(name, ts.GetTrack(name)) - }, false) -} - -func (ts *Tracks) WaitTrack(names ...string) Track { - ring := ts.SubRing(ts.head) - if ts.Context.Err() == nil { //在等待时间范围内 - if wait := make(chan string); len(names) == 0 { //任意编码需求,只取第一个 - go func() { - if rt, ok := ring.Read().(string); ok { - wait <- rt - } - }() - select { - case t := <-wait: - return ts.GetTrack(t) - case <-ts.Context.Done(): - return nil - } - } else { - go ring.ReadLoop(wait, false) - // go func() { - // for { - // if rt, ok := ring.Read().(string); ok { - // wait <- rt - // ring.MoveNext() - // } else { - // break - // } - // } - // }() - for { - select { - case t := <-wait: - for _, name := range names { - if t == name { - return ts.GetTrack(t) - } - } - case <-ts.Context.Done(): - return nil - } - } - } - } else { //进入不等待状态 - ts.RLock() - defer ts.RUnlock() - if len(names) == 0 { - if len(ts.m) == 0 { - return nil - } - return ts.m[ring.Read().(string)] - } else { - for _, name := range names { - if t, ok := ts.m[name]; ok { - return t - } - } - return nil - } - } -} diff --git a/codec/codec.go b/codec/codec.go new file mode 100644 index 0000000..63fb235 --- /dev/null +++ b/codec/codec.go @@ -0,0 +1,220 @@ +package codec + +import ( + "errors" +) + +const ( + ADTS_HEADER_SIZE = 7 + CodecID_AAC = 0xA + CodecID_PCMA = 7 + CodecID_PCMU = 8 + CodecID_H264 = 7 + CodecID_H265 = 0xC +) + +// ISO/IEC 14496-3 38(52)/page +// +// Audio +// + +type AudioSpecificConfig struct { + AudioObjectType byte // 5 bits + SamplingFrequencyIndex byte // 4 bits + ChannelConfiguration byte // 4 bits + GASpecificConfig +} + +type GASpecificConfig struct { + FrameLengthFlag byte // 1 bit + DependsOnCoreCoder byte // 1 bit + ExtensionFlag byte // 1 bit +} + +// +// AudioObjectTypes -> ISO/IEC 14496-3 43(57)/page +// +// 1 AAC MAIN ISO/IEC 14496-3 subpart 4 +// 2 AAC LC ISO/IEC 14496-3 subpart 4 +// 3 AAC SSR ISO/IEC 14496-3 subpart 4 +// 4 AAC LTP ISO/IEC 14496-3 subpart 4 +// +// + +// ISO/IEC 13838-7 20(25)/page +// +// Advanced Audio Coding +// +// AudioDataTransportStream +type ADTS struct { + ADTSFixedHeader + ADTSVariableHeader +} + +// 28 bits +type ADTSFixedHeader struct { + SyncWord uint16 // 12 bits The bit string ‘1111 1111 1111’. See ISO/IEC 11172-3,subclause 2.4.2.3 (Table 8) + ID byte // 1 bit MPEG identifier, set to ‘1’. See ISO/IEC 11172-3,subclause 2.4.2.3 (Table 8) + Layer byte // 2 bits Indicates which layer is used. Set to ‘00’. See ISO/IEC 11172-3,subclause 2.4.2.3 (Table 8) + ProtectionAbsent byte // 1 bit Indicates whether error_check() data is present or not. Same assyntax element ‘protection_bit’ in ISO/IEC 11172-3,subclause 2.4.1 and 2.4.2 (Table 8) + Profile byte // 2 bits profile used. See clause 2 (Table 8) + SamplingFrequencyIndex byte // 4 bits indicates the sampling frequency used according to the followingtable (Table 8) + PrivateBit byte // 1 bit see ISO/IEC 11172-3, subclause 2.4.2.3 (Table 8) + ChannelConfiguration byte // 3 bits indicates the channel configuration used. Ifchannel_configuration is greater than 0, the channelconfiguration is given in Table 42, see subclause 8.5.3.1. Ifchannel_configuration equals 0, the channel configuration is notspecified in the header and must be given by aprogram_config_element() following as first syntactic element inthe first raw_data_block() after the header (seesubclause 8.5.3.2), or by the implicit configuration (seesubclause 8.5.3.3) or must be known in the application (Table 8) + OriginalCopy byte // 1 bit see ISO/IEC 11172-3, definition of data element copyright + Home byte // 1 bit see ISO/IEC 11172-3, definition of data element original/copy +} + +// SyncWord, 同步头 总是0xFFF, all bits must be 1,代表着一个ADTS帧的开始 +// ID, MPEG Version: 0 for MPEG-4, 1 for MPEG-2 +// Layer, always: '00' +// ProtectionAbsent, 表示是否误码校验 +// Profile, 表示使用哪个级别的AAC,有些芯片只支持AAC LC 。在MPEG-2 AAC中定义了3种. +// SamplingFrequencyIndex, 表示使用的采样率下标,通过这个下标在 Sampling Frequencies[ ]数组中查找得知采样率的值 +// PrivateBit, +// ChannelConfiguration, 表示声道数 +// OriginalCopy, +// Home, + +// Profile: +// +// 0: Main profile +// 1: Low Complexity profile(LC) +// 2: Scalable Sampling Rate profile(SSR) +// 3: Reserved +// +var SamplingFrequencies = [...]int{96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350, 0, 0, 0} + +// Sampling Frequencies[]: +// +// 0: 96000 Hz +// 1: 88200 Hz +// 2: 64000 Hz +// 3: 48000 Hz +// 4: 44100 Hz +// 5: 32000 Hz +// 6: 24000 Hz +// 7: 22050 Hz +// 8: 16000 Hz +// 9: 12000 Hz +// 10: 11025 Hz +// 11: 8000 Hz +// 12: 7350 Hz +// 13: Reserved +// 14: Reserved +// 15: frequency is written explictly +// + +// ChannelConfiguration: +// +// 0: Defined in AOT Specifc Config +// 1: 1 channel: front-center +// 2: 2 channels: front-left, front-right +// 3: 3 channels: front-center, front-left, front-right +// 4: 4 channels: front-center, front-left, front-right, back-center +// 5: 5 channels: front-center, front-left, front-right, back-left, back-right +// 6: 6 channels: front-center, front-left, front-right, back-left, back-right, LFE-channel +// 7: 8 channels: front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel +// 8-15: Reserved +// + +// 28 bits +type ADTSVariableHeader struct { + CopyrightIdentificationBit byte // 1 bit One bit of the 72-bit copyright identification field (seecopyright_id above). The bits of this field are transmitted frame by frame; the first bit is indicated by the copyright_identification_start bit set to ‘1’. The field consists of an 8-bit copyright_identifier, followed by a 64-bit copyright_number.The copyright identifier is given by a Registration Authority as designated by SC29. The copyright_number is a value which identifies uniquely the copyrighted material. See ISO/IEC 13818-3, subclause 2.5.2.13 (Table 9) + CopyrightIdentificationStart byte // 1 bit One bit to indicate that the copyright_identification_bit in this audio frame is the first bit of the 72-bit copyright identification. If no copyright identification is transmitted, this bit should be kept '0'.'0' no start of copyright identification in this audio frame '1' start of copyright identification in this audio frame See ISO/IEC 13818-3, subclause 2.5.2.13 (Table 9) + AACFrameLength uint16 // 13 bits Length of the frame including headers and error_check in bytes(Table 9) + ADTSBufferFullness uint16 // 11 bits state of the bit reservoir in the course of encoding the ADTS frame, up to and including the first raw_data_block() and the optionally following adts_raw_data_block_error_check(). It is transmitted as the number of available bits in the bit reservoir divided by NCC divided by 32 and truncated to an integer value (Table 9). A value of hexadecimal 7FF signals that the bitstream is a variable rate bitstream. In this case, buffer fullness is not applicable + NumberOfRawDataBlockInFrame byte // 2 bits Number of raw_data_block()’s that are multiplexed in the adts_frame() is equal to number_of_raw_data_blocks_in_frame + 1. The minimum value is 0 indicating 1 raw_data_block()(Table 9) +} + +// CopyrightIdentificationBit, +// CopyrightIdentificationStart, +// AACFrameLength, 一个ADTS帧的长度包括ADTS头和raw data block. +// ADTSBufferFullness, 0x7FF 说明是码率可变的码流. +// NumberOfRawDataBlockInFrame, 表示ADTS帧中有number_of_raw_data_blocks_in_frame + 1个AAC原始帧 + +// 所以说number_of_raw_data_blocks_in_frame == 0 表示说ADTS帧中有一个AAC数据块并不是说没有。(一个AAC原始帧包含一段时间内1024个采样及相关数据) +func ADTSToAudioSpecificConfig(data []byte) []byte { + profile := ((data[2] & 0xc0) >> 6) + 1 + sampleRate := (data[2] & 0x3c) >> 2 + channel := ((data[2] & 0x1) << 2) | ((data[3] & 0xc0) >> 6) + config1 := (profile << 3) | ((sampleRate & 0xe) >> 1) + config2 := ((sampleRate & 0x1) << 7) | (channel << 3) + return []byte{0xAF, 0x00, config1, config2} +} +func AudioSpecificConfigToADTS(asc AudioSpecificConfig, rawDataLength int) (adts ADTS, adtsByte []byte, err error) { + if asc.ChannelConfiguration > 8 || asc.FrameLengthFlag > 13 { + err = errors.New("Reserved field.") + return + } + + // ADTSFixedHeader + adts.SyncWord = 0xfff + adts.ID = 0 + adts.Layer = 0 + adts.ProtectionAbsent = 1 + + // SyncWord(12) + ID(1) + Layer(2) + ProtectionAbsent(1) + adtsByte = append(adtsByte, 0xff) + adtsByte = append(adtsByte, 0xf1) + + if asc.AudioObjectType >= 3 || asc.AudioObjectType == 0 { + adts.Profile = 1 + } else { + adts.Profile = asc.AudioObjectType - 1 + } + + adts.SamplingFrequencyIndex = asc.SamplingFrequencyIndex + adts.PrivateBit = 0 + adts.ChannelConfiguration = asc.ChannelConfiguration + adts.OriginalCopy = 0 + adts.Home = 0 + + // Profile(2) + SamplingFrequencyIndex(4) + PrivateBit(1) + ChannelConfiguration(3)(取高1位) + byte3 := uint8(adts.Profile<<6) + uint8(adts.SamplingFrequencyIndex<<2) + uint8(adts.PrivateBit<<1) + uint8((adts.ChannelConfiguration&0x7)>>2) + adtsByte = append(adtsByte, byte3) + + // ADTSVariableHeader + adts.CopyrightIdentificationBit = 0 + adts.CopyrightIdentificationStart = 0 + adts.AACFrameLength = 7 + uint16(rawDataLength) + adts.ADTSBufferFullness = 0x7ff + adts.NumberOfRawDataBlockInFrame = 0 + + // ChannelConfiguration(3)(取低2位) + OriginalCopy(1) + Home(1) + CopyrightIdentificationBit(1) + CopyrightIdentificationStart(1) + AACFrameLength(13)(取高2位) + byte4 := uint8((adts.ChannelConfiguration&0x3)<<6) + uint8((adts.AACFrameLength&0x1fff)>>11) + adtsByte = append(adtsByte, byte4) + + // AACFrameLength(13) + // xx xxxxxxxx xxx + // 取中间的部分 + byte5 := uint8(((adts.AACFrameLength & 0x1fff) >> 3) & 0x0ff) + adtsByte = append(adtsByte, byte5) + + // AACFrameLength(13)(取低3位) + ADTSBufferFullness(11)(取高5位) + byte6 := uint8((adts.AACFrameLength&0x0007)<<5) + 0x1f + adtsByte = append(adtsByte, byte6) + + // ADTSBufferFullness(11)(取低6位) + NumberOfRawDataBlockInFrame(2) + adtsByte = append(adtsByte, 0xfc) + + return +} +func ParseRTPAAC(payload []byte) (result [][]byte) { + auHeaderLen := (int16(payload[0]) << 8) + int16(payload[1]) + auHeaderLen = auHeaderLen >> 3 + auHeaderCount := int(auHeaderLen / 2) + var auLenArray []int + for iIndex := 0; iIndex < int(auHeaderCount); iIndex++ { + auHeaderInfo := (int16(payload[2+2*iIndex]) << 8) + int16(payload[2+2*iIndex+1]) + auLen := auHeaderInfo >> 3 + auLenArray = append(auLenArray, int(auLen)) + } + startOffset := 2 + 2*auHeaderCount + for _, auLen := range auLenArray { + endOffset := startOffset + auLen + result = append(result, payload[startOffset:endOffset]) + startOffset = startOffset + auLen + } + return +} diff --git a/codec/flv.go b/codec/flv.go new file mode 100644 index 0000000..1276c34 --- /dev/null +++ b/codec/flv.go @@ -0,0 +1,110 @@ +package codec + +import ( + "io" + "net" + + "github.com/Monibuca/engine/v4/util" +) + +const ( + // FLV Tag Type + FLV_TAG_TYPE_AUDIO = 0x08 + FLV_TAG_TYPE_VIDEO = 0x09 + FLV_TAG_TYPE_SCRIPT = 0x12 +) + +var ( + Codec2SoundFormat = map[string]byte{ + "aac": 10, + "pcma": 7, + "pcmu": 8, + } + // 音频格式. 4 bit + SoundFormat = map[byte]string{ + 0: "Linear PCM, platform endian", + 1: "ADPCM", + 2: "MP3", + 3: "Linear PCM, little endian", + 4: "Nellymoser 16kHz mono", + 5: "Nellymoser 8kHz mono", + 6: "Nellymoser", + 7: "PCMA", + 8: "PCMU", + 9: "reserved", + 10: "AAC", + 11: "Speex", + 14: "MP3 8Khz", + 15: "Device-specific sound"} + + // 采样频率. 2 bit + SoundRate = map[byte]int{ + 0: 5500, + 1: 11000, + 2: 22000, + 3: 44000} + + // 量化精度. 1 bit + SoundSize = map[byte]string{ + 0: "8Bit", + 1: "16Bit"} + + // 音频类型. 1bit + SoundType = map[byte]string{ + 0: "Mono", + 1: "Stereo"} + + // 视频帧类型. 4bit + FrameType = map[byte]string{ + 1: "keyframe (for AVC, a seekable frame)", + 2: "inter frame (for AVC, a non-seekable frame)", + 3: "disposable inter frame (H.263 only)", + 4: "generated keyframe (reserved for server use only)", + 5: "video info/command frame"} + + // 视频编码类型. 4bit + CodecID = map[byte]string{ + 1: "JPEG (currently unused)", + 2: "Sorenson H.263", + 3: "Screen video", + 4: "On2 VP6", + 5: "On2 VP6 with alpha channel", + 6: "Screen video version 2", + 7: "H264", + 12: "H265"} +) + +var FLVHeader = []byte{0x46, 0x4c, 0x56, 0x01, 0x05, 0, 0, 0, 9, 0, 0, 0, 0} + +func WriteFLVTag(w io.Writer, t byte, timestamp uint32, payload net.Buffers) (err error) { + head := make([]byte, 11) + tail := make([]byte, 4) + head[0] = t + dataSize := uint32(len(payload)) + util.PutBE(tail, dataSize+11) + util.PutBE(head[1:4], dataSize) + head[4] = byte(timestamp >> 16) + head[5] = byte(timestamp >> 8) + head[6] = byte(timestamp) + head[7] = byte(timestamp >> 24) + var tag = net.Buffers{head} + tag = append(tag, payload...) + tag = append(tag, tail) + // Tag Data + _, err = tag.WriteTo(w) + return +} +func ReadFLVTag(r io.Reader) (t byte, timestamp uint32, payload []byte, err error) { + head := make([]byte, 11) + if _, err = io.ReadFull(r, head); err != nil { + return + } + t = head[0] + dataSize := util.ReadBE[int](head[1:4]) + timestamp = (uint32(head[7]) << 24) | (uint32(head[4]) << 16) | (uint32(head[5]) << 8) | uint32(head[6]) + payload = make([]byte, dataSize) + if _, err = io.ReadFull(r, payload); err == nil { + _, err = io.ReadFull(r, head[:4]) + } + return +} diff --git a/codec/h264.go b/codec/h264.go new file mode 100644 index 0000000..954809e --- /dev/null +++ b/codec/h264.go @@ -0,0 +1,293 @@ +package codec + +import ( + "bytes" + "errors" + "io" + + "github.com/Monibuca/engine/v4/util" + "github.com/Monibuca/engine/v4/util/bits/pio" +) + +// Start Code + NAL Unit -> NALU Header + NALU Body +// RTP Packet -> NALU Header + NALU Body + +// NALU Body -> Slice Header + Slice data +// Slice data -> flags + Macroblock layer1 + Macroblock layer2 + ... +// Macroblock layer1 -> mb_type + PCM Data +// Macroblock layer2 -> mb_type + Sub_mb_pred or mb_pred + Residual Data +// Residual Data -> + +const ( + // NALU Type + NALU_Unspecified byte = iota + NALU_Non_IDR_Picture // 1 + NALU_Data_Partition_A // 2 + NALU_Data_Partition_B // 3 + NALU_Data_Partition_C // 4 + NALU_IDR_Picture // 5 + NALU_SEI // 6 + NALU_SPS // 7 + NALU_PPS // 8 + NALU_Access_Unit_Delimiter // 9 + NALU_Sequence_End // 10 + NALU_Stream_End // 11 + NALU_Filler_Data // 12 + NALU_SPS_Extension // 13 + NALU_Prefix // 14 + NALU_SPS_Subset // 15 + NALU_DPS // 16 + NALU_Reserved1 // 17 + NALU_Reserved2 // 18 + NALU_Not_Auxiliary_Coded // 19 + NALU_Coded_Slice_Extension // 20 + NALU_Reserved3 // 21 + NALU_Reserved4 // 22 + NALU_Reserved5 // 23 + NALU_STAPA // 24 + NALU_STAPB + NALU_MTAP16 + NALU_MTAP24 + NALU_FUA // 28 + NALU_FUB + // 24 - 31 NALU_NotReserved + +) + +var ( + NALU_AUD_BYTE = []byte{0x00, 0x00, 0x00, 0x01, 0x09, 0xF0} + NALU_Delimiter1 = []byte{0x00, 0x00, 0x01} + NALU_Delimiter2 = []byte{0x00, 0x00, 0x00, 0x01} + // 0x17 keyframe 7:AVC + // 0x00 AVC sequence header + // 0x00 0x00 0x00 + // 0x01 configurationVersion + // 0x42 AVCProfileIndication + // 0x00 profile_compatibility + // 0x1E AVCLevelIndication + // 0xFF lengthSizeMinusOne + RTMP_AVC_HEAD = []byte{0x17, 0x00, 0x00, 0x00, 0x00, 0x01, 0x42, 0x00, 0x1E, 0xFF} + RTMP_KEYFRAME_HEAD = []byte{0x17, 0x01, 0x00, 0x00, 0x00} + RTMP_NORMALFRAME_HEAD = []byte{0x27, 0x01, 0x00, 0x00, 0x00} +) +var NALU_SEI_BYTE []byte + +// H.264/AVC视频编码标准中,整个系统框架被分为了两个层面:视频编码层面(VCL)和网络抽象层面(NAL) +// NAL - Network Abstract Layer +// raw byte sequence payload (RBSP) 原始字节序列载荷 + +// SplitH264 以0x00000001分割H264裸数据 +func SplitH264(payload []byte) (nalus [][]byte) { + for _, v := range bytes.SplitN(payload, NALU_Delimiter2, -1) { + if len(v) == 0 { + continue + } + nalus = append(nalus, bytes.SplitN(v, NALU_Delimiter1, -1)...) + } + return +} + +func BuildH264SeqHeaderFromSpsPps(sps, pps []byte) (seqHeader []byte) { + lenSPS, lenPPS := len(sps), len(pps) + seqHeader = append([]byte{}, RTMP_AVC_HEAD...) + if lenSPS > 3 { + copy(seqHeader[6:], sps[1:4]) + } + seqHeader = append(seqHeader, 0xE1, byte(lenSPS>>8), byte(lenSPS)) + seqHeader = append(seqHeader, sps...) + seqHeader = append(append(seqHeader, 0x01, byte(lenPPS>>8), byte(lenPPS)), pps...) + return +} + +// ISO/IEC 14496-15 11(16)/page +// +// Advanced Video Coding +// + +// AVCC +type AVCDecoderConfigurationRecord struct { + ConfigurationVersion byte // 8 bits Version + AVCProfileIndication byte // 8 bits + ProfileCompatibility byte // 8 bits + AVCLevelIndication byte // 8 bits + Reserved1 byte // 6 bits + LengthSizeMinusOne byte // 2 bits 非常重要,每个NALU包前面都(lengthSizeMinusOne & 3)+1个字节的NAL包长度描述 + Reserved2 byte // 3 bits + NumOfSequenceParameterSets byte // 5 bits SPS 的个数,计算方法是 numOfSequenceParameterSets & 0x1F + NumOfPictureParameterSets byte // 8 bits PPS 的个数 + + SequenceParameterSetLength uint16 // 16 byte SPS Length + SequenceParameterSetNALUnit []byte // n byte SPS + PictureParameterSetLength uint16 // 16 byte PPS Length + PictureParameterSetNALUnit []byte // n byte PPS +} + +func (p *AVCDecoderConfigurationRecord) Marshal(b []byte) (n int) { + b[0] = 1 + b[1] = p.AVCProfileIndication + b[2] = p.ProfileCompatibility + b[3] = p.AVCLevelIndication + b[4] = p.LengthSizeMinusOne | 0xfc + b[5] = uint8(1) | 0xe0 + n += 6 + + pio.PutU16BE(b[n:], p.SequenceParameterSetLength) + n += 2 + copy(b[n:], p.SequenceParameterSetNALUnit) + n += len(p.SequenceParameterSetNALUnit) + b[n] = uint8(1) + n++ + + pio.PutU16BE(b[n:], p.PictureParameterSetLength) + n += 2 + copy(b[n:], p.PictureParameterSetNALUnit) + n += len(p.PictureParameterSetNALUnit) + + return +} + +var ErrDecconfInvalid = errors.New("decode error") + +func (p *AVCDecoderConfigurationRecord) Unmarshal(b []byte) (n int, err error) { + if len(b) < 7 { + err = errors.New("not enough len") + return + } + + p.AVCProfileIndication = b[1] + p.ProfileCompatibility = b[2] + p.AVCLevelIndication = b[3] + p.LengthSizeMinusOne = b[4] & 0x03 + spscount := int(b[5] & 0x1f) + n += 6 + var sps, pps [][]byte + for i := 0; i < spscount; i++ { + if len(b) < n+2 { + err = ErrDecconfInvalid + return + } + spslen := util.ReadBE[int](b[n : n+2]) + n += 2 + + if len(b) < n+spslen { + err = ErrDecconfInvalid + return + } + sps = append(sps, b[n:n+spslen]) + n += spslen + } + p.SequenceParameterSetLength = uint16(len(sps[0])) + p.SequenceParameterSetNALUnit = sps[0] + if len(b) < n+1 { + err = ErrDecconfInvalid + return + } + ppscount := int(b[n]) + n++ + + for i := 0; i < ppscount; i++ { + if len(b) < n+2 { + err = ErrDecconfInvalid + return + } + ppslen := util.ReadBE[int](b[n : n+2]) + n += 2 + + if len(b) < n+ppslen { + err = ErrDecconfInvalid + return + } + pps = append(pps, b[n:n+ppslen]) + n += ppslen + } + if ppscount >= 1 { + p.PictureParameterSetLength = uint16(len(pps[0])) + p.PictureParameterSetNALUnit = pps[0] + } else { + err = ErrDecconfInvalid + } + return +} + +type NALUnit struct { + NALUHeader + RBSP +} + +type NALUHeader struct { + forbidden_zero_bit byte // 1 bit 0 + nal_ref_idc byte // 2 bits nal_unit_type等于6,9,10,11或12的NAL单元其nal_ref_idc都应等于 0 + nal_uint_type byte // 5 bits 包含在 NAL 单元中的 RBSP 数据结构的类型 +} + +type RBSP interface { +} + +/* +0 Unspecified non-VCL +1 Coded slice of a non-IDR picture VCL +2 Coded slice data partition A VCL +3 Coded slice data partition B VCL +4 Coded slice data partition C VCL +5 Coded slice of an IDR picture VCL +6 Supplemental enhancement information (SEI) non-VCL +7 Sequence parameter set non-VCL +8 Picture parameter set non-VCL +9 Access unit delimiter non-VCL +10 End of sequence non-VCL +11 End of stream non-VCL +12 Filler data non-VCL +13 Sequence parameter set extension non-VCL +14 Prefix NAL unit non-VCL +15 Subset sequence parameter set non-VCL +16 Depth parameter set non-VCL +17..18 Reserved non-VCL +19 Coded slice of an auxiliary coded picture without partitioning non-VCL +20 Coded slice extension non-VCL +21 Coded slice extension for depth view components non-VCL +22..23 Reserved non-VCL +24..31 Unspecified non-VCL + +0:未规定 +1:非IDR图像中不采用数据划分的片段 +2:非IDR图像中A类数据划分片段 +3:非IDR图像中B类数据划分片段 +4:非IDR图像中C类数据划分片段 +5:IDR图像的片段 +6:补充增强信息(SEI) +7:序列参数集(SPS) +8:图像参数集(PPS) +9:分割符 +10:序列结束符 +11:流结束符 +12:填充数据 +13:序列参数集扩展 +14:带前缀的NAL单元 +15:子序列参数集 +16 – 18:保留 +19:不采用数据划分的辅助编码图像片段 +20:编码片段扩展 +21 – 23:保留 +24 – 31:未规定 + +nal_unit_type NAL类型 nal_reference_bit +0 未使用 0 +1 非IDR的片 此片属于参考帧,则不等于0,不属于参考帧,则等与0 +2 片数据A分区 同上 +3 片数据B分区 同上 +4 片数据C分区 同上 +5 IDR图像的片 5 +6 补充增强信息单元(SEI) 0 +7 序列参数集 非0 +8 图像参数集 非0 +9 分界符 0 +10 序列结束 0 +11 码流结束 0 +12 填充 0 +13..23 保留 0 +24..31 不保留 0 +*/ + +func ReadPPS(w io.Writer) { + +} diff --git a/codec/h265.go b/codec/h265.go new file mode 100644 index 0000000..d53ea30 --- /dev/null +++ b/codec/h265.go @@ -0,0 +1,531 @@ +package codec + +import ( + "bytes" + "errors" + + "github.com/Monibuca/engine/v4/util" + "github.com/q191201771/naza/pkg/nazabits" +) + +const ( + // HEVC_VPS = 0x40 + // HEVC_SPS = 0x42 + // HEVC_PPS = 0x44 + // HEVC_SEI = 0x4E + // HEVC_IDR = 0x26 + // HEVC_PSLICE = 0x02 + + NAL_UNIT_CODED_SLICE_TRAIL_N byte = iota // 0 + NAL_UNIT_CODED_SLICE_TRAIL_R // 1 + NAL_UNIT_CODED_SLICE_TSA_N // 2 + NAL_UNIT_CODED_SLICE_TLA // 3 // Current name in the spec: TSA_R + NAL_UNIT_CODED_SLICE_STSA_N // 4 + NAL_UNIT_CODED_SLICE_STSA_R // 5 + NAL_UNIT_CODED_SLICE_RADL_N // 6 + NAL_UNIT_CODED_SLICE_DLP // 7 // Current name in the spec: RADL_R + NAL_UNIT_CODED_SLICE_RASL_N // 8 + NAL_UNIT_CODED_SLICE_TFD // 9 // Current name in the spec: RASL_R + NAL_UNIT_RESERVED_10 + NAL_UNIT_RESERVED_11 + NAL_UNIT_RESERVED_12 + NAL_UNIT_RESERVED_13 + NAL_UNIT_RESERVED_14 + NAL_UNIT_RESERVED_15 + NAL_UNIT_CODED_SLICE_BLA // 16 // Current name in the spec: BLA_W_LP + NAL_UNIT_CODED_SLICE_BLANT // 17 // Current name in the spec: BLA_W_DLP + NAL_UNIT_CODED_SLICE_BLA_N_LP // 18 + NAL_UNIT_CODED_SLICE_IDR // 19// Current name in the spec: IDR_W_DLP + NAL_UNIT_CODED_SLICE_IDR_N_LP // 20 + NAL_UNIT_CODED_SLICE_CRA // 21 + NAL_UNIT_RESERVED_22 + NAL_UNIT_RESERVED_23 + NAL_UNIT_RESERVED_24 + NAL_UNIT_RESERVED_25 + NAL_UNIT_RESERVED_26 + NAL_UNIT_RESERVED_27 + NAL_UNIT_RESERVED_28 + NAL_UNIT_RESERVED_29 + NAL_UNIT_RESERVED_30 + NAL_UNIT_RESERVED_31 + NAL_UNIT_VPS // 32 + NAL_UNIT_SPS // 33 + NAL_UNIT_PPS // 34 + NAL_UNIT_ACCESS_UNIT_DELIMITER // 35 + NAL_UNIT_EOS // 36 + NAL_UNIT_EOB // 37 + NAL_UNIT_FILLER_DATA // 38 + NAL_UNIT_SEI // 39 Prefix SEI + NAL_UNIT_SEI_SUFFIX // 40 Suffix SEI + NAL_UNIT_RESERVED_41 + NAL_UNIT_RESERVED_42 + NAL_UNIT_RESERVED_43 + NAL_UNIT_RESERVED_44 + NAL_UNIT_RESERVED_45 + NAL_UNIT_RESERVED_46 + NAL_UNIT_RESERVED_47 + NAL_UNIT_UNSPECIFIED_48 + NAL_UNIT_UNSPECIFIED_49 + NAL_UNIT_UNSPECIFIED_50 + NAL_UNIT_UNSPECIFIED_51 + NAL_UNIT_UNSPECIFIED_52 + NAL_UNIT_UNSPECIFIED_53 + NAL_UNIT_UNSPECIFIED_54 + NAL_UNIT_UNSPECIFIED_55 + NAL_UNIT_UNSPECIFIED_56 + NAL_UNIT_UNSPECIFIED_57 + NAL_UNIT_UNSPECIFIED_58 + NAL_UNIT_UNSPECIFIED_59 + NAL_UNIT_UNSPECIFIED_60 + NAL_UNIT_UNSPECIFIED_61 + NAL_UNIT_UNSPECIFIED_62 + NAL_UNIT_UNSPECIFIED_63 + NAL_UNIT_INVALID +) + +var AudNalu = []byte{0x00, 0x00, 0x00, 0x01, 0x46, 0x01, 0x10} +var ErrHevc = errors.New("m7s.hevc: fxxk") + +//HVCC +type HVCDecoderConfigurationRecord struct { + PicWidthInLumaSamples uint32 // sps + PicHeightInLumaSamples uint32 // sps + + configurationVersion uint8 + + generalProfileSpace uint8 + generalTierFlag uint8 + generalProfileIdc uint8 + generalProfileCompatibilityFlags uint32 + generalConstraintIndicatorFlags uint64 + generalLevelIdc uint8 + + lengthSizeMinusOne uint8 + + numTemporalLayers uint8 + temporalIdNested uint8 + + chromaFormat uint8 + bitDepthLumaMinus8 uint8 + bitDepthChromaMinus8 uint8 +} + +func ParseVpsSpsPpsFromSeqHeaderWithoutMalloc(payload []byte) (vps, sps, pps []byte, err error) { + if len(payload) < 5 { + return nil, nil, nil, ErrHevc + } + + if payload[0] != 0x1c || payload[1] != 0x00 || payload[2] != 0 || payload[3] != 0 || payload[4] != 0 { + return nil, nil, nil, ErrHevc + } + + if len(payload) < 33 { + return nil, nil, nil, ErrHevc + } + + index := 27 + if numOfArrays := payload[index]; numOfArrays != 3 && numOfArrays != 4 { + return nil, nil, nil, ErrHevc + } + index++ + + if payload[index] != byte(NAL_UNIT_VPS)&0x3f { + return nil, nil, nil, ErrHevc + } + if numNalus := util.ReadBE[int](payload[index+1 : index+3]); numNalus != 1 { + return nil, nil, nil, ErrHevc + } + vpsLen := util.ReadBE[int](payload[index+3 : index+5]) + + if len(payload) < 33+vpsLen { + return nil, nil, nil, ErrHevc + } + + vps = payload[index+5 : index+5+vpsLen] + index += 5 + vpsLen + + if len(payload) < 38+vpsLen { + return nil, nil, nil, ErrHevc + } + if payload[index] != byte(NAL_UNIT_SPS)&0x3f { + return nil, nil, nil, ErrHevc + } + if numNalus := util.ReadBE[int](payload[index+1 : index+3]); numNalus != 1 { + return nil, nil, nil, ErrHevc + } + spsLen := util.ReadBE[int](payload[index+3 : index+5]) + if len(payload) < 38+vpsLen+spsLen { + return nil, nil, nil, ErrHevc + } + sps = payload[index+5 : index+5+spsLen] + index += 5 + spsLen + + if len(payload) < 43+vpsLen+spsLen { + return nil, nil, nil, ErrHevc + } + if payload[index] != byte(NAL_UNIT_PPS)&0x3f { + return nil, nil, nil, ErrHevc + } + if numNalus := util.ReadBE[int](payload[index+1 : index+3]); numNalus != 1 { + return nil, nil, nil, ErrHevc + } + ppsLen := util.ReadBE[int](payload[index+3 : index+5]) + if len(payload) < 43+vpsLen+spsLen+ppsLen { + return nil, nil, nil, ErrHevc + } + pps = payload[index+5 : index+5+ppsLen] + + return +} +func BuildH265SeqHeaderFromVpsSpsPps(vps, sps, pps []byte) ([]byte, error) { + sh := make([]byte, 43+len(vps)+len(sps)+len(pps)) + sh[0] = 0x1c + sh[1] = 0x0 + sh[2] = 0x0 + sh[3] = 0x0 + sh[4] = 0x0 + + // unsigned int(8) configurationVersion = 1; + sh[5] = 0x1 + + ctx := HVCDecoderConfigurationRecord{ + configurationVersion: 1, + lengthSizeMinusOne: 3, // 4 bytes + generalProfileCompatibilityFlags: 0xffffffff, + generalConstraintIndicatorFlags: 0xffffffffffff, + } + if err := ctx.ParseVps(vps); err != nil { + return nil, err + } + if err := ctx.ParseSps(sps); err != nil { + return nil, err + } + + // unsigned int(2) general_profile_space; + // unsigned int(1) general_tier_flag; + // unsigned int(5) general_profile_idc; + sh[6] = ctx.generalProfileSpace<<6 | ctx.generalTierFlag<<5 | ctx.generalProfileIdc + // unsigned int(32) general_profile_compatibility_flags + util.PutBE(sh[7:7+4], ctx.generalProfileCompatibilityFlags) + // unsigned int(48) general_constraint_indicator_flags + util.PutBE(sh[11:11+4], uint32(ctx.generalConstraintIndicatorFlags>>16)) + util.PutBE(sh[15:15+2], uint16(ctx.generalConstraintIndicatorFlags)) + // unsigned int(8) general_level_idc; + sh[17] = ctx.generalLevelIdc + + // bit(4) reserved = ‘1111’b; + // unsigned int(12) min_spatial_segmentation_idc; + // bit(6) reserved = ‘111111’b; + // unsigned int(2) parallelismType; + // TODO chef: 这两个字段没有解析 + util.PutBE(sh[18:20], 0xf000) + sh[20] = 0xfc + + // bit(6) reserved = ‘111111’b; + // unsigned int(2) chromaFormat; + sh[21] = ctx.chromaFormat | 0xfc + + // bit(5) reserved = ‘11111’b; + // unsigned int(3) bitDepthLumaMinus8; + sh[22] = ctx.bitDepthLumaMinus8 | 0xf8 + + // bit(5) reserved = ‘11111’b; + // unsigned int(3) bitDepthChromaMinus8; + sh[23] = ctx.bitDepthChromaMinus8 | 0xf8 + + // bit(16) avgFrameRate; + util.PutBE(sh[24:26], 0) + + // bit(2) constantFrameRate; + // bit(3) numTemporalLayers; + // bit(1) temporalIdNested; + // unsigned int(2) lengthSizeMinusOne; + sh[26] = 0<<6 | ctx.numTemporalLayers<<3 | ctx.temporalIdNested<<2 | ctx.lengthSizeMinusOne + + // num of vps sps pps + sh[27] = 0x03 + i := 28 + sh[i] = byte(NAL_UNIT_VPS) + // num of vps + util.PutBE(sh[i+1:i+3], 1) + // length + util.PutBE(sh[i+3:i+5], len(vps)) + copy(sh[i+5:], vps) + i = i + 5 + len(vps) + sh[i] = byte(NAL_UNIT_SPS) + util.PutBE(sh[i+1:i+3], 1) + util.PutBE(sh[i+3:i+5], len(sps)) + copy(sh[i+5:], sps) + i = i + 5 + len(sps) + sh[i] = byte(NAL_UNIT_PPS) + util.PutBE(sh[i+1:i+3], 1) + util.PutBE(sh[i+3:i+5], len(pps)) + copy(sh[i+5:], pps) + + return sh, nil +} +func (ctx *HVCDecoderConfigurationRecord) ParseVps(vps []byte) error { + if len(vps) < 2 { + return ErrHevc + } + + rbsp := nal2rbsp(vps[2:]) + br := nazabits.NewBitReader(rbsp) + + // skip + // vps_video_parameter_set_id u(4) + // vps_reserved_three_2bits u(2) + // vps_max_layers_minus1 u(6) + if _, err := br.ReadBits16(12); err != nil { + return ErrHevc + } + + vpsMaxSubLayersMinus1, err := br.ReadBits8(3) + if err != nil { + return ErrHevc + } + if vpsMaxSubLayersMinus1+1 > ctx.numTemporalLayers { + ctx.numTemporalLayers = vpsMaxSubLayersMinus1 + 1 + } + + // skip + // vps_temporal_id_nesting_flag u(1) + // vps_reserved_0xffff_16bits u(16) + if _, err := br.ReadBits32(17); err != nil { + return ErrHevc + } + + return ctx.parsePtl(&br, vpsMaxSubLayersMinus1) +} + +func (ctx *HVCDecoderConfigurationRecord) ParseSps(sps []byte) error { + var err error + + if len(sps) < 2 { + return ErrHevc + } + + rbsp := nal2rbsp(sps[2:]) + br := nazabits.NewBitReader(rbsp) + + // sps_video_parameter_set_id + if _, err = br.ReadBits8(4); err != nil { + return err + } + + spsMaxSubLayersMinus1, err := br.ReadBits8(3) + if err != nil { + return err + } + + if spsMaxSubLayersMinus1+1 > ctx.numTemporalLayers { + ctx.numTemporalLayers = spsMaxSubLayersMinus1 + 1 + } + + // sps_temporal_id_nesting_flag + if ctx.temporalIdNested, err = br.ReadBit(); err != nil { + return err + } + + if err = ctx.parsePtl(&br, spsMaxSubLayersMinus1); err != nil { + return err + } + + // sps_seq_parameter_set_id + if _, err = br.ReadGolomb(); err != nil { + return err + } + + var cf uint32 + if cf, err = br.ReadGolomb(); err != nil { + return err + } + ctx.chromaFormat = uint8(cf) + if ctx.chromaFormat == 3 { + if _, err = br.ReadBit(); err != nil { + return err + } + } + + if ctx.PicWidthInLumaSamples, err = br.ReadGolomb(); err != nil { + return err + } + if ctx.PicHeightInLumaSamples, err = br.ReadGolomb(); err != nil { + return err + } + + conformanceWindowFlag, err := br.ReadBit() + if err != nil { + return err + } + if conformanceWindowFlag != 0 { + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + } + + var bdlm8 uint32 + if bdlm8, err = br.ReadGolomb(); err != nil { + return err + } + ctx.bitDepthChromaMinus8 = uint8(bdlm8) + var bdcm8 uint32 + if bdcm8, err = br.ReadGolomb(); err != nil { + return err + } + ctx.bitDepthChromaMinus8 = uint8(bdcm8) + + _, err = br.ReadGolomb() + if err != nil { + return err + } + spsSubLayerOrderingInfoPresentFlag, err := br.ReadBit() + if err != nil { + return err + } + var i uint8 + if spsSubLayerOrderingInfoPresentFlag != 0 { + i = 0 + } else { + i = spsMaxSubLayersMinus1 + } + for ; i <= spsMaxSubLayersMinus1; i++ { + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + } + + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + if _, err = br.ReadGolomb(); err != nil { + return err + } + + return nil +} + +func (ctx *HVCDecoderConfigurationRecord) parsePtl(br *nazabits.BitReader, maxSubLayersMinus1 uint8) error { + var err error + var ptl HVCDecoderConfigurationRecord + if ptl.generalProfileSpace, err = br.ReadBits8(2); err != nil { + return err + } + if ptl.generalTierFlag, err = br.ReadBit(); err != nil { + return err + } + if ptl.generalProfileIdc, err = br.ReadBits8(5); err != nil { + return err + } + if ptl.generalProfileCompatibilityFlags, err = br.ReadBits32(32); err != nil { + return err + } + if ptl.generalConstraintIndicatorFlags, err = br.ReadBits64(48); err != nil { + return err + } + if ptl.generalLevelIdc, err = br.ReadBits8(8); err != nil { + return err + } + ctx.updatePtl(&ptl) + + if maxSubLayersMinus1 == 0 { + return nil + } + + subLayerProfilePresentFlag := make([]uint8, maxSubLayersMinus1) + subLayerLevelPresentFlag := make([]uint8, maxSubLayersMinus1) + for i := uint8(0); i < maxSubLayersMinus1; i++ { + if subLayerProfilePresentFlag[i], err = br.ReadBit(); err != nil { + return err + } + if subLayerLevelPresentFlag[i], err = br.ReadBit(); err != nil { + return err + } + } + if maxSubLayersMinus1 > 0 { + for i := maxSubLayersMinus1; i < 8; i++ { + if _, err = br.ReadBits8(2); err != nil { + return err + } + } + } + + for i := uint8(0); i < maxSubLayersMinus1; i++ { + if subLayerProfilePresentFlag[i] != 0 { + if _, err = br.ReadBits32(32); err != nil { + return err + } + if _, err = br.ReadBits32(32); err != nil { + return err + } + if _, err = br.ReadBits32(24); err != nil { + return err + } + } + + if subLayerLevelPresentFlag[i] != 0 { + if _, err = br.ReadBits8(8); err != nil { + return err + } + } + } + + return nil +} + +func (ctx *HVCDecoderConfigurationRecord) updatePtl(ptl *HVCDecoderConfigurationRecord) { + ctx.generalProfileSpace = ptl.generalProfileSpace + + if ptl.generalTierFlag > ctx.generalTierFlag { + ctx.generalLevelIdc = ptl.generalLevelIdc + + ctx.generalTierFlag = ptl.generalTierFlag + } else { + if ptl.generalLevelIdc > ctx.generalLevelIdc { + ctx.generalLevelIdc = ptl.generalLevelIdc + } + } + + if ptl.generalProfileIdc > ctx.generalProfileIdc { + ctx.generalProfileIdc = ptl.generalProfileIdc + } + + ctx.generalProfileCompatibilityFlags &= ptl.generalProfileCompatibilityFlags + + ctx.generalConstraintIndicatorFlags &= ptl.generalConstraintIndicatorFlags +} + +func nal2rbsp(nal []byte) []byte { + // TODO chef: + // 1. 输出应该可由外部申请 + // 2. 替换性能 + // 3. 该函数应该放入avc中 + return bytes.Replace(nal, []byte{0x0, 0x0, 0x3}, []byte{0x0, 0x0}, -1) +} diff --git a/codec/mp4.go b/codec/mp4.go new file mode 100644 index 0000000..27d590f --- /dev/null +++ b/codec/mp4.go @@ -0,0 +1,2419 @@ +package codec + +import "github.com/Monibuca/engine/v4/util" + +type MP4 interface { +} + +type MP4Box interface { + Header() *MP4Header + Body() *MP4Body +} + +// +// ISO_IEC_14496-12_2012.pdf Page/17 +// +// The standard boxes all use compact types (32-bit) and most boxes will use the compact (32-bit) size +// standard header +type MP4BoxHeader struct { + BoxSize uint32 // 32 bits, is an integer that specifies the number of bytes in this box, including all its fields and contained boxes; if size is 1 then the actual size is in the field largesize; if size is 0, then this box is the last one in the file, and its contents extend to the end of the file (normally only used for a Media Data Box) + BoxType uint32 // 32 bits, identifies the box type; standard boxes use a compact type, which is normally four printable characters, to permit ease of identification, and is shown so in the boxes below. User extensions use an extended type; in this case, the type field is set to ‘uuid’. +} + +// +// ISO_IEC_14496-12_2012.pdf Page/17 +// +// Many objects also contain a version number and flags field +// full box header +type MP4FullBoxHeader struct { + Version uint8 // 8 bits, is an integer that specifies the version of this format of the box. + Flags [3]byte // 24 bits, is a map of flags +} + +// +// ISO_IEC_14496-12_2012.pdf Page/17 +// +// Typically only the Media Data Box(es) need the 64-bit size. +// lagesize box header +type MP4BoxLargeHeader struct { + LargeSize uint64 // 64 bits + UUIDs [16]uint8 // 128 bits +} + +// if(size == 1) +// { +// unsigned int(64) largesize; +// } +// else if(size == 0) +// { +// // box extends to end of file +// } +// if(boxtype == ‘uuid’) +// { +// unsigned int(8)[16] usertype = extended_type; +// } + +type MP4Header struct { + MP4BoxHeader +} + +type MP4Body struct{} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/18 +// +// Box Type: ftyp +// Container: File +// Mandatory: Yes +// Quantity: Exactly one (but see below) +// +// Each brand is a printable four-character code, registered with ISO, that identifies a precise specification +type FileTypeBox struct { + MP4BoxHeader // standard header + + MajorBrand uint32 // 32 bits, is a brand identifier + MinorVersion uint32 // 32 bits, is an informative integer for the minor version of the major brand + CompatibleBrands []uint32 // 32 bits array, is a list, to the end of the box, of brands +} + +func NewFileTypeBox() (box *FileTypeBox) { + box = new(FileTypeBox) + box.MP4BoxHeader.BoxType = util.ReadBE[uint32]([]byte("ftyp")) + return +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/30 +// +// Box Types: pdin +// Container: File +// Mandatory: No +// Quantity: Zero or One +type ProgressiveDownloadInformationBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Rate uint32 // 32 bits, is a download rate expressed in bytes/second + InitialDelay uint32 // 32 bits, is the suggested delay to use when playing the file, such that if download continues at the given rate, all data within the file will arrive in time for its use and playback should not need to stall. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/30 +// +// Box Type: moov +// Container: File +// Mandatory: Yes +// Quantity: Exactly one +// +// The metadata for a presentation is stored in the single Movie Box which occurs at the top-level of a file. +// Normally this box is close to the beginning or end of the file, though this is not required +type MovieBox struct { + MP4BoxHeader // standard header + + //Mhb MovieHeaderBox // the first child box(header box) +} + +func NewMovieBox() (box *MovieBox) { + box = new(MovieBox) + + box.MP4BoxHeader.BoxType = util.ReadBE[uint32]([]byte("moov")) + + return +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/31 +// +// Box Type: mvhd +// Container: Movie Box ('moov') +// Mandatory: Yes +// Quantity: Exactly one +// +// This box defines overall information which is media-independent, and relevant to the entire presentation +// considered as a whole +type MovieHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + CreationTime interface{} // uint64 or uint32, is an integer that declares the creation time of the presentation (in seconds since midnight, Jan. 1, 1904, in UTC time) + ModificationTime interface{} // uint64 or uint32, is an integer that declares the most recent time the presentation was modified (in seconds since midnight, Jan. 1, 1904, in UTC time) + TimeScale uint32 // 32 bits, is an integer that specifies the time-scale for the entire presentation; this is the number of time units that pass in one second. For example, a time coordinate system that measures time in sixtieths of a second has a time scale of 60. + Duration interface{} // uint64 or uint32, is an integer that declares length of the presentation (in the indicated timescale). This property is derived from the presentation's tracks: the value of this field corresponds to the duration of the longest track in the presentation. If the duration cannot be determined then duration is set to all 1s. + Rate int32 // 32 bits, is a fixed point 16.16 number that indicates the preferred rate to play the presentation; 1.0 (0x00010000) is normal forward playback + Volume int16 // 16 bits, is a fixed point 8.8 number that indicates the preferred playback volume. 1.0 (0x0100) is full volume. + Reserved1 int16 // 16 bits, bit[16] + Reserved2 [2]uint32 // 32 bits array, const unsigned int(32)[2] + Matrix [9]int32 // 32 bits array, provides a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex values(0,0,0x40000000). + PreDefined [6]int32 // 32 bits array, bit(32)[6] + NextTrackID uint32 // 32 bits, is a non-zero integer that indicates a value to use for the track ID of the next track to be added to this presentation. Zero is not a valid track ID value. The value of next_track_ID shall be larger than the largest track-ID in use. If this value is equal to all 1s (32-bit maxint), and a new media track is to be added, then a search must be made in the file for an unused track identifier. +} + +// CreationTime : 创建时间(相对于UTC时间1904-01-01零点的秒数) +// ModificationTime : 修改时间 +// TimeScale : 文件媒体在1秒时间内的刻度值,可以理解为1秒长度的时间单元数 +// Duration : 该track的时间长度,用duration和time scale值可以计算track时长,比如audio track的time scale = 8000, duration = 560128,时长为70.016,video track的time scale = 600, duration = 42000,时长为70 +// Rate : 推荐播放速率,高16位和低16位分别为小数点整数部分和小数部分,即[16.16] 格式,该值为1.0(0x00010000)表示正常前向播放 +// Volume : 与rate类似,[8.8] 格式,1.0(0x0100)表示最大音量 +// Matrix : 视频变换矩阵 { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 } +// NextTrackID : 下一个track使用的id号 + +// PreDefined: +// Preview Time : 开始预览此movie的时间 +// Preview Duration : 以movie的time scale为单位,预览的duration +// Poster Time : The time value of the time of the movie poster. +// Selection Time : The time value for the start time of the current selection. +// Selection Duration : The duration of the current selection in movie time scale units. +// Current Time : 当前时间 + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/32 +// +// Box Type: trak +// Container: Movie Box ('moov') +// Mandatory: Yes +// Quantity: One or more +type TrackBox struct { + MP4BoxHeader // standard header + + Thb TrackHeaderBox // the first child box(header box) +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/32 +// +// Box Type: tkhd +// Container: Track Box ('trak') +// Mandatory: Yes +// Quantity: Exactly one +type TrackHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + CreationTime interface{} // uint64 or uint32, + ModificationTime interface{} // uint64 or uint32, + TrackID uint32 // 32 bits, is an integer that uniquely identifies this track over the entire life-time of this presentation. Track IDs are never re-used and cannot be zero + Reserved1 uint32 // 32 bits, + Duration interface{} // uint64 or uint32, + Reserved2 [2]uint32 // 32 bits array, + Layer int16 // 16 bits, specifies the front-to-back ordering of video tracks; tracks with lower numbers are closer to the viewer. 0 is the normal value, and -1 would be in front of track 0, and so on + AlternateGroup int16 // 16 bits, + Volume int16 // 16 bits, if track_is_audio 0x0100 else 0 + Reserved3 uint16 // 16 bits, + Matrix [9]int32 // 32 bits array, provides a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex (0,0,0x40000000). { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 } + Width uint32 // 32 bits, + Height uint32 // 32 bits, +} + +// CreationTime : 创建时间 +// ModificationTime : 修改时间 +// TrackID : id号,不能重复且不能为0 +// Reserved1 : 保留位 +// Duration : track的时间长度 +// Reserved2 : 保留位 +// Layer : 视频层,默认为0,值小的在上层 +// AlternateGroup : track分组信息,默认为0表示该track未与其他track有群组关系 +// Volume : [8.8] 格式,如果为音频track,1.0(0x0100)表示最大音量;否则为0 +// Reserved3 : 保留位 +// Matrix : 视频变换矩阵 { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 } +// Width : 宽 +// Height : 高,均为 [16.16] 格式值,与sample描述中的实际画面大小比值,用于播放时的展示宽高 + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/34 +// +// Box Type: tref +// Container: Track Box (‘trak’) +// Mandatory: No +// Quantity: Zero or one +type TrackReferenceBox struct { + MP4BoxHeader // standard header +} + +type TrackReferenceTypeBox struct { + MP4BoxHeader // standard header + + TrackIDs []uint32 // 32 bits, is an integer that provides a reference from the containing track to another track in the presentation. track_IDs are never re-used and cannot be equal to zero +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/35 +// +// Box Type: trgr +// Container: Track Box (‘trak’) +// Mandatory: No +// Quantity: Zero or one +type TrackGroupBox struct { + MP4BoxHeader // standard header +} + +type TrackGroupTypeBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + TrackGroupID uint32 // 32 bits, indicates the grouping type and shall be set to one of the following values, or a value registered, or a value from a derived specification or registration +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/54 +// +// Box Type: edts +// Container: Track Box (‘trak’) +// Mandatory: No +// Quantity: Zero or one +type EditBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/54 +// +// Box Type: elst +// Container: Edit Box (‘edts’) +// Mandatory: No +// Quantity: Zero or one +type EditListBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table + Tables []EditListTable // Edit List Table +} + +type EditListTable struct { + SegmentDuration interface{} // uint64 or uint32, is an integer that specifies the duration of this edit segment in units of the timescale in the Movie Header Box + MediaTime interface{} // uint64 or uint32, is an integer containing the starting time within the media of this edit segment (in media time scale units, in composition time). If this field is set to –1, it is an empty edit. The last edit in a track shall never be an empty edit. Any difference between the duration in the Movie Header Box, and the track’s duration is expressed as an implicit empty edit at the end. + MediaRateInteger int16 // 16 bits, + MediaRateFraction int16 // 16 bits, +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/36 +// +// Box Type: mdia +// Container: Track Box ('trak') +// Mandatory: Yes +// Quantity: Exactly one +// +// The media declaration container contains all the objects that declare information about the media data within a track. +type MediaBox struct { + MP4BoxHeader // standard header + + Mhb MediaHeaderBox // the first child box(header box) +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/36 +// +// Box Type: mdhd +// Container: Media Box ('mdia') +// Mandatory: Yes +// Quantity: Exactly one +// +// The media header declares overall information that is media-independent, and relevant to characteristics of the media in a track. +type MediaHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + CreationTime interface{} // int64 or int32, is an integer that declares the creation time of the presentation (in seconds since midnight, Jan. 1, 1904, in UTC time) + ModificationTime interface{} // int64 or int32, is an integer that declares the most recent time the presentation was modified (in seconds since midnight, Jan. 1, 1904, in UTC time) + TimeScale uint32 // 32 bits, is an integer that specifies the time-scale for the entire presentation; this is the number of time units that pass in one second. For example, a time coordinate system that measures time in sixtieths of a second has a time scale of 60. + Duration interface{} // int64 or int32, is an integer that declares length of the presentation (in the indicated timescale). This property is derived from the presentation's tracks: the value of this field corresponds to the duration of the longest track in the presentation. If the duration cannot be determined then duration is set to all 1s. + Pad byte // 1 bit, + Language [2]byte // 15 bits, unsigned int(5)[3], declares the language code for this media. See ISO 639-2/T for the set of three charactercodes. Each character is packed as the difference between its ASCII value and 0x60. Since the code is confined to being three lower-case letters, these values are strictly positive + PreDefined uint16 // 16 bits, +} + +// Language : 媒体的语言码 +// PreDefined : 媒体的回放质量???怎样生成此质量,什么是参照点 + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/37 +// +// Box Type: hdlr +// Container: Media Box ('mdia') or Meta Box ('meta') +// Mandatory: Yes +// Quantity: Exactly one +type HandlerBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + PreDefined uint32 // 32 bits, + HandlerType uint32 // 32 bits, when present in a meta box, contains an appropriate value to indicate the format of the meta box contents. The value 'null' can be used in the primary meta box to indicate that it is merely being used to hold resources + Reserved [3]uint32 // 32 bits, + Name string // string, is a null-terminated string in UTF-8 characters which gives a human-readable name for the track type (for debugging and inspection purposes). +} + +// handler_type when present in a media box, is an integer containing one of the following values, or a value from a derived specification: +// 'vide' Video track +// 'soun' Audio track +// 'hint' Hint track +// 'meta' Timed Metadata track +// 'auxv' Auxiliary Video track + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/38 +// +// Box Type: minf +// Container: Media Box ('mdia') +// Mandatory: Yes +// Quantity: Exactly one +// +// This box contains all the objects that declare characteristic information of the media in the track. +type MediaInformationBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/38 +// +// Box Types: vmhd, smhd, hmhd, nmhd +// Container: Media Information Box (‘minf’) +// Mandatory: Yes +// Quantity: Exactly one specific media header shall be present +// +// There is a different media information header for each track type (corresponding to the media handler-type); +// the matching header shall be present, which may be one of those defined here, or one defined in a derived specification +type MediaInformationHeaderBoxes struct { + // VideoMediaHeaderBox + // +} + +// Box Types: vmhd +// The video media header contains general presentation information, independent of the coding, for video media. +// Note that the flags field has the value 1. +type VideoMediaHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + GraphicsMode uint16 // 16 bits, specifies a composition mode for this video track, from the following enumerated set, which may be extended by derived specifications: copy = 0 copy over the existing image + Opcolor [3]uint16 // 16 bits array, is a set of 3 colour values (red, green, blue) available for use by graphics modes +} + +// Box Types: smhd +// The sound media header contains general presentation information, independent of the coding, for audio media. +// This header is used for all tracks containing audio. +type SoundMediaHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Balance int16 // 16 bits, is a fixed-point 8.8 number that places mono audio tracks in a stereo space; 0 is centre (the normal value); full left is -1.0 and full right is 1.0 + Reserved uint16 // 16 bits, +} + +// Box Types: hmhd +// The hint media header contains general information, independent of the protocol, for hint tracks. +// (A PDU is a Protocol Data Unit.) +type HintMediaHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + MaxPDUSize uint16 // 16 bits, gives the size in bytes of the largest PDU in this (hint) stream + AvgPDUSize uint16 // 16 bits, gives the average size of a PDU over the entire presentation + MaxBitrate uint32 // 32 bits, gives the maximum rate in bits/second over any window of one second + AvgBitrate uint32 // 32 bits, gives the average rate in bits/second over the entire presentation + Reserved uint32 // 32 bits, +} + +// Box Types: nmhd +// Streams other than visual and audio (e.g., timed metadata streams) may use a null Media Header Box, as defined here. +type NullMediaHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/56 +// +// Box Type: dinf +// Container: Media Information Box ('minf') or Meta Box ('meta') +// Mandatory: Yes (required within 'minf' box) and No (optional within 'meta' box) +// Quantity: Exactly one +// +// The data information box contains objects that declare the location of the media information in a track +type DataInformationBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- +// +// ISO_IEC_14496-12_2012.pdf Page/56 +// +// Box Types: url, urn, dref +// Container: Data Information Box ('dinf') +// Mandatory: Yes +// Quantity: Exactly one +type DataReferenceBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table + DataEntry interface{} // DataEntryUrlBox or DataEntryUrnBox. +} + +// aligned(8) class DataReferenceBox +// extends FullBox('dref', version = 0, 0) { +// unsigned int(32) entry_count; +// for (i=1; i <= entry_count; i++) { +// DataEntryBox(entry_version, entry_flags) data_entry; +// } +// } + +type DataEntryUrlBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Location string // string, +} + +type DataEntryUrnBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Name string // string, + Location string // string, +} + +// ------------------------------------------------------------------------------------------------------- +// +// ISO_IEC_14496-12_2012.pdf Page/40 +// +// Box Type: stbl +// Container: Media Information Box ('minf') +// Mandatory: Yes +// Quantity: Exactly one +type SampleTableBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/40 +// +// Box Types: stsd +// Container: Sample Table Box ('stbl') +// Mandatory: Yes +// Quantity: Exactly one +type SampleDescriptionBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table +} + +// for (i = 1 ; i <= entry_count ; i++) { +// switch (handler_type){ +// case ‘soun’: // for audio tracks +// AudioSampleEntry(); +// break; +// case ‘vide’: // for video tracks +// VisualSampleEntry(); +// break; +// case ‘hint’: // Hint track +// HintSampleEntry(); +// break; +// case ‘meta’: // Metadata track +// MetadataSampleEntry(); +// break; +// } +// } + +// box header和version字段后会有一个entry count字段,根据entry的个数,每个entry会有type信息,如“vide”、“sund”等, +// 根据type不同sample description会提供不同的信息,例如对于video track,会有“VisualSampleEntry”类型信息, +// 对于audio track会有“AudioSampleEntry”类型信息. +// 视频的编码类型、宽高、长度,音频的声道、采样等信息都会出现在这个box中 + +// is the appropriate sample entry +type SampleEntry struct { + Reserved [6]uint8 // 48 bits, + DataReferenceIndex uint16 // 16 bits, is an integer that contains the index of the data reference to use to retrieve data associated with samples that use this sample description. Data references are stored in Data Reference Boxes. The index ranges from 1 to the number of data references. +} + +type HintSampleEntry struct { + Data []uint8 // 8 bits array, +} + +// Box Types: btrt +type BitRateBox struct { + MP4BoxHeader // standard header + + BufferSizeDB uint32 // 32 bits, gives the size of the decoding buffer for the elementary stream in bytes. + MaxBitrate uint32 // 32 bits, gives the maximum rate in bits/second over any window of one second. + AvgBitrate uint32 // 32 bits, gives the average rate in bits/second over the entire presentation. +} + +type MetaDataSampleEntry struct{} + +type XMLMetaDataSampleEntry struct { + ContentEncoding string // optional, is a null-terminated string in UTF-8 characters, and provides a MIME type which identifies the content encoding of the timed metadata + NameSpace string // string, gives the namespace of the schema for the timed XML metadata + SchemaLocation string // optional, optionally provides an URL to find the schema corresponding to the namespace. This is needed for decoding of the timed metadata by XML aware encoding mechanisms such as BiM. + Brb BitRateBox // optional +} + +type TextMetaDataSampleEntry struct { + ContentEncoding string // optional, is a null-terminated string in UTF-8 characters, and provides a MIME type which identifies the content encoding of the timed metadata + MimeFormat string // string, provides a MIME type which identifies the content format of the timed metadata. Examples for this field are ‘text/html’ and ‘text/plain’. + Brb BitRateBox // optional +} + +type URIBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + TheURI string // string, is a URI formatted according to the rules in 6.2.4 +} + +type URIInitBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + UriInitializationData []uint8 // 8 bits array, is opaque data whose form is defined in the documentation of the URI form. +} + +type URIMetaSampleEntry struct { + TheLabel URIBox + Init URIInitBox // optional + //Mpeg4 MPEG4BitRateBox // optional +} + +// Box Types: pasp +type PixelAspectRatioBox struct { + MP4BoxHeader // standard header + + HSpacing uint32 // 32 bits, define the relative width and height of a pixel; + VSpacing uint32 // 32 bits, define the relative width and height of a pixel; +} + +// Box Types: clap +// Visual Sequences +type CleanApertureBox struct { + MP4BoxHeader // standard header + + CleanApertureWidthN uint32 // 32 bits, a fractional number which defines the exact clean aperture width, in counted pixels, of the video image + CleanApertureWidthD uint32 // 32 bits, a fractional number which defines the exact clean aperture width, in counted pixels, of the video image + CleanApertureHeightN uint32 // 32 bits, a fractional number which defines the exact clean aperture height, in counted pixels, of the video image + CleanApertureHeightD uint32 // 32 bits, a fractional number which defines the exact clean aperture height, in counted pixels, of the video image + HorizOffN uint32 // 32 bits, a fractional number which defines the horizontal offset of clean aperture centre minus (width-1)/2. Typically 0 + HorizOffD uint32 // 32 bits, a fractional number which defines the horizontal offset of clean aperture centre minus (width-1)/2. Typically 0 + VertOffN uint32 // 32 bits, a fractional number which defines the vertical offset of clean aperture centre minus (height-1)/2. Typically 0 + VertOffD uint32 // 32 bits, a fractional number which defines the vertical offset of clean aperture centre minus (height-1)/2. Typically 0 +} + +// Box Types: colr +type ColourInformationBox struct { + MP4BoxHeader // standard header + + ColourType uint32 // 32 bits, an indication of the type of colour information supplied. For colour_type ‘nclx’: these fields are exactly the four bytes defined for PTM_COLOR_INFO( ) in A.7.2 of ISO/IEC 29199-2 but note that the full range flag is here in a different bit position +} + +// if (colour_type == ‘nclx’) /* on-screen colours */ +// { +// unsigned int(16) colour_primaries; +// unsigned int(16) transfer_characteristics; +// unsigned int(16) matrix_coefficients; +// unsigned int(1) full_range_flag; +// unsigned int(7) reserved = 0; +// } +// else if (colour_type == ‘rICC’) +// { +// ICC_profile; // restricted ICC profile +// } +// else if (colour_type == ‘prof’) +// { +// ICC_profile; // unrestricted ICC profile +// } + +// ICC_profile : an ICC profile as defined in ISO 15076-1 or ICC.1:2010 is supplied. + +type VisualSampleEntry struct { + PreDefined1 uint16 // 16 bits, + Reserved1 uint16 // 16 bits, + PreDefined2 [3]uint32 // 96 bits, + Width uint16 // 16 bits, are the maximum visual width and height of the stream described by this sample description, in pixels + Height uint16 // 16 bits, are the maximum visual width and height of the stream described by this sample description, in pixels + HorizreSolution uint32 // 32 bits, fields give the resolution of the image in pixels-per-inch, as a fixed 16.16 number + VertreSolution uint32 // 32 bits, fields give the resolution of the image in pixels-per-inch, as a fixed 16.16 number + Reserved3 uint32 // 32 bits, + FrameCount uint16 // 16 bits, indicates how many frames of compressed video are stored in each sample. The default is 1, for one frame per sample; it may be more than 1 for multiple frames per sample + CompressorName [32]string // 32 string, is a name, for informative purposes. It is formatted in a fixed 32-byte field, with the first byte set to the number of bytes to be displayed, followed by that number of bytes of displayable data, and then padding to complete 32 bytes total (including the size byte). The field may be set to 0. + Depth uint16 // 16 bits, takes one of the following values 0x0018 – images are in colour with no alpha + PreDefined3 int16 // 16 bits, + Cab CleanApertureBox // optional, other boxes from derived specifications + Parb PixelAspectRatioBox // optional, other boxes from derived specifications +} + +// Audio Sequences +type AudioSampleEntry struct { + Reserved1 [2]uint32 // 32 bits array, + ChannelCount uint16 // 16 bits, is the number of channels such as 1 (mono) or 2 (stereo) + SampleSize uint16 // 16 bits, is in bits, and takes the default value of 16 + PreDefined uint16 // 16 bits, + Reserved2 uint16 // 16 bits, + SampleRate uint32 // 32 bits, is the sampling rate expressed as a 16.16 fixed-point number (hi.lo) +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/48 +// +// Box Type: stts +// Container: Sample Table Box ('stbl') +// Mandatory: Yes +// Quantity: Exactly one +type TimeToSampleBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table + Table []TimeToSampleTable // Time To Sample Table , EntryCount elements +} + +type TimeToSampleTable struct { + SampleCount []uint32 // 32 bits, is an integer that counts the number of consecutive samples that have the given duration + SampleDelta []uint32 // 32 bits, is an integer that gives the delta of these samples in the time-scale of the media. +} + +// “stts”存储了sample的duration,描述了sample时序的映射方法,我们通过它可以找到任何时间的sample. +// “stts”可以包含一个压缩的表来映射时间和sample序号,用其他的表来提供每个sample的长度和指针. +// 表中每个条目提供了在同一个时间偏移量里面连续的sample序号,以及samples的偏移量. +// 递增这些偏移量,就可以建立一个完整的time to sample表 + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/49 +// +// Box Type: ctts +// Container: Sample Table Box (‘stbl’) +// Mandatory: No +// Quantity: Zero or one +type CompositionOffsetBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table + Table []CompositionOffsetTable // Composition Offset Table, EntryCount elements. +} + +type CompositionOffsetTable struct { + SampleCount uint32 // 32 bits, is an integer that counts the number of consecutive samples that have the given offset. + SampleOffset interface{} // int32 or uint32, is an integer that gives the offset between CT and DT, such that CT(n) = DT(n) + CTTS(n). +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/50 +// +// Box Type: cslg +// Container: Sample Table Box (‘stbl’) +// Mandatory: No +// Quantity: Zero or one +type CompositionToDecodeBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + CompositionToDTSShift int32 // 32 bits, signed, if this value is added to the composition times (as calculated by the CTS offsets from the DTS), then for all samples, their CTS is guaranteed to be greater than or equal to their DTS, and the buffer model implied by the indicated profile/level will be honoured; if leastDecodeToDisplayDelta is positive or zero, this field can be 0; otherwise it should be at least (- leastDecodeToDisplayDelta) + LeastDecodeToDisplayDelta int32 // 32 bits, signed, the smallest composition offset in the CompositionTimeToSample box in this track + GreatestDecodeToDisplayDelta int32 // 32 bits, signed, the largest composition offset in the CompositionTimeToSample box in this track + CompositionStartTime int32 // 32 bits, signed, the smallest computed composition time (CTS) for any sample in the media of this track + CompositionEndTime int32 // 32 bits, signed, the composition time plus the composition duration, of the sample with the largest computed composition time (CTS) in the media of this track; if this field takes the value 0, the composition end time is unknown. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/58 +// +// Box Type: stsc +// Container: Sample Table Box ('stbl') +// Mandatory: Yes +// Quantity: Exactly one +type SampleToChunkBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table + Table []SampleToChunkTable // Sample To Chunk Table, entry count elements. +} + +type SampleToChunkTable struct { + FirstChunk []uint32 // 32 bits, is an integer that gives the index of the first chunk in this run of chunks that share the same samples-per-chunk and sample-description-index; the index of the first chunk in a track has the value 1 (the first_chunk field in the first record of this box has the value 1, identifying that the first sample maps to the first chunk). + SamplesPerChunk []uint32 // 32 bits, is an integer that gives the number of samples in each of these chunks + SampleDescriptionIndex []uint32 // 32 bits, is an integer that gives the index of the sample entry that describes the samples in this chunk. The index ranges from 1 to the number of sample entries in the Sample Description Box +} + +// 用chunk组织sample可以方便优化数据获取,一个thunk包含一个或多个sample. +// “stsc”中用一个表描述了sample与chunk的映射关系,查看这张表就可以找到包含指定sample的thunk,从而找到这个sample + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/57 +// +// Box Type: stsz, stz2 +// Container: Sample Table Box (‘stbl’) +// Mandatory: Yes +// Quantity: Exactly one variant must be present +type SampleSizeBoxes struct{} + +// Box Type: stsz +type SampleSizeBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SampleSize uint32 // 32 bits, is integer specifying the default sample size. If all the samples are the same size, this field contains that size value. If this field is set to 0, then the samples have different sizes, and those sizes are stored in the sample size table. If this field is not 0, it specifies the constant sample size, and no array follows. + SampleCount uint32 // 32 bits, is an integer that gives the number of samples in the track; if sample-size is 0, then it is also the number of entries in the following table. + EntrySize interface{} // 32 bits array, SampleCount elements, is an integer specifying the size of a sample, indexed by its number. +} + +// if (sample_size == 0) { +// for (i = 1; i <= sample_count; i++) { +// unsigned int(32) entry_size; +// } +// } + +// Box Type: stz2 +type CompactSampleSizeBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Reserved [3]uint8 // 24 bits, + FieldSize uint8 // 8 bits, is an integer specifying the size in bits of the entries in the following table; it shall take the value 4, 8 or 16. If the value 4 is used, then each byte contains two values: entry[i]<<4 + entry[i+1]; if the sizes do not fill an integral number of bytes, the last byte is padded with zeros. + SampleCount uint32 // 32 bits, is an integer that gives the number of entries in the following table + EntrySize interface{} // +} + +// for (i = 1; i <= sample_count; i++) { +// unsigned int(field_size) entry_size; +// } + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/59 +// +// Box Type: stco, co64 +// Container: Sample Table Box (‘stbl’) +// Mandatory: Yes +// Quantity: Exactly one variant must be present +type ChunkOffsetBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table + ChunkOffset []uint32 // 32 bits array, entry count elements. +} + +// “stco”定义了每个thunk在媒体流中的位置。位置有两种可能,32位的和64位的,后者对非常大的电影很有用。 +// 在一个表中只会有一种可能,这个位置是在整个文件中的,而不是在任何box中的,这样做就可以直接在文件中找到媒体数据, +// 而不用解释box。需要注意的是一旦前面的box有了任何改变,这张表都要重新建立,因为位置信息已经改变了 + +// Box Type: co64 +type ChunkLargeOffsetBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table + ChunkOffset []uint64 // 64 bits array, entry count elements. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/51 +// +// Box Type: stss +// Container: Sample Table Box (‘stbl’) +// Mandatory: No +// Quantity: Zero or one +// +// This box provides a compact marking of the sync samples within the stream. The table is arranged in strictly increasing order of sample number. +// If the sync sample box is not present, every sample is a sync sample. +type SyncSampleBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table. If entry_count is zero, there are no sync samples within the stream and the following table is empty + SampleNumber []uint32 // 32 bits array, entry count elements. gives the numbers of the samples that are sync samples in the stream. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/52 +// +// Box Type: stsh +// Container: Sample Table Box (‘stbl’) +// Mandatory: No +// Quantity: Zero or one +type ShadowSyncSampleBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table. + Table []ShadowSyncSampleTable // Shadow Sync Sample Table, entry count elements. +} + +type ShadowSyncSampleTable struct { + ShadowedSampleNumber uint32 // 32 bits, gives the number of a sample for which there is an alternative sync sample. + SyncSampleNumber uint32 // 32 bits, gives the number of the alternative sync sample. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/60 +// +// Box Type: padb +// Container: Sample Table (‘stbl’) +// Mandatory: No +// Quantity: Zero or one +// +// In some streams the media samples do not occupy all bits of the bytes given by the sample size, and are +// padded at the end to a byte boundary. In some cases, it is necessary to record externally the number of +// padding bits used. This table supplies that information. +type PaddingBitsBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SampleCount uint32 // 32 bits, counts the number of samples in the track; it should match the count in other tables + Table []PaddingBitsTable // Padding Bits Table, (sample count + 1) / 2 elements. +} + +type PaddingBitsTable struct { + Reserved1 byte // 1 bit, + Pad1 byte // 3 bits, a value from 0 to 7, indicating the number of bits at the end of sample (i*2)+1. + Reserved2 byte // 1 bit, + Pad2 byte // 3 bits, a value from 0 to 7, indicating the number of bits at the end of sample (i*2)+2. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/46 +// +// Box Type: stdp +// Container: Sample Table Box (‘stbl’). +// Mandatory: No. +// Quantity: Zero or one. +// +// This box contains the degradation priority of each sample. The values are stored in the table, one for each +// sample. The size of the table, sample_count is taken from the sample_count in the Sample Size Box +// ('stsz'). Specifications derived from this define the exact meaning and acceptable range of the priority field. +type DegradationPriorityBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Priority []uint16 // 16 bits array, sample count elements, is integer specifying the degradation priority for each sample. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/53 +// +// Box Types: sdtp +// Container: Sample Table Box (‘stbl’) +// Mandatory: No +// Quantity: Zero or one +type IndependentAndDisposableSamplesBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Table []IndependentAndDisposableSamplesTable // Independent And Disposable Samples Table, sample count elements +} + +type IndependentAndDisposableSamplesTable struct { + IsLeading byte // 2 bits, + SampleDependsOn byte // 2 bits, + SampleIsDependedOn byte // 2 bits, + SampleHasTedundancy byte // 2 bits, +} + +// is_leading takes one of the following four values: +// 0: the leading nature of this sample is unknown; +// 1: this sample is a leading sample that has a dependency before the referenced I-picture (and is +// therefore not decodable); +// 2: this sample is not a leading sample; +// 3: this sample is a leading sample that has no dependency before the referenced I-picture (and is +// therefore decodable); +// sample_depends_on takes one of the following four values: +// 0: the dependency of this sample is unknown; +// 1: this sample does depend on others (not an I picture); +// 2: this sample does not depend on others (I picture); +// 3: reserved +// sample_is_depended_on takes one of the following four values: +// 0: the dependency of other samples on this sample is unknown; +// 1: other samples may depend on this one (not disposable); +// 2: no other sample depends on this one (disposable); +// 3: reserved +// sample_has_redundancy takes one of the following four values: +// 0: it is unknown whether there is redundant coding in this sample; +// 1: there is redundant coding in this sample; +// 2: there is no redundant coding in this sample; +// 3: reserved + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/75 +// +// Box Type: sbgp +// Container: Sample Table Box (‘stbl’) or Track Fragment Box (‘traf’) +// Mandatory: No +// Quantity: Zero or more. +type SampleToGroupBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + GroupingType uint32 // 32 bits, is an integer that identifies the type (i.e. criterion used to form the sample groups) of the sample grouping and links it to its sample group description table with the same value for grouping type. At most one occurrence of this box with the same value for grouping_type (and, if used, grouping_type_parameter) shall exist for a track. + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table. + Table []SampleToGroupTable // Sample To Group Table, entry count elements. +} + +type SampleToGroupTable struct { + SampleCount uint32 // 32 bits, is an integer that gives the number of consecutive samples with the same sample group descriptor. If the sum of the sample count in this box is less than the total sample count, then the reader should effectively extend it with an entry that associates the remaining samples with no group. It is an error for the total in this box to be greater than the sample_count documented elsewhere, and the reader behaviour would then be undefined. + GroupDescriptionIndex uint32 // 32 bits, is an integer that gives the index of the sample group entry which describes the samples in this group. The index ranges from 1 to the number of sample group entries in the SampleGroupDescription Box, or takes the value 0 to indicate that this sample is a member of no group of this type. +} + +// unsigned int(32) grouping_type; +// if (version == 1) { +// unsigned int(32) grouping_type_parameter; +// } +// unsigned int(32) entry_count; + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/76 +// +// Box Type: sgpd +// Container: Sample Table Box (‘stbl’) or Track Fragment Box (‘traf’) +// Mandatory: No +// Quantity: Zero or more, with one for each Sample to Group Box. +type SampleGroupDescriptionBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + GroupingType uint32 // 32 bits, is an integer that identifies the SampleToGroup box that is associated with this sample group description. + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table. +} + +// default_length : indicates the length of every group entry (if the length is constant), or zero (0) if it is variable +// description_length : indicates the length of an individual group entry, in the case it varies from entry to entry and default_length is therefore 0 + +// if (version==1) { unsigned int(32) default_length; } + +// for (i = 1 ; i <= entry_count ; i++){ +// if (version==1) { +// if (default_length==0) { +// unsigned int(32) description_length; +// } +// } +// switch (handler_type){ +// case ‘vide’: // for video tracks +// VisualSampleGroupEntry (grouping_type); +// break; +// case ‘soun’: // for audio tracks +// AudioSampleGroupEntry(grouping_type); +// break; +// case ‘hint’: // for hint tracks +// HintSampleGroupEntry(grouping_type); +// break; +// } +// } + +type SampleGroupDescriptionEntry struct{} + +type VisualSampleGroupEntry struct{} + +type AudioSampleGroupEntry struct{} + +type HintSampleGroupEntry struct{} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/61 +// +// Box Type: subs +// Container: Sample Table Box (‘stbl’) or Track Fragment Box (‘traf’) +// Mandatory: No +// Quantity: Zero or one +type SubSampleInformationBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table. + Table []SubSampleInformationTable // Sub-Sample Information Table, entry count elements. + +} + +type SubSampleInformationTable struct { + SampleDelta uint32 // 32 bits, is an integer that specifies the sample number of the sample having sub-sample structure. It is coded as the difference between the desired sample number, and the sample number indicated in the previous entry. If the current entry is the first entry, the value indicates the sample number of the first sample having sub-sample information, that is, the value is the difference between the sample number and zero (0). + SubsampleCount uint16 // 16 bits, is an integer that specifies the number of sub-sample for the current sample. If there is no sub-sample structure, then this field takes the value 0. + CountTable []SubSampleCountTable // Sub-Sample Information Table1, subsample count elements. +} + +type SubSampleCountTable struct { + SubsampleSize interface{} // uint16 or uint32, is an integer that specifies the size, in bytes, of the current sub-sample + SubsamplePriority uint8 // 8 bits, is an integer specifying the degradation priority for each sub-sample. Higher values of subsample_priority, indicate sub-samples which are important to, and have a greater impact on, the decoded quality. + DiscardAble uint8 // 8 bits, equal to 0 means that the sub-sample is required to decode the current sample, while equal to 1 means the sub-sample is not required to decode the current sample but may be used for enhancements, e.g., the sub-sample consists of supplemental enhancement information (SEI) messages. + Reserved uint32 // 32 bits, +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/62 +// +// Box Type: saiz +// Container: Sample Table Box (‘stbl’) or Track Fragment Box ('traf') +// Mandatory: No +// Quantity: Zero or More +type SampleAuxiliaryInformationSizesBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Table interface{} // SampleAuxiliaryInformationSizesTable1 or SampleAuxiliaryInformationSizesTable2. +} + +type SampleAuxiliaryInformationSizesTable1 struct { + AuxInfoType uint32 // 32 bits, + AuxInfoTypeParameter uint32 // 32 bits, + DefaultSampleInfoSize uint8 // 8 bits, is an integer specifying the sample auxiliary information size for the case where all the indicated samples have the same sample auxiliary information size. If the size varies then this field shall be zero. + SampleCount uint32 // 32 bits, +} + +type SampleAuxiliaryInformationSizesTable2 struct { + DefaultSampleInfoSize uint8 // 8 bits, is an integer specifying the sample auxiliary information size for the case where all the indicated samples have the same sample auxiliary information size. If the size varies then this field shall be zero. + SampleCount uint32 // 32 bits, +} + +// if (flags & 1) { +// unsigned int(32) aux_info_type; +// unsigned int(32) aux_info_type_parameter; +// } +// unsigned int(8) default_sample_info_size; +// unsigned int(32) sample_count; +// if (default_sample_info_size == 0) { +// unsigned int(8) sample_info_size[ sample_count ]; +// } + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/63 +// +// Box Type: saio +// Container: Sample Table Box (‘stbl’) or Track Fragment Box ('traf') +// Mandatory: No +// Quantity: Zero or More +type SampleAuxiliaryInformationOffsetsBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + //EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table. +} + +type AuxInfo struct { + AuxInfoType uint32 // 32 bits, + AuxInfoTypeParameter uint32 // 32 bits, +} + +// if (flags & 1) { +// unsigned int(32) aux_info_type; +// unsigned int(32) aux_info_type_parameter; +// } +// unsigned int(32) entry_count; +// if ( version == 0 ) { +// unsigned int(32) offset[ entry_count ]; +// } +// else { +// unsigned int(64) offset[ entry_count ]; +// } + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/78 +// +// Box Type: udta +// Container: Movie Box (‘moov’) or Track Box (‘trak’) +// Mandatory: No +// Quantity: Zero or one +type UserDataBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/64 +// +// Box Type: mvex +// Container: Movie Box (‘moov’) +// Mandatory: No +// Quantity: Zero or one +type MovieExtendsBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/65 +// +// Box Type: mehd +// Container: Movie Extends Box(‘mvex’) +// Mandatory: No +// Quantity: Zero or one +// +// The Movie Extends Header is optional, and provides the overall duration, including fragments, of a fragmented +// movie. If this box is not present, the overall duration must be computed by examining each fragment. +type MovieExtendsHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header +} + +// if (version==1) { +// unsigned int(64) fragment_duration; +// } else { // version==0 +// unsigned int(32) fragment_duration; +// } + +// fragment_duration : is an integer that declares length of the presentation of the whole movie including +// fragments (in the timescale indicated in the Movie Header Box). The value of this field corresponds to +// the duration of the longest track, including movie fragments. If an MP4 file is created in real-time, such +// as used in live streaming, it is not likely that the fragment_duration is known in advance and this +// box may be omitted. + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/65 +// +// Box Type: trex +// Container: Movie Extends Box (‘mvex’) +// Mandatory: Yes +// Quantity: Exactly one for each track in the Movie Box +type TrackExtendsBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + TrackID uint32 // 32 bits, identifies the track; this shall be the track ID of a track in the Movie Box + DefaultSampleDescriptionIndex uint32 // 32 bits, + DefaultSampleDuration uint32 // 32 bits, + DefaultSampleSize uint32 // 32 bits, + DefaultSampleFlags uint32 // 32 bits, +} + +// default_ : these fields set up defaults used in the track fragments. + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/72 +// +// Box Type: leva +// Container: Movie Extends Box (`mvex’) +// Mandatory: No +// Quantity: Zero or one +type LevelAssignmentBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + LevelCount uint8 // 8 bits, specifies the number of levels each fraction is grouped into. level_count shall be greater than or equal to 2. + Table []LevelAssignmentTable // Level Assignment Table, level count elements. +} + +type LevelAssignmentTable struct { + TrackId uint32 // 32 bits, for loop entry j specifies the track identifier of the track assigned to level j. + PaddingFlag byte // 1 bit, equal to 1 indicates that a conforming fraction can be formed by concatenating any positive integer number of levels within a fraction and padding the last Media Data box by zero bytes up to the full size that is indicated in the header of the last Media Data box. The semantics of padding_flag equal to 0 are that this is not assured. + AssignmentType byte // 7 bits, +} + +// for (j=1; j <= level_count; j++) { +// unsigned int(32) track_id; +// unsigned int(1) padding_flag; +// unsigned int(7) assignment_type; +// if (assignment_type == 0) { +// unsigned int(32) grouping_type; +// } +// else if (assignment_type == 1) { +// unsigned int(32) grouping_type; +// unsigned int(32) grouping_type_parameter; +// } +// else if (assignment_type == 2) {} // no further syntax elements needed +// else if (assignment_type == 3) {} // no further syntax elements needed +// else if (assignment_type == 4) { +// unsigned int(32) sub_track_id; +// } +// // other assignment_type values are reserved +// } + +// assignment_type : indicates the mechanism used to specify the assignment to a level. +// assignment_type values greater than 4 are reserved, while the semantics for the other values are +// specified as follows. The sequence of assignment_types is restricted to be a set of zero or more of +// type 2 or 3, followed by zero or more of exactly one type. +// • 0: sample groups are used to specify levels, i.e., samples mapped to different sample group +// description indexes of a particular sample grouping lie in different levels within the identified track; +// other tracks are not affected and must have all their data in precisely one level; +// • 1: as for assignment_type 0 except assignment is by a parameterized sample group; +// • 2, 3: level assignment is by track (see the Subsegment Index Box for the difference in processing +// of these levels) +// • 4: the respective level contains the samples for a sub-track. The sub-tracks are specified through +// the Sub Track box; other tracks are not affected and must have all their data in precisely one +// level; + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/66 +// +// Box Type: moof +// Container: File +// Mandatory: No +// Quantity: Zero or more +type MovieFragmentBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/66 +// +// Box Type: mfhd +// Container: Movie Fragment Box ('moof') +// Mandatory: Yes +// Quantity: Exactly one +// +// The movie fragment header contains a sequence number, as a safety check. The sequence number usually +// starts at 1 and must increase for each movie fragment in the file, in the order in which they occur. This allows +// readers to verify integrity of the sequence; it is an error to construct a file where the fragments are out of sequence. +type MovieFragmentHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SequenceNumber uint32 // 32 bits, the ordinal number of this fragment, in increasing order +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/67 +// +// Box Type: traf +// Container: Movie Fragment Box ('moof') +// Mandatory: No +// Quantity: Zero or more +type TrackFragmentBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/67 +// +// Box Type: tfhd +// Container: Track Fragment Box ('traf') +// Mandatory: Yes +// Quantity: Exactly one +type TrackFragmentHeaderBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + TrackID uint32 // 32 bits, + + // all the following are optional fields + BaseDataOffset uint64 // 64 bits, the base offset to use when calculating data offsets + SampleDescriptionIndex uint32 // 32 bits, + DefaultSampleDuration uint32 // 32 bits, + DefaultSampleSize uint32 // 32 bits, + DefaultSampleFlags uint32 // 32 bits, +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/68 +// +// Box Type: trun +// Container: Track Fragment Box ('traf') +// Mandatory: No +// Quantity: Zero or more +type TrackFragmentRunBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SampleCount uint32 // 32 bits, the number of samples being added in this run; also the number of rows in the following table (the rows can be empty) + + // the following are optional fields + DataOffset int32 // 32 bits, signed, is added to the implicit or explicit data_offset established in the track fragment header. + FirstSampleFlags uint32 // 32 bits, provides a set of flags for the first sample only of this run. + + // all fields in the following array are optional + Table []TrackFragmentRunTable // Track Fragment Run Table 1, SampleCount elements. +} + +type TrackFragmentRunTable struct { + SampleDuration uint32 // 32 bits, + SampleSize uint32 // 32 bits, + SampleFlags uint32 // 32 bits, + SampleCompositionTimeOffset interface{} // uint32 or int32, +} + +// if (version == 0){ +// unsigned int(32) sample_composition_time_offset; +// } +// else{ +// signed int(32) sample_composition_time_offset; +// } + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/71 +// +// Box Type: tfdt +// Container: Track Fragment box (‘traf’) +// Mandatory: No +// Quantity: Zero or one +type TrackFragmentBaseMediaDecodeTimeBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + BaseMediaDecodeTime interface{} // uint32 or uint64, is an integer equal to the sum of the decode durations of all earlier samples in the media, expressed in the media's timescale. It does not include the samples added in the enclosing track fragment. +} + +// if (version==1) { +// unsigned int(64) baseMediaDecodeTime; +// } else { // version==0 +// unsigned int(32) baseMediaDecodeTime; + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/69 +// +// Box Type: mfra +// Container: File +// Mandatory: No +// Quantity: Zero or one +type MovieFragmentRandomAccessBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/70 +// +// Box Type: tfra +// Container: Movie Fragment Random Access Box (‘mfra’) +// Mandatory: No +// Quantity: Zero or one per track +type TrackFragmentRandomAccessBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + TrackID uint32 // 32 bits, is an integer identifying the track_ID. + Reserved uint32 // 26 bits, + LengthSizeOfTrafNum byte // 2 bits, indicates the length in byte of the traf_number field minus one. + LengthSizeOfTrunNum byte // 2 bits, indicates the length in byte of the trun_number field minus one. + LengthSizeOfSampleNum byte // 2 bits, indicates the length in byte of the sample_number field minus one. + NumberOfEntry uint32 // 32 bits, is an integer that gives the number of the entries for this track. If this value is zero, it indicates that every sample is a sync sample and no table entry follows. + Table []TrackFragmentRandomAccessTable // Track Fragment RandomAccess Table 1, NumberOfEntry elements. +} + +type TrackFragmentRandomAccessTable struct { + Time interface{} // uint32 or uint64, is 32 or 64 bits integer that indicates the presentation time of the sync sample in units defined in the ‘mdhd’ of the associated track. + Moofoffset interface{} // uint32 or uint64, is 32 or 64 bits integer that gives the offset of the ‘moof’ used in this entry. Offset is the byte-offset between the beginning of the file and the beginning of the ‘moof’. + TrafNumber interface{} // unsigned int((length_size_of_traf_num+1) * 8). indicates the ‘traf’ number that contains the sync sample. The number ranges from 1 (the first ‘traf’ is numbered 1) in each ‘moof’. + TrunNumber interface{} // unsigned int((length_size_of_trun_num+1) * 8). indicates the ‘trun’ number that contains the sync sample. The number ranges from 1 in each ‘traf’ + SampleNumber interface{} // unsigned int((length_size_of_sample_num+1) * 8) . indicates the sample number of the sync sample. The number ranges from 1 in each ‘trun’. +} + +// for(i=1; i <= number_of_entry; i++){ +// if(version==1){ +// unsigned int(64) time; +// unsigned int(64) moof_offset; +// }else{ +// unsigned int(32) time; +// unsigned int(32) moof_offset; +// } +// unsigned int((length_size_of_traf_num+1) * 8) traf_number; +// unsigned int((length_size_of_trun_num+1) * 8) trun_number; +// unsigned int((length_size_of_sample_num+1) * 8) sample_number; +// } + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/71 +// +// Box Type: mfro +// Container: Movie Fragment Random Access Box (‘mfra’) +// Mandatory: Yes +// Quantity: Exactly one +// +// The Movie Fragment Random Access Offset Box provides a copy of the length field from the enclosing Movie +// Fragment Random Access Box. It is placed last within that box, so that the size field is also last in the +// enclosing Movie Fragment Random Access Box. When the Movie Fragment Random Access Box is also last +// in the file this permits its easy location. The size field here must be correct. However, neither the presence of +// the Movie Fragment Random Access Box, nor its placement last in the file, are assured. +type MovieFragmentRandomAccessOffsetBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Size uint32 // 32 bits, is an integer gives the number of bytes of the enclosing ‘mfra’ box. This field is placed at the last of the enclosing box to assist readers scanning from the end of the file in finding the ‘mfra’ box. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/29 +// +// Box Type: mdat +// Container: File +// Mandatory: No +// Quantity: Zero or more +type MediaDataBox struct { + MP4BoxHeader // standard header + + Data []byte // 8 bits array, is the contained media data. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/29 +// +// Box Types: free, skip +// Container: File or other box +// Mandatory: No +// Quantity: Zero or more +// +// The contents of a free-space box are irrelevant and may be ignored, or the object deleted, without affecting +// the presentation. (Care should be exercised when deleting the object, as this may invalidate the offsets used +// in the sample table, unless this object is after all the media data). +type FreeSpaceBox struct { + MP4BoxHeader // standard header + + Data []uint8 // 8 bits array, +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/78 +// +// Box Type: cprt +// Container: User data box (‘udta’) +// Mandatory: No +// Quantity: Zero or more +// +// The Copyright box contains a copyright declaration which applies to the entire presentation, when contained +// within the Movie Box, or, when contained in a track, to that entire track. There may be multiple copyright +// boxes using different language codes. +type CopyrightBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Pad byte // 1 bit, + Language [2]byte // 15 bits, declares the language code for the following text. See ISO 639-2/T for the set of three character codes. Each character is packed as the difference between its ASCII value and 0x60. The code is confined to being three lower-case letters, so these values are strictly positive. + Notice string // string, is a null-terminated string in either UTF-8 or UTF-16 characters, giving a copyright notice. If UTF- 16 is used, the string shall start with the BYTE ORDER MARK (0xFEFF), to distinguish it from a UTF- 8 string. This mark does not form part of the final string. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/79 +// +// Box Type: tsel +// Container: User Data Box (‘udta’) +// Mandatory: No +// Quantity: Zero or One +// +// The track selection box is contained in the user data box of the track it modifies. +type TrackSelectionBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SwitchGroup int32 // 32 bits, is an integer that specifies a group or collection of tracks. If this field is 0 (default value) or if the Track Selection box is absent there is no information on whether the track can be used for switching during playing or streaming. If this integer is not 0 it shall be the same for tracks that can be used for switching between each other. Tracks that belong to the same switch group shall belong to the same alternate group. A switch group may have only one member. + AttributeList []uint32 // 32 bits array, to end of the box, is a list, to the end of the box, of attributes. The attributes in this list should be used as descriptions of tracks or differentiation criteria for tracks in the same alternate or switch group. Each differentiating attribute is associated with a pointer to the field or information that distinguishes the track. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/100 +// +// Box Type: strk +// Container: User Data box (‘udta’) of the corresponding Track box (‘trak’) +// Mandatory: No +// Quantity: Zero or more +// +// This box contains objects that define and provide information about a sub track in the present track. +type SubTrack struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/100 +// +// Box Type: stri +// Container: Sub Track box (‘strk’) +// Mandatory: Yes +// Quantity: One +type SubTrackInformation struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SwitchGroup int16 // 16 bits, + AlternateGroup int16 // 16 bits, + SubTrackID uint32 // 32 bits, is an integer. A non-zero value uniquely identifies the sub track locally within the track. A zero value (default) means that sub track ID is not assigned. + AttributeList []uint32 // 32 bits array, is a list, to the end of the box, of attributes. The attributes in this list should be used as descriptions of sub tracks or differentiating criteria for tracks and sub tracks in the same alternate or switch group +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/101 +// +// Box Type: strd +// Container: Sub Track box (‘strk’) +// Mandatory: Yes +// Quantity: One +// +// This box contains objects that provide a definition of the sub track. +type SubTrackDefinition struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/81 +// +// Box Type: meta +// Container: File, Movie Box (‘moov’), Track Box (‘trak’), or Additional Metadata Container Box (‘meco’) +// Mandatory: No +// Quantity: Zero or one (in File, ‘moov’, and ‘trak’), One or more (in ‘meco’) +type MetaBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + PrimaryResource PrimaryItemBox // optional + FileLocations DataInformationBox // optional + ItemLocations ItemLocationBox // optional + Protections ItemProtectionBox // optional + ItemInfos ItemInfoBox // optional + IPMPControl IPMPControlBox // optional + ItemRefs ItemReferenceBox // optional + ItemData ItemDataBox // optional + //OtherBoxes []Box // optional +} + +type IPMPControlBox struct{} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/82 +// +// Box Type: iloc +// Container: Meta box (‘meta’) +// Mandatory: No +// Quantity: Zero or one +type ItemLocationBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + OffsetSize byte // 4 bits, + LengthSize byte // 4 bits, + BaseOffsetSize byte // 4 bits, + IndexSize byte // 4 bits, if version == 1, index_size replace to reserved. + ItemCount uint16 // 16 bits, + Table interface{} // version == 1 -> ItemLocationTable1 , version == 2 -> ItemLocationTable2, ItemCount elements. +} + +type ItemLocationTable1 struct { + ItemID uint16 // 16 bits, + Reserved uint16 // 12 bits, + ConstructionMethod byte // 4 bits, + DataReferenceIndex uint16 // 16 bits, + BaseOffset interface{} // unsigned int(base_offset_size*8), + ExtentCount uint16 // 16 bits, + ExtentTable []ItemLocationExtentTable1 // Item Location Extent Table1, ExtentCount elements. +} + +type ItemLocationTable2 struct { + ItemID uint16 // 16 bits, + DataReferenceIndex uint16 // 16 bits, + BaseOffset interface{} // unsigned int(base_offset_size*8), + ExtentCount uint16 // 16 bits, + ExtentTable []ItemLocationExtentTable2 // Item Location Extent Table2, ExtentCount elements. +} + +type ItemLocationExtentTable1 struct { + ExtentIndex interface{} // unsigned int(index_size*8) + ItemLocationExtentTable2 +} + +type ItemLocationExtentTable2 struct { + ExtentOffset interface{} // unsigned int(offset_size*8) + ExtentLength interface{} // unsigned int(length_size*8) +} + +// for (i=0; i 0)) { +// unsigned int(index_size*8) extent_index; +// } + +// unsigned int(offset_size*8) extent_offset; +// unsigned int(length_size*8) extent_length; +// } + +// offset_size : is taken from the set {0, 4, 8} and indicates the length in bytes of the offset field. +// length_size : is taken from the set {0, 4, 8} and indicates the length in bytes of the length field. +// base_offset_size : is taken from the set {0, 4, 8} and indicates the length in bytes of the base_offset field. +// index_size : is taken from the set {0, 4, 8} and indicates the length in bytes of the extent_index field. +// item_count : counts the number of resources in the following array. +// item_ID : is an arbitrary integer ‘name’ for this resource which can be used to refer to it (e.g. in a URL). +// construction_method : is taken from the set 0 (file), 1 (idat) or 2 (item) +// data-reference-index : is either zero (‘this file’) or a 1-based index into the data references in the data information box. +// base_offset : provides a base value for offset calculations within the referenced data. If +// base_offset_size : is 0, base_offset takes the value 0, i.e. it is unused. +// extent_count : provides the count of the number of extents into which the resource is fragmented; it must have the value 1 or greater +// extent_index : provides an index as defined for the construction method +// extent_offset : provides the absolute offset in bytes from the beginning of the containing file, of this item. If offset_size is 0, offset takes the value 0 +// extent_length : provides the absolute length in bytes of this metadata item. If length_size is 0, length takes the value 0. If the value is 0, then length of the item is the length of the entire referenced file. + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/85 +// +// Box Type: ipro +// Container: Meta box (‘meta’) +// Mandatory: No +// Quantity: Zero or one +// +// The item protection box provides an array of item protection information, for use by the Item Information Box. +type ItemProtectionBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + ProtectionCount uint16 // 16 bits, + Table []ItemProtectionTable // Item Protection Table, ProtectionCount elements. +} + +type ItemProtectionTable struct { + ProtectionInformation ProtectionSchemeInfoBox +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/92 +// +// Box Types: sinf +// Container: Protected Sample Entry, or Item Protection Box (‘ipro’) +// Mandatory: Yes +// Quantity: One or More +type ProtectionSchemeInfoBox struct { + MP4BoxHeader // standard header + + OriginalFormat OriginalFormatBox // + Type SchemeTypeBox // optional + Info SchemeInformationBox // optional +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/93 +// +// Box Types: frma +// Container: Protection Scheme Information Box (‘sinf’) or Restricted Scheme Information Box (‘rinf’) +// Mandatory: Yes when used in a protected sample entry or in a restricted sample entry +// Quantity: Exactly one +// +// The Original Format Box ‘frma’ contains the four-character-code of the original un-transformed sample description: + +type OriginalFormatBox struct { + MP4BoxHeader // standard header + + DataFormat uint32 // 32 bits, is the four-character-code of the original un-transformed sample entry (e.g. “mp4v” if the stream contains protected or restricted MPEG-4 visual material). +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/93 +// +// Box Types: schm +// Container: Protection Scheme Information Box (‘sinf’), Restricted Scheme Information Box (‘rinf’), +// or SRTP Process box (‘srpp‘) +// Mandatory: No +// +// Quantity: Zero or one in ‘sinf’, depending on the protection structure; Exactly one in ‘rinf’ and ‘srpp’ +// The Scheme Type Box (‘schm’) identifies the protection or restriction scheme. +type SchemeTypeBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SchemeType uint32 // 32 bits, is the code defining the protection or restriction scheme. + SchemeVersion uint32 // 32 bits, is the version of the scheme (used to create the content) +} + +// if (flags & 0x000001) { +// unsigned int(8) scheme_uri[]; // browser uri +// } + +// scheme_URI : allows for the option of directing the user to a web-page if they do not have the scheme installed on their system. It is an absolute URI formed as a null-terminated string in UTF-8 characters. + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/94 +// +// Box Types: schi +// Container: Protection Scheme Information Box (‘sinf’), Restricted Scheme Information Box (‘rinf’), +// or SRTP Process box (‘srpp‘) +// Mandatory: No +// Quantity: Zero or one +// The Scheme Information Box is a container Box that is only interpreted by the scheme being used. Any +// information the encryption or restriction system needs is stored here. The content of this box is a series of +// boxes whose type and format are defined by the scheme declared in the Scheme Type Box. +type SchemeInformationBox struct { + MP4BoxHeader // standard header + + SchemeSpecificData []SchemeTypeBox +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/85 +// +// Box Type: iinf +// Container: Meta Box (‘meta’) +// Mandatory: No +// Quantity: Zero or one + +type ItemInfoBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint16 // 16 bits, + ItemInfos []ItemInfoEntry // EntryCount elements. +} + +// Box Type: infe +type ItemInfoEntry struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + ItemID uint16 // 16 bits + ItemProtectionIndex uint16 // 16 bits + ItemType uint32 // 32 bits, + ItemName string // string, + ContentType string // string, + ContentEncoding string // string, optional + ItemUriType string // string, + ExtensionType uint32 // 32 bits, optional + ItemInfoExtension // optional +} + +type ItemInfoExtension struct { +} + +// if ((version == 0) || (version == 1)) { +// unsigned int(16) item_ID; +// unsigned int(16) item_protection_index +// string item_name; +// string content_type; +// string content_encoding; //optional +// } + +// if (version == 1) { +// unsigned int(32) extension_type; //optional +// ItemInfoExtension(extension_type); //optional +// } + +// if (version == 2) { +// unsigned int(16) item_ID; +// unsigned int(16) item_protection_index; +// unsigned int(32) item_type; +// string item_name; + +// if (item_type==’mime’) { +// string content_type; +// string content_encoding; //optional +// } else if (item_type == ‘uri ‘) { +// string item_uri_type; +// } +// } + +// item_id : contains either 0 for the primary resource (e.g., the XML contained in an ‘xml ‘ box) or the ID of the item for which the following information is defined. +// item_protection_index : contains either 0 for an unprotected item, or the one-based index into the item protection box defining the protection applied to this item (the first box in the item protection box has the index 1). +// item_name : is a null-terminated string in UTF-8 characters containing a symbolic name of the item (source file for file delivery transmissions). +// item_type : is a 32-bit value, typically 4 printable characters, that is a defined valid item type indicator, such as ‘mime’ +// content_type : is a null-terminated string in UTF-8 characters with the MIME type of the item. If the item is content encoded (see below), then the content type refers to the item after content decoding. +// item_uri_type : is a string that is an absolute URI, that is used as a type indicator. +// content_encoding : is an optional null-terminated string in UTF-8 characters used to indicate that the binary file is encoded and needs to be decoded before interpreted. The values are as defined for Content-Encoding for HTTP/1.1. Some possible values are “gzip”, “compress” and “deflate”. An empty string indicates no content encoding. Note that the item is stored after the content encoding has been applied. +// extension_type : is a printable four-character code that identifies the extension fields of version 1 with respect to version 0 of the Item information entry. +// content_location : is a null-terminated string in UTF-8 characters containing the URI of the file as defined in HTTP/1.1 (RFC 2616). +// content_MD5 : is a null-terminated string in UTF-8 characters containing an MD5 digest of the file. See HTTP/1.1 (RFC 2616) and RFC 1864. +// content_length : gives the total length (in bytes) of the (un-encoded) file. +// transfer_length : gives the total length (in bytes) of the (encoded) file. Note that transfer length is equal to content length if no content encoding is applied (see above). +// entry_count provides : a count of the number of entries in the following array. +// group_ID : indicates a file group to which the file item (source file) belongs. See 3GPP TS 26.346 for more details on file groups. + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/82 +// +// Box Type: ‘xml ‘ or ‘bxml’ +// Container: Meta box (‘meta’) +// Mandatory: No +// Quantity: Zero or one +// +// When the primary data is in XML format and it is desired that the XML be stored directly in the meta-box, one +// of these forms may be used. The Binary XML Box may only be used when there is a single well-defined +// binarization of the XML for that defined format as identified by the handler. +// Within an XML box the data is in UTF-8 format unless the data starts with a byte-order-mark (BOM), which +// indicates that the data is in UTF-16 format. +type XMLBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + XML string // string, +} + +type BinaryXMLBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + Data []uint8 // 8 bits array, +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/93 +// +// Box Type: pitm +// Container: Meta box (‘meta’) +// Mandatory: No +// Quantity: Zero or one +// +// For a given handler, the primary data may be one of the referenced items when it is desired that it be stored +// elsewhere, or divided into extents; or the primary metadata may be contained in the meta-box (e.g. in an XML +// box). Either this box must occur, or there must be a box within the meta-box (e.g. an XML box) containing the +// primary information in the format required by the identified handler. +type PrimaryItemBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + ItemID uint16 // 16 bits, is the identifier of the primary item +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/95 +// +// Box Type: fiin +// Container: Meta Box (‘meta’) +// Mandatory: No +// Quantity: Zero or one +// +// The FD item information box is optional, although it is mandatory for files using FD hint tracks. It provides +// information on the partitioning of source files and how FD hint tracks are combined into FD sessions. Each +// partition entry provides details on a particular file partitioning, FEC encoding and associated File and FEC +// reservoirs. It is possible to provide multiple entries for one source file (identified by its item ID) if alternative +// FEC encoding schemes or partitionings are used in the file. All partition entries are implicitly numbered and +// the first entry has number 1. +type FDItemInformationBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint16 // 16 bits, + PE []PartitionEntry // EntryCount elements. + FDSGB FDSessionGroupBox // optional + GidToNameB GroupIdToNameBox // optional +} + +// Box Type: paen +type PartitionEntry struct { + FPB FilePartitionBox // + FECRB FECReservoirBox //optional + FRB FileReservoirBox //optional +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/99 +// +// Box Type: fire +// Container: Partition Entry (‘paen’) +// Mandatory: No +// Quantity: Zero or One +// +// The File reservoir box associates the source file identified in the file partition box ('fpar') with File reservoirs +// stored as additional items. It contains a list that starts with the first File reservoir associated with the first +// source block of the source file and continues sequentially through the source blocks of the source file. +type FileReservoirBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint16 // 16 bits, gives the number of entries in the following list. An entry count here should match the total number or blocks in the corresponding file partition box. + Table []FileReservoirTable // EntryCount elements. +} + +type FileReservoirTable struct { + ItemID uint16 // 16 bits, indicates the location of the File reservoir associated with a source block. + SymbolCount uint32 // 32 bits, indicates the number of source symbols contained in the File reservoir. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/95 +// +// Box Type: fpar +// Container: Partition Entry (‘paen’) +// Mandatory: Yes +// Quantity: Exactly one +// +// The File Partition box identifies the source file and provides a partitioning of that file into source blocks and +// symbols. Further information about the source file, e.g., filename, content location and group IDs, is contained +// in the Item Information box ('iinf'), where the Item Information entry corresponding to the item ID of the +// source file is of version 1 and includes a File Delivery Item Information Extension ('fdel'). +type FilePartitionBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + ItemID uint16 // 16 bits, + PacketPayloadSize uint16 // 16 bits, + Reserved uint8 // 8 bits, + FECEncodingID uint8 // 8 bits, + FECInstanceID uint16 // 16 bits, + MaxSourceBlockLength uint16 // 16 bits, + EncodingSymbolLength uint16 // 16 bits, + MaxNumberOfEncodingSymbols uint16 // 16 bits, + SchemeSpecificInfo string // string, + EntryCount uint16 // 16 bits, + Tanble []FilePartitionTable //File Partition Table, EntryCount elements. +} + +type FilePartitionTable struct { + BlockCount uint16 // 16 bits, + BlockSize uint32 // 32 bits, +} + +// item_ID : references the item in the item location box ('iloc') that the file partitioning applies to. +// packet_payload_size : gives the target ALC/LCT or FLUTE packet payload size of the partitioning algorithm. Note that UDP packet payloads are larger, as they also contain ALC/LCT or FLUTE headers. +// FEC_encoding_ID : identifies the FEC encoding scheme and is subject to IANA registration (see RFC 5052). Note that i) value zero corresponds to the "Compact No-Code FEC scheme" also known as "Null-FEC" (RFC 3695); ii) value one corresponds to the “MBMS FEC” (3GPP TS 26.346); iii) for values in the range of 0 to 127, inclusive, the FEC scheme is Fully-Specified, whereas for values in the range of 128 to 255, inclusive, the FEC scheme is Under-Specified. +// FEC_instance_ID : provides a more specific identification of the FEC encoder being used for an UnderSpecified FEC scheme. This value should be set to zero for Fully-Specified FEC schemes and shall be ignored when parsing a file with FEC_encoding_ID in the range of 0 to 127, inclusive. FEC_instance_ID is scoped by the FEC_encoding_ID. See RFC 5052 for further details. +// max_source_block_length : gives the maximum number of source symbols per source block. +// encoding_symbol_length : gives the size (in bytes) of one encoding symbol. All encoding symbols of one item have the same length, except the last symbol which may be shorter. +// max_number_of_encoding_symbols : gives the maximum number of encoding symbols that can be generated for a source block for those FEC schemes in which the maximum number of encoding symbols is relevant, such as FEC encoding ID 129 defined in RFC 5052. For those FEC schemes in which the maximum number of encoding symbols is not relevant, the semantics of this field is unspecified. +// scheme_specific_info : is a base64-encoded null-terminated string of the scheme-specific object transfer information (FEC-OTI-Scheme-Specific-Info). The definition of the information depends on the FEC encoding ID. +// entry_count : gives the number of entries in the list of (block_count, block_size) pairs that provides a partitioning of the source file. Starting from the beginning of the file, each entry indicates how the next segment of the file is divided into source blocks and source symbols. +// block_count : indicates the number of consecutive source blocks of size block_size. +// block_size : indicates the size of a block (in bytes). A block_size that is not a multiple of the encoding_symbol_length symbol size indicates with Compact No-Code FEC that the last source symbols includes padding that is not stored in the item. With MBMS FEC (3GPP TS 26.346) the padding may extend across multiple symbols but the size of padding should never be more than encoding_symbol_length. + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/97 +// +// Box Type: fecr +// Container: Partition Entry (‘paen’) +// Mandatory: No +// Quantity: Zero or One +// +// The FEC reservoir box associates the source file identified in the file partition box ('fpar') with FEC +// reservoirs stored as additional items. It contains a list that starts with the first FEC reservoir associated with +// the first source block of the source file and continues sequentially through the source blocks of the source file. +type FECReservoirBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint16 // 16 bits, + Table []FECReservoirTable // FEC Reservoir Table, EntryCount elements. +} + +type FECReservoirTable struct { + ItemID uint16 // 16 bits, indicates the location of the FEC reservoir associated with a source block. + SymbolCount uint32 // 32 bits, indicates the number of repair symbols contained in the FEC reservoir. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/97 +// +// Box Type: segr +// Container: FD Information Box (‘fiin’) +// Mandatory: No +// Quantity: Zero or One +type FDSessionGroupBox struct { + MP4BoxHeader // standard header + + NumSessionGroups uint16 // 16 bits, + Table []FDSessionGroupTable // FD Session Group Table, NumSessionGroups elements. +} + +type FDSessionGroupTable struct { + EntryCount uint8 // 8 bits, + GIDTable []FDSessionGroupIDTable // FDSession Group ID Table, EntryCount elements. + NumChannelsInSessionGroup uint16 // 16 bits + HTIDTable []FDSessionHintTrackIDTable // FDSession Hint Track ID Table, NumChannelsInSessionGroup elements. +} + +type FDSessionGroupIDTable struct { + GroupID uint32 // 32 bits +} + +type FDSessionHintTrackIDTable struct { + HintTrackID uint32 // 32 bits +} + +// for(i=0; i < num_session_groups; i++) { +// unsigned int(8) entry_count; + +// for (j=0; j < entry_count; j++) { +// unsigned int(32) group_ID; +// } + +// unsigned int(16) num_channels_in_session_group; + +// for(k=0; k < num_channels_in_session_group; k++) { +// unsigned int(32) hint_track_id; +// } +// } + +// num_session_groups : specifies the number of session groups. +// entry_count : gives the number of entries in the following list comprising all file groups that the session group complies with. The session group contains all files included in the listed file groups as specified by the item information entry of each source file. Note that the FDT for the session group should only contain those groups that are listed in this structure. +// group_ID : indicates a file group that the session group complies with. +// num_channels_in_session_groups : specifies the number of channels in the session group. The value of num_channels_in_session_groups shall be a positive integer. +// hint_track_ID : specifies the track ID of the FD hint track belonging to a particular session group. Note that one FD hint track corresponds to one LCT channel. + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/98 +// +// Box Type: gitn +// Container: FD Information Box (‘fiin’) +// Mandatory: No +// Quantity: Zero or One +// +// The Group ID to Name box associates file group names to file group IDs used in the version 1 item +// information entries in the item information box ('iinf'). +type GroupIdToNameBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + EntryCount uint16 // 16 bits, gives the number of entries in the following list. + Table []GroupIdToNameBox // Group Id To Name Table, EntryCount elements. +} + +type GroupIdToNameTable struct { + GroupID uint32 // 32 bits, indicates a file group. + GroupName string // string, is a null-terminated string in UTF-8 characters containing a file group name. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/90 +// +// Box Type: idat +// Container: Metadata box (‘meta’) +// Mandatory: No +// Quantity: Zero or one +// +// This box contains the data of metadata items that use the construction method indicating that an item’s data +// extents are stored within this box. +type ItemDataBox struct { + MP4BoxHeader // standard header + + Data []byte // 8 bits array, is the contained meta data +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/91 +// +// Box Type: iref +// Container: Metadata box (‘meta’) +// Mandatory: No +// Quantity: Zero or one +// +// The item reference box allows the linking of one item to others via typed references. All the references for one +// item of a specific type are collected into a single item type reference box, whose type is the reference type, +// and which has a ‘from item ID’ field indicating which item is linked. The items linked to are then represented by +// an array of ‘to item ID’s. All these single item type reference boxes are then collected into the item reference +// box. The reference types defined for the track reference box defined in 8.3.3 may be used here if appropriate, +// or other registered reference types. +type ItemReferenceBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SITRB []SingleItemTypeReferenceBox +} + +type SingleItemTypeReferenceBox struct { + MP4BoxHeader // standard header + + FromItemID uint16 // 16 bits, contains the ID of the item that refers to other items + ReferenceCount uint16 // 16 bits, is the number of references + Table []SingleItemTypeReferenceTable // Single Item Type Reference Table, ReferenceCount elements. +} + +type SingleItemTypeReferenceTable struct { + ToItemID uint16 // 16 bits, contains the ID of the item referred to +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/87 +// +// Box Type: meco +// Container: File, Movie Box (‘moov’), or Track Box (‘trak’) +// Mandatory: No +// Quantity: Zero or one +type AdditionalMetadataContainerBox struct { + MP4BoxHeader // standard header +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/88 +// +// Box Type: mere +// Container: Additional Metadata Container Box (‘meco’) +// Mandatory: No +// Quantity: Zero or more +// +// The metabox relation box indicates a relation between two meta boxes at the same level, i.e., the top level of +// the file, the Movie Box, or Track Box. The relation between two meta boxes is unspecified if there is no +// metabox relation box for those meta boxes. Meta boxes are referenced by specifying their handler types. +type MetaboxRelationBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + FirstMetaboxHandlerType uint32 // 32 bits, indicates the first meta box to be related. + SecondMetaboxHandlerType uint32 // 32 bits, indicates the second meta box to be related. + MetaboxRelation uint8 // 8 bits, indicates the relation between the two meta boxes. +} + +// metabox_relation indicates the relation between the two meta boxes. The following values are defined: +// 1 The relationship between the boxes is unknown (which is the default when this box is not present); +// 2 the two boxes are semantically un-related (e.g., one is presentation, the other annotation); +// 3 the two boxes are semantically related but complementary (e.g., two disjoint sets of meta-data expressed in two different meta-data systems); +// 4 the two boxes are semantically related but overlap (e.g., two sets of meta-data neither of which is a subset of the other); neither is ‘preferred’ to the other; +// 5 the two boxes are semantically related but the second is a proper subset or weaker version of the first; the first is preferred; +// 6 the two boxes are semantically related and equivalent (e.g., two essentially identical sets of meta-data expressed in two different meta-data systems). + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/105 +// +// Box Type: styp +// Container: File +// Mandatory: No +// Quantity: Zero or more +// +// If segments are stored in separate files (e.g. on a standard HTTP server) it is recommended that these +// ‘segment files’ contain a segment-type box, which must be first if present, to enable identification of those files, +// and declaration of the specifications with which they are compliant. +// A segment type has the same format as an 'ftyp' box [4.3], except that it takes the box type 'styp'. The +// brands within it may include the same brands that were included in the 'ftyp' box that preceded the +// ‘moov’ box, and may also include additional brands to indicate the compatibility of this segment with various +// specification(s). +// Valid segment type boxes shall be the first box in a segment. Segment type boxes may be removed if +// segments are concatenated (e.g. to form a full file), but this is not required. Segment type boxes that are not +// first in their files may be ignored. + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/106 +// +// Box Type: sidx +// Container: File +// Mandatory: No +// Quantity: Zero or more +type SegmentIndexBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + ReferenceID uint32 // 32 bits, + TimeScale uint32 // 32 bits, + EarliestPresentationTime interface{} // uint32 or uint64, + FirstOffset interface{} // uint32 or uint64, + Reserved uint16 // 16 bits, + ReferenceCount uint16 // 16 bits, + Table []SegmentIndexTable // Segment Index Table, ReferenceCount elements +} + +type SegmentIndexTable struct { + ReferenceType byte // 1 bit + ReferencedSize uint32 // 32 bits + SubSegmentDuration uint32 // 32 bits, + StartsWithSAP byte // 1 bit + SAPType byte // 3 bits, + SAPDeltaTime uint32 // 28 bits, +} + +// if (version==0) { +// unsigned int(32) earliest_presentation_time; +// unsigned int(32) first_offset; +// } +// else { +// unsigned int(64) earliest_presentation_time; +// unsigned int(64) first_offset; +// } + +// unsigned int(16) reserved = 0; +// unsigned int(16) reference_count; + +// for(i=1; i <= reference_count; i++) +// { +// bit (1) reference_type; +// unsigned int(31) referenced_size; +// unsigned int(32) subsegment_duration; +// bit(1) starts_with_SAP; +// unsigned int(3) SAP_type; +// unsigned int(28) SAP_delta_time; +// } + +// reference_ID : provides the stream ID for the reference stream; if this Segment Index box is referenced from a “parent” Segment Index box, the value of reference_ID shall be the same as the value of reference_ID of the “parent” Segment Index box; +// timescale : provides the timescale, in ticks per second, for the time and duration fields within this box; it is recommended that this match the timescale of the reference stream or track; for files based on this specification, that is the timescale field of the Media Header Box of the track; +// earliest_presentation_time : is the earliest presentation time of any access unit in the reference stream in the first subsegment, in the timescale indicated in the timescale field; +// first_offset : is the distance in bytes, in the file containing media, from the anchor point, to the first byte of the indexed material; +// reference_count : provides the number of referenced items; +// reference_type : when set to 1 indicates that the reference is to a segment index (‘sidx’) box; otherwise the reference is to media content (e.g., in the case of files based on this specification, to a movie fragment box); if a separate index segment is used, then entries with reference type 1 are in the index segment, and entries with reference type 0 are in the media file; +// referenced_size : the distance in bytes from the first byte of the referenced item to the first byte of the next referenced item, or in the case of the last entry, the end of the referenced material; +// subsegment_duration : when the reference is to Segment Index box, this field carries the sum of the subsegment_duration fields in that box; when the reference is to a subsegment, this field carries the difference between the earliest presentation time of any access unit of the reference stream in the next subsegment (or the first subsegment of the next segment, if this is the last subsegment of the segment, or the end presentation time of the reference stream if this is the last subsegment of the stream) and the earliest presentation time of any access unit of the reference stream in the referenced subsegment; the duration is in the same units as earliest_presentation_time; +// starts_with_SAP : indicates whether the referenced subsegments start with a SAP. For the detailed semantics of this field in combination with other fields, see the table below. +// SAP_type : indicates a SAP type as specified in Annex I, or the value 0. Other type values are reserved. For the detailed semantics of this field in combination with other fields, see the table below. +// SAP_delta_time : indicates TSAP of the first SAP, in decoding order, in the referenced subsegment for the reference stream. If the referenced subsegments do not contain a SAP, SAP_delta_time is reserved with the value 0; otherwise SAP_delta_time is the difference between the earliest presentation time of the subsegment, and the TSAP (note that this difference may be zero, in the case that the subsegment starts with a SAP). + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/109 +// +// Box Type: ssix +// Container: File +// Mandatory: No +// Quantity: Zero or more +type SubsegmentIndexBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + SubSegmentCount uint32 // 32 bits, is a positive integer specifying the number of subsegments for which partial subsegment information is specified in this box. subsegment_count shall be equal to reference_count (i.e., the number of movie fragment references) in the immediately preceding Segment Index box. + Table []SubsegmentIndexTable // Subsegment Index Table, SubSegmentCount elements. +} + +type SubsegmentIndexTable struct { + RangesCount uint32 // 32 bits, specifies the number of partial subsegment levels into which the media data is grouped. This value shall be greater than or equal to 2. + Rtable []SubsegmentRangesTable // Subsegment Ranges Table, RangesCount elements. +} + +type SubsegmentRangesTable struct { + level uint8 // 8 bits, specifies the level to which this partial subsegment is assigned. + range_size [3]byte // 24 bits, indicates the size of the partial subsegment. +} + +// ------------------------------------------------------------------------------------------------------- + +// +// ISO_IEC_14496-12_2012.pdf Page/111 +// +// Box Type: prft +// Container: File +// Mandatory: No +// Quantity: Zero or more +type ProducerReferenceTimeBox struct { + MP4BoxHeader // standard header + MP4FullBoxHeader // full box header + + ReferenceTrackID uint32 // 32 bits, provides the track_ID for the reference track. + NtpTimestamp uint64 // 64 bits, indicates a UTC time in NTP format corresponding to decoding_time. + MediaTime interface{} // uint32 or uint64, corresponds to the same time as ntp_timestamp, but in the time units used for the reference track, and is measured on this media clock as the media is produced. +} + +// if (version==0) { +// unsigned int(32) media_time; +// } else { +// unsigned int(64) media_time; +// } + +// ------------------------------------------------------------------------------------------------------- diff --git a/codec/sps.go b/codec/sps.go new file mode 100644 index 0000000..32b50f4 --- /dev/null +++ b/codec/sps.go @@ -0,0 +1,227 @@ +package codec + +import ( + "bytes" + + "github.com/cnotch/ipchub/av/codec/hevc" + + "github.com/Monibuca/engine/v4/util/bits" +) + +type SPSInfo struct { + ProfileIdc uint + LevelIdc uint + + MbWidth uint + MbHeight uint + + CropLeft uint + CropRight uint + CropTop uint + CropBottom uint + + Width uint + Height uint +} + +func ParseSPS(data []byte) (self SPSInfo, err error) { + r := &bits.GolombBitReader{R: bytes.NewReader(data)} + + if _, err = r.ReadBits(8); err != nil { + return + } + + if self.ProfileIdc, err = r.ReadBits(8); err != nil { + return + } + + // constraint_set0_flag-constraint_set6_flag,reserved_zero_2bits + if _, err = r.ReadBits(8); err != nil { + return + } + + // level_idc + if self.LevelIdc, err = r.ReadBits(8); err != nil { + return + } + + // seq_parameter_set_id + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + if self.ProfileIdc == 100 || self.ProfileIdc == 110 || + self.ProfileIdc == 122 || self.ProfileIdc == 244 || + self.ProfileIdc == 44 || self.ProfileIdc == 83 || + self.ProfileIdc == 86 || self.ProfileIdc == 118 { + + var chroma_format_idc uint + if chroma_format_idc, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + if chroma_format_idc == 3 { + // residual_colour_transform_flag + if _, err = r.ReadBit(); err != nil { + return + } + } + + // bit_depth_luma_minus8 + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + // bit_depth_chroma_minus8 + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + // qpprime_y_zero_transform_bypass_flag + if _, err = r.ReadBit(); err != nil { + return + } + + var seq_scaling_matrix_present_flag uint + if seq_scaling_matrix_present_flag, err = r.ReadBit(); err != nil { + return + } + + if seq_scaling_matrix_present_flag != 0 { + for i := 0; i < 8; i++ { + var seq_scaling_list_present_flag uint + if seq_scaling_list_present_flag, err = r.ReadBit(); err != nil { + return + } + if seq_scaling_list_present_flag != 0 { + var sizeOfScalingList uint + if i < 6 { + sizeOfScalingList = 16 + } else { + sizeOfScalingList = 64 + } + lastScale := uint(8) + nextScale := uint(8) + for j := uint(0); j < sizeOfScalingList; j++ { + if nextScale != 0 { + var delta_scale uint + if delta_scale, err = r.ReadSE(); err != nil { + return + } + nextScale = (lastScale + delta_scale + 256) % 256 + } + if nextScale != 0 { + lastScale = nextScale + } + } + } + } + } + } + + // log2_max_frame_num_minus4 + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + var pic_order_cnt_type uint + if pic_order_cnt_type, err = r.ReadExponentialGolombCode(); err != nil { + return + } + if pic_order_cnt_type == 0 { + // log2_max_pic_order_cnt_lsb_minus4 + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + } else if pic_order_cnt_type == 1 { + // delta_pic_order_always_zero_flag + if _, err = r.ReadBit(); err != nil { + return + } + // offset_for_non_ref_pic + if _, err = r.ReadSE(); err != nil { + return + } + // offset_for_top_to_bottom_field + if _, err = r.ReadSE(); err != nil { + return + } + var num_ref_frames_in_pic_order_cnt_cycle uint + if num_ref_frames_in_pic_order_cnt_cycle, err = r.ReadExponentialGolombCode(); err != nil { + return + } + for i := uint(0); i < num_ref_frames_in_pic_order_cnt_cycle; i++ { + if _, err = r.ReadSE(); err != nil { + return + } + } + } + + // max_num_ref_frames + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + // gaps_in_frame_num_value_allowed_flag + if _, err = r.ReadBit(); err != nil { + return + } + + if self.MbWidth, err = r.ReadExponentialGolombCode(); err != nil { + return + } + self.MbWidth++ + + if self.MbHeight, err = r.ReadExponentialGolombCode(); err != nil { + return + } + self.MbHeight++ + + var frame_mbs_only_flag uint + if frame_mbs_only_flag, err = r.ReadBit(); err != nil { + return + } + if frame_mbs_only_flag == 0 { + // mb_adaptive_frame_field_flag + if _, err = r.ReadBit(); err != nil { + return + } + } + + // direct_8x8_inference_flag + if _, err = r.ReadBit(); err != nil { + return + } + + var frame_cropping_flag uint + if frame_cropping_flag, err = r.ReadBit(); err != nil { + return + } + if frame_cropping_flag != 0 { + if self.CropLeft, err = r.ReadExponentialGolombCode(); err != nil { + return + } + if self.CropRight, err = r.ReadExponentialGolombCode(); err != nil { + return + } + if self.CropTop, err = r.ReadExponentialGolombCode(); err != nil { + return + } + if self.CropBottom, err = r.ReadExponentialGolombCode(); err != nil { + return + } + } + + self.Width = (self.MbWidth * 16) - self.CropLeft*2 - self.CropRight*2 + self.Height = ((2 - frame_mbs_only_flag) * self.MbHeight * 16) - self.CropTop*2 - self.CropBottom*2 + + return +} + +func ParseHevcSPS(data []byte) (self SPSInfo, err error) { + var rawsps hevc.H265RawSPS + if err = rawsps.Decode(data); err == nil { + self.CropLeft, self.CropRight, self.CropTop, self.CropBottom = uint(rawsps.Conf_win_left_offset), uint(rawsps.Conf_win_right_offset), uint(rawsps.Conf_win_top_offset), uint(rawsps.Conf_win_bottom_offset) + self.Width = uint(rawsps.Pic_width_in_luma_samples) + self.Height = uint(rawsps.Pic_height_in_luma_samples) + } + return +} diff --git a/common/frame.go b/common/frame.go new file mode 100644 index 0000000..5caca1e --- /dev/null +++ b/common/frame.go @@ -0,0 +1,126 @@ +package common + +import ( + "net" + "time" + + "github.com/Monibuca/engine/v4/codec" + "github.com/pion/rtp" +) + +type NALUSlice net.Buffers +type H264Slice NALUSlice +type H265Slice NALUSlice +type BuffersType interface { + NALUSlice | net.Buffers +} + +func SizeOfBuffers[T BuffersType](buf T) (size int) { + for _, b := range buf { + size += len(b) + } + return +} + +type H264NALU []NALUSlice +type H265NALU []NALUSlice + +type AudioSlice []byte +type AACSlice AudioSlice +type G711Slice AudioSlice + +// 裸数据片段 +type RawSlice interface { + NALUSlice | AudioSlice +} + +func (nalu H264NALU) IFrame() bool { + return H264Slice(nalu[0]).Type() == codec.NALU_IDR_Picture +} +func (nalu *H264NALU) Append(slice ...NALUSlice) { + *nalu = append(*nalu, slice...) +} +func (nalu H264Slice) Type() byte { + return nalu[0][0] & 0b0001_1111 +} +func (nalu H265Slice) Type() byte { + return nalu[0][0] & 0x7E >> 1 +} +func (nalu *H265NALU) Append(slice ...NALUSlice) { + *nalu = append(*nalu, slice...) +} +func (nalu H265NALU) IFrame() bool { + switch H265Slice(nalu[0]).Type() { + case codec.NAL_UNIT_CODED_SLICE_BLA, + codec.NAL_UNIT_CODED_SLICE_BLANT, + codec.NAL_UNIT_CODED_SLICE_BLA_N_LP, + codec.NAL_UNIT_CODED_SLICE_IDR, + codec.NAL_UNIT_CODED_SLICE_IDR_N_LP, + codec.NAL_UNIT_CODED_SLICE_CRA: + return true + } + return false +} + +type AVCCFrame []byte // 一帧AVCC格式的数据 +type AnnexBFrame []byte // 一帧AnnexB格式数据 +type BaseFrame struct { + DeltaTime uint32 // 相对上一帧时间戳,毫秒 + SeqInStream uint32 //在一个流中的总序号 + SeqInTrack uint32 //在一个Track中的序号 + BytesIn int // 输入字节数用于计算BPS +} + +type DataFrame[T any] struct { + Timestamp time.Time // 写入时间 + BaseFrame + Value T +} +type AVFrame[T RawSlice] struct { + BaseFrame + IFrame bool + PTS uint32 + DTS uint32 + AVCC net.Buffers // 打包好的AVCC格式 + RTP net.Buffers // 打包好的RTP格式 + RTPPackets []rtp.Packet + Raw []T //裸数据 + canRead bool +} + +func (av *AVFrame[T]) AppendRaw(raw ...T) { + av.Raw = append(av.Raw, raw...) +} + +func (av *AVFrame[T]) AppendAVCC(avcc ...[]byte) { + av.AVCC = append(av.AVCC, avcc...) +} +func (av *AVFrame[T]) AppendRTP(rtp []byte) { + av.RTP = append(av.RTP, rtp) +} +func (av *AVFrame[T]) AppendRTPPackets(rtp rtp.Packet) { + av.RTPPackets = append(av.RTPPackets, rtp) +} + +func (av *AVFrame[T]) Reset() { + av.AVCC = nil + av.RTP = nil + av.RTPPackets = nil + av.Raw = nil +} + +func (avcc AVCCFrame) IsIDR() bool { + return avcc[0]>>4 == 1 +} +func (avcc AVCCFrame) IsSequence() bool { + return avcc[1] == 0 +} +func (avcc AVCCFrame) CTS() uint32 { + return uint32(avcc[2])<<24 | uint32(avcc[3])<<8 | uint32(avcc[4]) +} +func (avcc AVCCFrame) VideoCodecID() byte { + return avcc[0] & 0x0F +} +func (avcc AVCCFrame) AudioCodecID() byte { + return avcc[0] >> 4 +} diff --git a/common/index.go b/common/index.go new file mode 100644 index 0000000..1887e79 --- /dev/null +++ b/common/index.go @@ -0,0 +1,38 @@ +package common + +import "time" + +type Track interface { + Get(size int) (result []byte) + Put(b []byte) +} + +type AVTrack interface { + Track + WriteAVCC(ts uint32, frame AVCCFrame) //写入AVCC格式的数据 + Flush() +} + +type BPS struct { + ts time.Time + bytes int + BPS int +} + +func (bps *BPS) ComputeBPS(bytes int) { + bps.bytes += bytes + if elapse := time.Since(bps.ts).Seconds(); elapse > 1 { + bps.BPS = bps.bytes / int(elapse) + bps.ts = time.Now() + } +} + +type HZ uint32 + +func (hz HZ) ToMini(nts uint32) uint32 { + return nts / (uint32(hz) / 1000) +} + +func (hz HZ) ToNTS(mini uint32) uint32 { + return mini * (uint32(hz) / 1000) +} diff --git a/common/ring.go b/common/ring.go new file mode 100644 index 0000000..7d5d69b --- /dev/null +++ b/common/ring.go @@ -0,0 +1,36 @@ +package common + +import ( + "github.com/Monibuca/engine/v4/util" +) + +type RingBuffer[T any] struct { + *util.Ring[T] + Size int + MoveCount uint32 +} + +func (rb *RingBuffer[T]) Init(n int) *RingBuffer[T] { + if rb == nil { + rb = new(RingBuffer[T]) + } + rb.Ring = util.NewRing[T](n) + rb.Size = n + return rb +} + +func (rb RingBuffer[T]) SubRing(rr *util.Ring[T]) *RingBuffer[T] { + rb.Ring = rr + rb.MoveCount = 0 + return &rb +} + +func (rb *RingBuffer[T]) MoveNext() *T { + rb.Ring = rb.Next() + rb.MoveCount++ + return &rb.Value +} + +func (rb *RingBuffer[T]) PreValue() *T { + return &rb.Prev().Value +} diff --git a/common/ring_av.go b/common/ring_av.go new file mode 100644 index 0000000..bd83bef --- /dev/null +++ b/common/ring_av.go @@ -0,0 +1,59 @@ +package common + +import ( + "context" + "runtime" + "time" + + "github.com/Monibuca/engine/v4/util" +) + +type AVRing[T RawSlice] struct { + RingBuffer[AVFrame[T]] + ctx context.Context + Poll time.Duration +} + +func (r *AVRing[T]) Init(ctx context.Context, n int) { + r.ctx = ctx + r.RingBuffer.Init(n) +} + +func (r AVRing[T]) SubRing(rr *util.Ring[AVFrame[T]]) *AVRing[T] { + r.Ring = rr + return &r +} + +func (r *AVRing[T]) Step() *AVFrame[T] { + last := &r.Value + current := r.MoveNext() + current.SeqInTrack = r.MoveCount + current.canRead = false + current.Reset() + last.canRead = true + return current +} + +func (r *AVRing[T]) wait() { + if r.Poll == 0 { + runtime.Gosched() + } else { + time.Sleep(r.Poll) + } +} + +func (r *AVRing[T]) Read() *AVFrame[T] { + item := &r.Value + for r.ctx.Err() == nil && !item.canRead { + r.wait() + } + return item +} + +func (r *AVRing[T]) TryRead() *AVFrame[T] { + item := &r.Value + if r.ctx.Err() == nil && !item.canRead { + return nil + } + return item +} diff --git a/ring.go b/common/ring_lock.go similarity index 51% rename from ring.go rename to common/ring_lock.go index 8c156de..78437b0 100644 --- a/ring.go +++ b/common/ring_lock.go @@ -1,92 +1,46 @@ -package engine +package common import ( - "container/ring" "context" "reflect" "sync" "sync/atomic" - "time" ) -type DataItem struct { - Timestamp time.Time - Sequence int - Value interface{} -} - -// TODO: 池化,泛型 - -type LockItem struct { - DataItem +type LockFrame[T any] struct { + DataFrame[T] sync.RWMutex } -type RingBuffer struct { - *ring.Ring - Size int +type LockRing[T any] struct { + RingBuffer[LockFrame[T]] + ctx context.Context Flag *int32 - context.Context } -func (rb *RingBuffer) Init(ctx context.Context, n int) *RingBuffer { +func (lr *LockRing[T]) Init(ctx context.Context, n int) *LockRing[T] { var flag int32 - if rb == nil { - rb = &RingBuffer{Context: ctx, Ring: ring.New(n), Flag: &flag} - } else { - rb.Ring = ring.New(n) - rb.Size = n - rb.Context = ctx - rb.Flag = &flag + if lr == nil { + lr = &LockRing[T]{} } - for x := rb.Ring; x.Value == nil; x = x.Next() { - x.Value = new(LockItem) - } - rb.Current().Lock() - return rb + lr.ctx = ctx + lr.RingBuffer.Init(n) + lr.Flag = &flag + lr.Value.Lock() + return lr } -func (rb RingBuffer) Clone() *RingBuffer { - return &rb -} - -func (rb RingBuffer) SubRing(rr *ring.Ring) *RingBuffer { - rb.Ring = rr - return &rb -} - -func (rb *RingBuffer) CurrentValue() interface{} { - return rb.Current().Value -} - -func (rb *RingBuffer) NextValue() interface{} { - return rb.Next().Value.(*LockItem).Value -} - -func (rb *RingBuffer) Current() *LockItem { - return rb.Ring.Value.(*LockItem) -} - -func (rb *RingBuffer) MoveNext() { - rb.Ring = rb.Next() -} - -func (rb *RingBuffer) GetNext() *LockItem { - rb.MoveNext() - return rb.Current() -} - -func (rb *RingBuffer) Read() interface{} { - current := rb.Current() +func (rb *LockRing[T]) Read() *DataFrame[T] { + current := rb.Value current.RLock() defer current.RUnlock() - return current.Value + return ¤t.DataFrame } -func (rb *RingBuffer) Step() { - last := rb.Current() +func (rb *LockRing[T]) Step() { + last := &rb.Value if atomic.CompareAndSwapInt32(rb.Flag, 0, 1) { - current := rb.GetNext() + current := rb.MoveNext() current.Lock() last.Unlock() //Flag不为1代表被Dispose了,但尚未处理Done @@ -96,11 +50,11 @@ func (rb *RingBuffer) Step() { } } -func (rb *RingBuffer) Write(value interface{}) { - last := rb.Current() +func (rb *LockRing[T]) Write(value T) { + last := &rb.Value last.Value = value if atomic.CompareAndSwapInt32(rb.Flag, 0, 1) { - current := rb.GetNext() + current := rb.MoveNext() current.Lock() last.Unlock() //Flag不为1代表被Dispose了,但尚未处理Done @@ -110,8 +64,8 @@ func (rb *RingBuffer) Write(value interface{}) { } } -func (rb *RingBuffer) Dispose() { - current := rb.Current() +func (rb *LockRing[T]) Dispose() { + current := &rb.Value if atomic.CompareAndSwapInt32(rb.Flag, 0, 2) { current.Unlock() } else if atomic.CompareAndSwapInt32(rb.Flag, 1, 2) { @@ -121,22 +75,22 @@ func (rb *RingBuffer) Dispose() { } } -func (rb *RingBuffer) read() reflect.Value { +func (rb *LockRing[T]) read() reflect.Value { return reflect.ValueOf(rb.Read()) } -func (rb *RingBuffer) nextRead() reflect.Value { +func (rb *LockRing[T]) nextRead() reflect.Value { rb.MoveNext() return rb.read() } -func (rb *RingBuffer) condition() bool { - return rb.Err() == nil && *rb.Flag != 2 +func (rb *LockRing[T]) condition() bool { + return rb.ctx.Err() == nil && *rb.Flag != 2 } // ReadLoop 循环读取,采用了反射机制,不适用高性能场景 // handler入参可以传入回调函数或者channel -func (rb *RingBuffer) ReadLoop(handler interface{}, async bool) { +func (rb *LockRing[T]) ReadLoop(handler interface{}, async bool) { if async { rb.ReadLoopConditionalGo(handler, rb.condition) } else { @@ -145,7 +99,7 @@ func (rb *RingBuffer) ReadLoop(handler interface{}, async bool) { } // goon判断函数用来判断是否继续读取,返回false将终止循环 -func (rb *RingBuffer) ReadLoopConditional(handler interface{}, goon func() bool) { +func (rb *LockRing[T]) ReadLoopConditional(handler interface{}, goon func() bool) { switch t := reflect.ValueOf(handler); t.Kind() { case reflect.Chan: for v := rb.read(); goon(); v = rb.nextRead() { @@ -159,7 +113,7 @@ func (rb *RingBuffer) ReadLoopConditional(handler interface{}, goon func() bool) } // goon判断函数用来判断是否继续读取,返回false将终止循环 -func (r *RingBuffer) ReadLoopConditionalGo(handler interface{}, goon func() bool) { +func (r *LockRing[T]) ReadLoopConditionalGo(handler interface{}, goon func() bool) { switch t := reflect.ValueOf(handler); t.Kind() { case reflect.Chan: for v := r.read(); goon(); v = r.nextRead() { diff --git a/common/stream.go b/common/stream.go new file mode 100644 index 0000000..183c4a9 --- /dev/null +++ b/common/stream.go @@ -0,0 +1,9 @@ +package common + +import "context" + +type IStream interface { + context.Context + Update() uint32 + AddTrack(string, Track) +} diff --git a/data_track.go b/data_track.go index c4b0445..8298d99 100644 --- a/data_track.go +++ b/data_track.go @@ -1,58 +1,57 @@ package engine -import ( - "sync" - "time" - "unsafe" -) +// import ( +// "sync" +// "time" +// "unsafe" -type DataTrack struct { - RingBuffer - BaseTrack - *LockItem - sync.Locker // 写入锁,可选,单一写入可以不加锁 -} +// "github.com/Monibuca/engine/v4/util" +// ) -func (s *Stream) NewDataTrack(l sync.Locker) (dt *DataTrack) { - dt = &DataTrack{ - Locker: l, - } - dt.Stream = s - dt.Init(s.Context, 256) - dt.setCurrent() - return -} +// type DataTrack struct { +// LockRing[any] +// BaseTrack +// *LockFrame[any] +// sync.Locker // 写入锁,可选,单一写入可以不加锁 +// } -func (dt *DataTrack) Push(data interface{}) { - if dt.Locker != nil { - dt.Lock() - defer dt.Unlock() - } - dt.Timestamp = time.Now() - dt.addBytes(int(unsafe.Sizeof(data))) - dt.GetBPS() - if time.Since(dt.ts) > 1000 { - dt.resetBPS() - } - dt.Write(data) - dt.setCurrent() -} +// func (s *Stream) NewDataTrack(l sync.Locker) (dt *DataTrack) { +// dt = &DataTrack{ +// Locker: l, +// } +// dt.Stream = s +// dt.Init(s.Context, 256) +// dt.setCurrent() +// return +// } -func (at *DataTrack) setCurrent() { - at.LockItem = at.Current() -} +// func (dt *DataTrack) Push(data any) { +// if dt.Locker != nil { +// dt.Lock() +// defer dt.Unlock() +// } +// dt.Timestamp = time.Now() +// dt.bytesIn = (int(unsafe.Sizeof(data))) +// dt.GetBPS() +// dt.Write(data) +// dt.setCurrent() +// } -func (dt *DataTrack) Play(onData func(*DataItem), exit1, exit2 <-chan struct{}) { - dr := dt.Clone() - for dp := dr.Read(); ; dp = dr.Read() { - select { - case <-exit1: - return - case <-exit2: - return - default: - onData(dp.(*DataItem)) - dr.MoveNext() - } - } -} +// func (at *DataTrack) setCurrent() { +// at.LockFrame = at.Current() +// } + +// func (dt *DataTrack) Play(onData func(DataFrame[any]), exit1, exit2 <-chan struct{}) { +// dr := util.Clone(dt.LockRing) +// for dp := dr.Read(); ; dp = dr.Read() { +// select { +// case <-exit1: +// return +// case <-exit2: +// return +// default: +// onData(dp) +// dr.MoveNext() +// } +// } +// } diff --git a/events.go b/events.go new file mode 100644 index 0000000..dbee6fd --- /dev/null +++ b/events.go @@ -0,0 +1,20 @@ +package engine + +import ( + "github.com/asaskevich/EventBus" +) + +type TransCodeReq struct { + *Subscriber + RequestCodec string +} + +const ( + Event_SUBSCRIBE = "Subscribe" + Event_UNSUBSCRIBE = "UnSubscibe" + Event_STREAMCLOSE = "StreamClose" + Event_PUBLISH = "Publish" + Event_REQUEST_TRANSAUDIO = "RequestTransAudio" +) + +var Bus = EventBus.New() \ No newline at end of file diff --git a/go.mod b/go.mod index 735eda8..238e839 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,24 @@ -module github.com/Monibuca/engine/v3 +module github.com/Monibuca/engine/v4 -go 1.13 +go 1.18 require ( github.com/BurntSushi/toml v0.4.1 - github.com/Monibuca/utils/v3 v3.0.5 + github.com/cnotch/ipchub v1.1.0 github.com/google/uuid v1.3.0 github.com/logrusorgru/aurora v2.0.3+incompatible + github.com/mattn/go-colorable v0.1.8 github.com/pion/rtp v1.7.4 github.com/pkg/errors v0.9.1 + github.com/q191201771/naza v0.19.1 + golang.org/x/sync v0.0.0-20201207232520-09787c993a3a +) + +require ( + github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/mattn/go-isatty v0.0.12 // indirect + github.com/pion/randutil v0.1.0 // indirect + github.com/stretchr/testify v1.7.0 // indirect + golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359 // indirect ) diff --git a/go.sum b/go.sum index b7b5b5a..d491f8f 100644 --- a/go.sum +++ b/go.sum @@ -1,10 +1,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v0.4.1 h1:GaI7EiDXDRfa8VshkTj7Fym7ha+y8/XxIgD2okUIjLw= github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= -github.com/Monibuca/utils/v3 v3.0.4 h1:PssGhww+qePzw4qpB3g2DCG5Buru0Cu64UiqtAPuHjc= -github.com/Monibuca/utils/v3 v3.0.4/go.mod h1:RpNS95gapWs6gimwh8Xn2x72FN5tO7Powabj7dTFyvE= -github.com/Monibuca/utils/v3 v3.0.5 h1:w14x0HkWTbF4MmHbINLlOwe4VJNoSOeaQChMk5E/4es= -github.com/Monibuca/utils/v3 v3.0.5/go.mod h1:RpNS95gapWs6gimwh8Xn2x72FN5tO7Powabj7dTFyvE= +github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef h1:2JGTg6JapxP9/R33ZaagQtAM4EkkSYnIAlOG5EI8gkM= +github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef/go.mod h1:JS7hed4L1fj0hXcyEejnW57/7LCetXggd+vwrRnYeII= github.com/cnotch/apirouter v0.0.0-20200731232942-89e243a791f3/go.mod h1:5deJPLON/x/s2dLOQfuKS0lenhOIT4xX0pvtN/OEIuY= github.com/cnotch/ipchub v1.1.0 h1:hH0lh2mU3AZXPiqMwA0pdtqrwo7PFIMRGush9OobMUs= github.com/cnotch/ipchub v1.1.0/go.mod h1:2PbeBs2q2VxxTVCn1eYCDwpAWuVXbq1+N0FU7GimOH4= @@ -13,14 +11,11 @@ github.com/cnotch/queue v0.0.0-20200326024423-6e88bdbf2ad4/go.mod h1:zOssjAlNusO github.com/cnotch/queue v0.0.0-20201224060551-4191569ce8f6/go.mod h1:zOssjAlNusOxvtaqT+EMA+Iyi8rrtKr4/XfzN1Fgoeg= github.com/cnotch/scheduler v0.0.0-20200522024700-1d2da93eefc5/go.mod h1:F4GE3SZkJZ8an1Y0ZCqvSM3jeozNuKzoC67erG1PhIo= github.com/cnotch/xlog v0.0.0-20201208005456-cfda439cd3a0/go.mod h1:RW9oHsR79ffl3sR3yMGgxYupMn2btzdtJUwoxFPUE5E= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emitter-io/address v1.0.0/go.mod h1:GfZb5+S/o8694B1GMGK2imUYQyn2skszMvGNA5D84Ug= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/funny/slab v0.0.0-20180511031532-b1fad5e5d478 h1:Db9StoJ6RZN3YttC0Pm0I4Y5izITRYch3RMbT59BYN0= -github.com/funny/slab v0.0.0-20180511031532-b1fad5e5d478/go.mod h1:0j1+svBH8ABEIPdUP0AIg4qedsybnXGJBakCEw8cfoo= -github.com/funny/utest v0.0.0-20161029064919-43870a374500 h1:Z0r1CZnoIWFB/Uiwh1BU5FYmuFe6L5NPi6XWQEmsTRg= -github.com/funny/utest v0.0.0-20161029064919-43870a374500/go.mod h1:mUn39tBov9jKnTWV1RlOYoNzxdBFHiSzXWdY1FoNGGg= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -53,8 +48,9 @@ github.com/q191201771/naza v0.19.1/go.mod h1:5LeGupZZFtYP1g/S203n9vXoUNVdlRnPIfM github.com/sqs/goreturns v0.0.0-20181028201513-538ac6014518/go.mod h1:CKI4AZ4XmGV240rTHfO0hfE83S6/a3/Q1siZJ/vXf7A= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -66,11 +62,11 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359 h1:2B5p2L5IfGiD7+b9BOoRMC6DgObAVZV+Fsp050NqXik= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= diff --git a/hook.go b/hook.go deleted file mode 100644 index bbeb65c..0000000 --- a/hook.go +++ /dev/null @@ -1,98 +0,0 @@ -package engine - -import ( - "context" - "reflect" - "runtime" - "sync" -) - -type TransCodeReq struct { - *Subscriber - RequestCodec string -} - -const ( - HOOK_SUBSCRIBE = "Subscribe" - HOOK_UNSUBSCRIBE = "UnSubscibe" - HOOK_STREAMCLOSE = "StreamClose" - HOOK_PUBLISH = "Publish" - HOOK_REQUEST_TRANSAUDIO = "RequestTransAudio" -) - -var Hooks = make(map[string]*RingBuffer) -var hookLocker sync.Mutex - -func addHookRing(name string) (r *RingBuffer) { - r = r.Init(context.TODO(), 10) - Hooks[name] = r - return -} - -func AddHooks(hooks map[string]interface{}) { - hookLocker.Lock() - for name, hook := range hooks { - rl, ok := Hooks[name] - if !ok { - rl = addHookRing(name) - } - vf := reflect.ValueOf(hook) - if vf.Kind() != reflect.Func { - panic("callback is not a function") - } - go rl.Clone().ReadLoop(vf.Call, false) - } - hookLocker.Unlock() -} - -func addHook(name string, callback interface{}, async bool) { - hookLocker.Lock() - rl, ok := Hooks[name] - if !ok { - rl = addHookRing(name) - } - hookLocker.Unlock() - vf := reflect.ValueOf(callback) - if vf.Kind() != reflect.Func { - panic("callback is not a function") - } - rl.Clone().ReadLoop(vf.Call, async) -} - -func AddHook(name string, callback interface{}) { - addHook(name, callback, false) -} - -func AddHookGo(name string, callback interface{}) { - addHook(name, callback, true) -} - -func AddHookConditional(name string, callback interface{}, goon func() bool) { - hookLocker.Lock() - rl, ok := Hooks[name] - if !ok { - rl = addHookRing(name) - } - hookLocker.Unlock() - vf := reflect.ValueOf(callback) - if vf.Kind() != reflect.Func { - panic("callback is not a function") - } - rl.Clone().ReadLoopConditional(vf.Call, goon) -} - -func TriggerHook(name string, payload ...interface{}) { - args := make([]reflect.Value, len(payload)) - for i, arg := range payload { - args[i] = reflect.ValueOf(arg) - } - defer runtime.Gosched() //防止连续写入 - hookLocker.Lock() - defer hookLocker.Unlock() - if rl, ok := Hooks[name]; ok { - rl.Write(args) - } else { - rl = addHookRing(name) - rl.Write(args) - } -} diff --git a/hook_test.go b/hook_test.go deleted file mode 100644 index 3ef55aa..0000000 --- a/hook_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package engine - -import ( - "fmt" - "sync" - "testing" - "time" -) - -func TestAddHook(t *testing.T) { - t.Run(t.Name(), func(t *testing.T) { - var wg sync.WaitGroup - wg.Add(1) - go AddHook("test", func(a, b int) { - fmt.Printf("on test,%d,%d", a, b) - }) - go AddHook("done", wg.Done) - TriggerHook("test", 2, 10) - go AddHook("test", func(a, b int) { - fmt.Printf("on test,%d,%d", a, b) - }) - <-time.After(time.Millisecond * 100) - TriggerHook("test", 1, 12) - <-time.After(time.Millisecond * 100) - TriggerHook("done") - wg.Wait() - }) -} diff --git a/main.go b/main.go index b679533..14d5c24 100644 --- a/main.go +++ b/main.go @@ -13,33 +13,45 @@ import ( "strings" "time" // colorable - "github.com/Monibuca/utils/v3" "github.com/google/uuid" - "github.com/Monibuca/engine/v3/util" + "github.com/Monibuca/engine/v4/util" "github.com/BurntSushi/toml" . "github.com/logrusorgru/aurora" ) -var Version = "3.2.2" +var Version = "4.0.0" + +type Second int + +func (s Second) Duration() time.Duration { + return time.Duration(s) * time.Second +} + +// StreamConfig 流的三级覆盖配置(全局,插件,流) +type StreamConfig struct { + EnableAudio bool + EnableVideo bool + AutoReconnect bool // 自动重连 + PullOnStart bool // 启动时拉流 + PullOnSubscribe bool // 订阅时自动拉流 + PublishTimeout Second // 发布无数据超时 + WaitTimeout Second // 等待流超时 + WaitCloseTimeout Second // 延迟自动关闭(无订阅时) +} var ( config = &struct { - EnableAudio bool - EnableVideo bool - PublishTimeout time.Duration - MaxRingSize int - AutoCloseAfter int - RTPReorder bool - }{true, true, 60, 256, -1, false} + StreamConfig + RTPReorder bool + }{StreamConfig{true, true, true, true, true, 10, 10, 0}, false} // ConfigRaw 配置信息的原始数据 - ConfigRaw []byte - StartTime time.Time //启动时间 - Plugins = make(map[string]*PluginConfig) // Plugins 所有的插件配置 - HasTranscoder bool - Ctx context.Context - settingDir string + ConfigRaw []byte + StartTime time.Time //启动时间 + Plugins = make(map[string]*PluginConfig) // Plugins 所有的插件配置 + Ctx context.Context + settingDir string ) //PluginConfig 插件配置定义 @@ -61,11 +73,11 @@ func (opt *PluginConfig) Install(run func()) { opt.Version = parts[len(parts)-1] } Plugins[opt.Name] = opt - utils.Print(Green("install plugin"), BrightCyan(opt.Name), BrightBlue(opt.Version)) + util.Print(Green("install plugin"), BrightCyan(opt.Name), BrightBlue(opt.Version)) } func init() { - if parts := strings.Split(utils.CurrentDir(), "@"); len(parts) > 1 { + if parts := strings.Split(util.CurrentDir(), "@"); len(parts) > 1 { Version = parts[len(parts)-1] } } @@ -74,19 +86,19 @@ func init() { func Run(ctx context.Context, configFile string) (err error) { Ctx = ctx if err := util.CreateShutdownScript(); err != nil { - utils.Print(Red("create shutdown script error:"), err) + util.Print(Red("create shutdown script error:"), err) } StartTime = time.Now() if ConfigRaw, err = ioutil.ReadFile(configFile); err != nil { - utils.Print(Red("read config file error:"), err) + util.Print(Red("read config file error:"), err) return } settingDir = filepath.Join(filepath.Dir(configFile), ".m7s") if err = os.MkdirAll(settingDir, 0755); err != nil { - utils.Print(Red("create dir .m7s error:"), err) + util.Print(Red("create dir .m7s error:"), err) return } - utils.Print(BgGreen(Black("Ⓜ starting m7s ")), BrightBlue(Version)) + util.Print(BgGreen(Black("Ⓜ starting m7s ")), BrightBlue(Version)) var cg map[string]interface{} if _, err = toml.Decode(string(ConfigRaw), &cg); err == nil { if cfg, ok := cg["Engine"]; ok { @@ -94,7 +106,6 @@ func Run(ctx context.Context, configFile string) (err error) { if err = json.Unmarshal(b, config); err != nil { log.Println(err) } - config.PublishTimeout *= time.Second } for name, config := range Plugins { if cfg, ok := cg[name]; ok { @@ -112,7 +123,7 @@ func Run(ctx context.Context, configFile string) (err error) { } } } else { - utils.Print(Red("decode config file error:"), err) + util.Print(Red("decode config file error:"), err) } UUID := uuid.NewString() reportTimer := time.NewTimer(time.Minute) @@ -122,7 +133,7 @@ func Run(ctx context.Context, configFile string) (err error) { req.Header.Set("uuid", UUID) var c http.Client for { - req.Header.Set("streams", fmt.Sprintf("%d", len(Streams.m))) + req.Header.Set("streams", fmt.Sprintf("%d", Streams.Len())) c.Do(req) select { case <-ctx.Done(): diff --git a/publisher.go b/publisher.go new file mode 100644 index 0000000..6e5cba5 --- /dev/null +++ b/publisher.go @@ -0,0 +1,5 @@ +package engine + +type Publisher interface { + OnStateChange(oldState StreamState, newState StreamState) bool +} diff --git a/ring_av.go b/ring_av.go deleted file mode 100644 index b8bae06..0000000 --- a/ring_av.go +++ /dev/null @@ -1,91 +0,0 @@ -package engine - -import ( - "container/ring" - "context" - "runtime" - "time" -) - -type AVItem struct { - DataItem - canRead bool -} - -type AVRing struct { - RingBuffer - poll time.Duration -} - -func (r *AVRing) Init(ctx context.Context, n int) *AVRing { - r.Ring = ring.New(n) - r.Context = ctx - r.Size = n - for x := r.Ring; x.Value == nil; x = x.Next() { - x.Value = new(AVItem) - } - return r -} -func (r AVRing) Clone() *AVRing { - return &r -} - -func (r AVRing) SubRing(rr *ring.Ring) *AVRing { - r.Ring = rr - return &r -} -func (r *AVRing) Write(value interface{}) { - last := r.Current() - last.Value = value - r.GetNext().canRead = false - last.canRead = true -} - -func (r *AVRing) Step() { - last := r.Current() - r.GetNext().canRead = false - last.canRead = true -} - -func (r *AVRing) wait() { - if r.poll == 0 { - runtime.Gosched() - } else { - time.Sleep(r.poll) - } -} - -func (r *AVRing) CurrentValue() interface{} { - return r.Current().Value -} - -func (r *AVRing) Current() *AVItem { - return r.Ring.Value.(*AVItem) -} - -func (r *AVRing) NextValue() interface{} { - return r.Next().Value.(*AVItem).Value -} -func (r *AVRing) PreItem() *AVItem { - return r.Prev().Value.(*AVItem) -} -func (r *AVRing) GetNext() *AVItem { - r.MoveNext() - return r.Current() -} - -func (r *AVRing) Read() (item *AVItem, value interface{}) { - current := r.Current() - for r.Err() == nil && !current.canRead { - r.wait() - } - return current, current.Value -} - -func (r *AVRing) TryRead() (item *AVItem, value interface{}) { - current := r.Current() - if r.Err() == nil && !current.canRead { - return nil, nil - } - return current, current.Value -} diff --git a/rtp.go b/rtp.go deleted file mode 100644 index db904b6..0000000 --- a/rtp.go +++ /dev/null @@ -1,106 +0,0 @@ -package engine - -import ( - "time" - - "github.com/Monibuca/utils/v3" - "github.com/pion/rtp" -) - -// 对rtp包进行解封装,并修复时间戳,包括时间戳跳跃 -type RTPDemuxer struct { - rtp.Packet - PTS uint32 // 修复后的时间戳(毫秒) - lastTs uint32 // 记录上一个收到的时间戳 - lastSeq uint16 // 记录上一个收到的序列号 - lastSeq2 uint16 // 记录上上一个收到的序列号 - timeBase *time.Duration // 采样率 - timestamp time.Time // 客观时间用于计算耗时 - orderMap map[uint16]RTPNalu // 缓存,用于乱序重排 - OnDemux func(uint32, []byte) -} - -func (r *RTPDemuxer) tryPop(ts uint32, payload []byte) { - for { - r.lastSeq++ - r.push(ts, payload) - if next, ok := r.orderMap[r.lastSeq+1]; ok { - delete(r.orderMap, r.lastSeq+1) - ts = next.PTS - payload = next.Payload - } else { - break - } - } -} -func (r *RTPDemuxer) push(ts uint32, payload []byte) { - if ts > r.lastTs { - delta := uint32(uint64(ts-r.lastTs) * 1000 / uint64(*r.timeBase)) - if delta > 1000 { // 时间戳跳跃 - r.PTS += uint32(time.Since(r.timestamp) / time.Millisecond) - } else { - r.PTS += delta - } - - } else if r.lastTs > ts { - delta := uint32(uint64(r.lastTs-ts) * 1000 / uint64(*r.timeBase)) - if delta > 1000 { // 时间戳跳跃 - r.PTS += uint32(time.Since(r.timestamp) / time.Millisecond) - } else { - r.PTS -= delta - } - } - r.timestamp = time.Now() - r.OnDemux(r.PTS, r.Payload) - r.lastTs = ts -} -func (r *RTPDemuxer) Push(rtpRaw []byte) { - if err := r.Unmarshal(rtpRaw); err != nil { - utils.Println("RTP Unmarshal error", err) - return - } - if config.RTPReorder { - if r.SequenceNumber < r.lastSeq { - return - } else if r.lastSeq == 0 { - r.timestamp = time.Now() - r.tryPop(r.Timestamp, r.Payload) - r.lastSeq = r.SequenceNumber - } else if r.lastSeq+1 == r.SequenceNumber { - r.tryPop(r.Timestamp, r.Payload) - } else if _, ok := r.orderMap[r.SequenceNumber]; !ok { - r.orderMap[r.SequenceNumber] = RTPNalu{ - Payload: r.Payload, - PTS: r.Timestamp, - } - // 20个包都没有出现,丢弃 - if len(r.orderMap) > 20 { - utils.Println("RTP SequenceNumber lost", r.lastSeq+1) - r.lastSeq++ - next, ok := r.orderMap[r.lastSeq] - for !ok { - r.lastSeq++ - next, ok = r.orderMap[r.lastSeq] - } - delete(r.orderMap, r.lastSeq) - r.tryPop(next.PTS, next.Payload) - } - } - return - } else { - if r.lastSeq == 0 { - r.timestamp = time.Now() - r.lastSeq = r.SequenceNumber - } else if r.SequenceNumber == r.lastSeq2+1 { // 本次序号是上上次的序号+1 说明中间隔了一个错误序号(某些rtsp流中的rtcp包写成了rtp包导致的) - r.lastSeq = r.SequenceNumber - } else { - r.lastSeq2 = r.lastSeq - r.lastSeq = r.SequenceNumber - if r.lastSeq != r.lastSeq2+1 { //序号不连续 - utils.Println("RTP SequenceNumber error", r.lastSeq2, r.lastSeq) - return - } - } - } - r.push(r.Timestamp, r.Payload) -} diff --git a/rtp_audio.go b/rtp_audio.go deleted file mode 100644 index 7cbace9..0000000 --- a/rtp_audio.go +++ /dev/null @@ -1,46 +0,0 @@ -package engine - -import ( - "time" - - "github.com/Monibuca/utils/v3" - "github.com/Monibuca/utils/v3/codec" -) - -type RTPAudio struct { - RTPDemuxer `json:"-"` - *AudioTrack -} - -func (s *Stream) NewRTPAudio(codec byte) (r *RTPAudio) { - r = &RTPAudio{ - AudioTrack: s.NewAudioTrack(codec), - } - if config.RTPReorder { - r.orderMap = make(map[uint16]RTPNalu) - } - r.timeBase = &r.timebase - r.OnDemux = r.push - return -} - -// 该函数只执行一次 -func (v *RTPAudio) push(ts uint32, payload []byte) { - switch v.CodecID { - case codec.CodecID_AAC: - v.OnDemux = func(ts uint32, payload []byte) { - for _, payload := range codec.ParseRTPAAC(payload) { - v.PushRaw(ts, payload) - } - } - case codec.CodecID_PCMA, codec.CodecID_PCMU: - v.OnDemux = func(ts uint32, payload []byte) { - v.PushRaw(ts, payload) - } - default: - utils.Println("RTP Publisher: Unsupported codec", v.CodecID) - return // TODO - } - v.timestamp = time.Now() - v.OnDemux(ts, payload) -} diff --git a/rtp_video.go b/rtp_video.go deleted file mode 100644 index 197fb8f..0000000 --- a/rtp_video.go +++ /dev/null @@ -1,343 +0,0 @@ -package engine - -import ( - "bytes" - "encoding/binary" - - "github.com/Monibuca/utils/v3" - "github.com/Monibuca/utils/v3/codec" - // "github.com/pion/rtp/codecs" -) - -const ( - fuaStartBitmask = 0b1000_0000 - fuaEndBitmask = 0b0100_0000 - stapaNALULengthSize = 2 - naluRefIdcBitmask = 0x60 -) - -var sizeMap = map[uint8]int{ - codec.NALU_STAPA: 1, - codec.NALU_STAPB: 3, - codec.NALU_MTAP16: 4, - codec.NALU_MTAP24: 5, - codec.NALU_FUA: 2, - codec.NALU_FUB: 4, -} - -type RTPNalu struct { - Payload []byte - PTS uint32 - Next *RTPNalu -} - -type RTPVideo struct { - RTPDemuxer `json:"-"` - *VideoTrack - fuaBuffer *bytes.Buffer - demuxNalu func([]byte) *RTPNalu -} - -func (s *Stream) NewRTPVideo(codecID byte) (r *RTPVideo) { - r = &RTPVideo{ - VideoTrack: s.NewVideoTrack(codecID), - } - if config.RTPReorder { - r.orderMap = make(map[uint16]RTPNalu) - } - r.timeBase = &r.timebase - switch codecID { - case codec.CodecID_H264: - r.demuxNalu = r.demuxH264 - case codec.CodecID_H265: - r.demuxNalu = r.demuxH265 - } - r.OnDemux = r._demux - return -} - -func (v *RTPVideo) demuxH264(payload []byte) (result *RTPNalu) { - naluLen := len(payload) - if naluLen == 0 { - return - } - naluType := payload[0] & naluTypeBitmask - lenSize := sizeMap[naluType] - switch naluType { - case codec.NALU_STAPA, codec.NALU_STAPB: - current := &result - for currOffset, naluSize := lenSize, 0; currOffset < naluLen; currOffset += naluSize { - naluSize = int(binary.BigEndian.Uint16(payload[currOffset:])) - if currOffset += stapaNALULengthSize; naluLen < currOffset+naluSize { - utils.Printf("STAP-A declared size(%d) is larger then buffer(%d)", naluSize, naluLen-currOffset) - return - } - *current = &RTPNalu{Payload: payload[currOffset : currOffset+naluSize], PTS: v.PTS} - current = &(*current).Next - } - case codec.NALU_MTAP16, codec.NALU_MTAP24: - current := &result - for currOffset, naluSize := 3, 0; currOffset < naluLen; currOffset += naluSize { - naluSize = int(binary.BigEndian.Uint16(payload[currOffset:])) - currOffset += lenSize - if naluLen < currOffset+naluSize { - utils.Printf("MTAP16 declared size(%d) is larger then buffer(%d)", naluSize, naluLen-currOffset) - return - } - ts := binary.BigEndian.Uint16(payload[currOffset+3:]) - if lenSize == 5 { - ts = (ts << 8) | uint16(payload[currOffset+5]) - } - *current = &RTPNalu{Payload: payload[currOffset : currOffset+naluSize], PTS: v.PTS + uint32(ts)} - current = &(*current).Next - } - /* - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PayloadHdr (Type=29) | FU header | DONL (cond) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-| - | DONL (cond) | | - |-+-+-+-+-+-+-+-+ | - | FU payload | - | | - | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | :...OPTIONAL RTP padding | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - */ - /* - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PayloadHdr (Type=28) | NALU 1 Size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | NALU 1 HDR | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ NALU 1 Data | - | . . . | - | | - + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | . . . | NALU 2 Size | NALU 2 HDR | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | NALU 2 HDR | | - +-+-+-+-+-+-+-+-+ NALU 2 Data | - | . . . | - | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | :...OPTIONAL RTP padding | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - */ - case codec.NALU_FUA, codec.NALU_FUB: - if naluLen < lenSize { - utils.Printf("Payload is not large enough to be FU-A") - return - } - if payload[1]&fuaStartBitmask != 0 { - v.fuaBuffer = bytes.NewBuffer([]byte{}) - v.fuaBuffer.WriteByte((payload[0] & naluRefIdcBitmask) | (payload[1] & naluTypeBitmask)) - } - if v.fuaBuffer != nil { - if v.fuaBuffer.Write(payload[lenSize:]); payload[1]&fuaEndBitmask != 0 { - result = &RTPNalu{Payload: v.fuaBuffer.Bytes(), PTS: v.PTS} - v.fuaBuffer = nil - } - } - default: - return &RTPNalu{Payload: payload, PTS: v.PTS} - } - return -} - -func (v *RTPVideo) demuxH265(payload []byte) (result *RTPNalu) { - naluLen := len(payload) - if naluLen == 0 { - return - } - naluType := payload[0] & naluTypeBitmask_hevc >> 1 - switch naluType { - // 4.4.2. Aggregation Packets (APs) (p25) - /* - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PayloadHdr (Type=48) | NALU 1 DONL | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | NALU 1 Size | NALU 1 HDR | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | NALU 1 Data . . . | - | | - + . . . +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | NALU 2 DOND | NALU 2 Size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | NALU 2 HDR | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ NALU 2 Data | - | | - | . . . +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | : ...OPTIONAL RTP padding | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - */ - case codec.NAL_UNIT_UNSPECIFIED_48: - currOffset := 2 - if v.UsingDonlField { - currOffset = 4 - } - current := &result - for naluSize := 0; currOffset < naluLen; currOffset += naluSize { - naluSize = int(binary.BigEndian.Uint16(payload[currOffset:])) - currOffset += 2 - if naluLen < currOffset+naluSize { - utils.Printf("STAP-A declared size(%d) is larger then buffer(%d)", naluSize, naluLen-currOffset) - return - } - *current = &RTPNalu{Payload: payload[currOffset : currOffset+naluSize], PTS: v.PTS} - current = &(*current).Next - if v.UsingDonlField { - currOffset += 1 - } - } - // 4.4.3. Fragmentation Units (p29) - /* - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PayloadHdr (Type=49) | FU header | DONL (cond) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-| - | DONL (cond) | | - |-+-+-+-+-+-+-+-+ | - | FU payload | - | | - | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | : ...OPTIONAL RTP padding | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +---------------+ - |0|1|2|3|4|5|6|7| - +-+-+-+-+-+-+-+-+ - |S|E| FuType | - +---------------+ - */ - case codec.NAL_UNIT_UNSPECIFIED_49: - offset := 3 - if v.UsingDonlField { - offset = 5 - } - if naluLen < offset { - return - } - fuheader := payload[2] - if naluType = fuheader & 0b00111111; fuheader&fuaStartBitmask != 0 { - v.fuaBuffer = bytes.NewBuffer([]byte{}) - payload[0] = payload[0]&0b10000001 | (naluType << 1) - v.fuaBuffer.Write(payload[:2]) - } - if v.fuaBuffer != nil { - if v.fuaBuffer.Write(payload[offset:]); fuheader&fuaEndBitmask != 0 { - result = &RTPNalu{Payload: v.fuaBuffer.Bytes(), PTS: v.PTS} - v.fuaBuffer = nil - } - } - default: - return &RTPNalu{Payload: payload, PTS: v.PTS} - } - return -} - -// func (p *RTPVideo) demuxH265(payload []byte) (result *RTPNalu) { -// var h265 codecs.H265Packet -// if _, err := h265.Unmarshal(payload); err == nil { -// switch v := h265.Packet().(type) { -// case (*codecs.H265FragmentationUnitPacket): -// if v.FuHeader().S() { -// p.fuaBuffer = bytes.NewBuffer([]byte{}) -// payload[0] = payload[0]&0b10000001 | ((byte(v.FuHeader()) & 0b00111111) << 1) -// p.fuaBuffer.Write(payload[:2]) -// } -// p.fuaBuffer.Write(v.Payload()) -// if v.FuHeader().E() { -// result = &RTPNalu{Payload: p.fuaBuffer.Bytes(), PTS: p.Timestamp} -// p.fuaBuffer = nil -// } -// case (*codecs.H265AggregationPacket): -// head := &RTPNalu{Payload: v.FirstUnit().NalUnit(), PTS: p.Timestamp} -// for _, nalu := range v.OtherUnits() { -// head.Next = &RTPNalu{Payload: nalu.NalUnit(), PTS: p.Timestamp} -// head = head.Next -// } -// return head -// case (*codecs.H265PACIPacket): -// return &RTPNalu{Payload: v.Payload(), PTS: p.Timestamp} -// case (*codecs.H265SingleNALUnitPacket): -// return &RTPNalu{Payload: v.Payload(), PTS: p.Timestamp} -// } -// } -// return -// } - -// func (p *RTPVideo) _demux(ts uint32, payload []byte) { -// p.timestamp = time.Now() -// if last := p.demuxNalu(payload); last != nil { -// p.OnDemux = func(ts uint32, payload []byte) { -// if current := p.demuxNalu(payload); current != nil { -// if last.PTS > current.PTS { //有B帧 -// var b B -// utils.Println("rtp has B-frame!!") -// for heap.Push(&b, last); last.Next != nil; last = last.Next { -// heap.Push(&b, last.Next) -// } -// for heap.Push(&b, current); current.Next != nil; current = current.Next { -// heap.Push(&b, current.Next) -// } -// p.OnDemux = func(ts uint32, payload []byte) { -// if current := p.demuxNalu(payload); current != nil { -// if current.PTS > b.MaxTS { -// for b.Len() > 0 { -// el := heap.Pop(&b).(struct { -// DTS uint32 -// *RTPNalu -// }) -// p.PushNalu(el.DTS, (el.PTS - el.DTS), el.Payload) -// } -// b.MaxTS = 0 -// } -// for heap.Push(&b, current); current.Next != nil; current = current.Next { -// heap.Push(&b, current.Next) -// } -// } -// } -// return -// } -// p.PushNalu(p.PTS, 0, last.Payload) -// for last = current; last.Next != nil; last = last.Next { -// p.PushNalu(p.PTS, 0, last.Payload) -// } -// } -// } -// } -// } - -func (p *RTPVideo) _demux(ts uint32, payload []byte) { - if nalus := p.demuxNalu(payload); nalus != nil { - startPTS := nalus.PTS - dtsEst := NewDTSEstimator() - dts := dtsEst.Feed(0) - p.PushNalu(dts, 0, nalus.Payload) - var cache [][]byte - pts := startPTS - for nalus = nalus.Next; nalus != nil; nalus = nalus.Next { - pts = nalus.PTS - startPTS - dts = dtsEst.Feed(pts) - p.PushNalu(dts, pts-dts, nalus.Payload) - } - p.OnDemux = func(ts uint32, payload []byte) { - for nalus := p.demuxNalu(p.Payload); nalus != nil; nalus = nalus.Next { - if len(cache) == 0 { - pts = nalus.PTS - startPTS - dts = dtsEst.Feed(pts) - } - cache = append(cache, nalus.Payload) - if p.Marker { - p.PushNalu(dts, pts-dts, cache...) - cache = cache[:0] - } - } - } - } -} diff --git a/stream.go b/stream.go index 635f8ca..e12740c 100644 --- a/stream.go +++ b/stream.go @@ -2,259 +2,228 @@ package engine import ( "context" - "sync" + "sync/atomic" "time" - utils "github.com/Monibuca/utils/v3" + "github.com/Monibuca/engine/v4/track" + "github.com/Monibuca/engine/v4/util" . "github.com/logrusorgru/aurora" - "github.com/pkg/errors" ) -type StreamCollection struct { - sync.RWMutex - m map[string]*Stream +type StreamState byte +type StreamAction byte + +const ( + STATE_WAITPUBLISH StreamState = iota // 等待发布者状态 + STATE_WAITTRACK // 等待Track + STATE_PUBLISHING // 正在发布流状态 + STATE_WAITCLOSE // 等待关闭状态(自动关闭延时开启) + STATE_CLOSED +) + +const ( + ACTION_PUBLISH StreamAction = iota + ACTION_TIMEOUT // 发布流长时间没有数据/长时间没有发布者发布流/等待关闭时间到 + ACTION_PUBLISHLOST // 发布者意外断开 + ACTION_CLOSE // 主动关闭流 + ACTION_LASTLEAVE // 最后一个订阅者离开 + ACTION_FIRSTENTER // 第一个订阅者进入 +) + +var StreamFSM = [STATE_CLOSED + 1]map[StreamAction]StreamState{ + { + ACTION_PUBLISH: STATE_WAITTRACK, + ACTION_LASTLEAVE: STATE_CLOSED, + ACTION_CLOSE: STATE_CLOSED, + }, + { + ACTION_PUBLISHLOST: STATE_WAITPUBLISH, + ACTION_TIMEOUT: STATE_PUBLISHING, + ACTION_CLOSE: STATE_CLOSED, + }, + { + ACTION_PUBLISHLOST: STATE_WAITPUBLISH, + ACTION_TIMEOUT: STATE_WAITPUBLISH, + ACTION_LASTLEAVE: STATE_WAITCLOSE, + ACTION_CLOSE: STATE_CLOSED, + }, + { + ACTION_PUBLISHLOST: STATE_CLOSED, + ACTION_TIMEOUT: STATE_CLOSED, + ACTION_FIRSTENTER: STATE_PUBLISHING, + ACTION_CLOSE: STATE_CLOSED, + }, + {}, } -func (sc *StreamCollection) GetStream(streamPath string) *Stream { - sc.RLock() - defer sc.RUnlock() - if s, ok := sc.m[streamPath]; ok { - return s +// Streams 所有的流集合 +var Streams = util.Map[string, *Stream]{Map: make(map[string]*Stream)} + +type SubscribeAction *Subscriber +type UnSubscibeAction *Subscriber + +// Stream 流定义 +type Stream struct { + context.Context + cancel context.CancelFunc + Publisher + State StreamState + timeout *time.Timer //当前状态的超时定时器 + actionChan chan any + Config StreamConfig + URL string //远程地址,仅远程拉流有值 + StreamPath string + StartTime time.Time //流的创建时间 + Subscribers util.Slice[*Subscriber] // 订阅者 + Tracks + FrameCount uint32 //帧总数 +} + +func (r *Stream) Register(streamPath string) (result bool) { + if r == nil { + r = &Stream{ + Config: config.StreamConfig, + } } - return nil -} -func (sc *StreamCollection) Delete(streamPath string) { - sc.Lock() - delete(sc.m, streamPath) - sc.Unlock() -} - -func (sc *StreamCollection) ToList() (r []*Stream) { - sc.RLock() - defer sc.RUnlock() - for _, s := range sc.m { - r = append(r, s) + r.StreamPath = streamPath + if result = Streams.Add(streamPath, r); result { + r.actionChan = make(chan any, 1) + r.StartTime = time.Now() + r.timeout = time.NewTimer(r.Config.WaitTimeout.Duration()) + r.Context, r.cancel = context.WithCancel(Ctx) + r.Init(r) + go r.run() } return } -func (sc *StreamCollection) Range(f func(*Stream)) { - sc.RLock() - defer sc.RUnlock() - for _, s := range sc.m { - f(s) - } -} - -func init() { - Streams.m = make(map[string]*Stream) -} - -// Streams 所有的流集合 -var Streams StreamCollection -var StreamTimeoutError = errors.New("timeout") - -//FindStream 根据流路径查找流 -func FindStream(streamPath string) *Stream { - return Streams.GetStream(streamPath) -} - -// Publish 直接发布 -func Publish(streamPath, t string) *Stream { - var stream = &Stream{ - StreamPath: streamPath, - Type: t, - } - if stream.Publish() { - return stream - } - return nil -} - -type StreamContext struct { - context.Context - cancel context.CancelFunc - timeout *time.Timer //更新时间用来做超时处理 - IsTimeout bool -} - -func (r *StreamContext) Err() error { - if r.IsTimeout { - return StreamTimeoutError - } - return r.Context.Err() -} -func (r *StreamContext) Update() { - if r.timeout != nil { - r.timeout.Reset(config.PublishTimeout) - } -} - -// Stream 流定义 -type Stream struct { - URL string //远程地址,仅远程拉流有值 - StreamContext `json:"-"` - StreamPath string - Type string //流类型,来自发布者 - StartTime time.Time //流的创建时间 - Subscribers []*Subscriber // 订阅者 - VideoTracks Tracks - AudioTracks Tracks - DataTracks Tracks - AutoCloseAfter *int //当无人订阅时延迟N秒后自动停止发布 - Transcoding map[string]string //转码配置,key:目标编码,value:发布者提供的编码 - subscribeMutex sync.Mutex - OnClose func() `json:"-"` - ExtraProp interface{} //额外的属性,用于实现子类化,减少map的使用 - closeDelay *time.Timer -} - -func (r *Stream) Close() { - Streams.Lock() - //如果没有发布过,就不需要进行处理 - if r.cancel == nil { - Streams.Unlock() +// ForceRegister 强制注册流,会将已有的流踢掉 +func (r *Stream) ForceRegister(streamPath string) { + if ok := r.Register(streamPath); !ok { + if s := Streams.Get(streamPath); s != nil { + s.Close() + <-s.Done() + } + r.ForceRegister(streamPath) + } else { return } - if r.closeDelay != nil { - r.closeDelay.Stop() - } - r.cancel() - r.cancel = nil - delete(Streams.m, r.StreamPath) - Streams.Unlock() - r.VideoTracks.Dispose() - r.AudioTracks.Dispose() - r.DataTracks.Dispose() - if r.OnClose != nil { - r.OnClose() - } - TriggerHook(HOOK_STREAMCLOSE, r) - utils.Print(Yellow("Stream destoryed :"), BrightCyan(r.StreamPath)) } -// Publish 发布者进行发布操作 -func (r *Stream) Publish() bool { - Streams.Lock() - defer Streams.Unlock() - if _, ok := Streams.m[r.StreamPath]; ok { - return false - } - if r.AutoCloseAfter == nil { - r.AutoCloseAfter = &config.AutoCloseAfter - } - var closeChann <-chan time.Time - if *r.AutoCloseAfter > 0 { - r.closeDelay = time.NewTimer(time.Duration(*r.AutoCloseAfter) * time.Second) - r.closeDelay.Stop() - closeChann = r.closeDelay.C - } - r.Context, r.cancel = context.WithCancel(Ctx) - r.VideoTracks.Init(r) - r.AudioTracks.Init(r) - r.DataTracks.Init(r) - r.StartTime = time.Now() - Streams.m[r.StreamPath] = r - utils.Print(Green("Stream publish:"), BrightCyan(r.StreamPath)) - go r.waitClose(closeChann) - //触发钩子 - TriggerHook(HOOK_PUBLISH, r) - return true -} - -// 等待流关闭 -func (r *Stream) waitClose(closeChann <-chan time.Time) { - r.timeout = time.NewTimer(config.PublishTimeout) - defer r.timeout.Stop() - if r.closeDelay != nil { - defer r.closeDelay.Stop() - } - select { - case <-r.Done(): - case <-closeChann: - utils.Print(Yellow("Stream closeDelay:"), BrightCyan(r.StreamPath)) - r.Close() - case <-r.timeout.C: - utils.Print(Yellow("Stream timeout:"), BrightCyan(r.StreamPath)) - r.IsTimeout = true - r.Close() - } -} - -func (r *Stream) WaitDataTrack(names ...string) *DataTrack { - if !config.EnableVideo { - return nil - } - if track := r.DataTracks.WaitTrack(names...); track != nil { - return track.(*DataTrack) - } - return nil -} - -func (r *Stream) WaitVideoTrack(names ...string) *VideoTrack { - if !config.EnableVideo { - return nil - } - if track := r.VideoTracks.WaitTrack(names...); track != nil { - return track.(*VideoTrack) - } - return nil -} - -// TODO: 触发转码逻辑 -func (r *Stream) WaitAudioTrack(names ...string) *AudioTrack { - if !config.EnableAudio { - return nil - } - if track := r.AudioTracks.WaitTrack(names...); track != nil { - return track.(*AudioTrack) - } - return nil -} - -//Subscribe 订阅流 -func (r *Stream) Subscribe(s *Subscriber) { - if s.Stream = r; r.Err() == nil { - s.SubscribeTime = time.Now() - utils.Print(Sprintf(Yellow("subscribe :%s %s,to Stream %s"), Blue(s.Type), Cyan(s.ID), BrightCyan(r.StreamPath))) - s.Context, s.cancel = context.WithCancel(r) - r.subscribeMutex.Lock() - if *r.AutoCloseAfter > 0 { - r.closeDelay.Stop() - } - r.Subscribers = append(r.Subscribers, s) - TriggerHook(HOOK_SUBSCRIBE, s, len(r.Subscribers)) - r.subscribeMutex.Unlock() - utils.Print(Sprintf(Yellow("%s subscriber %s added remains:%d"), BrightCyan(r.StreamPath), Cyan(s.ID), Blue(len(r.Subscribers)))) - } -} - -//UnSubscribe 取消订阅流 -func (r *Stream) UnSubscribe(s *Subscriber) { - if r.Err() == nil { - var deleted bool - r.subscribeMutex.Lock() - defer r.subscribeMutex.Unlock() - r.Subscribers, deleted = DeleteSliceItem_Subscriber(r.Subscribers, s) - if deleted { - utils.Print(Sprintf(Yellow("%s subscriber %s removed remains:%d"), BrightCyan(r.StreamPath), Cyan(s.ID), Blue(len(r.Subscribers)))) - l := len(r.Subscribers) - TriggerHook(HOOK_UNSUBSCRIBE, s, l) - if l == 0 && *r.AutoCloseAfter >= 0 { - if *r.AutoCloseAfter == 0 { - r.Close() - } else { - r.closeDelay.Reset(time.Duration(*r.AutoCloseAfter) * time.Second) - } +func (r *Stream) action(action StreamAction) { + if next, ok := StreamFSM[r.State][action]; ok { + if r.Publisher == nil || r.OnStateChange(r.State, next) { + util.Print(Yellow("Stream "), BrightCyan(r.StreamPath), " state changed :", r.State, "->", next) + r.State = next + switch next { + case STATE_WAITPUBLISH: + r.timeout.Reset(r.Config.WaitTimeout.Duration()) + case STATE_WAITTRACK: + r.timeout.Reset(time.Second * 5) + case STATE_PUBLISHING: + r.WaitDone() + r.timeout.Reset(r.Config.PublishTimeout.Duration()) + case STATE_WAITCLOSE: + r.timeout.Reset(r.Config.WaitCloseTimeout.Duration()) + case STATE_CLOSED: + r.cancel() + r.WaitDone() + close(r.actionChan) + Streams.Delete(r.StreamPath) + fallthrough + default: + r.timeout.Stop() } } } } -func DeleteSliceItem_Subscriber(slice []*Subscriber, item *Subscriber) ([]*Subscriber, bool) { - for i, val := range slice { - if val == item { - return append(slice[:i], slice[i+1:]...), true + +func (r *Stream) Close() { + r.actionChan <- ACTION_CLOSE +} +func (r *Stream) UnSubscribe(sub *Subscriber) { + r.actionChan <- UnSubscibeAction(sub) +} +func (r *Stream) Subscribe(sub *Subscriber) { + r.actionChan <- SubscribeAction(sub) +} +func (r *Stream) run() { + for { + select { + case <-r.timeout.C: + util.Print(Yellow("Stream "), BrightCyan(r.StreamPath), "timeout:", r.State) + r.action(ACTION_TIMEOUT) + case <-r.Done(): + r.action(ACTION_CLOSE) + case action, ok := <-r.actionChan: + if ok { + switch v := action.(type) { + case StreamAction: + r.action(v) + case SubscribeAction: + v.Stream = r + v.Context, v.cancel = context.WithCancel(r) + r.Subscribers.Add(v) + util.Print(Sprintf(Yellow("%s subscriber %s added remains:%d"), BrightCyan(r.StreamPath), Cyan(v.ID), Blue(len(r.Subscribers)))) + if r.Subscribers.Len() == 1 { + r.action(ACTION_FIRSTENTER) + } + case UnSubscibeAction: + if r.Subscribers.Delete(v) { + util.Print(Sprintf(Yellow("%s subscriber %s removed remains:%d"), BrightCyan(r.StreamPath), Cyan(v.ID), Blue(len(r.Subscribers)))) + if r.Subscribers.Len() == 0 && r.Config.WaitCloseTimeout > 0 { + r.action(ACTION_LASTLEAVE) + } + } + } + } else { + return + } } } - return slice, false +} + +// Update 更新数据重置超时定时器 +func (r *Stream) Update() uint32 { + if r.State == STATE_PUBLISHING { + r.timeout.Reset(r.Config.PublishTimeout.Duration()) + } + return atomic.AddUint32(&r.FrameCount, 1) +} + +// 如果暂时不知道编码格式可以用这个 +func (r *Stream) NewVideoTrack() (vt *track.UnknowVideo) { + vt = &track.UnknowVideo{ + Stream: r, + } + return +} + +func (r *Stream) NewH264Track() (vt *track.H264) { + return track.NewH264(r) +} + +func (r *Stream) NewH265Track() (vt *track.H265) { + return track.NewH265(r) +} + +// func (r *Stream) WaitDataTrack(names ...string) DataTrack { +// t := <-r.WaitTrack(names...) +// return t.(DataTrack) +// } + +func (r *Stream) WaitVideoTrack(names ...string) track.Video { + if !r.Config.EnableVideo { + return nil + } + t := <-r.WaitTrack(names...) + return t.(track.Video) +} + +func (r *Stream) WaitAudioTrack(names ...string) track.Audio { + if !r.Config.EnableAudio { + return nil + } + t := <-r.WaitTrack(names...) + return t.(track.Audio) } diff --git a/subscriber.go b/subscriber.go index 6673711..d99962f 100644 --- a/subscriber.go +++ b/subscriber.go @@ -6,14 +6,18 @@ import ( "sync" "time" + . "github.com/Monibuca/engine/v4/common" + "github.com/Monibuca/engine/v4/track" "github.com/pkg/errors" ) +type AudioFrame AVFrame[AudioSlice] +type VideoFrame AVFrame[NALUSlice] + // Subscriber 订阅者实体定义 type Subscriber struct { context.Context `json:"-"` cancel context.CancelFunc - Ctx2 context.Context `json:"-"` *Stream `json:"-"` ID string TotalDrop int //总丢帧 @@ -23,8 +27,8 @@ type Subscriber struct { Delay uint32 SubscribeTime time.Time SubscribeArgs url.Values - OnAudio func(uint32, *AudioPack) `json:"-"` - OnVideo func(uint32, *VideoPack) `json:"-"` + OnAudio func(*AudioFrame) bool `json:"-"` + OnVideo func(*VideoFrame) bool `json:"-"` closeOnce sync.Once } @@ -51,7 +55,7 @@ func (s *Subscriber) Subscribe(streamPath string) error { } else { streamPath = u.Path } - if stream := FindStream(streamPath); stream == nil { + if stream := Streams.Get(streamPath); stream == nil { return errors.Errorf("subscribe %s faild :stream not found", streamPath) } else { if stream.Subscribe(s); s.Context == nil { @@ -62,7 +66,7 @@ func (s *Subscriber) Subscribe(streamPath string) error { } //Play 开始播放 -func (s *Subscriber) Play(at *AudioTrack, vt *VideoTrack) { +func (s *Subscriber) Play(at track.Audio, vt track.Video) { defer s.Close() if vt == nil && at == nil { return @@ -74,71 +78,42 @@ func (s *Subscriber) Play(at *AudioTrack, vt *VideoTrack) { s.PlayVideo(vt) return } - var extraExit <-chan struct{} - if s.Ctx2 != nil { - extraExit = s.Ctx2.Done() - } - streamExit := s.Context.Done() - select { - case <-vt.WaitIDR: //等待获取到第一个关键帧 - case <-streamExit: //可能等不到关键帧就退出了 - return - case <-extraExit: //可能等不到关键帧就退出了 - return - } - vr := vt.SubRing(vt.IDRing) //从关键帧开始读取,首屏秒开 - realSt := vt.PreItem().Timestamp // 当前时间戳 - ar := at.Clone() - iv, vp := vr.Read() - ia, ap := ar.TryRead() - vst := iv.Timestamp - chase := true + vr := vt.ReadRing() //从关键帧开始读取,首屏秒开 + ar := at.ReadRing() + vp := vr.Read() + ap := ar.TryRead() + // chase := true for { - select { - case <-extraExit: - return - case <-streamExit: - return - default: - if ia == nil && iv == nil { - time.Sleep(time.Millisecond * 10) - } else if ia != nil && (iv == nil || iv.Timestamp.After(ia.Timestamp)) { - s.OnAudio(uint32(ia.Timestamp.Sub(vst).Milliseconds()), ap.(*AudioPack)) - ar.MoveNext() - } else if iv != nil && (ia == nil || ia.Timestamp.After(iv.Timestamp)) { - s.OnVideo(uint32(iv.Timestamp.Sub(vst).Milliseconds()), vp.(*VideoPack)) - if chase { - if add10 := vst.Add(time.Millisecond * 10); realSt.After(add10) { - vst = add10 - } else { - vst = realSt - chase = false - } - } - vr.MoveNext() - } - ia, ap = ar.TryRead() - iv, vp = vr.TryRead() + if ap == nil && vp == nil { + time.Sleep(time.Millisecond * 10) + } else if ap != nil && (vp == nil || vp.SeqInStream > ap.SeqInStream) { + s.onAudio(ap) + ar.MoveNext() + } else if vp != nil && (ap == nil || ap.SeqInStream > vp.SeqInStream) { + s.onVideo(vp) + // if chase { + // if add10 := vst.Add(time.Millisecond * 10); realSt.After(add10) { + // vst = add10 + // } else { + // vst = realSt + // chase = false + // } + // } + vr.MoveNext() } + ap = ar.TryRead() + vp = vr.TryRead() } } -func (s *Subscriber) onAudio(ts uint32, ap *AudioPack) { - s.OnAudio(ts, ap) +func (s *Subscriber) onAudio(af *AVFrame[AudioSlice]) bool { + return s.OnAudio((*AudioFrame)(af)) } -func (s *Subscriber) onVideo(ts uint32, vp *VideoPack) { - s.OnVideo(ts, vp) +func (s *Subscriber) onVideo(vf *AVFrame[NALUSlice]) bool { + return s.OnVideo((*VideoFrame)(vf)) } -func (s *Subscriber) PlayAudio(at *AudioTrack) { - if s.Ctx2 != nil { - at.Play(s.onAudio, s.Done(), s.Ctx2.Done()) - } else { - at.Play(s.onAudio, s.Done(), nil) - } +func (s *Subscriber) PlayAudio(vt track.Audio) { + vt.Play(s.onAudio) } -func (s *Subscriber) PlayVideo(vt *VideoTrack) { - if s.Ctx2 != nil { - vt.Play(s.onVideo, s.Done(), s.Ctx2.Done()) - } else { - vt.Play(s.onVideo, s.Done(), nil) - } +func (s *Subscriber) PlayVideo(vt track.Video) { + vt.Play(s.onVideo) } diff --git a/track/aac.go b/track/aac.go new file mode 100644 index 0000000..b92f494 --- /dev/null +++ b/track/aac.go @@ -0,0 +1,38 @@ +package track + +import ( + "github.com/Monibuca/engine/v4/codec" + . "github.com/Monibuca/engine/v4/common" + "time" +) + +func NewAAC(stream IStream) (aac *AAC) { + aac = &AAC{} + aac.Stream = stream + aac.CodecID = codec.CodecID_AAC + aac.Init(stream, 32) + aac.Poll = time.Millisecond * 20 + return +} + +type AAC struct { + BaseAudio +} + +func (aac *AAC) WriteAVCC(ts uint32, frame AVCCFrame) { + if frame.IsSequence() { + aac.DecoderConfiguration.Reset() + aac.DecoderConfiguration.AppendAVCC(frame) + config1, config2 := frame[2], frame[3] + //audioObjectType = (config1 & 0xF8) >> 3 + // 1 AAC MAIN ISO/IEC 14496-3 subpart 4 + // 2 AAC LC ISO/IEC 14496-3 subpart 4 + // 3 AAC SSR ISO/IEC 14496-3 subpart 4 + // 4 AAC LTP ISO/IEC 14496-3 subpart 4 + aac.Channels = ((config2 >> 3) & 0x0F) //声道 + aac.SampleRate = HZ(codec.SamplingFrequencies[((config1&0x7)<<1)|(config2>>7)]) + aac.DecoderConfiguration.AppendRaw(AudioSlice(frame[2:])) + } else { + aac.BaseAudio.WriteAVCC(ts, frame) + } +} diff --git a/track/audio.go b/track/audio.go new file mode 100644 index 0000000..7122fc6 --- /dev/null +++ b/track/audio.go @@ -0,0 +1,89 @@ +package track + +import ( + "strings" + + "github.com/Monibuca/engine/v4/codec" + . "github.com/Monibuca/engine/v4/common" + "github.com/Monibuca/engine/v4/util" +) + +type Audio interface { + AVTrack + ReadRing() *AVRing[AudioSlice] + Play(onAudio func(*AVFrame[AudioSlice]) bool) +} + +type BaseAudio struct { + Media[AudioSlice] + Channels byte + avccHead []byte +} + +func (at *BaseAudio) ReadRing() *AVRing[AudioSlice] { + return util.Clone(at.AVRing) +} +func (at *BaseAudio) Play(onAudio func(*AVFrame[AudioSlice]) bool) { + ar := at.ReadRing() + for ap := ar.Read(); at.Stream.Err() == nil; ap = ar.Read() { + if !onAudio(ap) { + break + } + ar.MoveNext() + } +} + +func (at *BaseAudio) WriteAVCC(ts uint32, frame AVCCFrame) { + at.Media.WriteAVCC(ts, frame) + at.Flush() +} + +func (at *BaseAudio) Flush() { + if at.Value.AVCC == nil { + at.Value.AppendAVCC(at.avccHead) + for _, raw := range at.Value.Raw { + at.Value.AppendAVCC(raw) + } + } + at.Media.Flush() +} + +type UnknowAudio struct { + Name string + Stream IStream + Know Audio +} + +func (at *UnknowAudio) WriteAVCC(ts uint32, frame AVCCFrame) { + if at.Know == nil { + codecID := frame.AudioCodecID() + if at.Name == "" { + at.Name = strings.ToLower(codec.SoundFormat[codecID]) + } + switch codecID { + case codec.CodecID_AAC: + if !frame.IsSequence() { + return + } + a := NewAAC(at.Stream) + at.Know = a + a.avccHead = []byte{frame[0], 1} + a.WriteAVCC(0, frame) + a.Stream.AddTrack(a.Name, a) + case codec.CodecID_PCMA, + codec.CodecID_PCMU: + alaw := true + if codecID == codec.CodecID_PCMU { + alaw = false + } + a := NewG711(at.Stream, alaw) + at.Know = a + a.SampleRate = HZ(codec.SoundRate[(frame[0]&0x0c)>>2]) + a.Channels = frame[0]&0x01 + 1 + a.avccHead = frame[:1] + a.Stream.AddTrack(a.Name, a) + } + } else { + at.Know.WriteAVCC(ts, frame) + } +} diff --git a/track/base.go b/track/base.go new file mode 100644 index 0000000..1c154df --- /dev/null +++ b/track/base.go @@ -0,0 +1,56 @@ +package track + +import ( + . "github.com/Monibuca/engine/v4/common" + "github.com/Monibuca/engine/v4/util" + "github.com/pion/rtp" +) + +// Base 基础Track类 +type Base struct { + Name string + Stream IStream `json:"-"` + BPS +} + +func (bt *Base) Flush(bf *BaseFrame) { + bt.ComputeBPS(bf.BytesIn) + bf.SeqInStream = bt.Stream.Update() +} + +// Media 基础媒体Track类 +type Media[T RawSlice] struct { + Base + AVRing[T] `json:"-"` + CodecID byte + SampleRate HZ + DecoderConfiguration AVFrame[T] `json:"-"` //H264(SPS、PPS) H265(VPS、SPS、PPS) AAC(config) + util.BytesPool //无锁内存池,用于发布者(在同一个协程中)复用小块的内存,通常是解包时需要临时使用 +} + +func (av *Media[T]) WriteRTP(raw []byte) { + av.Value.AppendRTP(raw) + var packet rtp.Packet + if err := packet.Unmarshal(raw); err != nil { + return + } + av.Value.AppendRTPPackets(packet) + if packet.Marker { + av.Flush() + } +} + +func (av *Media[T]) WriteSlice(slice T) { + av.Value.AppendRaw(slice) +} +func (av *Media[T]) WriteAVCC(ts uint32, frame AVCCFrame) { + av.Value.BytesIn = len(frame) + av.Value.AppendAVCC(frame) + av.Value.DTS = av.SampleRate.ToNTS(ts) + av.Value.PTS = av.SampleRate.ToNTS(ts + frame.CTS()) +} + +func (av *Media[T]) Flush() { + av.Base.Flush(&av.Value.BaseFrame) + av.Step() +} diff --git a/track/g711.go b/track/g711.go new file mode 100644 index 0000000..7b88125 --- /dev/null +++ b/track/g711.go @@ -0,0 +1,30 @@ +package track + +import ( + "time" + + "github.com/Monibuca/engine/v4/codec" + . "github.com/Monibuca/engine/v4/common" +) + +func NewG711(stream IStream, alaw bool) (g711 *G711) { + g711 = &G711{} + g711.Stream = stream + if alaw { + g711.CodecID = codec.CodecID_PCMA + } else { + g711.CodecID = codec.CodecID_PCMU + } + g711.Init(stream, 32) + g711.Poll = time.Millisecond * 20 + return +} + +type G711 struct { + BaseAudio +} + +func (g711 *G711) WriteAVCC(ts uint32, frame AVCCFrame) { + g711.Value.AppendRaw(AudioSlice(frame[1:])) + g711.BaseAudio.WriteAVCC(ts, frame) +} diff --git a/track/h264.go b/track/h264.go new file mode 100644 index 0000000..a60d29d --- /dev/null +++ b/track/h264.go @@ -0,0 +1,78 @@ +package track + +import ( + "time" + + "github.com/Monibuca/engine/v4/codec" + . "github.com/Monibuca/engine/v4/common" + "github.com/Monibuca/engine/v4/util" +) + +type H264 struct { + H264H265 +} + +func NewH264(stream IStream) (vt *H264) { + vt = &H264{} + vt.CodecID = codec.CodecID_H264 + vt.SampleRate = 90000 + vt.Stream = stream + vt.Init(stream, 256) + vt.Poll = time.Millisecond * 20 + return +} + +func (vt *H264) WriteSlice(slice NALUSlice) { + switch H264Slice(slice).Type() { + case codec.NALU_SPS: + vt.DecoderConfiguration.Reset() + vt.DecoderConfiguration.AppendRaw(slice) + case codec.NALU_PPS: + vt.DecoderConfiguration.AppendRaw(slice) + vt.SPSInfo, _ = codec.ParseSPS(slice[0]) + lenSPS := SizeOfBuffers(vt.DecoderConfiguration.Raw[0]) + lenPPS := SizeOfBuffers(vt.DecoderConfiguration.Raw[1]) + if lenSPS > 3 { + vt.DecoderConfiguration.AppendAVCC(codec.RTMP_AVC_HEAD[:6], vt.DecoderConfiguration.Raw[0][0][1:4]) + } else { + vt.DecoderConfiguration.AppendAVCC(codec.RTMP_AVC_HEAD) + } + tmp := []byte{0xE1, 0, 0, 0x01, 0, 0} + vt.DecoderConfiguration.AppendAVCC(tmp[:1], util.PutBE(tmp[1:3], lenSPS), vt.DecoderConfiguration.Raw[0][0], tmp[3:4], util.PutBE(tmp[3:6], lenPPS), vt.DecoderConfiguration.Raw[1][0]) + case codec.NALU_IDR_Picture: + case codec.NALU_Non_IDR_Picture: + case codec.NALU_SEI: + vt.Media.WriteSlice(slice) + } +} + +func (vt *H264) WriteAVCC(ts uint32, frame AVCCFrame) { + if frame.IsSequence() { + vt.DecoderConfiguration.Reset() + vt.DecoderConfiguration.SeqInTrack = vt.Value.SeqInTrack + vt.DecoderConfiguration.AppendAVCC(frame) + var info codec.AVCDecoderConfigurationRecord + if _, err := info.Unmarshal(frame[5:]); err == nil { + vt.SPSInfo, _ = codec.ParseSPS(info.SequenceParameterSetNALUnit) + vt.nalulenSize = int(info.LengthSizeMinusOne&3 + 1) + vt.DecoderConfiguration.AppendRaw(NALUSlice{info.SequenceParameterSetNALUnit}, NALUSlice{info.PictureParameterSetNALUnit}) + } + } else { + vt.H264H265.WriteAVCC(ts, frame) + } +} + +func (vt *H264) Flush() { + if H264NALU(vt.Value.Raw).IFrame() { + vt.Value.IFrame = true + if vt.IDRing == nil { + defer vt.Stream.AddTrack(vt.Name, vt) + } + vt.ComputeGOP() + } + // RTP格式补完 + if vt.Value.RTP == nil { + + } + vt.H264H265.Flush() +} diff --git a/track/h265.go b/track/h265.go new file mode 100644 index 0000000..28dab6a --- /dev/null +++ b/track/h265.go @@ -0,0 +1,75 @@ +package track + +import ( + "time" + + "github.com/Monibuca/engine/v4/codec" + . "github.com/Monibuca/engine/v4/common" +) + +type H265 struct { + H264H265 +} + +func NewH265(stream IStream) (vt *H265) { + vt = &H265{} + vt.CodecID = codec.CodecID_H265 + vt.SampleRate = 90000 + vt.Stream = stream + vt.Init(stream, 256) + vt.Poll = time.Millisecond * 20 + return +} +func (vt *H265) WriteSlice(slice NALUSlice) { + switch H265Slice(slice).Type() { + case codec.NAL_UNIT_VPS: + vt.DecoderConfiguration.Reset() + vt.DecoderConfiguration.AppendRaw(slice) + case codec.NAL_UNIT_SPS: + vt.DecoderConfiguration.AppendRaw(slice) + vt.SPSInfo, _ = codec.ParseHevcSPS(slice[0]) + case codec.NAL_UNIT_PPS: + vt.DecoderConfiguration.AppendRaw(slice) + extraData, err := codec.BuildH265SeqHeaderFromVpsSpsPps(vt.DecoderConfiguration.Raw[0][0], vt.DecoderConfiguration.Raw[1][0], vt.DecoderConfiguration.Raw[2][0]) + if err == nil { + vt.DecoderConfiguration.AppendAVCC(extraData) + } + case 0, 1, 2, 3, 4, 5, 6, 7, 9, + codec.NAL_UNIT_CODED_SLICE_BLA, + codec.NAL_UNIT_CODED_SLICE_BLANT, + codec.NAL_UNIT_CODED_SLICE_BLA_N_LP, + codec.NAL_UNIT_CODED_SLICE_IDR, + codec.NAL_UNIT_CODED_SLICE_IDR_N_LP, + codec.NAL_UNIT_CODED_SLICE_CRA: + vt.Media.WriteSlice(slice) + } +} +func (vt *H265) WriteAVCC(ts uint32, frame AVCCFrame) { + if frame.IsSequence() { + vt.DecoderConfiguration.Reset() + vt.DecoderConfiguration.SeqInTrack = vt.Value.SeqInTrack + vt.DecoderConfiguration.AppendAVCC(frame) + if vps, sps, pps, err := codec.ParseVpsSpsPpsFromSeqHeaderWithoutMalloc(frame); err == nil { + vt.SPSInfo, _ = codec.ParseHevcSPS(frame) + vt.nalulenSize = int(frame[26]) & 0x03 + vt.DecoderConfiguration.AppendRaw(NALUSlice{vps}, NALUSlice{sps}, NALUSlice{pps}) + } + } else { + vt.H264H265.WriteAVCC(ts, frame) + } +} + +func (vt *H265) Flush() { + if H265NALU(vt.Value.Raw).IFrame() { + vt.Value.IFrame = true + if vt.IDRing == nil { + defer vt.Stream.AddTrack(vt.Name, vt) + } + vt.ComputeGOP() + } + // RTP格式补完 + if vt.Value.RTP == nil { + + } + vt.H264H265.Flush() +} diff --git a/track/video.go b/track/video.go new file mode 100644 index 0000000..13e0814 --- /dev/null +++ b/track/video.go @@ -0,0 +1,137 @@ +package track + +import ( + "strings" + + "github.com/Monibuca/engine/v4/codec" + . "github.com/Monibuca/engine/v4/common" + "github.com/Monibuca/engine/v4/util" +) + +type Video interface { + AVTrack + ReadRing() *AVRing[NALUSlice] + Play(onVideo func(*AVFrame[NALUSlice]) bool) +} + +type H264H265 struct { + Media[NALUSlice] + IDRing *util.Ring[AVFrame[NALUSlice]] `json:"-"` //最近的关键帧位置,首屏渲染 + SPSInfo codec.SPSInfo + GOP int //关键帧间隔 + nalulenSize int //avcc格式中表示nalu长度的字节数,通常为4 + idrCount int //缓存中包含的idr数量 +} + +func (t *H264H265) ComputeGOP() { + t.idrCount++ + if t.IDRing != nil { + t.GOP = int(t.Value.SeqInTrack - t.IDRing.Value.SeqInTrack) + if l := t.Size - t.GOP - 5; l > 5 { + t.Size -= l + //缩小缓冲环节省内存 + t.Unlink(l).Do(func(v AVFrame[NALUSlice]) { + if v.IFrame { + t.idrCount-- + } + v.Reset() + }) + } + } + t.IDRing = t.Ring +} + +func (vt *H264H265) WriteAVCC(ts uint32, frame AVCCFrame) { + vt.Media.WriteAVCC(ts, frame) + for nalus := frame[5:]; len(nalus) > vt.nalulenSize; { + nalulen := util.ReadBE[int](nalus[:vt.nalulenSize]) + if end := nalulen + vt.nalulenSize; len(nalus) >= end { + vt.Value.AppendRaw(NALUSlice{nalus[vt.nalulenSize:end]}) + nalus = nalus[end:] + } else { + util.Printf("WriteAVCC error,len %d,nalulenSize:%d,end:%d", len(nalus), vt.nalulenSize, end) + break + } + } + vt.Flush() +} + +func (vt *H264H265) Flush() { + // AVCC格式补完 + if vt.Value.AVCC == nil { + b := []byte{vt.CodecID, 1, 0, 0, 0} + if vt.Value.IFrame { + b[0] |= 0x10 + } else { + b[0] |= 0x20 + } + // 写入CTS + util.PutBE(b[2:5], vt.SampleRate.ToMini(vt.Value.PTS-vt.Value.DTS)) + vt.Value.AppendAVCC(b) + for _, nalu := range vt.Value.Raw { + vt.Value.AppendAVCC(util.PutBE(make([]byte, 4), SizeOfBuffers(nalu))) + vt.Value.AppendAVCC(nalu...) + } + } + // 下一帧为I帧,即将覆盖 + if vt.Next().Value.IFrame { + // 仅存一枚I帧,需要扩环 + if vt.idrCount == 1 { + if vt.Size < 256 { + vt.Link(util.NewRing[AVFrame[NALUSlice]](5)) // 扩大缓冲环 + } + } else { + vt.idrCount-- + } + } + vt.Media.Flush() +} +func (vt *H264H265) ReadRing() *AVRing[NALUSlice] { + vr := util.Clone(vt.AVRing) + vr.Ring = vt.IDRing + return vr +} +func (vt *H264H265) Play(onVideo func(*AVFrame[NALUSlice]) bool) { + vr := vt.ReadRing() + for vp := vr.Read(); vt.Stream.Err() == nil; vp = vr.Read() { + if !onVideo(vp) { + break + } + vr.MoveNext() + } +} + +type UnknowVideo struct { + Name string + Stream IStream + Know Video +} + +func (vt *UnknowVideo) WriteAnnexB(pts uint32, dts uint32, frame AnnexBFrame) { + +} + +func (vt *UnknowVideo) WriteAVCC(ts uint32, frame AVCCFrame) { + if vt.Know == nil { + if frame.IsSequence() { + codecID := frame.VideoCodecID() + if vt.Name == "" { + vt.Name = strings.ToLower(codec.CodecID[codecID]) + } + switch codecID { + case codec.CodecID_H264: + v := NewH264(vt.Stream) + vt.Know = v + v.WriteAVCC(0, frame) + v.Stream.AddTrack(v.Name, v) + case codec.CodecID_H265: + v := NewH265(vt.Stream) + vt.Know = v + v.WriteAVCC(0, frame) + v.Stream.AddTrack(v.Name, v) + } + } + } else { + vt.Know.WriteAVCC(ts, frame) + } +} diff --git a/tracks.go b/tracks.go new file mode 100644 index 0000000..25c9bbe --- /dev/null +++ b/tracks.go @@ -0,0 +1,83 @@ +package engine + +import ( + "context" + "encoding/json" + "sync" + + . "github.com/Monibuca/engine/v4/common" +) + +type Tracks struct { + context.Context + sync.RWMutex + m map[string]Track + waiters map[string][]*chan Track +} + +func (ts *Tracks) MarshalJSON() ([]byte, error) { + ts.RLock() + defer ts.RUnlock() + return json.Marshal(ts.m) +} + +func (ts *Tracks) Init(ctx context.Context) { + ts.m = make(map[string]Track) + ts.waiters = make(map[string][]*chan Track) + ts.Context = ctx +} + +func (ts *Tracks) AddTrack(name string, t Track) { + ts.Lock() + defer ts.Unlock() + if _, ok := ts.m[name]; !ok { + if ts.m[name] = t; ts.Err() == nil { + for i, ch := range ts.waiters[name] { + if ch != nil { + *ch <- t + close(*ch) + ts.waiters[name][i] = nil //通过设置为nil,防止重复通知 + } + } + } + } +} + +func (ts *Tracks) GetTrack(name string) Track { + ts.RLock() + defer ts.RUnlock() + return ts.m[name] +} + +// WaitDone 当等待结束时需要调用该函数,防止订阅者无限等待Track +func (ts *Tracks) WaitDone() { + ts.Lock() + defer ts.Unlock() + for _, chs := range ts.waiters { + for i, ch := range chs { + if ch != nil { + close(*ch) + chs[i] = nil //通过设置为nil,防止重复关闭 + } + } + } +} +func (ts *Tracks) WaitTrack(names ...string) (ch chan Track) { + ch = make(chan Track, 1) + ts.Lock() + defer ts.Unlock() + for _, name := range names { + if t, ok := ts.m[name]; ok { + ch <- t + return + } + } + if ts.Err() == nil { //在等待时间范围内 + for _, name := range names { + ts.waiters[name] = append(ts.waiters[name], &ch) + } + } else { + close(ch) + } + return +} diff --git a/util/big_endian.go b/util/big_endian.go new file mode 100644 index 0000000..598dc3d --- /dev/null +++ b/util/big_endian.go @@ -0,0 +1,17 @@ +package util + +import "constraints" + +func PutBE[T constraints.Integer](b []byte, num T) []byte { + for i, n := 0, len(b); i < n; i++ { + b[i] = byte(num >> ((n - i - 1) << 3)) + } + return b +} + +func ReadBE[T constraints.Integer](b []byte) (num T) { + for i, n := 0, len(b); i < n; i++ { + num += T(b[i]) << ((n - i - 1) << 3) + } + return +} diff --git a/util/bits/bits.go b/util/bits/bits.go new file mode 100644 index 0000000..4a09f0a --- /dev/null +++ b/util/bits/bits.go @@ -0,0 +1,118 @@ +package bits + +import ( + "io" +) + +type Reader struct { + R io.Reader + n int + bits uint64 +} + +func (self *Reader) ReadBits64(n int) (bits uint64, err error) { + if self.n < n { + var b [8]byte + var got int + want := (n - self.n + 7) / 8 + if got, err = self.R.Read(b[:want]); err != nil { + return + } + if got < want { + err = io.EOF + return + } + for i := 0; i < got; i++ { + self.bits <<= 8 + self.bits |= uint64(b[i]) + } + self.n += got * 8 + } + bits = self.bits >> uint(self.n-n) + self.bits ^= bits << uint(self.n-n) + self.n -= n + return +} + +func (self *Reader) ReadBits(n int) (bits uint, err error) { + var bits64 uint64 + if bits64, err = self.ReadBits64(n); err != nil { + return + } + bits = uint(bits64) + return +} + +func (self *Reader) Read(p []byte) (n int, err error) { + for n < len(p) { + want := 8 + if len(p)-n < want { + want = len(p) - n + } + var bits uint64 + if bits, err = self.ReadBits64(want * 8); err != nil { + break + } + for i := 0; i < want; i++ { + p[n+i] = byte(bits >> uint((want-i-1)*8)) + } + n += want + } + return +} + +type Writer struct { + W io.Writer + n int + bits uint64 +} + +func (self *Writer) WriteBits64(bits uint64, n int) (err error) { + if self.n+n > 64 { + move := uint(64 - self.n) + mask := bits >> move + self.bits = (self.bits << move) | mask + self.n = 64 + if err = self.FlushBits(); err != nil { + return + } + n -= int(move) + bits ^= (mask << move) + } + self.bits = (self.bits << uint(n)) | bits + self.n += n + return +} + +func (self *Writer) WriteBits(bits uint, n int) (err error) { + return self.WriteBits64(uint64(bits), n) +} + +func (self *Writer) Write(p []byte) (n int, err error) { + for n < len(p) { + if err = self.WriteBits64(uint64(p[n]), 8); err != nil { + return + } + n++ + } + return +} + +func (self *Writer) FlushBits() (err error) { + if self.n > 0 { + var b [8]byte + bits := self.bits + if self.n%8 != 0 { + bits <<= uint(8 - (self.n % 8)) + } + want := (self.n + 7) / 8 + for i := 0; i < want; i++ { + b[i] = byte(bits >> uint((want-i-1)*8)) + } + if _, err = self.W.Write(b[:want]); err != nil { + return + } + self.n = 0 + } + return +} diff --git a/util/bits/bits_test.go b/util/bits/bits_test.go new file mode 100644 index 0000000..37d5545 --- /dev/null +++ b/util/bits/bits_test.go @@ -0,0 +1,61 @@ +package bits + +import ( + "bytes" + "testing" +) + +func TestBits(t *testing.T) { + rdata := []byte{0xf3, 0xb3, 0x45, 0x60} + rbuf := bytes.NewReader(rdata[:]) + r := &Reader{R: rbuf} + var u32 uint + if u32, _ = r.ReadBits(4); u32 != 0xf { + t.FailNow() + } + if u32, _ = r.ReadBits(4); u32 != 0x3 { + t.FailNow() + } + if u32, _ = r.ReadBits(2); u32 != 0x2 { + t.FailNow() + } + if u32, _ = r.ReadBits(2); u32 != 0x3 { + t.FailNow() + } + b := make([]byte, 2) + if r.Read(b); b[0] != 0x34 || b[1] != 0x56 { + t.FailNow() + } + + wbuf := &bytes.Buffer{} + w := &Writer{W: wbuf} + w.WriteBits(0xf, 4) + w.WriteBits(0x3, 4) + w.WriteBits(0x2, 2) + w.WriteBits(0x3, 2) + n, _ := w.Write([]byte{0x34, 0x56}) + if n != 2 { + t.FailNow() + } + w.FlushBits() + wdata := wbuf.Bytes() + if wdata[0] != 0xf3 || wdata[1] != 0xb3 || wdata[2] != 0x45 || wdata[3] != 0x60 { + t.FailNow() + } + + b = make([]byte, 8) + PutUInt64BE(b, 0x11223344) + if b[0] != 0x11 || b[1] != 0x22 || b[2] != 0x33 || b[3] != 0x44 { + t.FailNow() + } +} +func PutUInt64BE(b []byte, v uint64) { + b[0] = byte(v >> 56) + b[1] = byte(v >> 48) + b[2] = byte(v >> 40) + b[3] = byte(v >> 32) + b[4] = byte(v >> 24) + b[5] = byte(v >> 16) + b[6] = byte(v >> 8) + b[7] = byte(v) +} diff --git a/util/bits/bufio/bufio.go b/util/bits/bufio/bufio.go new file mode 100644 index 0000000..fdaa88e --- /dev/null +++ b/util/bits/bufio/bufio.go @@ -0,0 +1,22 @@ +package bufio + +import ( + "io" +) + +type Reader struct { + buf [][]byte + R io.ReadSeeker +} + +func NewReaderSize(r io.ReadSeeker, size int) *Reader { + buf := make([]byte, size*2) + return &Reader{ + R: r, + buf: [][]byte{buf[0:size], buf[size:]}, + } +} + +func (self *Reader) ReadAt(b []byte, off int64) (n int, err error) { + return +} diff --git a/util/bits/golomb_reader.go b/util/bits/golomb_reader.go new file mode 100644 index 0000000..da57cb2 --- /dev/null +++ b/util/bits/golomb_reader.go @@ -0,0 +1,65 @@ +package bits + +import ( + "io" +) + +type GolombBitReader struct { + R io.Reader + buf [1]byte + left byte +} + +func (self *GolombBitReader) ReadBit() (res uint, err error) { + if self.left == 0 { + if _, err = self.R.Read(self.buf[:]); err != nil { + return + } + self.left = 8 + } + self.left-- + res = uint(self.buf[0]>>self.left) & 1 + return +} + +func (self *GolombBitReader) ReadBits(n int) (res uint, err error) { + for i := 0; i < n; i++ { + var bit uint + if bit, err = self.ReadBit(); err != nil { + return + } + res |= bit << uint(n-i-1) + } + return +} + +func (self *GolombBitReader) ReadExponentialGolombCode() (res uint, err error) { + i := 0 + for { + var bit uint + if bit, err = self.ReadBit(); err != nil { + return + } + if !(bit == 0 && i < 32) { + break + } + i++ + } + if res, err = self.ReadBits(i); err != nil { + return + } + res += (1 << uint(i)) - 1 + return +} + +func (self *GolombBitReader) ReadSE() (res uint, err error) { + if res, err = self.ReadExponentialGolombCode(); err != nil { + return + } + if res&0x01 != 0 { + res = (res + 1) / 2 + } else { + res = -res / 2 + } + return +} diff --git a/util/bits/pio/pio.go b/util/bits/pio/pio.go new file mode 100644 index 0000000..4a73aa9 --- /dev/null +++ b/util/bits/pio/pio.go @@ -0,0 +1,3 @@ +package pio + +var RecommendBufioSize = 1024 * 64 diff --git a/util/bits/pio/reader.go b/util/bits/pio/reader.go new file mode 100644 index 0000000..c28a119 --- /dev/null +++ b/util/bits/pio/reader.go @@ -0,0 +1,121 @@ +package pio + +func U8(b []byte) (i uint8) { + return b[0] +} + +func U16BE(b []byte) (i uint16) { + i = uint16(b[0]) + i <<= 8 + i |= uint16(b[1]) + return +} + +func I16BE(b []byte) (i int16) { + i = int16(b[0]) + i <<= 8 + i |= int16(b[1]) + return +} + +func I24BE(b []byte) (i int32) { + i = int32(int8(b[0])) + i <<= 8 + i |= int32(b[1]) + i <<= 8 + i |= int32(b[2]) + return +} + +func U24BE(b []byte) (i uint32) { + i = uint32(b[0]) + i <<= 8 + i |= uint32(b[1]) + i <<= 8 + i |= uint32(b[2]) + return +} + +func I32BE(b []byte) (i int32) { + i = int32(int8(b[0])) + i <<= 8 + i |= int32(b[1]) + i <<= 8 + i |= int32(b[2]) + i <<= 8 + i |= int32(b[3]) + return +} + +func U32LE(b []byte) (i uint32) { + i = uint32(b[3]) + i <<= 8 + i |= uint32(b[2]) + i <<= 8 + i |= uint32(b[1]) + i <<= 8 + i |= uint32(b[0]) + return +} + +func U32BE(b []byte) (i uint32) { + i = uint32(b[0]) + i <<= 8 + i |= uint32(b[1]) + i <<= 8 + i |= uint32(b[2]) + i <<= 8 + i |= uint32(b[3]) + return +} + +func U40BE(b []byte) (i uint64) { + i = uint64(b[0]) + i <<= 8 + i |= uint64(b[1]) + i <<= 8 + i |= uint64(b[2]) + i <<= 8 + i |= uint64(b[3]) + i <<= 8 + i |= uint64(b[4]) + return +} + +func U64BE(b []byte) (i uint64) { + i = uint64(b[0]) + i <<= 8 + i |= uint64(b[1]) + i <<= 8 + i |= uint64(b[2]) + i <<= 8 + i |= uint64(b[3]) + i <<= 8 + i |= uint64(b[4]) + i <<= 8 + i |= uint64(b[5]) + i <<= 8 + i |= uint64(b[6]) + i <<= 8 + i |= uint64(b[7]) + return +} + +func I64BE(b []byte) (i int64) { + i = int64(int8(b[0])) + i <<= 8 + i |= int64(b[1]) + i <<= 8 + i |= int64(b[2]) + i <<= 8 + i |= int64(b[3]) + i <<= 8 + i |= int64(b[4]) + i <<= 8 + i |= int64(b[5]) + i <<= 8 + i |= int64(b[6]) + i <<= 8 + i |= int64(b[7]) + return +} diff --git a/util/bits/pio/vec.go b/util/bits/pio/vec.go new file mode 100644 index 0000000..7a1c229 --- /dev/null +++ b/util/bits/pio/vec.go @@ -0,0 +1,68 @@ +package pio + +func VecLen(vec [][]byte) (n int) { + for _, b := range vec { + n += len(b) + } + return +} + +func VecSliceTo(in [][]byte, out [][]byte, s int, e int) (n int) { + if s < 0 { + s = 0 + } + + if e >= 0 && e < s { + panic("pio: VecSlice start > end") + } + + i := 0 + off := 0 + for s > 0 && i < len(in) { + left := len(in[i]) + read := s + if left < read { + read = left + } + left -= read + off += read + s -= read + e -= read + if left == 0 { + i++ + off = 0 + } + } + if s > 0 { + panic("pio: VecSlice start out of range") + } + + for e != 0 && i < len(in) { + left := len(in[i]) - off + read := left + if e > 0 && e < read { + read = e + } + out[n] = in[i][off : off+read] + n++ + left -= read + e -= read + off += read + if left == 0 { + i++ + off = 0 + } + } + if e > 0 { + panic("pio: VecSlice end out of range") + } + + return +} + +func VecSlice(in [][]byte, s int, e int) (out [][]byte) { + out = make([][]byte, len(in)) + n := VecSliceTo(in, out, s, e) + out = out[:n] + return +} diff --git a/util/bits/pio/vec_test.go b/util/bits/pio/vec_test.go new file mode 100644 index 0000000..b435466 --- /dev/null +++ b/util/bits/pio/vec_test.go @@ -0,0 +1,22 @@ +package pio + +import ( + "fmt" + "testing" +) + +func TestExampleVec(t *testing.T) { + vec := [][]byte{[]byte{1, 2, 3}, []byte{4, 5, 6, 7, 8, 9}, []byte{10, 11, 12, 13}} + println(VecLen(vec)) + + vec = VecSlice(vec, 1, -1) + fmt.Println(vec) + + vec = VecSlice(vec, 2, -1) + fmt.Println(vec) + + vec = VecSlice(vec, 8, 8) + fmt.Println(vec) + + // Output: +} diff --git a/util/bits/pio/writer.go b/util/bits/pio/writer.go new file mode 100644 index 0000000..fdbb1b6 --- /dev/null +++ b/util/bits/pio/writer.go @@ -0,0 +1,87 @@ +package pio + +func PutU8(b []byte, v uint8) { + b[0] = v +} + +func PutI16BE(b []byte, v int16) { + b[0] = byte(v >> 8) + b[1] = byte(v) +} + +func PutU16BE(b []byte, v uint16) { + b[0] = byte(v >> 8) + b[1] = byte(v) +} + +func PutI24BE(b []byte, v int32) { + b[0] = byte(v >> 16) + b[1] = byte(v >> 8) + b[2] = byte(v) +} + +func PutU24BE(b []byte, v uint32) { + b[0] = byte(v >> 16) + b[1] = byte(v >> 8) + b[2] = byte(v) +} + +func PutI32BE(b []byte, v int32) { + b[0] = byte(v >> 24) + b[1] = byte(v >> 16) + b[2] = byte(v >> 8) + b[3] = byte(v) +} + +func PutU32BE(b []byte, v uint32) { + b[0] = byte(v >> 24) + b[1] = byte(v >> 16) + b[2] = byte(v >> 8) + b[3] = byte(v) +} + +func PutU32LE(b []byte, v uint32) { + b[3] = byte(v >> 24) + b[2] = byte(v >> 16) + b[1] = byte(v >> 8) + b[0] = byte(v) +} + +func PutU40BE(b []byte, v uint64) { + b[0] = byte(v >> 32) + b[1] = byte(v >> 24) + b[2] = byte(v >> 16) + b[3] = byte(v >> 8) + b[4] = byte(v) +} + +func PutU48BE(b []byte, v uint64) { + b[0] = byte(v >> 40) + b[1] = byte(v >> 32) + b[2] = byte(v >> 24) + b[3] = byte(v >> 16) + b[4] = byte(v >> 8) + b[5] = byte(v) +} + +func PutU64BE(b []byte, v uint64) { + b[0] = byte(v >> 56) + b[1] = byte(v >> 48) + b[2] = byte(v >> 40) + b[3] = byte(v >> 32) + b[4] = byte(v >> 24) + b[5] = byte(v >> 16) + b[6] = byte(v >> 8) + b[7] = byte(v) +} + +func PutI64BE(b []byte, v int64) { + b[0] = byte(v >> 56) + b[1] = byte(v >> 48) + b[2] = byte(v >> 40) + b[3] = byte(v >> 32) + b[4] = byte(v >> 24) + b[5] = byte(v >> 16) + b[6] = byte(v >> 8) + b[7] = byte(v) +} diff --git a/util/buffer.go b/util/buffer.go new file mode 100644 index 0000000..d82f225 --- /dev/null +++ b/util/buffer.go @@ -0,0 +1,30 @@ +package util + +type Buffer []byte + +func (b *Buffer) Write(a []byte) (n int, err error) { + *b = append(*b, a...) + return len(a), nil +} +func (b Buffer) Len() int { + return len(b) +} +func (b Buffer) Cap() int { + return cap(b) +} +func (b Buffer) SubBuf(start int, length int) Buffer { + return b[start : start+length] +} + +func (b *Buffer) Malloc(count int) Buffer { + l := b.Len() + if l+count > b.Cap() { + n := make(Buffer, l+count) + copy(n, *b) + *b = n + } + return b.SubBuf(l, count) +} +func (b *Buffer) Reset() { + *b = b.SubBuf(0, 0) +} diff --git a/util/buffer_test.go b/util/buffer_test.go new file mode 100644 index 0000000..383d71e --- /dev/null +++ b/util/buffer_test.go @@ -0,0 +1,18 @@ +package util + +import ( + "testing" +) + +func TestBuffer(t *testing.T) { + t.Run(t.Name(), func(t *testing.T) { + var b Buffer + t.Log(b == nil) + b.Write([]byte{1, 2, 3}) + if b == nil { + t.Fail() + } else { + t.Logf("b:% x", b) + } + }) +} diff --git a/util/bytes_pool.go b/util/bytes_pool.go new file mode 100644 index 0000000..2bb5c4a --- /dev/null +++ b/util/bytes_pool.go @@ -0,0 +1,17 @@ +package util + +type BytesPool [][]byte + +func (pool *BytesPool) Get(size int) (result []byte) { + if l := len(*pool); l > 0 { + result = (*pool)[l-1] + *pool = (*pool)[:l-1] + } else { + result = make([]byte, size, 10) + } + return +} + +func (pool *BytesPool) Put(b []byte) { + *pool = append(*pool, b) +} diff --git a/util/index.go b/util/index.go new file mode 100644 index 0000000..9e461db --- /dev/null +++ b/util/index.go @@ -0,0 +1,5 @@ +package util + +func Clone[T any](x T) *T { + return &x +} \ No newline at end of file diff --git a/util/logger.go b/util/logger.go new file mode 100644 index 0000000..28c2b78 --- /dev/null +++ b/util/logger.go @@ -0,0 +1,87 @@ +package util + +import ( + "context" + "fmt" + "io" + "log" + "os" + "time" + + colorable "github.com/mattn/go-colorable" + + "github.com/logrusorgru/aurora" +) + +// MultiLogWriter 多端写日志类 +type MultiLogWriter struct { + writers []io.Writer + io.Writer +} + +var logWriter MultiLogWriter +var multiLogger = log.New(&logWriter, "", log.LstdFlags) +var colorLogger = log.New(colorable.NewColorableStdout(), "", log.LstdFlags) + +func init() { + log.SetOutput(io.MultiWriter(os.Stdout, &logWriter)) + logWriter.Writer = io.MultiWriter() +} + +// AddWriter 添加日志输出端 +func AddWriter(wn io.Writer) { + logWriter.writers = append(logWriter.writers, wn) + logWriter.Writer = io.MultiWriter(logWriter.writers...) +} + +// MayBeError 优雅错误判断加日志辅助函数 +func MayBeError(info error) (hasError bool) { + if hasError = info != nil; hasError { + Print(aurora.Red(info)) + } + return +} +func getNoColor(v ...interface{}) (noColor []interface{}) { + noColor = append(noColor, v...) + for i, value := range v { + if vv, ok := value.(aurora.Value); ok { + noColor[i] = vv.Value() + } + } + return +} + +// Print 带颜色识别 +func Print(v ...interface{}) { + noColor := getNoColor(v...) + colorLogger.Output(2, fmt.Sprint(v...)) + multiLogger.Output(2, fmt.Sprint(noColor...)) +} + +// Printf calls Output to print to the standard logger. +// Arguments are handled in the manner of fmt.Printf. +func Printf(format string, v ...interface{}) { + noColor := getNoColor(v...) + colorLogger.Output(2, fmt.Sprintf(format, v...)) + multiLogger.Output(2, fmt.Sprintf(format, noColor...)) +} + +// Println calls Output to print to the standard logger. +// Arguments are handled in the manner of fmt.Println. +func Println(v ...interface{}) { + noColor := getNoColor(v...) + colorLogger.Output(2, fmt.Sprintln(v...)) + multiLogger.Output(2, fmt.Sprintln(noColor...)) +} + +type Event struct { + Timestamp time.Time + Level int + Label string + Tag string +} +type EventContext struct { + Name string + context.Context + EventChan chan *Event +} diff --git a/util/map.go b/util/map.go new file mode 100644 index 0000000..03ed1d8 --- /dev/null +++ b/util/map.go @@ -0,0 +1,70 @@ +package util + +import "sync" + +type Map[K comparable, V any] struct { + sync.RWMutex + Map map[K]V +} + +func (m *Map[K, V]) Init() { + m.Map = make(map[K]V) +} + +func (m *Map[K, V]) Add(k K, v V) bool { + m.Lock() + defer m.Unlock() + if _, ok := m.Map[k]; ok { + return false + } + m.Map[k] = v + return true +} + +func (m *Map[K, V]) Set(k K, v V) { + m.Lock() + m.Map[k] = v + m.Unlock() +} + +func (m *Map[K, V]) Has(k K) (ok bool) { + m.RLock() + defer m.RUnlock() + _, ok = m.Map[k] + return +} + +func (m *Map[K, V]) Len() int { + m.RLock() + defer m.RUnlock() + return len(m.Map) +} + +func (m *Map[K, V]) Get(k K) V { + m.RLock() + defer m.RUnlock() + return m.Map[k] +} + +func (m *Map[K, V]) Delete(k K) { + m.Lock() + delete(m.Map, k) + m.Unlock() +} + +func (m *Map[K, V]) ToList() (r []V) { + m.RLock() + defer m.RUnlock() + for _, s := range m.Map { + r = append(r, s) + } + return +} + +func (m *Map[K, V]) Range(f func(V)) { + m.RLock() + defer m.RUnlock() + for _, s := range m.Map { + f(s) + } +} diff --git a/util/ring.go b/util/ring.go new file mode 100644 index 0000000..4c0e066 --- /dev/null +++ b/util/ring.go @@ -0,0 +1,136 @@ +package util + +// A Ring is an element of a circular list, or ring. +// Rings do not have a beginning or end; a pointer to any ring element +// serves as reference to the entire ring. Empty rings are represented +// as nil Ring pointers. The zero value for a Ring is a one-element +// ring with a nil Value. +// +type Ring[T any] struct { + next, prev *Ring[T] + Value T // for use by client; untouched by this library +} + +func (r *Ring[T]) init() *Ring[T] { + r.next = r + r.prev = r + return r +} + +// Next returns the next ring element. r must not be empty. +func (r *Ring[T]) Next() *Ring[T] { + if r.next == nil { + return r.init() + } + return r.next +} + +// Prev returns the previous ring element. r must not be empty. +func (r *Ring[T]) Prev() *Ring[T] { + if r.next == nil { + return r.init() + } + return r.prev +} + +// Move moves n % r.Len() elements backward (n < 0) or forward (n >= 0) +// in the ring and returns that ring element. r must not be empty. +// +func (r *Ring[T]) Move(n int) *Ring[T] { + if r.next == nil { + return r.init() + } + switch { + case n < 0: + for ; n < 0; n++ { + r = r.prev + } + case n > 0: + for ; n > 0; n-- { + r = r.next + } + } + return r +} + +// New creates a ring of n elements. +func NewRing[T any](n int) *Ring[T] { + if n <= 0 { + return nil + } + r := new(Ring[T]) + p := r + for i := 1; i < n; i++ { + p.next = &Ring[T]{prev: p} + p = p.next + } + p.next = r + r.prev = p + return r +} + +// Link connects ring r with ring s such that r.Next() +// becomes s and returns the original value for r.Next(). +// r must not be empty. +// +// If r and s point to the same ring, linking +// them removes the elements between r and s from the ring. +// The removed elements form a subring and the result is a +// reference to that subring (if no elements were removed, +// the result is still the original value for r.Next(), +// and not nil). +// +// If r and s point to different rings, linking +// them creates a single ring with the elements of s inserted +// after r. The result points to the element following the +// last element of s after insertion. +// +func (r *Ring[T]) Link(s *Ring[T]) *Ring[T] { + n := r.Next() + if s != nil { + p := s.Prev() + // Note: Cannot use multiple assignment because + // evaluation order of LHS is not specified. + r.next = s + s.prev = r + n.prev = p + p.next = n + } + return n +} + +// Unlink removes n % r.Len() elements from the ring r, starting +// at r.Next(). If n % r.Len() == 0, r remains unchanged. +// The result is the removed subring. r must not be empty. +// +func (r *Ring[T]) Unlink(n int) *Ring[T] { + if n <= 0 { + return nil + } + return r.Link(r.Move(n + 1)) +} + +// Len computes the number of elements in ring r. +// It executes in time proportional to the number of elements. +// +func (r *Ring[T]) Len() int { + n := 0 + if r != nil { + n = 1 + for p := r.Next(); p != r; p = p.next { + n++ + } + } + return n +} + +// Do calls function f on each element of the ring, in forward order. +// The behavior of Do is undefined if f changes *r. +func (r *Ring[T]) Do(f func(T)) { + if r != nil { + f(r.Value) + for p := r.Next(); p != r; p = p.next { + f(p.Value) + } + } +} diff --git a/util/slice.go b/util/slice.go new file mode 100644 index 0000000..fbdf52b --- /dev/null +++ b/util/slice.go @@ -0,0 +1,21 @@ +package util + +type Slice[T comparable] []T + +func (s Slice[T]) Len() int { + return len(s) +} + +func (s *Slice[T]) Add(v T) { + *s = append(*s, v) +} + +func (s *Slice[T]) Delete(v T) bool { + for i, val := range *s { + if val == v { + *s = append((*s)[:i], (*s)[i+1:]...) + return true + } + } + return false +} diff --git a/util/socket.go b/util/socket.go new file mode 100644 index 0000000..8ec1dcf --- /dev/null +++ b/util/socket.go @@ -0,0 +1,92 @@ +package util + +import ( + "log" + "net" + "net/http" + "os" + "time" + + "golang.org/x/sync/errgroup" +) + +// ListenAddrs Listen http and https +func ListenAddrs(addr, addTLS, cert, key string, handler http.Handler) { + var g errgroup.Group + if addTLS != "" { + g.Go(func() error { + return http.ListenAndServeTLS(addTLS, cert, key, handler) + }) + } + if addr != "" { + g.Go(func() error { return http.ListenAndServe(addr, handler) }) + } + if err := g.Wait(); err != nil { + log.Fatal(err) + } +} + +func ListenTCP(addr string, process func(net.Conn)) error { + listener, err := net.Listen("tcp", addr) + if err != nil { + return err + } + var tempDelay time.Duration + for { + conn, err := listener.Accept() + conn.(*net.TCPConn).SetNoDelay(false) + if err != nil { + if ne, ok := err.(net.Error); ok && ne.Temporary() { + if tempDelay == 0 { + tempDelay = 5 * time.Millisecond + } else { + tempDelay *= 2 + } + if max := 1 * time.Second; tempDelay > max { + tempDelay = max + } + Printf("%s: Accept error: %v; retrying in %v", addr, err, tempDelay) + time.Sleep(tempDelay) + continue + } + return err + } + tempDelay = 0 + go process(conn) + } +} + +func ListenUDP(address string, networkBuffer int) (*net.UDPConn, error) { + addr, err := net.ResolveUDPAddr("udp", address) + if err != nil { + log.Fatalf("udp server ResolveUDPAddr :%s error, %v", address, err) + } + conn, err := net.ListenUDP("udp", addr) + if err != nil { + log.Fatalf("udp server ListenUDP :%s error, %v", address, err) + } + if err = conn.SetReadBuffer(networkBuffer); err != nil { + Printf("udp server video conn set read buffer error, %v", err) + } + if err = conn.SetWriteBuffer(networkBuffer); err != nil { + Printf("udp server video conn set write buffer error, %v", err) + } + return conn, err +} + +func CORS(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Credentials", "true") + origin := r.Header["Origin"] + if len(origin) == 0 { + w.Header().Set("Access-Control-Allow-Origin", "*") + } else { + w.Header().Set("Access-Control-Allow-Origin", origin[0]) + } +} + +// 检查文件或目录是否存在 +// 如果由 filename 指定的文件或目录存在则返回 true,否则返回 false +func Exist(filename string) bool { + _, err := os.Stat(filename) + return err == nil || os.IsExist(err) +} diff --git a/util/sse.go b/util/sse.go new file mode 100644 index 0000000..842800e --- /dev/null +++ b/util/sse.go @@ -0,0 +1,70 @@ +package util + +import ( + "context" + "encoding/json" + "net/http" + "os/exec" +) + +var ( + sseEent = []byte("event: ") + sseBegin = []byte("data: ") + sseEnd = []byte("\n\n") +) + +type SSE struct { + http.ResponseWriter + context.Context +} + +func (sse *SSE) Write(data []byte) (n int, err error) { + if err = sse.Err(); err != nil { + return + } + _, err = sse.ResponseWriter.Write(sseBegin) + n, err = sse.ResponseWriter.Write(data) + _, err = sse.ResponseWriter.Write(sseEnd) + if err != nil { + return + } + sse.ResponseWriter.(http.Flusher).Flush() + return +} + +func (sse *SSE) WriteEvent(event string, data []byte) (err error) { + if err = sse.Err(); err != nil { + return + } + _, err = sse.ResponseWriter.Write(sseEent) + _, err = sse.ResponseWriter.Write([]byte(event)) + _, err = sse.ResponseWriter.Write([]byte("\n")) + _, err = sse.Write(data) + return +} + +func NewSSE(w http.ResponseWriter, ctx context.Context) *SSE { + header := w.Header() + header.Set("Content-Type", "text/event-stream") + header.Set("Cache-Control", "no-cache") + header.Set("Connection", "keep-alive") + header.Set("X-Accel-Buffering", "no") + header.Set("Access-Control-Allow-Origin", "*") + return &SSE{ + w, + ctx, + } +} + +func (sse *SSE) WriteJSON(data interface{}) error { + jsonData, err := json.Marshal(data) + if err == nil { + _, err = sse.Write(jsonData) + } + return err +} +func (sse *SSE) WriteExec(cmd *exec.Cmd) error { + cmd.Stderr = sse + cmd.Stdout = sse + return cmd.Run() +} diff --git a/video_track.go b/video_track.go deleted file mode 100644 index def8a52..0000000 --- a/video_track.go +++ /dev/null @@ -1,444 +0,0 @@ -package engine - -import ( - "container/list" - "container/ring" - "encoding/binary" - "time" - - "github.com/Monibuca/utils/v3" - "github.com/Monibuca/utils/v3/codec" -) - -const ( - naluTypeBitmask = 0b0001_1111 - naluTypeBitmask_hevc = 0x7E -) - -type VideoPack struct { - AVPack - CompositionTime uint32 - NALUs [][]byte - IDR bool // 是否关键帧 -} - -func (v *VideoPack) ResetNALUs() { - if cap(v.NALUs) > 0 { - v.NALUs = v.NALUs[:0] - } -} - -func (v *VideoPack) SetNalu0(nalu []byte) { - if cap(v.NALUs) > 0 { - v.NALUs = v.NALUs[:1] - v.NALUs[0] = nalu - } else { - v.NALUs = [][]byte{nalu} - } -} - -type VideoTrack struct { - IDRing *ring.Ring `json:"-"` //最近的关键帧位置,首屏渲染 - AVTrack - SPSInfo codec.SPSInfo - GOP int //关键帧间隔 - ExtraData *VideoPack `json:"-"` //H264(SPS、PPS) H265(VPS、SPS、PPS) - WaitIDR chan struct{} `json:"-"` - revIDR func() - PushNalu func(ts uint32, cts uint32, nalus ...[]byte) `json:"-"` - UsingDonlField bool - writeByteStream func() - idrCount int //处于缓冲中的关键帧数量 - nalulenSize int - *VideoPack `json:"-"` //当前写入的视频数据 - keyFrameBuffers *list.List //用于作为关键帧缓存的对象池,缓冲中每个节点都有buffer,但是关键帧的长度较长,会导致每个节点都可能增长空间 -} - -func (s *Stream) NewVideoTrack(codec byte) (vt *VideoTrack) { - vt = &VideoTrack{ - WaitIDR: make(chan struct{}), - revIDR: func() { - vt.IDRing = vt.Ring - close(vt.WaitIDR) - idrSequence := vt.Sequence - vt.ts = vt.Timestamp - vt.idrCount++ - vt.revIDR = func() { - vt.idrCount++ - vt.GOP = vt.Sequence - idrSequence - if l := vt.Size - vt.GOP - 5; l > 5 { - vt.Size -= l - //缩小缓冲环节省内存 - vt.Unlink(l).Do(func(v interface{}) { - if v.(*AVItem).Value.(*VideoPack).IDR { - // 将关键帧的缓存放入对象池 - vt.keyFrameBuffers.PushBack(v.(*AVItem).Value) - vt.idrCount-- - } - }) - } - vt.IDRing = vt.Ring - idrSequence = vt.Sequence - vt.resetBPS() - } - }, - keyFrameBuffers: list.New(), - } - vt.timebase = 90000 - vt.PushNalu = vt.pushNalu - vt.Stream = s - vt.CodecID = codec - vt.Init(s.Context, 256) - vt.poll = time.Millisecond * 20 - vt.Do(func(v interface{}) { - v.(*AVItem).Value = new(VideoPack) - }) - vt.setCurrent() - return -} - -func (vt *VideoTrack) PushAnnexB(ts uint32, cts uint32, payload []byte) { - vt.PushNalu(ts, cts, codec.SplitH264(payload)...) -} - -func (vt *VideoTrack) pushNalu(ts uint32, cts uint32, nalus ...[]byte) { - idrBit := 0x10 | vt.CodecID - nIdrBit := 0x20 | vt.CodecID - tmp := make([]byte, 4) - // 缓冲中只包含Nalu数据所以写入rtmp格式时需要按照ByteStream格式写入 - vt.writeByteStream = func() { - vt.Reset() - if vt.IDR { - tmp[0] = idrBit - } else { - tmp[0] = nIdrBit - } - tmp[1] = 1 - vt.Buffer.Write(tmp[:2]) - utils.BigEndian.PutUint24(tmp, vt.CompositionTime) - vt.Buffer.Write(tmp[:3]) - for _, nalu := range vt.NALUs { - binary.Write(&vt.Buffer, binary.BigEndian, uint32(len(nalu))) - vt.Buffer.Write(nalu) - } - vt.Bytes2Payload() - } - switch vt.CodecID { - case codec.CodecID_H264: - { - var info codec.AVCDecoderConfigurationRecord - vt.PushNalu = func(ts uint32, cts uint32, nalus ...[]byte) { - // 等待接收SPS和PPS数据 - for _, nalu := range nalus { - if len(nalu) == 0 { - continue - } - switch nalu[0] & naluTypeBitmask { - case codec.NALU_SPS: - info.SequenceParameterSetNALUnit = nalu - info.SequenceParameterSetLength = uint16(len(nalu)) - vt.SPSInfo, _ = codec.ParseSPS(nalu) - case codec.NALU_PPS: - info.PictureParameterSetNALUnit = nalu - info.PictureParameterSetLength = uint16(len(nalu)) - } - } - if info.SequenceParameterSetNALUnit != nil && info.PictureParameterSetNALUnit != nil { - vt.ExtraData = &VideoPack{ - NALUs: [][]byte{info.SequenceParameterSetNALUnit, info.PictureParameterSetNALUnit}, - } - vt.ExtraData.Payload = codec.BuildH264SeqHeaderFromSpsPps(info.SequenceParameterSetNALUnit, info.PictureParameterSetNALUnit) - } - if vt.ExtraData == nil { - return - } - vt.Stream.VideoTracks.AddTrack("h264", vt) - //已完成SPS和PPS 组装,重置push函数,接收视频数据 - vt.PushNalu = func(ts uint32, cts uint32, nalus ...[]byte) { - var nonIDRs int - var IDRs int - for _, nalu := range nalus { - naluLen := len(nalu) - if naluLen == 0 { - continue - } - naluType := nalu[0] & naluTypeBitmask - switch naluType { - case codec.NALU_SPS: - vt.ExtraData.NALUs[0] = nalu - vt.SPSInfo, _ = codec.ParseSPS(nalu) - case codec.NALU_PPS: - vt.ExtraData.NALUs[1] = nalu - vt.ExtraData.Payload = codec.BuildH264SeqHeaderFromSpsPps(vt.ExtraData.NALUs[0], vt.ExtraData.NALUs[1]) - case codec.NALU_Access_Unit_Delimiter: - case codec.NALU_IDR_Picture: - vt.addBytes(naluLen) - if IDRs == 0 { - vt.setIDR(true) - vt.SetNalu0(nalu) - } else { - vt.NALUs = append(vt.NALUs, nalu) - } - IDRs++ - case codec.NALU_Non_IDR_Picture: - vt.addBytes(naluLen) - if nonIDRs == 0 { - vt.setIDR(false) - vt.SetNalu0(nalu) - } else { - vt.NALUs = append(vt.NALUs, nalu) - } - nonIDRs++ - case codec.NALU_SEI: - case codec.NALU_Filler_Data: - default: - utils.Printf("%s,nalType not support yet:%d,[0]=0x%X", vt.Stream.StreamPath, naluType, nalu[0]) - } - } - if nonIDRs + IDRs > 0 { - vt.setTS(ts) - vt.CompositionTime = cts - vt.push() - } - } - } - } - case codec.CodecID_H265: - var vps, sps, pps []byte - vt.PushNalu = func(ts uint32, cts uint32, nalus ...[]byte) { - // 等待接收SPS和PPS数据 - for _, nalu := range nalus { - if len(nalu) == 0 { - continue - } - switch nalu[0] & naluTypeBitmask_hevc >> 1 { - case codec.NAL_UNIT_VPS: - vps = nalu - case codec.NAL_UNIT_SPS: - sps = nalu - vt.SPSInfo, _ = codec.ParseHevcSPS(nalu) - case codec.NAL_UNIT_PPS: - pps = nalu - } - } - if vps != nil && sps != nil && pps != nil { - extraData, err := codec.BuildH265SeqHeaderFromVpsSpsPps(vps, sps, pps) - if err != nil { - return - } - vt.ExtraData = &VideoPack{ - NALUs: [][]byte{vps, sps, pps}, - } - vt.ExtraData.Payload = extraData - } - if vt.ExtraData != nil { - vt.Stream.VideoTracks.AddTrack("h265", vt) - vt.PushNalu = func(ts uint32, cts uint32, nalus ...[]byte) { - var nonIDRs [][]byte - for _, nalu := range nalus { - naluLen := len(nalu) - if naluLen == 0 { - continue - } - /* - 0 1 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |F| Type | LayerId | TID | - +-------------+-----------------+ - Forbidden zero(F) : 1 bit - NAL unit type(Type) : 6 bits - NUH layer ID(LayerId) : 6 bits - NUH temporal ID plus 1 (TID) : 3 bits - */ - - naluType := nalu[0] & naluTypeBitmask_hevc >> 1 - switch naluType { - case codec.NAL_UNIT_VPS: - vps = nalu - vt.ExtraData.NALUs[0] = vps - case codec.NAL_UNIT_SPS: - sps = nalu - vt.ExtraData.NALUs[1] = sps - vt.SPSInfo, _ = codec.ParseHevcSPS(nalu) - case codec.NAL_UNIT_PPS: - pps = nalu - vt.ExtraData.NALUs[2] = pps - extraData, err := codec.BuildH265SeqHeaderFromVpsSpsPps(vps, sps, pps) - if err != nil { - return - } - vt.ExtraData.Payload = extraData - case codec.NAL_UNIT_CODED_SLICE_BLA, - codec.NAL_UNIT_CODED_SLICE_BLANT, - codec.NAL_UNIT_CODED_SLICE_BLA_N_LP, - codec.NAL_UNIT_CODED_SLICE_IDR, - codec.NAL_UNIT_CODED_SLICE_IDR_N_LP, - codec.NAL_UNIT_CODED_SLICE_CRA: - vt.setIDR(true) - vt.setTS(ts) - vt.CompositionTime = cts - vt.SetNalu0(nalu) - vt.addBytes(naluLen) - vt.push() - case 0, 1, 2, 3, 4, 5, 6, 7, 9: - nonIDRs = append(nonIDRs, nalu) - vt.addBytes(naluLen) - } - } - if len(nonIDRs) > 0 { - vt.setIDR(false) - vt.setTS(ts) - vt.CompositionTime = cts - vt.NALUs = nonIDRs - vt.push() - } - } - } - } - } - vt.PushNalu(ts, cts, nalus...) -} - -func (vt *VideoTrack) setCurrent() { - vt.AVTrack.setCurrent() - vt.VideoPack = vt.Value.(*VideoPack) -} - -func (vt *VideoTrack) PushByteStream(ts uint32, payload []byte) { - if payload[1] == 0 { - vt.CodecID = payload[0] & 0x0F - switch vt.CodecID { - case 7: - var info codec.AVCDecoderConfigurationRecord - if _, err := info.Unmarshal(payload[5:]); err == nil { - vt.SPSInfo, _ = codec.ParseSPS(info.SequenceParameterSetNALUnit) - vt.nalulenSize = int(info.LengthSizeMinusOne&3 + 1) - vt.ExtraData = &VideoPack{ - NALUs: [][]byte{info.SequenceParameterSetNALUnit, info.PictureParameterSetNALUnit}, - } - vt.ExtraData.Payload = payload - vt.Stream.VideoTracks.AddTrack("h264", vt) - } - case 12: - if vps, sps, pps, err := codec.ParseVpsSpsPpsFromSeqHeaderWithoutMalloc(payload); err == nil { - vt.SPSInfo, _ = codec.ParseSPS(sps) - vt.nalulenSize = int(payload[26]) & 0x03 - vt.ExtraData = &VideoPack{ - NALUs: [][]byte{vps, sps, pps}, - } - vt.ExtraData.Payload = payload - vt.Stream.VideoTracks.AddTrack("h265", vt) - } - } - } else { - if len(payload) < 4 { - return - } - vt.addBytes(len(payload)) - vt.IDR = payload[0]>>4 == 1 - vt.setTS(ts) - vt.Payload = payload - vt.CompositionTime = utils.BigEndian.Uint24(payload[2:]) - vt.ResetNALUs() - for nalus := payload[5:]; len(nalus) > vt.nalulenSize; { - nalulen := 0 - for i := 0; i < vt.nalulenSize; i++ { - nalulen += int(nalus[i]) << ((vt.nalulenSize - i - 1) << 3) - } - if end := nalulen + vt.nalulenSize; len(nalus) >= end { - vt.NALUs = append(vt.NALUs, nalus[vt.nalulenSize:end]) - nalus = nalus[end:] - } else { - utils.Printf("PushByteStream error,len %d,nalulenSize:%d,end:%d", len(nalus), vt.nalulenSize, end) - break - } - } - if len(vt.NALUs) > 0 { - vt.push() - } - } -} - -// 设置关键帧信息,主要是为了判断缓存之前是否是关键帧,用来调度缓存 -func (vt *VideoTrack) setIDR(idr bool) { - // 如果当前帧的类型和需要设置的类型相同,则不需要操作 - if idr == vt.IDR { - return - } - // 原来是非关键帧,现在是关键帧,需要从关键帧池里面拿出一个缓存 - if idr { - if cache := vt.keyFrameBuffers.Back(); cache != nil { - vt.AVItem.Value = vt.keyFrameBuffers.Remove(cache) - vt.VideoPack = vt.AVItem.Value.(*VideoPack) //设置当前操作的指针 - } - } else { //原来是关键帧,现在是非关键帧,把原来的关键帧缓存放回去 - vt.keyFrameBuffers.PushBack(vt.AVItem.Value) - vt.VideoPack = new(VideoPack) //设置当前操作的指针 - vt.AVItem.Value = vt.VideoPack - } - vt.IDR = idr -} -func (vt *VideoTrack) push() { - if len(vt.NALUs) == 0 { - panic("push error,nalus is empty") - } - if vt.Stream != nil { - vt.Stream.Update() - } - if vt.writeByteStream != nil { - vt.writeByteStream() - } - if vt.GetBPS(); vt.IDR { - vt.revIDR() - } - if nextPack := vt.NextValue().(*VideoPack); nextPack.IDR { - if vt.idrCount == 1 { - if vt.Size < config.MaxRingSize { - exRing := ring.New(5) - for x := exRing; x.Value == nil; x = x.Next() { - x.Value = &AVItem{DataItem: DataItem{Value: new(VideoPack)}} - } - vt.Link(exRing) // 扩大缓冲环 - } - } else { - vt.idrCount-- - } - } - vt.Step() - vt.setCurrent() -} - -func (vt *VideoTrack) Play(onVideo func(uint32, *VideoPack), exit1, exit2 <-chan struct{}) { - select { - case <-vt.WaitIDR: - case <-exit1: - return - case <-exit2: //可能等不到关键帧就退出了 - return - } - vr := vt.SubRing(vt.IDRing) //从关键帧开始读取,首屏秒开 - realSt := vt.PreItem().Timestamp // 当前时间戳 - item, vp := vr.Read() - startTimestamp := item.Timestamp - for chase := true; ; item, vp = vr.Read() { - select { - case <-exit1: - return - case <-exit2: - return - default: - onVideo(uint32(item.Timestamp.Sub(startTimestamp).Milliseconds()), vp.(*VideoPack)) - if chase { - add10 := startTimestamp.Add(time.Millisecond * 10) - if realSt.After(add10) { - startTimestamp = add10 - } else { - startTimestamp = realSt - chase = false - } - } - vr.MoveNext() - } - } -} diff --git a/video_track_test.go b/video_track_test.go deleted file mode 100644 index cd21c9d..0000000 --- a/video_track_test.go +++ /dev/null @@ -1,20 +0,0 @@ -package engine - -import ( - "encoding/json" - "testing" -) - -func TestJSON(t *testing.T) { - t.Run(t.Name(), func(t *testing.T) { - var s Stream - s.StreamPath = "test" - s.Publish() - s.NewVideoTrack(7) - bytes, err := json.Marshal(&s) - if err == nil { - str := string(bytes) - t.Logf("%s", str) - } - }) -}