flv support h265(experiment)

This commit is contained in:
notch
2021-01-15 11:13:36 +08:00
parent d3c4ab4012
commit 8917825571
7 changed files with 516 additions and 68 deletions

View File

@@ -18,44 +18,61 @@ import (
"github.com/cnotch/xlog"
)
func TestFlvWriter(t *testing.T) {
sdpraw, err := ioutil.ReadFile("../../../test/asserts/game.sdp")
if err != nil {
panic("Couldn't open sdp")
}
file, err := os.Open("../../../test/asserts/game.rtp")
if err != nil {
panic("Couldn't open rtp")
}
defer file.Close()
reader := bufio.NewReader(file)
out, err := os.OpenFile("game.flv", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
if err != nil {
panic("Couldn't open flv")
}
defer out.Close()
var video codec.VideoMeta
var audio codec.AudioMeta
sdp.ParseMetadata(string(sdpraw), &video, &audio)
writer, err := NewWriter(out, 5)
flvMuxer,_ := NewMuxer(&video, &audio, writer, xlog.L())
rtpDemuxer,_ := rtp.NewDemuxer(&video,&audio,flvMuxer, xlog.L())
channels := []int{int(rtp.ChannelVideo), int(rtp.ChannelVideoControl), int(rtp.ChannelAudio), int(rtp.ChannelAudioControl)}
for {
packet, err := rtp.ReadPacket(reader, channels)
if err == io.EOF {
break
}
if err != nil {
t.Logf("read packet error :%s", err.Error())
}
rtpDemuxer.WriteRtpPacket(packet)
}
<-time.After(time.Millisecond * 1000)
rtpDemuxer.Close()
flvMuxer.Close()
var muxerTestCases = []struct {
sdpFile string
rtpFile string
flvFile string
}{
{"game.sdp", "game.rtp", "game.flv"},
{"music.sdp", "music.rtp", "music.flv"},
{"265.sdp", "265.rtp", "265.flv"},
}
func TestFlvWriter(t *testing.T) {
assertsPath := "../../../test/asserts/"
for _, tt := range muxerTestCases {
t.Run(tt.rtpFile, func(t *testing.T) {
sdpraw, err := ioutil.ReadFile(assertsPath + tt.sdpFile)
if err != nil {
panic("Couldn't open sdp")
}
file, err := os.Open(assertsPath + tt.rtpFile)
if err != nil {
panic("Couldn't open rtp")
}
defer file.Close()
reader := bufio.NewReader(file)
out, err := os.OpenFile(assertsPath+tt.flvFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
if err != nil {
panic("Couldn't open flv")
}
defer out.Close()
var video codec.VideoMeta
var audio codec.AudioMeta
sdp.ParseMetadata(string(sdpraw), &video, &audio)
writer, err := NewWriter(out, 5)
flvMuxer, _ := NewMuxer(&video, &audio, writer, xlog.L())
rtpDemuxer, _ := rtp.NewDemuxer(&video, &audio, flvMuxer, xlog.L())
channels := []int{int(rtp.ChannelVideo), int(rtp.ChannelVideoControl), int(rtp.ChannelAudio), int(rtp.ChannelAudioControl)}
for {
packet, err := rtp.ReadPacket(reader, channels)
if err == io.EOF {
break
}
if err != nil {
t.Logf("read packet error :%s", err.Error())
}
rtpDemuxer.WriteRtpPacket(packet)
}
<-time.After(time.Millisecond * 1000)
rtpDemuxer.Close()
flvMuxer.Close()
})
}
}

View File

@@ -53,7 +53,7 @@ func (h264p *h264Packetizer) PacketizeSequenceHeader() error {
videoData := &VideoData{
FrameType: FrameTypeKeyFrame,
CodecID: CodecIDAVC,
AVCPacketType: AVCPacketTypeSequenceHeader,
H2645PacketType: H2645PacketTypeSequenceHeader,
CompositionTime: 0,
Body: body,
}
@@ -89,7 +89,7 @@ func (h264p *h264Packetizer) Packetize(basePts int64, frame *codec.Frame) error
videoData := &VideoData{
FrameType: FrameTypeInterFrame,
CodecID: CodecIDAVC,
AVCPacketType: AVCPacketTypeNALU,
H2645PacketType: H2645PacketTypeNALU,
CompositionTime: uint32(pts - dts),
Body: frame.Payload,
}

View File

@@ -0,0 +1,110 @@
// Copyright (c) 2019,CAOHONGJU All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package flv
import (
"github.com/cnotch/ipchub/av/codec"
"github.com/cnotch/ipchub/av/codec/hevc"
)
type h265Packetizer struct {
meta *codec.VideoMeta
tagWriter TagWriter
spsMuxed bool
nextDts float64
dtsStep float64
}
func NewH265Packetizer(meta *codec.VideoMeta, tagWriter TagWriter) Packetizer {
h265p := &h265Packetizer{
meta: meta,
tagWriter: tagWriter,
}
if meta.FrameRate > 0 {
h265p.dtsStep = 1000.0 / meta.FrameRate
}
return h265p
}
func (h265p *h265Packetizer) PacketizeSequenceHeader() error {
if h265p.spsMuxed {
return nil
}
if !hevc.MetadataIsReady(h265p.meta) {
// not enough
return nil
}
h265p.spsMuxed = true
if h265p.meta.FixedFrameRate {
h265p.dtsStep = 1000.0 / h265p.meta.FrameRate
} else { // TODO:
h265p.dtsStep = 1000.0 / 30
}
record := NewHEVCDecoderConfigurationRecord(h265p.meta.Vps, h265p.meta.Sps, h265p.meta.Pps)
body, _ := record.Marshal()
videoData := &VideoData{
FrameType: FrameTypeKeyFrame,
CodecID: CodecIDHEVC,
H2645PacketType: H2645PacketTypeSequenceHeader,
CompositionTime: 0,
Body: body,
}
data, _ := videoData.Marshal()
tag := &Tag{
TagType: TagTypeVideo,
DataSize: uint32(len(data)),
Timestamp: 0,
StreamID: 0,
Data: data,
}
return h265p.tagWriter.WriteFlvTag(tag)
}
func (h265p *h265Packetizer) Packetize(basePts int64, frame *codec.Frame) error {
nalType := (frame.Payload[0] >> 1) & 0x3f
if nalType == hevc.NalVps ||
nalType == hevc.NalSps ||
nalType == hevc.NalPps {
return h265p.PacketizeSequenceHeader()
}
dts := int64(h265p.nextDts)
h265p.nextDts += h265p.dtsStep
pts := frame.AbsTimestamp - basePts + ptsDelay
if dts > pts {
pts = dts
}
videoData := &VideoData{
FrameType: FrameTypeInterFrame,
CodecID: CodecIDHEVC,
H2645PacketType: H2645PacketTypeNALU,
CompositionTime: uint32(pts - dts),
Body: frame.Payload,
}
if nalType >= hevc.NalBlaWLp && nalType <= hevc.NalCraNut {
videoData.FrameType = FrameTypeKeyFrame
}
data, _ := videoData.Marshal()
tag := &Tag{
TagType: TagTypeVideo,
DataSize: uint32(len(data)),
Timestamp: uint32(dts),
StreamID: 0,
Data: data,
}
return h265p.tagWriter.WriteFlvTag(tag)
}

View File

@@ -62,6 +62,8 @@ func NewMuxer(videoMeta *codec.VideoMeta, audioMeta *codec.AudioMeta, tagWriter
switch videoMeta.Codec {
case "H264":
muxer.vp = NewH264Packetizer(videoMeta, tagWriter)
case "H265":
muxer.vp = NewH265Packetizer(videoMeta, tagWriter)
default:
return nil, fmt.Errorf("flv muxer unsupport video codec type:%s", videoMeta.Codec)
}

View File

@@ -73,28 +73,27 @@ func (tag *Tag) IsMetadata() bool {
return false
}
// IsH264KeyFrame 判断是否是 H264 关键帧 Tag
func (tag *Tag) IsH264KeyFrame() bool {
// IsH2645KeyFrame 判断是否是 H264/H265 关键帧 Tag
func (tag *Tag) IsH2645KeyFrame() bool {
if len(tag.Data) < 2 {
return false
}
return tag.TagType == TagTypeVideo &&
(tag.Data[0]&0x0f) == CodecIDAVC &&
((tag.Data[0]&0x0f) == CodecIDAVC || ((tag.Data[0] & 0x0f) == CodecIDHEVC)) &&
((tag.Data[0]>>4)&0x0f) == FrameTypeKeyFrame
}
// IsH264SequenceHeader 判断是否是 H264 序列头 Tag
func (tag *Tag) IsH264SequenceHeader() bool {
// IsH2645SequenceHeader 判断是否是 H264/H265 序列头 Tag
func (tag *Tag) IsH2645SequenceHeader() bool {
if len(tag.Data) < 2 {
return false
}
return tag.TagType == TagTypeVideo &&
(tag.Data[0]&0x0f) == CodecIDAVC &&
((tag.Data[0]&0x0f) == CodecIDAVC || ((tag.Data[0] & 0x0f) == CodecIDHEVC)) &&
((tag.Data[0]>>4)&0x0f) == FrameTypeKeyFrame &&
tag.Data[1] == AVCPacketTypeSequenceHeader
tag.Data[1] == H2645PacketTypeSequenceHeader
}
// IsAACSequenceHeader 判断是否是 AAC 序列头 Tag

View File

@@ -8,6 +8,8 @@ import (
"encoding/binary"
"errors"
"fmt"
"github.com/cnotch/ipchub/av/codec/hevc"
)
// E.4.3.1 VIDEODATA
@@ -73,17 +75,17 @@ func CodecIDName(codecID int32) string {
}
}
// AVCPacketType IF CodecID == 7 UI8
// H2645PacketType IF CodecID == 7 or 12 UI8
// The following values are defined:
// 0 = AVC sequence header
// 1 = AVC NALU
// 2 = AVC end of sequence (lower level NALU sequence ender is
// not required or supported)
const (
AVCPacketTypeSequenceHeader = 0
AVCPacketTypeNALU = 1
AVCPacketTypeSequenceHeaderEOF = 2
AVCPacketTypeReserved = 3
H2645PacketTypeSequenceHeader = 0
H2645PacketTypeNALU = 1
H2645PacketTypeSequenceHeaderEOF = 2
H2645PacketTypeReserved = 3
)
// VideoData flv Tag 中的的视频数据
@@ -93,11 +95,23 @@ const (
// AVCDecoderConfigurationRecord
// ELSE
//  One or more NALUs (Full frames are required)
//
// 对于 CodecID == CodecIDAVCBody 值:
// IF H2645PacketType == H2645PacketTypeSequenceHeader
// AVCDecoderConfigurationRecord
// ELSE
//  One or more NALUs (Full frames are required)
//
// 对于 CodecID == CodecIDHEVCBody 值:
// IF H2645PacketType == H2645PacketTypeSequenceHeader
// HEVCDecoderConfigurationRecord
// ELSE
//  One or more NALUs (Full frames are required)
type VideoData struct {
FrameType byte // 4 bits; 帧类型
CodecID byte // 4 bits; 编解码器标识
AVCPacketType byte // 8 bits; 仅 AVC 编码有效AVC 包类型
CompositionTime uint32 // 24 bits; 仅 AVC 编码有效表示PTS 与 DTS 的时间偏移值,单位 ms记作 CTS。
H2645PacketType byte // 8 bits; 仅 AVC/HEVC 编码有效AVC 包类型
CompositionTime uint32 // 24 bits; 仅 AVC/HEVC 编码有效表示PTS 与 DTS 的时间偏移值,单位 ms记作 CTS。
Body []byte // 原始视频
}
@@ -106,8 +120,8 @@ var _ TagData = &VideoData{}
// Unmarshal .
// Note: Unmarshal not copy the data
func (videoData *VideoData) Unmarshal(data []byte) error {
if len(data) < 9 {
return errors.New("data.length < 9")
if len(data) < 1 {
return errors.New("data.length < 1")
}
offset := 0
@@ -116,13 +130,19 @@ func (videoData *VideoData) Unmarshal(data []byte) error {
videoData.CodecID = data[offset] & 0x0f
offset++
if videoData.CodecID == CodecIDAVC {
if videoData.CodecID == CodecIDAVC || videoData.CodecID == CodecIDHEVC {
if len(data) < 5 {
return errors.New("data.length < 5")
}
temp := binary.BigEndian.Uint32(data[offset:])
videoData.AVCPacketType = byte(temp >> 24)
videoData.H2645PacketType = byte(temp >> 24)
videoData.CompositionTime = temp & 0x00ffffff
offset += 4
if videoData.AVCPacketType == AVCPacketTypeNALU {
if videoData.H2645PacketType == H2645PacketTypeNALU {
if len(data) < 9 {
return errors.New("data.length < 9")
}
size := int(binary.BigEndian.Uint32(data[offset:]))
offset += 4
if size > len(data)-offset {
@@ -137,7 +157,7 @@ func (videoData *VideoData) Unmarshal(data []byte) error {
// MarshalSize .
func (videoData *VideoData) MarshalSize() int {
if videoData.AVCPacketType == AVCPacketTypeNALU {
if videoData.H2645PacketType == H2645PacketTypeNALU {
return 9 + len(videoData.Body)
}
return 5 + len(videoData.Body)
@@ -150,12 +170,12 @@ func (videoData *VideoData) Marshal() ([]byte, error) {
buff[offset] = (videoData.FrameType << 4) | (videoData.CodecID & 0x0f)
offset++
if videoData.CodecID == CodecIDAVC {
if videoData.CodecID == CodecIDAVC || videoData.CodecID == CodecIDHEVC {
binary.BigEndian.PutUint32(buff[offset:],
(uint32(videoData.AVCPacketType)<<24)|(videoData.CompositionTime&0x00ffffff))
(uint32(videoData.H2645PacketType)<<24)|(videoData.CompositionTime&0x00ffffff))
offset += 4
if videoData.AVCPacketType == AVCPacketTypeNALU {
if videoData.H2645PacketType == H2645PacketTypeNALU {
binary.BigEndian.PutUint32(buff[offset:], uint32(len(videoData.Body)))
offset += 4
}
@@ -307,3 +327,303 @@ func (record *AVCDecoderConfigurationRecord) Marshal() ([]byte, error) {
return buff, nil
}
// HEVCDecoderConfigurationRecord .
type HEVCDecoderConfigurationRecord struct {
ConfigurationVersion uint8
GeneralProfileSpace uint8
GeneralTierFlag uint8
GeneralProfileIDC uint8
GeneralProfileCompatibilityFlags uint32
GeneralConstraintIndicatorFlags uint64
GeneralLevelIDC uint8
LengthSizeMinusOne uint8
MaxSubLayers uint8
TemporalIdNestingFlag uint8
ChromaFormatIDC uint8
BitDepthLumaMinus8 uint8
BitDepthChromaMinus8 uint8
VPS []byte
SPS []byte
PPS []byte
}
// NewHEVCDecoderConfigurationRecord creates and initializes a new HEVCDecoderConfigurationRecord
func NewHEVCDecoderConfigurationRecord(vps, sps, pps []byte) *HEVCDecoderConfigurationRecord {
record := &HEVCDecoderConfigurationRecord{
ConfigurationVersion: 1,
LengthSizeMinusOne: 3, // 4 bytes
GeneralProfileCompatibilityFlags: 0xffffffff,
GeneralConstraintIndicatorFlags: 0xffffffffffff,
VPS: vps,
SPS: sps,
PPS: pps,
}
record.init()
return record
}
func (record *HEVCDecoderConfigurationRecord) init() error {
var rawVps hevc.H265RawVPS
if err := rawVps.Decode(record.VPS); err != nil {
return err
}
if rawVps.Vps_max_sub_layers_minus1+1 > record.MaxSubLayers {
record.MaxSubLayers = rawVps.Vps_max_sub_layers_minus1 + 1
}
record.applyPLT(&rawVps.Profile_tier_level)
var rawSps hevc.H265RawSPS
if err := rawSps.Decode(record.SPS); err != nil {
return err
}
if rawSps.Sps_max_sub_layers_minus1+1 > record.MaxSubLayers {
record.MaxSubLayers = rawSps.Sps_max_sub_layers_minus1 + 1
}
// sps_temporal_id_nesting_flag
record.TemporalIdNestingFlag = rawSps.Sps_temporal_id_nesting_flag
record.applyPLT(&rawSps.Profile_tier_level)
record.ChromaFormatIDC = rawSps.Chroma_format_idc
record.BitDepthLumaMinus8 = rawSps.Bit_depth_luma_minus8
record.BitDepthChromaMinus8 = rawSps.Bit_depth_chroma_minus8
return nil
}
func (record *HEVCDecoderConfigurationRecord) applyPLT(ptl *hevc.H265RawProfileTierLevel) {
record.GeneralProfileSpace = ptl.General_profile_space
if ptl.General_tier_flag > record.GeneralTierFlag {
record.GeneralLevelIDC = ptl.General_level_idc
record.GeneralTierFlag = ptl.General_tier_flag
} else {
if ptl.General_level_idc > record.GeneralLevelIDC {
record.GeneralLevelIDC = ptl.General_level_idc
}
}
if ptl.General_profile_idc > record.GeneralProfileIDC {
record.GeneralProfileIDC = ptl.General_profile_idc
}
record.GeneralProfileCompatibilityFlags &= ptl.GeneralProfileCompatibilityFlags
record.GeneralConstraintIndicatorFlags &= ptl.GeneralConstraintIndicatorFlags
}
// Unmarshal .
func (record *HEVCDecoderConfigurationRecord) Unmarshal(data []byte) error {
if len(data) < 23 {
return errors.New("data.length < 23")
}
offset := 0
// unsigned int(8) configurationVersion = 1;
record.ConfigurationVersion = data[offset]
offset++
// unsigned int(2) general_profile_space;
// unsigned int(1) general_tier_flag;
// unsigned int(5) general_profile_idc;
record.GeneralProfileSpace = data[offset] >> 6
record.GeneralTierFlag = (data[offset] >> 5) & 0x01
record.GeneralProfileIDC = data[offset] & 0x1f
offset++
// unsigned int(32) general_profile_compatibility_flags
record.GeneralProfileCompatibilityFlags = binary.BigEndian.Uint32(data[offset:])
offset += 4
// unsigned int(48) general_constraint_indicator_flags
record.GeneralConstraintIndicatorFlags = uint64(binary.BigEndian.Uint32(data[offset:]))
record.GeneralConstraintIndicatorFlags <<= 16
offset += 4
record.GeneralConstraintIndicatorFlags |= uint64(binary.BigEndian.Uint16(data[offset:]))
offset += 2
// unsigned int(8) general_level_idc;
record.GeneralLevelIDC = data[offset]
offset++
// bit(4) reserved = 1111b;
// unsigned int(12) min_spatial_segmentation_idc;
// bit(6) reserved = 111111b;
// unsigned int(2) parallelismType;
offset += 2
offset++
// bit(6) reserved = 111111b;
// unsigned int(2) chromaFormat;
record.ChromaFormatIDC = data[offset] & 0x03
offset++
// bit(5) reserved = 11111b;
// unsigned int(3) bitDepthLumaMinus8;
record.BitDepthLumaMinus8 = data[offset] & 0x07
offset++
// bit(5) reserved = 11111b;
// unsigned int(3) bitDepthChromaMinus8;
record.BitDepthChromaMinus8 = data[offset] & 0x07
offset++
// bit(16) avgFrameRate;
offset += 2
// bit(2) constantFrameRate;
// bit(3) MaxSubLayers;
// bit(1) temporalIdNested;
// unsigned int(2) lengthSizeMinusOne;
record.MaxSubLayers = (data[offset] >> 3) & 0x07
record.TemporalIdNestingFlag = (data[offset] >> 2) & 0x01
record.LengthSizeMinusOne = data[offset] & 0x03
offset++
// num of vps sps pps
numNals := int(data[offset])
offset++
for i := 0; i < numNals; i++ {
if len(data) < offset+5 {
return errors.New("Insufficient data")
}
nalType := data[offset]
offset++
// num of vps
num := binary.BigEndian.Uint16(data[offset:])
offset += 2
// length
length := binary.BigEndian.Uint16(data[offset:])
offset += 2
if num != 1 {
return errors.New("Multiple VPS or SPS or PPS NAL is not supported")
}
if len(data) < offset+int(length) {
return errors.New("Insufficient raw data")
}
raw := data[offset : offset+int(length)]
offset += int(length)
switch nalType {
case hevc.NalVps:
record.VPS = raw
case hevc.NalSps:
record.SPS = raw
case hevc.NalPps:
record.PPS = raw
default:
return errors.New("Only VPS SPS PPS NAL is supported")
}
}
return nil
}
// MarshalSize .
func (record *HEVCDecoderConfigurationRecord) MarshalSize() int {
return 23 + 5 + len(record.VPS) + 5 + len(record.SPS) + 5 + len(record.PPS)
}
// Marshal .
func (record *HEVCDecoderConfigurationRecord) Marshal() ([]byte, error) {
buff := make([]byte, record.MarshalSize())
offset := 0
// unsigned int(8) configurationVersion = 1;
buff[offset] = 0x1
offset++
// unsigned int(2) general_profile_space;
// unsigned int(1) general_tier_flag;
// unsigned int(5) general_profile_idc;
buff[offset] = record.GeneralProfileSpace<<6 | record.GeneralTierFlag<<5 | record.GeneralProfileIDC
offset++
// unsigned int(32) general_profile_compatibility_flags
binary.BigEndian.PutUint32(buff[offset:], record.GeneralProfileCompatibilityFlags)
offset += 4
// unsigned int(48) general_constraint_indicator_flags
binary.BigEndian.PutUint32(buff[offset:], uint32(record.GeneralConstraintIndicatorFlags>>16))
offset += 4
binary.BigEndian.PutUint16(buff[offset:], uint16(record.GeneralConstraintIndicatorFlags))
offset += 2
// unsigned int(8) general_level_idc;
buff[offset] = record.GeneralLevelIDC
offset++
// bit(4) reserved = 1111b;
// unsigned int(12) min_spatial_segmentation_idc;
// bit(6) reserved = 111111b;
// unsigned int(2) parallelismType;
// TODO chef: 这两个字段没有解析
binary.BigEndian.PutUint16(buff[offset:], 0xf000)
offset += 2
buff[offset] = 0xfc
offset++
// bit(6) reserved = 111111b;
// unsigned int(2) chromaFormat;
buff[offset] = record.ChromaFormatIDC | 0xfc
offset++
// bit(5) reserved = 11111b;
// unsigned int(3) bitDepthLumaMinus8;
buff[offset] = record.BitDepthLumaMinus8 | 0xf8
offset++
// bit(5) reserved = 11111b;
// unsigned int(3) bitDepthChromaMinus8;
buff[offset] = record.BitDepthChromaMinus8 | 0xf8
offset++
// bit(16) avgFrameRate;
binary.BigEndian.PutUint16(buff[offset:], 0)
offset += 2
// bit(2) constantFrameRate;
// bit(3) numTemporalLayers;
// bit(1) temporalIdNested;
// unsigned int(2) lengthSizeMinusOne;
buff[offset] = 0<<6 | record.MaxSubLayers<<3 | record.TemporalIdNestingFlag<<2 | record.LengthSizeMinusOne
offset++
// num of vps sps pps
buff[offset] = 0x03
offset++
pset := []struct {
nalType uint8
data []byte
}{
{hevc.NalVps, record.VPS},
{hevc.NalSps, record.SPS},
{hevc.NalPps, record.PPS},
}
for _, ps := range pset {
buff[offset] = ps.nalType
offset++
// num of vps
binary.BigEndian.PutUint16(buff[offset:], 1)
offset += 2
// length
binary.BigEndian.PutUint16(buff[offset:], uint16(len(ps.data)))
offset += 2
copy(buff[offset:], ps.data)
offset += len(ps.data)
}
return buff, nil
}

View File

@@ -42,7 +42,7 @@ func (cache *FlvCache) CachePack(pack Pack) {
cache.metaData = tag
return
}
if tag.IsH264SequenceHeader() {
if tag.IsH2645SequenceHeader() {
cache.videoSequenceHeader = tag
return
}
@@ -52,7 +52,7 @@ func (cache *FlvCache) CachePack(pack Pack) {
}
if cache.cacheGop { // 如果启用 FlvCache
if tag.IsH264KeyFrame() { // 关键帧重置GOP
if tag.IsH2645KeyFrame() { // 关键帧重置GOP
cache.gop.Reset()
cache.gop.Push(pack)
} else if cache.gop.Len() > 0 { // 必须关键帧作为cache的第一个包