Files
gortsplib/pkg/codecs/h264/sps.go
2023-03-31 12:34:07 +02:00

729 lines
16 KiB
Go

package h264
import (
"fmt"
"github.com/bluenviron/gortsplib/v3/pkg/bits"
)
const (
maxRefFrames = 255
)
func readScalingList(buf []byte, pos *int, size int) ([]int32, bool, error) {
lastScale := int32(8)
nextScale := int32(8)
scalingList := make([]int32, size)
var useDefaultScalingMatrixFlag bool
for j := 0; j < size; j++ {
if nextScale != 0 {
deltaScale, err := bits.ReadGolombSigned(buf, pos)
if err != nil {
return nil, false, err
}
nextScale = (lastScale + deltaScale + 256) % 256
useDefaultScalingMatrixFlag = (j == 0 && nextScale == 0)
}
if nextScale == 0 {
scalingList[j] = lastScale
} else {
scalingList[j] = nextScale
}
lastScale = scalingList[j]
}
return scalingList, useDefaultScalingMatrixFlag, nil
}
// SPS_HRD is a hypotetical reference decoder.
type SPS_HRD struct { //nolint:revive
CpbCntMinus1 uint32
BitRateScale uint8
CpbSizeScale uint8
BitRateValueMinus1 []uint32
CpbSizeValueMinus1 []uint32
CbrFlag []bool
InitialCpbRemovalDelayLengthMinus1 uint8
CpbRemovalDelayLengthMinus1 uint8
DpbOutputDelayLengthMinus1 uint8
TimeOffsetLength uint8
}
func (h *SPS_HRD) unmarshal(buf []byte, pos *int) error {
var err error
h.CpbCntMinus1, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
err = bits.HasSpace(buf, *pos, 8)
if err != nil {
return err
}
h.BitRateScale = uint8(bits.ReadBitsUnsafe(buf, pos, 4))
h.CpbSizeScale = uint8(bits.ReadBitsUnsafe(buf, pos, 4))
for i := uint32(0); i <= h.CpbCntMinus1; i++ {
v, err := bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
h.BitRateValueMinus1 = append(h.BitRateValueMinus1, v)
v, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
h.CpbSizeValueMinus1 = append(h.CpbSizeValueMinus1, v)
vb, err := bits.ReadFlag(buf, pos)
if err != nil {
return err
}
h.CbrFlag = append(h.CbrFlag, vb)
}
err = bits.HasSpace(buf, *pos, 5+5+5+5)
if err != nil {
return err
}
h.InitialCpbRemovalDelayLengthMinus1 = uint8(bits.ReadBitsUnsafe(buf, pos, 5))
h.CpbRemovalDelayLengthMinus1 = uint8(bits.ReadBitsUnsafe(buf, pos, 5))
h.DpbOutputDelayLengthMinus1 = uint8(bits.ReadBitsUnsafe(buf, pos, 5))
h.TimeOffsetLength = uint8(bits.ReadBitsUnsafe(buf, pos, 5))
return nil
}
// SPS_TimingInfo is a timing info.
type SPS_TimingInfo struct { //nolint:revive
NumUnitsInTick uint32
TimeScale uint32
FixedFrameRateFlag bool
}
func (t *SPS_TimingInfo) unmarshal(buf []byte, pos *int) error {
err := bits.HasSpace(buf, *pos, 32+32+1)
if err != nil {
return err
}
t.NumUnitsInTick = uint32(bits.ReadBitsUnsafe(buf, pos, 32))
t.TimeScale = uint32(bits.ReadBitsUnsafe(buf, pos, 32))
t.FixedFrameRateFlag = bits.ReadFlagUnsafe(buf, pos)
return nil
}
// SPS_BitstreamRestriction are bitstream restriction infos.
type SPS_BitstreamRestriction struct { //nolint:revive
MotionVectorsOverPicBoundariesFlag bool
MaxBytesPerPicDenom uint32
MaxBitsPerMbDenom uint32
Log2MaxMvLengthHorizontal uint32
Log2MaxMvLengthVertical uint32
MaxNumReorderFrames uint32
MaxDecFrameBuffering uint32
}
func (r *SPS_BitstreamRestriction) unmarshal(buf []byte, pos *int) error {
var err error
r.MotionVectorsOverPicBoundariesFlag, err = bits.ReadFlag(buf, pos)
if err != nil {
return err
}
r.MaxBytesPerPicDenom, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
r.MaxBitsPerMbDenom, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
r.Log2MaxMvLengthHorizontal, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
r.Log2MaxMvLengthVertical, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
r.MaxNumReorderFrames, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
r.MaxDecFrameBuffering, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
return nil
}
// SPS_VUI is a video usability information.
type SPS_VUI struct { //nolint:revive
AspectRatioInfoPresentFlag bool
// AspectRatioInfoPresentFlag == true
AspectRatioIdc uint8
SarWidth uint16
SarHeight uint16
OverscanInfoPresentFlag bool
// OverscanInfoPresentFlag == true
OverscanAppropriateFlag bool
VideoSignalTypePresentFlag bool
// VideoSignalTypePresentFlag == true
VideoFormat uint8
VideoFullRangeFlag bool
ColourDescriptionPresentFlag bool
// ColourDescriptionPresentFlag == true
ColourPrimaries uint8
TransferCharacteristics uint8
MatrixCoefficients uint8
ChromaLocInfoPresentFlag bool
// ChromaLocInfoPresentFlag == true
ChromaSampleLocTypeTopField uint32
ChromaSampleLocTypeBottomField uint32
TimingInfo *SPS_TimingInfo
NalHRD *SPS_HRD
VclHRD *SPS_HRD
LowDelayHrdFlag bool
PicStructPresentFlag bool
BitstreamRestriction *SPS_BitstreamRestriction
}
func (v *SPS_VUI) unmarshal(buf []byte, pos *int) error {
var err error
v.AspectRatioInfoPresentFlag, err = bits.ReadFlag(buf, pos)
if err != nil {
return err
}
if v.AspectRatioInfoPresentFlag {
tmp, err := bits.ReadBits(buf, pos, 8)
if err != nil {
return err
}
v.AspectRatioIdc = uint8(tmp)
if v.AspectRatioIdc == 255 { // Extended_SAR
err := bits.HasSpace(buf, *pos, 32)
if err != nil {
return err
}
v.SarWidth = uint16(bits.ReadBitsUnsafe(buf, pos, 16))
v.SarHeight = uint16(bits.ReadBitsUnsafe(buf, pos, 16))
}
}
v.OverscanInfoPresentFlag, err = bits.ReadFlag(buf, pos)
if err != nil {
return err
}
if v.OverscanInfoPresentFlag {
v.OverscanAppropriateFlag, err = bits.ReadFlag(buf, pos)
if err != nil {
return err
}
}
v.VideoSignalTypePresentFlag, err = bits.ReadFlag(buf, pos)
if err != nil {
return err
}
if v.VideoSignalTypePresentFlag {
err := bits.HasSpace(buf, *pos, 5)
if err != nil {
return err
}
v.VideoFormat = uint8(bits.ReadBitsUnsafe(buf, pos, 3))
v.VideoFullRangeFlag = bits.ReadFlagUnsafe(buf, pos)
v.ColourDescriptionPresentFlag = bits.ReadFlagUnsafe(buf, pos)
if v.ColourDescriptionPresentFlag {
err := bits.HasSpace(buf, *pos, 24)
if err != nil {
return err
}
v.ColourPrimaries = uint8(bits.ReadBitsUnsafe(buf, pos, 8))
v.TransferCharacteristics = uint8(bits.ReadBitsUnsafe(buf, pos, 8))
v.MatrixCoefficients = uint8(bits.ReadBitsUnsafe(buf, pos, 8))
}
}
v.ChromaLocInfoPresentFlag, err = bits.ReadFlag(buf, pos)
if err != nil {
return err
}
if v.ChromaLocInfoPresentFlag {
v.ChromaSampleLocTypeTopField, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
v.ChromaSampleLocTypeBottomField, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
}
timingInfoPresentFlag, err := bits.ReadFlag(buf, pos)
if err != nil {
return err
}
if timingInfoPresentFlag {
v.TimingInfo = &SPS_TimingInfo{}
err := v.TimingInfo.unmarshal(buf, pos)
if err != nil {
return err
}
}
nalHrdParametersPresentFlag, err := bits.ReadFlag(buf, pos)
if err != nil {
return err
}
if nalHrdParametersPresentFlag {
v.NalHRD = &SPS_HRD{}
err := v.NalHRD.unmarshal(buf, pos)
if err != nil {
return err
}
}
vclHrdParametersPresentFlag, err := bits.ReadFlag(buf, pos)
if err != nil {
return err
}
if vclHrdParametersPresentFlag {
v.VclHRD = &SPS_HRD{}
err := v.VclHRD.unmarshal(buf, pos)
if err != nil {
return err
}
}
if nalHrdParametersPresentFlag || vclHrdParametersPresentFlag {
v.LowDelayHrdFlag, err = bits.ReadFlag(buf, pos)
if err != nil {
return err
}
}
v.PicStructPresentFlag, err = bits.ReadFlag(buf, pos)
if err != nil {
return err
}
bitstreamRestrictionFlag, err := bits.ReadFlag(buf, pos)
if err != nil {
return err
}
if bitstreamRestrictionFlag {
v.BitstreamRestriction = &SPS_BitstreamRestriction{}
err := v.BitstreamRestriction.unmarshal(buf, pos)
if err != nil {
return err
}
}
return nil
}
// SPS_FrameCropping is the frame cropping part of a SPS.
type SPS_FrameCropping struct { //nolint:revive
LeftOffset uint32
RightOffset uint32
TopOffset uint32
BottomOffset uint32
}
func (c *SPS_FrameCropping) unmarshal(buf []byte, pos *int) error {
var err error
c.LeftOffset, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
c.RightOffset, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
c.TopOffset, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
c.BottomOffset, err = bits.ReadGolombUnsigned(buf, pos)
if err != nil {
return err
}
return nil
}
// SPS is a H264 sequence parameter set.
type SPS struct {
ProfileIdc uint8
ConstraintSet0Flag bool
ConstraintSet1Flag bool
ConstraintSet2Flag bool
ConstraintSet3Flag bool
ConstraintSet4Flag bool
ConstraintSet5Flag bool
LevelIdc uint8
ID uint32
// only for selected ProfileIdcs
ChromeFormatIdc uint32
SeparateColourPlaneFlag bool
BitDepthLumaMinus8 uint32
BitDepthChromaMinus8 uint32
QpprimeYZeroTransformBypassFlag bool
// seqScalingListPresentFlag == true
ScalingList4x4 [][]int32
UseDefaultScalingMatrix4x4Flag []bool
ScalingList8x8 [][]int32
UseDefaultScalingMatrix8x8Flag []bool
Log2MaxFrameNumMinus4 uint32
PicOrderCntType uint32
// PicOrderCntType == 0
Log2MaxPicOrderCntLsbMinus4 uint32
// PicOrderCntType == 1
DeltaPicOrderAlwaysZeroFlag bool
OffsetForNonRefPic int32
OffsetForTopToBottomField int32
OffsetForRefFrames []int32
MaxNumRefFrames uint32
GapsInFrameNumValueAllowedFlag bool
PicWidthInMbsMinus1 uint32
PicHeightInMapUnitsMinus1 uint32
FrameMbsOnlyFlag bool
// FrameMbsOnlyFlag == false
MbAdaptiveFrameFieldFlag bool
Direct8x8InferenceFlag bool
FrameCropping *SPS_FrameCropping
VUI *SPS_VUI
}
// Unmarshal decodes a SPS from bytes.
func (s *SPS) Unmarshal(buf []byte) error {
buf = EmulationPreventionRemove(buf)
if len(buf) < 4 {
return fmt.Errorf("not enough bits")
}
s.ProfileIdc = buf[1]
s.ConstraintSet0Flag = (buf[2] >> 7) == 1
s.ConstraintSet1Flag = (buf[2] >> 6 & 0x01) == 1
s.ConstraintSet2Flag = (buf[2] >> 5 & 0x01) == 1
s.ConstraintSet3Flag = (buf[2] >> 4 & 0x01) == 1
s.ConstraintSet4Flag = (buf[2] >> 3 & 0x01) == 1
s.ConstraintSet5Flag = (buf[2] >> 2 & 0x01) == 1
s.LevelIdc = buf[3]
buf = buf[4:]
pos := 0
var err error
s.ID, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
switch s.ProfileIdc {
case 100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135:
s.ChromeFormatIdc, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
if s.ChromeFormatIdc == 3 {
s.SeparateColourPlaneFlag, err = bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
} else {
s.SeparateColourPlaneFlag = false
}
s.BitDepthLumaMinus8, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
s.BitDepthChromaMinus8, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
s.QpprimeYZeroTransformBypassFlag, err = bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
seqScalingMatrixPresentFlag, err := bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
if seqScalingMatrixPresentFlag {
var lim int
if s.ChromeFormatIdc != 3 {
lim = 8
} else {
lim = 12
}
for i := 0; i < lim; i++ {
seqScalingListPresentFlag, err := bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
if seqScalingListPresentFlag {
if i < 6 {
scalingList, useDefaultScalingMatrixFlag, err := readScalingList(buf, &pos, 16)
if err != nil {
return err
}
s.ScalingList4x4 = append(s.ScalingList4x4, scalingList)
s.UseDefaultScalingMatrix4x4Flag = append(s.UseDefaultScalingMatrix4x4Flag,
useDefaultScalingMatrixFlag)
} else {
scalingList, useDefaultScalingMatrixFlag, err := readScalingList(buf, &pos, 64)
if err != nil {
return err
}
s.ScalingList8x8 = append(s.ScalingList8x8, scalingList)
s.UseDefaultScalingMatrix8x8Flag = append(s.UseDefaultScalingMatrix8x8Flag,
useDefaultScalingMatrixFlag)
}
}
}
}
default:
s.ChromeFormatIdc = 0
s.SeparateColourPlaneFlag = false
s.BitDepthLumaMinus8 = 0
s.BitDepthChromaMinus8 = 0
s.QpprimeYZeroTransformBypassFlag = false
}
s.Log2MaxFrameNumMinus4, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
s.PicOrderCntType, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
switch s.PicOrderCntType {
case 0:
s.Log2MaxPicOrderCntLsbMinus4, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
s.DeltaPicOrderAlwaysZeroFlag = false
s.OffsetForNonRefPic = 0
s.OffsetForTopToBottomField = 0
s.OffsetForRefFrames = nil
case 1:
s.Log2MaxPicOrderCntLsbMinus4 = 0
s.DeltaPicOrderAlwaysZeroFlag, err = bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
s.OffsetForNonRefPic, err = bits.ReadGolombSigned(buf, &pos)
if err != nil {
return err
}
s.OffsetForTopToBottomField, err = bits.ReadGolombSigned(buf, &pos)
if err != nil {
return err
}
numRefFramesInPicOrderCntCycle, err := bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
if numRefFramesInPicOrderCntCycle > maxRefFrames {
return fmt.Errorf("num_ref_frames_in_pic_order_cnt_cycle exceeds %d", maxRefFrames)
}
s.OffsetForRefFrames = make([]int32, numRefFramesInPicOrderCntCycle)
for i := uint32(0); i < numRefFramesInPicOrderCntCycle; i++ {
v, err := bits.ReadGolombSigned(buf, &pos)
if err != nil {
return err
}
s.OffsetForRefFrames[i] = v
}
case 2:
s.Log2MaxPicOrderCntLsbMinus4 = 0
s.DeltaPicOrderAlwaysZeroFlag = false
s.OffsetForNonRefPic = 0
s.OffsetForTopToBottomField = 0
s.OffsetForRefFrames = nil
default:
return fmt.Errorf("invalid pic_order_cnt_type: %d", s.PicOrderCntType)
}
s.MaxNumRefFrames, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
s.GapsInFrameNumValueAllowedFlag, err = bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
s.PicWidthInMbsMinus1, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
s.PicHeightInMapUnitsMinus1, err = bits.ReadGolombUnsigned(buf, &pos)
if err != nil {
return err
}
s.FrameMbsOnlyFlag, err = bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
if !s.FrameMbsOnlyFlag {
s.MbAdaptiveFrameFieldFlag, err = bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
} else {
s.MbAdaptiveFrameFieldFlag = false
}
s.Direct8x8InferenceFlag, err = bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
frameCroppingFlag, err := bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
if frameCroppingFlag {
s.FrameCropping = &SPS_FrameCropping{}
err := s.FrameCropping.unmarshal(buf, &pos)
if err != nil {
return err
}
} else {
s.FrameCropping = nil
}
vuiParametersPresentFlag, err := bits.ReadFlag(buf, &pos)
if err != nil {
return err
}
if vuiParametersPresentFlag {
s.VUI = &SPS_VUI{}
err := s.VUI.unmarshal(buf, &pos)
if err != nil {
return err
}
} else {
s.VUI = nil
}
return nil
}
// Width returns the video width.
func (s SPS) Width() int {
if s.FrameCropping != nil {
return int(((s.PicWidthInMbsMinus1 + 1) * 16) - (s.FrameCropping.LeftOffset+s.FrameCropping.RightOffset)*2)
}
return int((s.PicWidthInMbsMinus1 + 1) * 16)
}
// Height returns the video height.
func (s SPS) Height() int {
f := uint32(0)
if s.FrameMbsOnlyFlag {
f = 1
}
if s.FrameCropping != nil {
return int(((2 - f) * (s.PicHeightInMapUnitsMinus1 + 1) * 16) -
(s.FrameCropping.TopOffset+s.FrameCropping.BottomOffset)*2)
}
return int((2 - f) * (s.PicHeightInMapUnitsMinus1 + 1) * 16)
}
// FPS returns the frames per second of the video.
func (s SPS) FPS() float64 {
if s.VUI == nil || s.VUI.TimingInfo == nil {
return 0
}
return float64(s.VUI.TimingInfo.TimeScale) / (2 * float64(s.VUI.TimingInfo.NumUnitsInTick))
}