mirror of
https://github.com/aler9/gortsplib
synced 2025-10-04 14:52:46 +08:00
![dependabot[bot]](/assets/img/avatar_default.png)
Bumps [github.com/bluenviron/mediacommon](https://github.com/bluenviron/mediacommon) from 1.13.4 to 1.14.0. - [Commits](https://github.com/bluenviron/mediacommon/compare/v1.13.4...v1.14.0) --- updated-dependencies: - dependency-name: github.com/bluenviron/mediacommon dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
273 lines
6.2 KiB
Go
273 lines
6.2 KiB
Go
package rtph264
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
|
|
"github.com/pion/rtp"
|
|
|
|
"github.com/bluenviron/mediacommon/pkg/codecs/h264"
|
|
)
|
|
|
|
// ErrMorePacketsNeeded is returned when more packets are needed.
|
|
var ErrMorePacketsNeeded = errors.New("need more packets")
|
|
|
|
// ErrNonStartingPacketAndNoPrevious is returned when we received a non-starting
|
|
// packet of a fragmented NALU and we didn't received anything before.
|
|
// It's normal to receive this when decoding a stream that has been already
|
|
// running for some time.
|
|
var ErrNonStartingPacketAndNoPrevious = errors.New(
|
|
"received a non-starting fragment without any previous starting fragment")
|
|
|
|
func joinFragments(fragments [][]byte, size int) []byte {
|
|
ret := make([]byte, size)
|
|
n := 0
|
|
for _, p := range fragments {
|
|
n += copy(ret[n:], p)
|
|
}
|
|
return ret
|
|
}
|
|
|
|
func isAllZero(buf []byte) bool {
|
|
for _, b := range buf {
|
|
if b != 0 {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Decoder is a RTP/H264 decoder.
|
|
// Specification: https://datatracker.ietf.org/doc/html/rfc6184
|
|
type Decoder struct {
|
|
// indicates the packetization mode.
|
|
PacketizationMode int
|
|
|
|
firstPacketReceived bool
|
|
fragments [][]byte
|
|
fragmentsSize int
|
|
fragmentNextSeqNum uint16
|
|
annexBMode bool
|
|
|
|
// for Decode()
|
|
frameBuffer [][]byte
|
|
frameBufferLen int
|
|
frameBufferSize int
|
|
}
|
|
|
|
// Init initializes the decoder.
|
|
func (d *Decoder) Init() error {
|
|
if d.PacketizationMode >= 2 {
|
|
return fmt.Errorf("PacketizationMode >= 2 is not supported")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Decoder) resetFragments() {
|
|
d.fragments = d.fragments[:0]
|
|
d.fragmentsSize = 0
|
|
}
|
|
|
|
func (d *Decoder) decodeNALUs(pkt *rtp.Packet) ([][]byte, error) {
|
|
if len(pkt.Payload) < 1 {
|
|
d.resetFragments()
|
|
return nil, fmt.Errorf("payload is too short")
|
|
}
|
|
|
|
typ := h264.NALUType(pkt.Payload[0] & 0x1F)
|
|
var nalus [][]byte
|
|
|
|
switch typ {
|
|
case h264.NALUTypeFUA:
|
|
if len(pkt.Payload) < 2 {
|
|
return nil, fmt.Errorf("invalid FU-A packet (invalid size)")
|
|
}
|
|
|
|
start := pkt.Payload[1] >> 7
|
|
end := (pkt.Payload[1] >> 6) & 0x01
|
|
|
|
if start == 1 {
|
|
d.resetFragments()
|
|
|
|
nri := (pkt.Payload[0] >> 5) & 0x03
|
|
typ := pkt.Payload[1] & 0x1F
|
|
d.fragmentsSize = len(pkt.Payload[1:])
|
|
d.fragments = append(d.fragments, []byte{(nri << 5) | typ}, pkt.Payload[2:])
|
|
d.fragmentNextSeqNum = pkt.SequenceNumber + 1
|
|
d.firstPacketReceived = true
|
|
|
|
// RFC 6184 clearly states:
|
|
//
|
|
// A fragmented NAL unit MUST NOT be transmitted in one FU; that is, the
|
|
// Start bit and End bit MUST NOT both be set to one in the same FU
|
|
// header.
|
|
//
|
|
// However, some vendors camera (e.g. CostarHD) have been observed to nevertheless
|
|
// emit one fragmented NAL unit for sufficiently small P-frames.
|
|
if end != 0 {
|
|
nalus = [][]byte{joinFragments(d.fragments, d.fragmentsSize)}
|
|
d.resetFragments()
|
|
break
|
|
}
|
|
|
|
return nil, ErrMorePacketsNeeded
|
|
}
|
|
|
|
if d.fragmentsSize == 0 {
|
|
if !d.firstPacketReceived {
|
|
return nil, ErrNonStartingPacketAndNoPrevious
|
|
}
|
|
|
|
return nil, fmt.Errorf("invalid FU-A packet (non-starting)")
|
|
}
|
|
|
|
if pkt.SequenceNumber != d.fragmentNextSeqNum {
|
|
d.resetFragments()
|
|
return nil, fmt.Errorf("discarding frame since a RTP packet is missing")
|
|
}
|
|
|
|
d.fragmentsSize += len(pkt.Payload[2:])
|
|
|
|
if d.fragmentsSize > h264.MaxAccessUnitSize {
|
|
d.resetFragments()
|
|
return nil, fmt.Errorf("NALU size (%d) is too big, maximum is %d", d.fragmentsSize, h264.MaxAccessUnitSize)
|
|
}
|
|
|
|
d.fragments = append(d.fragments, pkt.Payload[2:])
|
|
d.fragmentNextSeqNum++
|
|
|
|
if end != 1 {
|
|
return nil, ErrMorePacketsNeeded
|
|
}
|
|
|
|
nalus = [][]byte{joinFragments(d.fragments, d.fragmentsSize)}
|
|
d.resetFragments()
|
|
|
|
case h264.NALUTypeSTAPA:
|
|
d.resetFragments()
|
|
|
|
payload := pkt.Payload[1:]
|
|
|
|
for {
|
|
if len(payload) < 2 {
|
|
return nil, fmt.Errorf("invalid STAP-A packet (invalid size)")
|
|
}
|
|
|
|
size := uint16(payload[0])<<8 | uint16(payload[1])
|
|
payload = payload[2:]
|
|
|
|
// discard padding
|
|
if size == 0 && isAllZero(payload) {
|
|
break
|
|
}
|
|
|
|
if int(size) > len(payload) {
|
|
return nil, fmt.Errorf("invalid STAP-A packet (invalid size)")
|
|
}
|
|
|
|
nalus = append(nalus, payload[:size])
|
|
payload = payload[size:]
|
|
|
|
if len(payload) == 0 {
|
|
break
|
|
}
|
|
}
|
|
|
|
if nalus == nil {
|
|
return nil, fmt.Errorf("STAP-A packet doesn't contain any NALU")
|
|
}
|
|
|
|
d.firstPacketReceived = true
|
|
|
|
case h264.NALUTypeSTAPB, h264.NALUTypeMTAP16,
|
|
h264.NALUTypeMTAP24, h264.NALUTypeFUB:
|
|
d.resetFragments()
|
|
d.firstPacketReceived = true
|
|
return nil, fmt.Errorf("packet type not supported (%v)", typ)
|
|
|
|
default:
|
|
d.resetFragments()
|
|
d.firstPacketReceived = true
|
|
nalus = [][]byte{pkt.Payload}
|
|
}
|
|
|
|
nalus, err := d.removeAnnexB(nalus)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return nalus, nil
|
|
}
|
|
|
|
// Decode decodes an access unit from a RTP packet.
|
|
func (d *Decoder) Decode(pkt *rtp.Packet) ([][]byte, error) {
|
|
nalus, err := d.decodeNALUs(pkt)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
l := len(nalus)
|
|
|
|
if (d.frameBufferLen + l) > h264.MaxNALUsPerAccessUnit {
|
|
d.frameBuffer = nil
|
|
d.frameBufferLen = 0
|
|
d.frameBufferSize = 0
|
|
return nil, fmt.Errorf("NALU count exceeds maximum allowed (%d)",
|
|
h264.MaxNALUsPerAccessUnit)
|
|
}
|
|
|
|
addSize := 0
|
|
|
|
for _, nalu := range nalus {
|
|
addSize += len(nalu)
|
|
}
|
|
|
|
if (d.frameBufferSize + addSize) > h264.MaxAccessUnitSize {
|
|
d.frameBuffer = nil
|
|
d.frameBufferLen = 0
|
|
d.frameBufferSize = 0
|
|
return nil, fmt.Errorf("access unit size (%d) is too big, maximum is %d",
|
|
d.frameBufferSize+addSize, h264.MaxAccessUnitSize)
|
|
}
|
|
|
|
d.frameBuffer = append(d.frameBuffer, nalus...)
|
|
d.frameBufferLen += l
|
|
d.frameBufferSize += addSize
|
|
|
|
if !pkt.Marker {
|
|
return nil, ErrMorePacketsNeeded
|
|
}
|
|
|
|
ret := d.frameBuffer
|
|
|
|
// do not reuse frameBuffer to avoid race conditions
|
|
d.frameBuffer = nil
|
|
d.frameBufferLen = 0
|
|
d.frameBufferSize = 0
|
|
|
|
return ret, nil
|
|
}
|
|
|
|
// some cameras / servers wrap NALUs into Annex-B
|
|
func (d *Decoder) removeAnnexB(nalus [][]byte) ([][]byte, error) {
|
|
if len(nalus) == 1 {
|
|
nalu := nalus[0]
|
|
|
|
if !d.annexBMode && bytes.Contains(nalu, []byte{0x00, 0x00, 0x00, 0x01}) {
|
|
d.annexBMode = true
|
|
}
|
|
|
|
if d.annexBMode {
|
|
if !bytes.HasPrefix(nalu, []byte{0x00, 0x00, 0x00, 0x01}) {
|
|
nalu = append([]byte{0x00, 0x00, 0x00, 0x01}, nalu...)
|
|
}
|
|
|
|
var annexb h264.AnnexB
|
|
err := annexb.Unmarshal(nalu)
|
|
return annexb, err
|
|
}
|
|
}
|
|
|
|
return nalus, nil
|
|
}
|