From a17e1f776e34e80c163af6961a0df433a90227d3 Mon Sep 17 00:00:00 2001 From: Alessandro Ros Date: Wed, 19 Feb 2025 22:00:49 +0100 Subject: [PATCH] improve examples (#703) * add client-play-format-av1-to-jpeg * improve client-play-format-av1 to decode frames * improve speed of sample decoders by using pointers instead of copies * improve client-record-format-h264 and client-record-format-h265 to encode frames * add client-record-format-av1 --- README.md | 7 +- .../av1_decoder.go | 153 ++++++++++++++ .../client-play-format-av1-to-jpeg/main.go | 149 ++++++++++++++ .../client-play-format-av1/av1_decoder.go | 153 ++++++++++++++ examples/client-play-format-av1/main.go | 44 ++++- .../h264_decoder.go | 137 ------------- .../h264_decoder.go | 153 ++++++++++++++ .../main.go | 70 ++++--- .../client-play-format-h264/h264_decoder.go | 100 ++++++---- examples/client-play-format-h264/main.go | 48 +++-- .../h265_decoder.go | 137 ------------- .../h265_decoder.go | 153 ++++++++++++++ .../main.go | 72 ++++--- .../client-play-format-h265/h265_decoder.go | 100 ++++++---- examples/client-play-format-h265/main.go | 50 +++-- .../client-record-format-av1/av1_encoder.go | 186 ++++++++++++++++++ .../main.go | 82 +++++--- .../main.go | 13 +- .../client-record-format-h264/h264_encoder.go | 179 +++++++++++++++++ examples/client-record-format-h264/main.go | 160 +++++++++++---- .../client-record-format-h265/h265_encoder.go | 179 +++++++++++++++++ examples/client-record-format-h265/main.go | 157 +++++++++++---- examples/client-record-format-mjpeg/main.go | 130 ++++++++---- go.mod | 2 +- go.sum | 4 +- pkg/format/rtpav1/encoder.go | 7 +- 26 files changed, 1978 insertions(+), 647 deletions(-) create mode 100644 examples/client-play-format-av1-to-jpeg/av1_decoder.go create mode 100644 examples/client-play-format-av1-to-jpeg/main.go create mode 100644 examples/client-play-format-av1/av1_decoder.go delete mode 100644 examples/client-play-format-h264-convert-to-jpeg/h264_decoder.go create mode 100644 examples/client-play-format-h264-to-jpeg/h264_decoder.go rename examples/{client-play-format-h264-convert-to-jpeg => client-play-format-h264-to-jpeg}/main.go (72%) delete mode 100644 examples/client-play-format-h265-convert-to-jpeg/h265_decoder.go create mode 100644 examples/client-play-format-h265-to-jpeg/h265_decoder.go rename examples/{client-play-format-h265-convert-to-jpeg => client-play-format-h265-to-jpeg}/main.go (72%) create mode 100644 examples/client-record-format-av1/av1_encoder.go rename examples/{client-record-format-mjpeg-from-image => client-record-format-av1}/main.go (58%) create mode 100644 examples/client-record-format-h264/h264_encoder.go create mode 100644 examples/client-record-format-h265/h265_encoder.go diff --git a/README.md b/README.md index 6eadee04..5a593b46 100644 --- a/README.md +++ b/README.md @@ -62,14 +62,15 @@ Features: * [client-play-to-record](examples/client-play-to-record/main.go) * [client-play-backchannel](examples/client-play-backchannel/main.go) * [client-play-format-av1](examples/client-play-format-av1/main.go) +* [client-play-format-av1-to-jpeg](examples/client-play-format-av1-to-jpeg/main.go) * [client-play-format-g711](examples/client-play-format-g711/main.go) * [client-play-format-g722](examples/client-play-format-g722/main.go) * [client-play-format-h264](examples/client-play-format-h264/main.go) -* [client-play-format-h264-convert-to-jpeg](examples/client-play-format-h264-convert-to-jpeg/main.go) +* [client-play-format-h264-to-jpeg](examples/client-play-format-h264-to-jpeg/main.go) * [client-play-format-h264-save-to-disk](examples/client-play-format-h264-save-to-disk/main.go) * [client-play-format-h264-mpeg4audio-save-to-disk](examples/client-play-format-h264-mpeg4audio-save-to-disk/main.go) * [client-play-format-h265](examples/client-play-format-h265/main.go) -* [client-play-format-h265-convert-to-jpeg](examples/client-play-format-h265-convert-to-jpeg/main.go) +* [client-play-format-h265-to-jpeg](examples/client-play-format-h265-to-jpeg/main.go) * [client-play-format-h265-save-to-disk](examples/client-play-format-h265-save-to-disk/main.go) * [client-play-format-lpcm](examples/client-play-format-lpcm/main.go) * [client-play-format-mjpeg](examples/client-play-format-mjpeg/main.go) @@ -81,6 +82,7 @@ Features: * [client-play-format-vp9](examples/client-play-format-vp9/main.go) * [client-record-options](examples/client-record-options/main.go) * [client-record-pause](examples/client-record-pause/main.go) +* [client-record-format-av1](examples/client-record-format-av1/main.go) * [client-record-format-g711](examples/client-record-format-g711/main.go) * [client-record-format-g722](examples/client-record-format-g722/main.go) * [client-record-format-h264](examples/client-record-format-h264/main.go) @@ -88,7 +90,6 @@ Features: * [client-record-format-h265](examples/client-record-format-h265/main.go) * [client-record-format-lpcm](examples/client-record-format-lpcm/main.go) * [client-record-format-mjpeg](examples/client-record-format-mjpeg/main.go) -* [client-record-format-mjpeg-from-image](examples/client-record-format-mjpeg-from-image/main.go) * [client-record-format-mpeg4audio](examples/client-record-format-mpeg4audio/main.go) * [client-record-format-opus](examples/client-record-format-opus/main.go) * [client-record-format-vp8](examples/client-record-format-vp8/main.go) diff --git a/examples/client-play-format-av1-to-jpeg/av1_decoder.go b/examples/client-play-format-av1-to-jpeg/av1_decoder.go new file mode 100644 index 00000000..8ae5ac1c --- /dev/null +++ b/examples/client-play-format-av1-to-jpeg/av1_decoder.go @@ -0,0 +1,153 @@ +package main + +import ( + "fmt" + "image" + "runtime" + "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/av1" +) + +// #cgo pkg-config: libavcodec libavutil libswscale +// #include +// #include +// #include +import "C" + +func frameData(frame *C.AVFrame) **C.uint8_t { + return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) +} + +func frameLineSize(frame *C.AVFrame) *C.int { + return (*C.int)(unsafe.Pointer(&frame.linesize[0])) +} + +// av1Decoder is a wrapper around FFmpeg's AV1 decoder. +type av1Decoder struct { + codecCtx *C.AVCodecContext + yuv420Frame *C.AVFrame + rgbaFrame *C.AVFrame + rgbaFramePtr []uint8 + swsCtx *C.struct_SwsContext +} + +// initialize initializes a av1Decoder. +func (d *av1Decoder) initialize() error { + codec := C.avcodec_find_decoder(C.AV_CODEC_ID_AV1) + if codec == nil { + return fmt.Errorf("avcodec_find_decoder() failed") + } + + d.codecCtx = C.avcodec_alloc_context3(codec) + if d.codecCtx == nil { + return fmt.Errorf("avcodec_alloc_context3() failed") + } + + res := C.avcodec_open2(d.codecCtx, codec, nil) + if res < 0 { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("avcodec_open2() failed") + } + + d.yuv420Frame = C.av_frame_alloc() + if d.yuv420Frame == nil { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + return nil +} + +// close closes the decoder. +func (d *av1Decoder) close() { + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + C.av_frame_free(&d.yuv420Frame) + C.avcodec_close(d.codecCtx) +} + +// decode decodes a RGBA image from AV1. +func (d *av1Decoder) decode(tu [][]byte) (*image.RGBA, error) { + // encode temporal unit into bytestream + bs, err := av1.Bitstream(tu).Marshal() + if err != nil { + panic(err) + } + + // send access unit to decoder + var pkt C.AVPacket + ptr := &bs[0] + var p runtime.Pinner + p.Pin(ptr) + pkt.data = (*C.uint8_t)(ptr) + pkt.size = (C.int)(len(bs)) + res := C.avcodec_send_packet(d.codecCtx, &pkt) + p.Unpin() + if res < 0 { + return nil, nil + } + + // receive frame if available + res = C.avcodec_receive_frame(d.codecCtx, d.yuv420Frame) + if res < 0 { + return nil, nil + } + + // if frame size has changed, allocate needed objects + if d.rgbaFrame == nil || d.rgbaFrame.width != d.yuv420Frame.width || d.rgbaFrame.height != d.yuv420Frame.height { + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + return nil, fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.yuv420Frame.width + d.rgbaFrame.height = d.yuv420Frame.height + d.rgbaFrame.color_range = C.AVCOL_RANGE_JPEG + + res = C.av_frame_get_buffer(d.rgbaFrame, 1) + if res < 0 { + return nil, fmt.Errorf("av_frame_get_buffer() failed") + } + + d.swsCtx = C.sws_getContext(d.yuv420Frame.width, d.yuv420Frame.height, int32(d.yuv420Frame.format), + d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), C.SWS_BILINEAR, nil, nil, nil) + if d.swsCtx == nil { + return nil, fmt.Errorf("sws_getContext() failed") + } + + rgbaFrameSize := C.av_image_get_buffer_size((int32)(d.rgbaFrame.format), d.rgbaFrame.width, d.rgbaFrame.height, 1) + d.rgbaFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.rgbaFrame.data[0]))[:rgbaFrameSize:rgbaFrameSize] + } + + // convert color space from YUV420 to RGBA + res = C.sws_scale(d.swsCtx, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame), + 0, d.yuv420Frame.height, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame)) + if res < 0 { + return nil, fmt.Errorf("sws_scale() failed") + } + + // embed frame into an image.RGBA + return &image.RGBA{ + Pix: d.rgbaFramePtr, + Stride: 4 * (int)(d.rgbaFrame.width), + Rect: image.Rectangle{ + Max: image.Point{(int)(d.rgbaFrame.width), (int)(d.rgbaFrame.height)}, + }, + }, nil +} diff --git a/examples/client-play-format-av1-to-jpeg/main.go b/examples/client-play-format-av1-to-jpeg/main.go new file mode 100644 index 00000000..1efad69e --- /dev/null +++ b/examples/client-play-format-av1-to-jpeg/main.go @@ -0,0 +1,149 @@ +//go:build cgo + +package main + +import ( + "image" + "image/jpeg" + "log" + "os" + "strconv" + "time" + + "github.com/bluenviron/gortsplib/v4" + "github.com/bluenviron/gortsplib/v4/pkg/base" + "github.com/bluenviron/gortsplib/v4/pkg/format" + "github.com/bluenviron/gortsplib/v4/pkg/format/rtpav1" + "github.com/bluenviron/mediacommon/v2/pkg/codecs/av1" + "github.com/pion/rtp" +) + +// This example shows how to +// 1. connect to a RTSP server +// 2. check if there's a AV1 format +// 3. decode the AV1 stream into RGBA frames +// 4. convert RGBA frames to JPEG images and save them on disk + +// This example requires the FFmpeg libraries, that can be installed with this command: +// apt install -y libavformat-dev libswscale-dev gcc pkg-config + +func saveToFile(img image.Image) error { + // create file + fname := strconv.FormatInt(time.Now().UnixNano()/int64(time.Millisecond), 10) + ".jpg" + f, err := os.Create(fname) + if err != nil { + panic(err) + } + defer f.Close() + + log.Println("saving", fname) + + // convert to jpeg + return jpeg.Encode(f, img, &jpeg.Options{ + Quality: 60, + }) +} + +func main() { + c := gortsplib.Client{} + + // parse URL + u, err := base.ParseURL("rtsp://myuser:mypass@localhost:8554/mystream") + if err != nil { + panic(err) + } + + // connect to the server + err = c.Start(u.Scheme, u.Host) + if err != nil { + panic(err) + } + defer c.Close() + + // find available medias + desc, _, err := c.Describe(u) + if err != nil { + panic(err) + } + + // find the AV1 media and format + var forma *format.AV1 + medi := desc.FindFormat(&forma) + if medi == nil { + panic("media not found") + } + + // setup RTP -> AV1 decoder + rtpDec, err := forma.CreateDecoder() + if err != nil { + panic(err) + } + + // setup AV1 -> RGBA decoder + av1Dec := &av1Decoder{} + err = av1Dec.initialize() + if err != nil { + panic(err) + } + defer av1Dec.close() + + // setup a single media + _, err = c.Setup(desc.BaseURL, medi, 0, 0) + if err != nil { + panic(err) + } + + firstRandomReceived := false + saveCount := 0 + + // called when a RTP packet arrives + c.OnPacketRTP(medi, forma, func(pkt *rtp.Packet) { + // extract AV1 temporal units from RTP packets + tu, err := rtpDec.Decode(pkt) + if err != nil { + if err != rtpav1.ErrNonStartingPacketAndNoPrevious && err != rtpav1.ErrMorePacketsNeeded { + log.Printf("ERR: %v", err) + } + return + } + + // wait for a random access unit + if !firstRandomReceived && !av1.IsRandomAccess2(tu) { + log.Printf("waiting for a random access unit") + return + } + firstRandomReceived = true + + // convert AV1 temporal units into RGBA frames + img, err := av1Dec.decode(tu) + if err != nil { + panic(err) + } + + // wait for a frame + if img == nil { + return + } + + // convert frame to JPEG and save to file + err = saveToFile(img) + if err != nil { + panic(err) + } + + saveCount++ + if saveCount == 5 { + log.Printf("saved 5 images, exiting") + os.Exit(1) + } + }) + + // start playing + _, err = c.Play(nil) + if err != nil { + panic(err) + } + + // wait until a fatal error + panic(c.Wait()) +} diff --git a/examples/client-play-format-av1/av1_decoder.go b/examples/client-play-format-av1/av1_decoder.go new file mode 100644 index 00000000..cb983075 --- /dev/null +++ b/examples/client-play-format-av1/av1_decoder.go @@ -0,0 +1,153 @@ +package main + +import ( + "fmt" + "image" + "runtime" + "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/av1" +) + +// #cgo pkg-config: libavcodec libavutil libswscale +// #include +// #include +// #include +import "C" + +func frameData(frame *C.AVFrame) **C.uint8_t { + return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) +} + +func frameLineSize(frame *C.AVFrame) *C.int { + return (*C.int)(unsafe.Pointer(&frame.linesize[0])) +} + +// av1Decoder is a wrapper around FFmpeg's AV1 decoder. +type av1Decoder struct { + codecCtx *C.AVCodecContext + yuv420Frame *C.AVFrame + rgbaFrame *C.AVFrame + rgbaFramePtr []uint8 + swsCtx *C.struct_SwsContext +} + +// initialize initializes a av1Decoder. +func (d *av1Decoder) initialize() error { + codec := C.avcodec_find_decoder(C.AV_CODEC_ID_AV1) + if codec == nil { + return fmt.Errorf("avcodec_find_decoder() failed") + } + + d.codecCtx = C.avcodec_alloc_context3(codec) + if d.codecCtx == nil { + return fmt.Errorf("avcodec_alloc_context3() failed") + } + + res := C.avcodec_open2(d.codecCtx, codec, nil) + if res < 0 { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("avcodec_open2() failed") + } + + d.yuv420Frame = C.av_frame_alloc() + if d.yuv420Frame == nil { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + return nil +} + +// close closes the decoder. +func (d *av1Decoder) close() { + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + C.av_frame_free(&d.yuv420Frame) + C.avcodec_close(d.codecCtx) +} + +// decode decodes a RGBA image from AV1. +func (d *av1Decoder) decode(tu [][]byte) (*image.RGBA, error) { + // encode temporal unit into bytestream + bs, err := av1.Bitstream(tu).Marshal() + if err != nil { + panic(err) + } + + // send temporal unit to decoder + var pkt C.AVPacket + ptr := &bs[0] + var p runtime.Pinner + p.Pin(ptr) + pkt.data = (*C.uint8_t)(ptr) + pkt.size = (C.int)(len(bs)) + res := C.avcodec_send_packet(d.codecCtx, &pkt) + p.Unpin() + if res < 0 { + return nil, nil + } + + // receive frame if available + res = C.avcodec_receive_frame(d.codecCtx, d.yuv420Frame) + if res < 0 { + return nil, nil + } + + // if frame size has changed, allocate needed objects + if d.rgbaFrame == nil || d.rgbaFrame.width != d.yuv420Frame.width || d.rgbaFrame.height != d.yuv420Frame.height { + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + return nil, fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.yuv420Frame.width + d.rgbaFrame.height = d.yuv420Frame.height + d.rgbaFrame.color_range = C.AVCOL_RANGE_JPEG + + res = C.av_frame_get_buffer(d.rgbaFrame, 1) + if res < 0 { + return nil, fmt.Errorf("av_frame_get_buffer() failed") + } + + d.swsCtx = C.sws_getContext(d.yuv420Frame.width, d.yuv420Frame.height, int32(d.yuv420Frame.format), + d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), C.SWS_BILINEAR, nil, nil, nil) + if d.swsCtx == nil { + return nil, fmt.Errorf("sws_getContext() failed") + } + + rgbaFrameSize := C.av_image_get_buffer_size((int32)(d.rgbaFrame.format), d.rgbaFrame.width, d.rgbaFrame.height, 1) + d.rgbaFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.rgbaFrame.data[0]))[:rgbaFrameSize:rgbaFrameSize] + } + + // convert color space from YUV420 to RGBA + res = C.sws_scale(d.swsCtx, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame), + 0, d.yuv420Frame.height, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame)) + if res < 0 { + return nil, fmt.Errorf("sws_scale() failed") + } + + // embed frame into an image.RGBA + return &image.RGBA{ + Pix: d.rgbaFramePtr, + Stride: 4 * (int)(d.rgbaFrame.width), + Rect: image.Rectangle{ + Max: image.Point{(int)(d.rgbaFrame.width), (int)(d.rgbaFrame.height)}, + }, + }, nil +} diff --git a/examples/client-play-format-av1/main.go b/examples/client-play-format-av1/main.go index aed1192b..b7958e38 100644 --- a/examples/client-play-format-av1/main.go +++ b/examples/client-play-format-av1/main.go @@ -1,3 +1,5 @@ +//go:build cgo + package main import ( @@ -6,14 +8,18 @@ import ( "github.com/bluenviron/gortsplib/v4" "github.com/bluenviron/gortsplib/v4/pkg/base" "github.com/bluenviron/gortsplib/v4/pkg/format" - "github.com/bluenviron/gortsplib/v4/pkg/format/rtpvp9" + "github.com/bluenviron/gortsplib/v4/pkg/format/rtpav1" + "github.com/bluenviron/mediacommon/v2/pkg/codecs/av1" "github.com/pion/rtp" ) // This example shows how to // 1. connect to a RTSP server // 2. check if there's a AV1 format -// 3. get access units of that format +// 3. decode the AV1 stream into RGBA frames + +// This example requires the FFmpeg libraries, that can be installed with this command: +// apt install -y libavformat-dev libswscale-dev gcc pkg-config func main() { c := gortsplib.Client{} @@ -44,18 +50,28 @@ func main() { panic("media not found") } - // create decoder + // setup RTP -> AV1 decoder rtpDec, err := forma.CreateDecoder() if err != nil { panic(err) } + // setup AV1 -> RGBA decoder + av1Dec := &av1Decoder{} + err = av1Dec.initialize() + if err != nil { + panic(err) + } + defer av1Dec.close() + // setup a single media _, err = c.Setup(desc.BaseURL, medi, 0, 0) if err != nil { panic(err) } + firstRandomReceived := false + // called when a RTP packet arrives c.OnPacketRTP(medi, forma, func(pkt *rtp.Packet) { // decode timestamp @@ -68,13 +84,31 @@ func main() { // extract AV1 temporal units from RTP packets tu, err := rtpDec.Decode(pkt) if err != nil { - if err != rtpvp9.ErrNonStartingPacketAndNoPrevious && err != rtpvp9.ErrMorePacketsNeeded { + if err != rtpav1.ErrNonStartingPacketAndNoPrevious && err != rtpav1.ErrMorePacketsNeeded { log.Printf("ERR: %v", err) } return } - log.Printf("received temporal unit with PTS %v and size %d\n", pts, len(tu)) + // wait for a random access unit + if !firstRandomReceived && !av1.IsRandomAccess2(tu) { + log.Printf("waiting for a random access unit") + return + } + firstRandomReceived = true + + // convert AV1 temporal units into RGBA frames + img, err := av1Dec.decode(tu) + if err != nil { + panic(err) + } + + // wait for a frame + if img == nil { + return + } + + log.Printf("decoded frame with PTS %v and size %v", pts, img.Bounds().Max) }) // start playing diff --git a/examples/client-play-format-h264-convert-to-jpeg/h264_decoder.go b/examples/client-play-format-h264-convert-to-jpeg/h264_decoder.go deleted file mode 100644 index af026cc1..00000000 --- a/examples/client-play-format-h264-convert-to-jpeg/h264_decoder.go +++ /dev/null @@ -1,137 +0,0 @@ -package main - -import ( - "fmt" - "image" - "unsafe" -) - -// #cgo pkg-config: libavcodec libavutil libswscale -// #include -// #include -// #include -import "C" - -func frameData(frame *C.AVFrame) **C.uint8_t { - return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) -} - -func frameLineSize(frame *C.AVFrame) *C.int { - return (*C.int)(unsafe.Pointer(&frame.linesize[0])) -} - -// h264Decoder is a wrapper around FFmpeg's H264 decoder. -type h264Decoder struct { - codecCtx *C.AVCodecContext - srcFrame *C.AVFrame - swsCtx *C.struct_SwsContext - dstFrame *C.AVFrame - dstFramePtr []uint8 -} - -// initialize initializes a h264Decoder. -func (d *h264Decoder) initialize() error { - codec := C.avcodec_find_decoder(C.AV_CODEC_ID_H264) - if codec == nil { - return fmt.Errorf("avcodec_find_decoder() failed") - } - - d.codecCtx = C.avcodec_alloc_context3(codec) - if d.codecCtx == nil { - return fmt.Errorf("avcodec_alloc_context3() failed") - } - - res := C.avcodec_open2(d.codecCtx, codec, nil) - if res < 0 { - C.avcodec_close(d.codecCtx) - return fmt.Errorf("avcodec_open2() failed") - } - - d.srcFrame = C.av_frame_alloc() - if d.srcFrame == nil { - C.avcodec_close(d.codecCtx) - return fmt.Errorf("av_frame_alloc() failed") - } - - return nil -} - -// close closes the decoder. -func (d *h264Decoder) close() { - if d.dstFrame != nil { - C.av_frame_free(&d.dstFrame) - } - - if d.swsCtx != nil { - C.sws_freeContext(d.swsCtx) - } - - C.av_frame_free(&d.srcFrame) - C.avcodec_close(d.codecCtx) -} - -func (d *h264Decoder) decode(nalu []byte) (image.Image, error) { - nalu = append([]uint8{0x00, 0x00, 0x00, 0x01}, []uint8(nalu)...) - - // send NALU to decoder - var avPacket C.AVPacket - avPacket.data = (*C.uint8_t)(C.CBytes(nalu)) - defer C.free(unsafe.Pointer(avPacket.data)) - avPacket.size = C.int(len(nalu)) - res := C.avcodec_send_packet(d.codecCtx, &avPacket) - if res < 0 { - return nil, nil - } - - // receive frame if available - res = C.avcodec_receive_frame(d.codecCtx, d.srcFrame) - if res < 0 { - return nil, nil - } - - // if frame size has changed, allocate needed objects - if d.dstFrame == nil || d.dstFrame.width != d.srcFrame.width || d.dstFrame.height != d.srcFrame.height { - if d.dstFrame != nil { - C.av_frame_free(&d.dstFrame) - } - - if d.swsCtx != nil { - C.sws_freeContext(d.swsCtx) - } - - d.dstFrame = C.av_frame_alloc() - d.dstFrame.format = C.AV_PIX_FMT_RGBA - d.dstFrame.width = d.srcFrame.width - d.dstFrame.height = d.srcFrame.height - d.dstFrame.color_range = C.AVCOL_RANGE_JPEG - res = C.av_frame_get_buffer(d.dstFrame, 1) - if res < 0 { - return nil, fmt.Errorf("av_frame_get_buffer() failed") - } - - d.swsCtx = C.sws_getContext(d.srcFrame.width, d.srcFrame.height, C.AV_PIX_FMT_YUV420P, - d.dstFrame.width, d.dstFrame.height, (int32)(d.dstFrame.format), C.SWS_BILINEAR, nil, nil, nil) - if d.swsCtx == nil { - return nil, fmt.Errorf("sws_getContext() failed") - } - - dstFrameSize := C.av_image_get_buffer_size((int32)(d.dstFrame.format), d.dstFrame.width, d.dstFrame.height, 1) - d.dstFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.dstFrame.data[0]))[:dstFrameSize:dstFrameSize] - } - - // convert color space from YUV420 to RGBA - res = C.sws_scale(d.swsCtx, frameData(d.srcFrame), frameLineSize(d.srcFrame), - 0, d.srcFrame.height, frameData(d.dstFrame), frameLineSize(d.dstFrame)) - if res < 0 { - return nil, fmt.Errorf("sws_scale() failed") - } - - // embed frame into an image.Image - return &image.RGBA{ - Pix: d.dstFramePtr, - Stride: 4 * (int)(d.dstFrame.width), - Rect: image.Rectangle{ - Max: image.Point{(int)(d.dstFrame.width), (int)(d.dstFrame.height)}, - }, - }, nil -} diff --git a/examples/client-play-format-h264-to-jpeg/h264_decoder.go b/examples/client-play-format-h264-to-jpeg/h264_decoder.go new file mode 100644 index 00000000..0e760574 --- /dev/null +++ b/examples/client-play-format-h264-to-jpeg/h264_decoder.go @@ -0,0 +1,153 @@ +package main + +import ( + "fmt" + "image" + "runtime" + "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/h264" +) + +// #cgo pkg-config: libavcodec libavutil libswscale +// #include +// #include +// #include +import "C" + +func frameData(frame *C.AVFrame) **C.uint8_t { + return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) +} + +func frameLineSize(frame *C.AVFrame) *C.int { + return (*C.int)(unsafe.Pointer(&frame.linesize[0])) +} + +// h264Decoder is a wrapper around FFmpeg's H264 decoder. +type h264Decoder struct { + codecCtx *C.AVCodecContext + yuv420Frame *C.AVFrame + rgbaFrame *C.AVFrame + rgbaFramePtr []uint8 + swsCtx *C.struct_SwsContext +} + +// initialize initializes a h264Decoder. +func (d *h264Decoder) initialize() error { + codec := C.avcodec_find_decoder(C.AV_CODEC_ID_H264) + if codec == nil { + return fmt.Errorf("avcodec_find_decoder() failed") + } + + d.codecCtx = C.avcodec_alloc_context3(codec) + if d.codecCtx == nil { + return fmt.Errorf("avcodec_alloc_context3() failed") + } + + res := C.avcodec_open2(d.codecCtx, codec, nil) + if res < 0 { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("avcodec_open2() failed") + } + + d.yuv420Frame = C.av_frame_alloc() + if d.yuv420Frame == nil { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + return nil +} + +// close closes the decoder. +func (d *h264Decoder) close() { + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + C.av_frame_free(&d.yuv420Frame) + C.avcodec_close(d.codecCtx) +} + +// decode decodes a RGBA image from H264. +func (d *h264Decoder) decode(au [][]byte) (*image.RGBA, error) { + // encode access unit into Annex-B + annexb, err := h264.AnnexB(au).Marshal() + if err != nil { + panic(err) + } + + // send access unit to decoder + var pkt C.AVPacket + ptr := &annexb[0] + var p runtime.Pinner + p.Pin(ptr) + pkt.data = (*C.uint8_t)(ptr) + pkt.size = (C.int)(len(annexb)) + res := C.avcodec_send_packet(d.codecCtx, &pkt) + p.Unpin() + if res < 0 { + return nil, nil + } + + // receive frame if available + res = C.avcodec_receive_frame(d.codecCtx, d.yuv420Frame) + if res < 0 { + return nil, nil + } + + // if frame size has changed, allocate needed objects + if d.rgbaFrame == nil || d.rgbaFrame.width != d.yuv420Frame.width || d.rgbaFrame.height != d.yuv420Frame.height { + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + return nil, fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.yuv420Frame.width + d.rgbaFrame.height = d.yuv420Frame.height + d.rgbaFrame.color_range = C.AVCOL_RANGE_JPEG + + res = C.av_frame_get_buffer(d.rgbaFrame, 1) + if res < 0 { + return nil, fmt.Errorf("av_frame_get_buffer() failed") + } + + d.swsCtx = C.sws_getContext(d.yuv420Frame.width, d.yuv420Frame.height, int32(d.yuv420Frame.format), + d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), C.SWS_BILINEAR, nil, nil, nil) + if d.swsCtx == nil { + return nil, fmt.Errorf("sws_getContext() failed") + } + + rgbaFrameSize := C.av_image_get_buffer_size((int32)(d.rgbaFrame.format), d.rgbaFrame.width, d.rgbaFrame.height, 1) + d.rgbaFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.rgbaFrame.data[0]))[:rgbaFrameSize:rgbaFrameSize] + } + + // convert color space from YUV420 to RGBA + res = C.sws_scale(d.swsCtx, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame), + 0, d.yuv420Frame.height, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame)) + if res < 0 { + return nil, fmt.Errorf("sws_scale() failed") + } + + // embed frame into an image.RGBA + return &image.RGBA{ + Pix: d.rgbaFramePtr, + Stride: 4 * (int)(d.rgbaFrame.width), + Rect: image.Rectangle{ + Max: image.Point{(int)(d.rgbaFrame.width), (int)(d.rgbaFrame.height)}, + }, + }, nil +} diff --git a/examples/client-play-format-h264-convert-to-jpeg/main.go b/examples/client-play-format-h264-to-jpeg/main.go similarity index 72% rename from examples/client-play-format-h264-convert-to-jpeg/main.go rename to examples/client-play-format-h264-to-jpeg/main.go index abb72d73..cfc679b0 100644 --- a/examples/client-play-format-h264-convert-to-jpeg/main.go +++ b/examples/client-play-format-h264-to-jpeg/main.go @@ -21,8 +21,8 @@ import ( // This example shows how to // 1. connect to a RTSP server // 2. check if there's a H264 format -// 3. decode the H264 format into RGBA frames -// 4. convert frames to JPEG images and save them on disk +// 3. decode the H264 stream into RGBA frames +// 4. convert RGBA frames to JPEG images and save them on disk // This example requires the FFmpeg libraries, that can be installed with this command: // apt install -y libavformat-dev libswscale-dev gcc pkg-config @@ -79,20 +79,20 @@ func main() { panic(err) } - // setup H264 -> raw frames decoder - frameDec := &h264Decoder{} - err = frameDec.initialize() + // setup H264 -> RGBA decoder + h264Dec := &h264Decoder{} + err = h264Dec.initialize() if err != nil { panic(err) } - defer frameDec.close() + defer h264Dec.close() // if SPS and PPS are present into the SDP, send them to the decoder if forma.SPS != nil { - frameDec.decode(forma.SPS) + h264Dec.decode([][]byte{forma.SPS}) } if forma.PPS != nil { - frameDec.decode(forma.PPS) + h264Dec.decode([][]byte{forma.PPS}) } // setup a single media @@ -101,7 +101,7 @@ func main() { panic(err) } - iframeReceived := false + firstRandomAccess := false saveCount := 0 // called when a RTP packet arrives @@ -115,38 +115,34 @@ func main() { return } - // wait for an I-frame - if !iframeReceived { - if !h264.IsRandomAccess(au) { - log.Printf("waiting for an I-frame") - return - } - iframeReceived = true + // wait for a random access unit + if !firstRandomAccess && !h264.IsRandomAccess(au) { + log.Printf("waiting for a random access unit") + return + } + firstRandomAccess = true + + // convert H264 access units into RGBA frames + img, err := h264Dec.decode(au) + if err != nil { + panic(err) } - for _, nalu := range au { - // convert NALUs into RGBA frames - img, err := frameDec.decode(nalu) - if err != nil { - panic(err) - } + // wait for a frame + if img == nil { + return + } - // wait for a frame - if img == nil { - continue - } + // convert frame to JPEG and save to file + err = saveToFile(img) + if err != nil { + panic(err) + } - // convert frame to JPEG and save to file - err = saveToFile(img) - if err != nil { - panic(err) - } - - saveCount++ - if saveCount == 5 { - log.Printf("saved 5 images, exiting") - os.Exit(1) - } + saveCount++ + if saveCount == 5 { + log.Printf("saved 5 images, exiting") + os.Exit(1) } }) diff --git a/examples/client-play-format-h264/h264_decoder.go b/examples/client-play-format-h264/h264_decoder.go index af026cc1..0e760574 100644 --- a/examples/client-play-format-h264/h264_decoder.go +++ b/examples/client-play-format-h264/h264_decoder.go @@ -3,7 +3,10 @@ package main import ( "fmt" "image" + "runtime" "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/h264" ) // #cgo pkg-config: libavcodec libavutil libswscale @@ -22,11 +25,11 @@ func frameLineSize(frame *C.AVFrame) *C.int { // h264Decoder is a wrapper around FFmpeg's H264 decoder. type h264Decoder struct { - codecCtx *C.AVCodecContext - srcFrame *C.AVFrame - swsCtx *C.struct_SwsContext - dstFrame *C.AVFrame - dstFramePtr []uint8 + codecCtx *C.AVCodecContext + yuv420Frame *C.AVFrame + rgbaFrame *C.AVFrame + rgbaFramePtr []uint8 + swsCtx *C.struct_SwsContext } // initialize initializes a h264Decoder. @@ -47,8 +50,8 @@ func (d *h264Decoder) initialize() error { return fmt.Errorf("avcodec_open2() failed") } - d.srcFrame = C.av_frame_alloc() - if d.srcFrame == nil { + d.yuv420Frame = C.av_frame_alloc() + if d.yuv420Frame == nil { C.avcodec_close(d.codecCtx) return fmt.Errorf("av_frame_alloc() failed") } @@ -58,80 +61,93 @@ func (d *h264Decoder) initialize() error { // close closes the decoder. func (d *h264Decoder) close() { - if d.dstFrame != nil { - C.av_frame_free(&d.dstFrame) - } - if d.swsCtx != nil { C.sws_freeContext(d.swsCtx) } - C.av_frame_free(&d.srcFrame) + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + C.av_frame_free(&d.yuv420Frame) C.avcodec_close(d.codecCtx) } -func (d *h264Decoder) decode(nalu []byte) (image.Image, error) { - nalu = append([]uint8{0x00, 0x00, 0x00, 0x01}, []uint8(nalu)...) +// decode decodes a RGBA image from H264. +func (d *h264Decoder) decode(au [][]byte) (*image.RGBA, error) { + // encode access unit into Annex-B + annexb, err := h264.AnnexB(au).Marshal() + if err != nil { + panic(err) + } - // send NALU to decoder - var avPacket C.AVPacket - avPacket.data = (*C.uint8_t)(C.CBytes(nalu)) - defer C.free(unsafe.Pointer(avPacket.data)) - avPacket.size = C.int(len(nalu)) - res := C.avcodec_send_packet(d.codecCtx, &avPacket) + // send access unit to decoder + var pkt C.AVPacket + ptr := &annexb[0] + var p runtime.Pinner + p.Pin(ptr) + pkt.data = (*C.uint8_t)(ptr) + pkt.size = (C.int)(len(annexb)) + res := C.avcodec_send_packet(d.codecCtx, &pkt) + p.Unpin() if res < 0 { return nil, nil } // receive frame if available - res = C.avcodec_receive_frame(d.codecCtx, d.srcFrame) + res = C.avcodec_receive_frame(d.codecCtx, d.yuv420Frame) if res < 0 { return nil, nil } // if frame size has changed, allocate needed objects - if d.dstFrame == nil || d.dstFrame.width != d.srcFrame.width || d.dstFrame.height != d.srcFrame.height { - if d.dstFrame != nil { - C.av_frame_free(&d.dstFrame) - } - + if d.rgbaFrame == nil || d.rgbaFrame.width != d.yuv420Frame.width || d.rgbaFrame.height != d.yuv420Frame.height { if d.swsCtx != nil { C.sws_freeContext(d.swsCtx) } - d.dstFrame = C.av_frame_alloc() - d.dstFrame.format = C.AV_PIX_FMT_RGBA - d.dstFrame.width = d.srcFrame.width - d.dstFrame.height = d.srcFrame.height - d.dstFrame.color_range = C.AVCOL_RANGE_JPEG - res = C.av_frame_get_buffer(d.dstFrame, 1) + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + return nil, fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.yuv420Frame.width + d.rgbaFrame.height = d.yuv420Frame.height + d.rgbaFrame.color_range = C.AVCOL_RANGE_JPEG + + res = C.av_frame_get_buffer(d.rgbaFrame, 1) if res < 0 { return nil, fmt.Errorf("av_frame_get_buffer() failed") } - d.swsCtx = C.sws_getContext(d.srcFrame.width, d.srcFrame.height, C.AV_PIX_FMT_YUV420P, - d.dstFrame.width, d.dstFrame.height, (int32)(d.dstFrame.format), C.SWS_BILINEAR, nil, nil, nil) + d.swsCtx = C.sws_getContext(d.yuv420Frame.width, d.yuv420Frame.height, int32(d.yuv420Frame.format), + d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), C.SWS_BILINEAR, nil, nil, nil) if d.swsCtx == nil { return nil, fmt.Errorf("sws_getContext() failed") } - dstFrameSize := C.av_image_get_buffer_size((int32)(d.dstFrame.format), d.dstFrame.width, d.dstFrame.height, 1) - d.dstFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.dstFrame.data[0]))[:dstFrameSize:dstFrameSize] + rgbaFrameSize := C.av_image_get_buffer_size((int32)(d.rgbaFrame.format), d.rgbaFrame.width, d.rgbaFrame.height, 1) + d.rgbaFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.rgbaFrame.data[0]))[:rgbaFrameSize:rgbaFrameSize] } // convert color space from YUV420 to RGBA - res = C.sws_scale(d.swsCtx, frameData(d.srcFrame), frameLineSize(d.srcFrame), - 0, d.srcFrame.height, frameData(d.dstFrame), frameLineSize(d.dstFrame)) + res = C.sws_scale(d.swsCtx, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame), + 0, d.yuv420Frame.height, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame)) if res < 0 { return nil, fmt.Errorf("sws_scale() failed") } - // embed frame into an image.Image + // embed frame into an image.RGBA return &image.RGBA{ - Pix: d.dstFramePtr, - Stride: 4 * (int)(d.dstFrame.width), + Pix: d.rgbaFramePtr, + Stride: 4 * (int)(d.rgbaFrame.width), Rect: image.Rectangle{ - Max: image.Point{(int)(d.dstFrame.width), (int)(d.dstFrame.height)}, + Max: image.Point{(int)(d.rgbaFrame.width), (int)(d.rgbaFrame.height)}, }, }, nil } diff --git a/examples/client-play-format-h264/main.go b/examples/client-play-format-h264/main.go index f99dfcc1..c97126d0 100644 --- a/examples/client-play-format-h264/main.go +++ b/examples/client-play-format-h264/main.go @@ -9,13 +9,14 @@ import ( "github.com/bluenviron/gortsplib/v4/pkg/base" "github.com/bluenviron/gortsplib/v4/pkg/format" "github.com/bluenviron/gortsplib/v4/pkg/format/rtph264" + "github.com/bluenviron/mediacommon/v2/pkg/codecs/h264" "github.com/pion/rtp" ) // This example shows how to // 1. connect to a RTSP server // 2. check if there's an H264 format -// 3. decode the H264 format into RGBA frames +// 3. decode the H264 stream into RGBA frames // This example requires the FFmpeg libraries, that can be installed with this command: // apt install -y libavformat-dev libswscale-dev gcc pkg-config @@ -55,20 +56,20 @@ func main() { panic(err) } - // setup H264 -> raw frames decoder - frameDec := &h264Decoder{} - err = frameDec.initialize() + // setup H264 -> RGBA decoder + h264Dec := &h264Decoder{} + err = h264Dec.initialize() if err != nil { panic(err) } - defer frameDec.close() + defer h264Dec.close() // if SPS and PPS are present into the SDP, send them to the decoder if forma.SPS != nil { - frameDec.decode(forma.SPS) + h264Dec.decode([][]byte{forma.SPS}) } if forma.PPS != nil { - frameDec.decode(forma.PPS) + h264Dec.decode([][]byte{forma.PPS}) } // setup a single media @@ -77,6 +78,8 @@ func main() { panic(err) } + firstRandomAccess := false + // called when a RTP packet arrives c.OnPacketRTP(medi, forma, func(pkt *rtp.Packet) { // decode timestamp @@ -95,20 +98,25 @@ func main() { return } - for _, nalu := range au { - // convert NALUs into RGBA frames - img, err := frameDec.decode(nalu) - if err != nil { - panic(err) - } - - // wait for a frame - if img == nil { - continue - } - - log.Printf("decoded frame with PTS %v and size %v", pts, img.Bounds().Max) + // wait for a random access unit + if !firstRandomAccess && !h264.IsRandomAccess(au) { + log.Printf("waiting for a random access unit") + return } + firstRandomAccess = true + + // convert H264 access units into RGBA frames + img, err := h264Dec.decode(au) + if err != nil { + panic(err) + } + + // wait for a frame + if img == nil { + return + } + + log.Printf("decoded frame with PTS %v and size %v", pts, img.Bounds().Max) }) // start playing diff --git a/examples/client-play-format-h265-convert-to-jpeg/h265_decoder.go b/examples/client-play-format-h265-convert-to-jpeg/h265_decoder.go deleted file mode 100644 index 9bcbaa50..00000000 --- a/examples/client-play-format-h265-convert-to-jpeg/h265_decoder.go +++ /dev/null @@ -1,137 +0,0 @@ -package main - -import ( - "fmt" - "image" - "unsafe" -) - -// #cgo pkg-config: libavcodec libavutil libswscale -// #include -// #include -// #include -import "C" - -func frameData(frame *C.AVFrame) **C.uint8_t { - return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) -} - -func frameLineSize(frame *C.AVFrame) *C.int { - return (*C.int)(unsafe.Pointer(&frame.linesize[0])) -} - -// h265Decoder is a wrapper around FFmpeg's H265 decoder. -type h265Decoder struct { - codecCtx *C.AVCodecContext - srcFrame *C.AVFrame - swsCtx *C.struct_SwsContext - dstFrame *C.AVFrame - dstFramePtr []uint8 -} - -// initialize initializes a h265Decoder. -func (d *h265Decoder) initialize() error { - codec := C.avcodec_find_decoder(C.AV_CODEC_ID_H265) - if codec == nil { - return fmt.Errorf("avcodec_find_decoder() failed") - } - - d.codecCtx = C.avcodec_alloc_context3(codec) - if d.codecCtx == nil { - return fmt.Errorf("avcodec_alloc_context3() failed") - } - - res := C.avcodec_open2(d.codecCtx, codec, nil) - if res < 0 { - C.avcodec_close(d.codecCtx) - return fmt.Errorf("avcodec_open2() failed") - } - - d.srcFrame = C.av_frame_alloc() - if d.srcFrame == nil { - C.avcodec_close(d.codecCtx) - return fmt.Errorf("av_frame_alloc() failed") - } - - return nil -} - -// close closes the decoder. -func (d *h265Decoder) close() { - if d.dstFrame != nil { - C.av_frame_free(&d.dstFrame) - } - - if d.swsCtx != nil { - C.sws_freeContext(d.swsCtx) - } - - C.av_frame_free(&d.srcFrame) - C.avcodec_close(d.codecCtx) -} - -func (d *h265Decoder) decode(nalu []byte) (image.Image, error) { - nalu = append([]uint8{0x00, 0x00, 0x00, 0x01}, []uint8(nalu)...) - - // send NALU to decoder - var avPacket C.AVPacket - avPacket.data = (*C.uint8_t)(C.CBytes(nalu)) - defer C.free(unsafe.Pointer(avPacket.data)) - avPacket.size = C.int(len(nalu)) - res := C.avcodec_send_packet(d.codecCtx, &avPacket) - if res < 0 { - return nil, nil - } - - // receive frame if available - res = C.avcodec_receive_frame(d.codecCtx, d.srcFrame) - if res < 0 { - return nil, nil - } - - // if frame size has changed, allocate needed objects - if d.dstFrame == nil || d.dstFrame.width != d.srcFrame.width || d.dstFrame.height != d.srcFrame.height { - if d.dstFrame != nil { - C.av_frame_free(&d.dstFrame) - } - - if d.swsCtx != nil { - C.sws_freeContext(d.swsCtx) - } - - d.dstFrame = C.av_frame_alloc() - d.dstFrame.format = C.AV_PIX_FMT_RGBA - d.dstFrame.width = d.srcFrame.width - d.dstFrame.height = d.srcFrame.height - d.dstFrame.color_range = C.AVCOL_RANGE_JPEG - res = C.av_frame_get_buffer(d.dstFrame, 1) - if res < 0 { - return nil, fmt.Errorf("av_frame_get_buffer() failed") - } - - d.swsCtx = C.sws_getContext(d.srcFrame.width, d.srcFrame.height, C.AV_PIX_FMT_YUV420P, - d.dstFrame.width, d.dstFrame.height, (int32)(d.dstFrame.format), C.SWS_BILINEAR, nil, nil, nil) - if d.swsCtx == nil { - return nil, fmt.Errorf("sws_getContext() failed") - } - - dstFrameSize := C.av_image_get_buffer_size((int32)(d.dstFrame.format), d.dstFrame.width, d.dstFrame.height, 1) - d.dstFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.dstFrame.data[0]))[:dstFrameSize:dstFrameSize] - } - - // convert color space from YUV420 to RGBA - res = C.sws_scale(d.swsCtx, frameData(d.srcFrame), frameLineSize(d.srcFrame), - 0, d.srcFrame.height, frameData(d.dstFrame), frameLineSize(d.dstFrame)) - if res < 0 { - return nil, fmt.Errorf("sws_scale() failed") - } - - // embed frame into an image.Image - return &image.RGBA{ - Pix: d.dstFramePtr, - Stride: 4 * (int)(d.dstFrame.width), - Rect: image.Rectangle{ - Max: image.Point{(int)(d.dstFrame.width), (int)(d.dstFrame.height)}, - }, - }, nil -} diff --git a/examples/client-play-format-h265-to-jpeg/h265_decoder.go b/examples/client-play-format-h265-to-jpeg/h265_decoder.go new file mode 100644 index 00000000..3b046cd8 --- /dev/null +++ b/examples/client-play-format-h265-to-jpeg/h265_decoder.go @@ -0,0 +1,153 @@ +package main + +import ( + "fmt" + "image" + "runtime" + "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/h264" +) + +// #cgo pkg-config: libavcodec libavutil libswscale +// #include +// #include +// #include +import "C" + +func frameData(frame *C.AVFrame) **C.uint8_t { + return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) +} + +func frameLineSize(frame *C.AVFrame) *C.int { + return (*C.int)(unsafe.Pointer(&frame.linesize[0])) +} + +// h265Decoder is a wrapper around FFmpeg's H265 decoder. +type h265Decoder struct { + codecCtx *C.AVCodecContext + yuv420Frame *C.AVFrame + rgbaFrame *C.AVFrame + rgbaFramePtr []uint8 + swsCtx *C.struct_SwsContext +} + +// initialize initializes a h265Decoder. +func (d *h265Decoder) initialize() error { + codec := C.avcodec_find_decoder(C.AV_CODEC_ID_H265) + if codec == nil { + return fmt.Errorf("avcodec_find_decoder() failed") + } + + d.codecCtx = C.avcodec_alloc_context3(codec) + if d.codecCtx == nil { + return fmt.Errorf("avcodec_alloc_context3() failed") + } + + res := C.avcodec_open2(d.codecCtx, codec, nil) + if res < 0 { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("avcodec_open2() failed") + } + + d.yuv420Frame = C.av_frame_alloc() + if d.yuv420Frame == nil { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + return nil +} + +// close closes the decoder. +func (d *h265Decoder) close() { + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + C.av_frame_free(&d.yuv420Frame) + C.avcodec_close(d.codecCtx) +} + +// decode decodes a RGBA image from H265. +func (d *h265Decoder) decode(au [][]byte) (*image.RGBA, error) { + // encode access unit into Annex-B + annexb, err := h264.AnnexB(au).Marshal() + if err != nil { + panic(err) + } + + // send access unit to decoder + var pkt C.AVPacket + ptr := &annexb[0] + var p runtime.Pinner + p.Pin(ptr) + pkt.data = (*C.uint8_t)(ptr) + pkt.size = (C.int)(len(annexb)) + res := C.avcodec_send_packet(d.codecCtx, &pkt) + p.Unpin() + if res < 0 { + return nil, nil + } + + // receive frame if available + res = C.avcodec_receive_frame(d.codecCtx, d.yuv420Frame) + if res < 0 { + return nil, nil + } + + // if frame size has changed, allocate needed objects + if d.rgbaFrame == nil || d.rgbaFrame.width != d.yuv420Frame.width || d.rgbaFrame.height != d.yuv420Frame.height { + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + return nil, fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.yuv420Frame.width + d.rgbaFrame.height = d.yuv420Frame.height + d.rgbaFrame.color_range = C.AVCOL_RANGE_JPEG + + res = C.av_frame_get_buffer(d.rgbaFrame, 1) + if res < 0 { + return nil, fmt.Errorf("av_frame_get_buffer() failed") + } + + d.swsCtx = C.sws_getContext(d.yuv420Frame.width, d.yuv420Frame.height, int32(d.yuv420Frame.format), + d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), C.SWS_BILINEAR, nil, nil, nil) + if d.swsCtx == nil { + return nil, fmt.Errorf("sws_getContext() failed") + } + + rgbaFrameSize := C.av_image_get_buffer_size((int32)(d.rgbaFrame.format), d.rgbaFrame.width, d.rgbaFrame.height, 1) + d.rgbaFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.rgbaFrame.data[0]))[:rgbaFrameSize:rgbaFrameSize] + } + + // convert color space from YUV420 to RGBA + res = C.sws_scale(d.swsCtx, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame), + 0, d.yuv420Frame.height, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame)) + if res < 0 { + return nil, fmt.Errorf("sws_scale() failed") + } + + // embed frame into an image.RGBA + return &image.RGBA{ + Pix: d.rgbaFramePtr, + Stride: 4 * (int)(d.rgbaFrame.width), + Rect: image.Rectangle{ + Max: image.Point{(int)(d.rgbaFrame.width), (int)(d.rgbaFrame.height)}, + }, + }, nil +} diff --git a/examples/client-play-format-h265-convert-to-jpeg/main.go b/examples/client-play-format-h265-to-jpeg/main.go similarity index 72% rename from examples/client-play-format-h265-convert-to-jpeg/main.go rename to examples/client-play-format-h265-to-jpeg/main.go index 97c4243e..983239c4 100644 --- a/examples/client-play-format-h265-convert-to-jpeg/main.go +++ b/examples/client-play-format-h265-to-jpeg/main.go @@ -21,8 +21,8 @@ import ( // This example shows how to // 1. connect to a RTSP server // 2. check if there's a H265 format -// 3. decode the H265 format into RGBA frames -// 4. convert frames to JPEG images and save them on disk +// 3. decode the H265 stream into RGBA frames +// 4. convert RGBA frames to JPEG images and save them on disk // This example requires the FFmpeg libraries, that can be installed with this command: // apt install -y libavformat-dev libswscale-dev gcc pkg-config @@ -79,23 +79,23 @@ func main() { panic(err) } - // setup H265 -> raw frames decoder - frameDec := &h265Decoder{} - err = frameDec.initialize() + // setup H265 -> RGBA decoder + h265Dec := &h265Decoder{} + err = h265Dec.initialize() if err != nil { panic(err) } - defer frameDec.close() + defer h265Dec.close() // if VPS, SPS and PPS are present into the SDP, send them to the decoder if forma.VPS != nil { - frameDec.decode(forma.VPS) + h265Dec.decode([][]byte{forma.VPS}) } if forma.SPS != nil { - frameDec.decode(forma.SPS) + h265Dec.decode([][]byte{forma.SPS}) } if forma.PPS != nil { - frameDec.decode(forma.PPS) + h265Dec.decode([][]byte{forma.PPS}) } // setup a single media @@ -104,7 +104,7 @@ func main() { panic(err) } - iframeReceived := false + firstRandomAccess := false saveCount := 0 // called when a RTP packet arrives @@ -118,38 +118,34 @@ func main() { return } - // wait for an I-frame - if !iframeReceived { - if !h265.IsRandomAccess(au) { - log.Printf("waiting for an I-frame") - return - } - iframeReceived = true + // wait for a random access unit + if !firstRandomAccess && !h265.IsRandomAccess(au) { + log.Printf("waiting for a random access unit") + return + } + firstRandomAccess = true + + // convert H265 access units into RGBA frames + img, err := h265Dec.decode(au) + if err != nil { + panic(err) } - for _, nalu := range au { - // convert NALUs into RGBA frames - img, err := frameDec.decode(nalu) - if err != nil { - panic(err) - } + // wait for a frame + if img == nil { + return + } - // wait for a frame - if img == nil { - continue - } + // convert frame to JPEG and save to file + err = saveToFile(img) + if err != nil { + panic(err) + } - // convert frame to JPEG and save to file - err = saveToFile(img) - if err != nil { - panic(err) - } - - saveCount++ - if saveCount == 5 { - log.Printf("saved 5 images, exiting") - os.Exit(1) - } + saveCount++ + if saveCount == 5 { + log.Printf("saved 5 images, exiting") + os.Exit(1) } }) diff --git a/examples/client-play-format-h265/h265_decoder.go b/examples/client-play-format-h265/h265_decoder.go index 9bcbaa50..3b046cd8 100644 --- a/examples/client-play-format-h265/h265_decoder.go +++ b/examples/client-play-format-h265/h265_decoder.go @@ -3,7 +3,10 @@ package main import ( "fmt" "image" + "runtime" "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/h264" ) // #cgo pkg-config: libavcodec libavutil libswscale @@ -22,11 +25,11 @@ func frameLineSize(frame *C.AVFrame) *C.int { // h265Decoder is a wrapper around FFmpeg's H265 decoder. type h265Decoder struct { - codecCtx *C.AVCodecContext - srcFrame *C.AVFrame - swsCtx *C.struct_SwsContext - dstFrame *C.AVFrame - dstFramePtr []uint8 + codecCtx *C.AVCodecContext + yuv420Frame *C.AVFrame + rgbaFrame *C.AVFrame + rgbaFramePtr []uint8 + swsCtx *C.struct_SwsContext } // initialize initializes a h265Decoder. @@ -47,8 +50,8 @@ func (d *h265Decoder) initialize() error { return fmt.Errorf("avcodec_open2() failed") } - d.srcFrame = C.av_frame_alloc() - if d.srcFrame == nil { + d.yuv420Frame = C.av_frame_alloc() + if d.yuv420Frame == nil { C.avcodec_close(d.codecCtx) return fmt.Errorf("av_frame_alloc() failed") } @@ -58,80 +61,93 @@ func (d *h265Decoder) initialize() error { // close closes the decoder. func (d *h265Decoder) close() { - if d.dstFrame != nil { - C.av_frame_free(&d.dstFrame) - } - if d.swsCtx != nil { C.sws_freeContext(d.swsCtx) } - C.av_frame_free(&d.srcFrame) + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + C.av_frame_free(&d.yuv420Frame) C.avcodec_close(d.codecCtx) } -func (d *h265Decoder) decode(nalu []byte) (image.Image, error) { - nalu = append([]uint8{0x00, 0x00, 0x00, 0x01}, []uint8(nalu)...) +// decode decodes a RGBA image from H265. +func (d *h265Decoder) decode(au [][]byte) (*image.RGBA, error) { + // encode access unit into Annex-B + annexb, err := h264.AnnexB(au).Marshal() + if err != nil { + panic(err) + } - // send NALU to decoder - var avPacket C.AVPacket - avPacket.data = (*C.uint8_t)(C.CBytes(nalu)) - defer C.free(unsafe.Pointer(avPacket.data)) - avPacket.size = C.int(len(nalu)) - res := C.avcodec_send_packet(d.codecCtx, &avPacket) + // send access unit to decoder + var pkt C.AVPacket + ptr := &annexb[0] + var p runtime.Pinner + p.Pin(ptr) + pkt.data = (*C.uint8_t)(ptr) + pkt.size = (C.int)(len(annexb)) + res := C.avcodec_send_packet(d.codecCtx, &pkt) + p.Unpin() if res < 0 { return nil, nil } // receive frame if available - res = C.avcodec_receive_frame(d.codecCtx, d.srcFrame) + res = C.avcodec_receive_frame(d.codecCtx, d.yuv420Frame) if res < 0 { return nil, nil } // if frame size has changed, allocate needed objects - if d.dstFrame == nil || d.dstFrame.width != d.srcFrame.width || d.dstFrame.height != d.srcFrame.height { - if d.dstFrame != nil { - C.av_frame_free(&d.dstFrame) - } - + if d.rgbaFrame == nil || d.rgbaFrame.width != d.yuv420Frame.width || d.rgbaFrame.height != d.yuv420Frame.height { if d.swsCtx != nil { C.sws_freeContext(d.swsCtx) } - d.dstFrame = C.av_frame_alloc() - d.dstFrame.format = C.AV_PIX_FMT_RGBA - d.dstFrame.width = d.srcFrame.width - d.dstFrame.height = d.srcFrame.height - d.dstFrame.color_range = C.AVCOL_RANGE_JPEG - res = C.av_frame_get_buffer(d.dstFrame, 1) + if d.rgbaFrame != nil { + C.av_frame_free(&d.rgbaFrame) + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + return nil, fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.yuv420Frame.width + d.rgbaFrame.height = d.yuv420Frame.height + d.rgbaFrame.color_range = C.AVCOL_RANGE_JPEG + + res = C.av_frame_get_buffer(d.rgbaFrame, 1) if res < 0 { return nil, fmt.Errorf("av_frame_get_buffer() failed") } - d.swsCtx = C.sws_getContext(d.srcFrame.width, d.srcFrame.height, C.AV_PIX_FMT_YUV420P, - d.dstFrame.width, d.dstFrame.height, (int32)(d.dstFrame.format), C.SWS_BILINEAR, nil, nil, nil) + d.swsCtx = C.sws_getContext(d.yuv420Frame.width, d.yuv420Frame.height, int32(d.yuv420Frame.format), + d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), C.SWS_BILINEAR, nil, nil, nil) if d.swsCtx == nil { return nil, fmt.Errorf("sws_getContext() failed") } - dstFrameSize := C.av_image_get_buffer_size((int32)(d.dstFrame.format), d.dstFrame.width, d.dstFrame.height, 1) - d.dstFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.dstFrame.data[0]))[:dstFrameSize:dstFrameSize] + rgbaFrameSize := C.av_image_get_buffer_size((int32)(d.rgbaFrame.format), d.rgbaFrame.width, d.rgbaFrame.height, 1) + d.rgbaFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.rgbaFrame.data[0]))[:rgbaFrameSize:rgbaFrameSize] } // convert color space from YUV420 to RGBA - res = C.sws_scale(d.swsCtx, frameData(d.srcFrame), frameLineSize(d.srcFrame), - 0, d.srcFrame.height, frameData(d.dstFrame), frameLineSize(d.dstFrame)) + res = C.sws_scale(d.swsCtx, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame), + 0, d.yuv420Frame.height, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame)) if res < 0 { return nil, fmt.Errorf("sws_scale() failed") } - // embed frame into an image.Image + // embed frame into an image.RGBA return &image.RGBA{ - Pix: d.dstFramePtr, - Stride: 4 * (int)(d.dstFrame.width), + Pix: d.rgbaFramePtr, + Stride: 4 * (int)(d.rgbaFrame.width), Rect: image.Rectangle{ - Max: image.Point{(int)(d.dstFrame.width), (int)(d.dstFrame.height)}, + Max: image.Point{(int)(d.rgbaFrame.width), (int)(d.rgbaFrame.height)}, }, }, nil } diff --git a/examples/client-play-format-h265/main.go b/examples/client-play-format-h265/main.go index e5888339..48423585 100644 --- a/examples/client-play-format-h265/main.go +++ b/examples/client-play-format-h265/main.go @@ -9,13 +9,14 @@ import ( "github.com/bluenviron/gortsplib/v4/pkg/base" "github.com/bluenviron/gortsplib/v4/pkg/format" "github.com/bluenviron/gortsplib/v4/pkg/format/rtph265" + "github.com/bluenviron/mediacommon/v2/pkg/codecs/h265" "github.com/pion/rtp" ) // This example shows how to // 1. connect to a RTSP server // 2. check if there's an H265 format -// 3. decode the H265 format into RGBA frames +// 3. decode the H265 stream into RGBA frames // This example requires the FFmpeg libraries, that can be installed with this command: // apt install -y libavformat-dev libswscale-dev gcc pkg-config @@ -55,23 +56,23 @@ func main() { panic(err) } - // setup H265 -> raw frames decoder - frameDec := &h265Decoder{} - err = frameDec.initialize() + // setup H265 -> RGBA decoder + h265Dec := &h265Decoder{} + err = h265Dec.initialize() if err != nil { panic(err) } - defer frameDec.close() + defer h265Dec.close() // if VPS, SPS and PPS are present into the SDP, send them to the decoder if forma.VPS != nil { - frameDec.decode(forma.VPS) + h265Dec.decode([][]byte{forma.VPS}) } if forma.SPS != nil { - frameDec.decode(forma.SPS) + h265Dec.decode([][]byte{forma.SPS}) } if forma.PPS != nil { - frameDec.decode(forma.PPS) + h265Dec.decode([][]byte{forma.PPS}) } // setup a single media @@ -80,6 +81,8 @@ func main() { panic(err) } + firstRandomAccess := false + // called when a RTP packet arrives c.OnPacketRTP(medi, forma, func(pkt *rtp.Packet) { // decode timestamp @@ -98,20 +101,25 @@ func main() { return } - for _, nalu := range au { - // convert NALUs into RGBA frames - img, err := frameDec.decode(nalu) - if err != nil { - panic(err) - } - - // wait for a frame - if img == nil { - continue - } - - log.Printf("decoded frame with PTS %v and size %v", pts, img.Bounds().Max) + // wait for a random access unit + if !firstRandomAccess && !h265.IsRandomAccess(au) { + log.Printf("waiting for a random access unit") + return } + firstRandomAccess = true + + // convert H265 access units into RGBA frames + img, err := h265Dec.decode(au) + if err != nil { + panic(err) + } + + // wait for a frame + if img == nil { + return + } + + log.Printf("decoded frame with PTS %v and size %v", pts, img.Bounds().Max) }) // start playing diff --git a/examples/client-record-format-av1/av1_encoder.go b/examples/client-record-format-av1/av1_encoder.go new file mode 100644 index 00000000..3b5f5dcb --- /dev/null +++ b/examples/client-record-format-av1/av1_encoder.go @@ -0,0 +1,186 @@ +package main + +import ( + "fmt" + "image" + "log" + "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/av1" +) + +// #cgo pkg-config: libavcodec libavutil libswscale +// #include +// #include +// #include +import "C" + +func frameData(frame *C.AVFrame) **C.uint8_t { + return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) +} + +func frameLineSize(frame *C.AVFrame) *C.int { + return (*C.int)(unsafe.Pointer(&frame.linesize[0])) +} + +// av1Encoder is a wrapper around FFmpeg's AV1 encoder. +type av1Encoder struct { + Width int + Height int + FPS int + + codecCtx *C.AVCodecContext + rgbaFrame *C.AVFrame + yuv420Frame *C.AVFrame + swsCtx *C.struct_SwsContext + pkt *C.AVPacket +} + +// initialize initializes a av1Encoder. +func (d *av1Encoder) initialize() error { + // prefer svtav1 over libaom-av1 + var codec *C.AVCodec + for _, lib := range []string{"libsvtav1", "libaom-av1"} { + str := C.CString(lib) + defer C.free(unsafe.Pointer(str)) + codec = C.avcodec_find_encoder_by_name(str) + if codec != nil { + if lib == "libaom-av1" { + log.Println("WARNING: using libaom-av1 - encoding will be very slow. Compile FFmpeg against libsvtav1 to speed things up.") + } + break + } + } + if codec == nil { + return fmt.Errorf("avcodec_find_encoder_by_name() failed") + } + + d.codecCtx = C.avcodec_alloc_context3(codec) + if d.codecCtx == nil { + return fmt.Errorf("avcodec_alloc_context3() failed") + } + + key := C.CString("preset") + defer C.free(unsafe.Pointer(key)) + val := C.CString("8") + defer C.free(unsafe.Pointer(val)) + C.av_opt_set(d.codecCtx.priv_data, key, val, 0) + + d.codecCtx.pix_fmt = C.AV_PIX_FMT_YUV420P + d.codecCtx.width = (C.int)(d.Height) + d.codecCtx.height = (C.int)(d.Width) + d.codecCtx.time_base.num = 1 + d.codecCtx.time_base.den = (C.int)(d.FPS) + d.codecCtx.gop_size = 10 + d.codecCtx.max_b_frames = 0 + d.codecCtx.bit_rate = 600000 + + res := C.avcodec_open2(d.codecCtx, codec, nil) + if res < 0 { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("avcodec_open2() failed") + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.codecCtx.width + d.rgbaFrame.height = d.codecCtx.height + + res = C.av_frame_get_buffer(d.rgbaFrame, 0) + if res < 0 { + return fmt.Errorf("av_frame_get_buffer() failed") + } + + d.yuv420Frame = C.av_frame_alloc() + if d.rgbaFrame == nil { + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + d.yuv420Frame.format = C.AV_PIX_FMT_YUV420P + d.yuv420Frame.width = d.codecCtx.width + d.yuv420Frame.height = d.codecCtx.height + + res = C.av_frame_get_buffer(d.yuv420Frame, 0) + if res < 0 { + return fmt.Errorf("av_frame_get_buffer() failed") + } + + d.swsCtx = C.sws_getContext(d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), + d.yuv420Frame.width, d.yuv420Frame.height, (int32)(d.yuv420Frame.format), C.SWS_BILINEAR, nil, nil, nil) + if d.swsCtx == nil { + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("sws_getContext() failed") + } + + d.pkt = C.av_packet_alloc() + if d.pkt == nil { + C.av_packet_free(&d.pkt) + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_packet_alloc() failed") + } + + return nil +} + +// close closes the decoder. +func (d *av1Encoder) close() { + C.av_packet_free(&d.pkt) + C.sws_freeContext(d.swsCtx) + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) +} + +// encode encodes a RGBA image into AV1. +func (d *av1Encoder) encode(img *image.RGBA, pts int64) ([][]byte, int64, error) { + // pass image pointer to frame + d.rgbaFrame.data[0] = (*C.uint8_t)(&img.Pix[0]) + + // convert color space from RGBA to YUV420 + res := C.sws_scale(d.swsCtx, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame), + 0, d.rgbaFrame.height, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame)) + if res < 0 { + return nil, 0, fmt.Errorf("sws_scale() failed") + } + + // send frame to the encoder + d.yuv420Frame.pts = (C.int64_t)(pts) + res = C.avcodec_send_frame(d.codecCtx, d.yuv420Frame) + if res < 0 { + return nil, 0, fmt.Errorf("avcodec_send_frame() failed") + } + + // wait for result + res = C.avcodec_receive_packet(d.codecCtx, d.pkt) + if res == -C.EAGAIN { + return nil, 0, nil + } + if res < 0 { + return nil, 0, fmt.Errorf("avcodec_receive_packet() failed") + } + + // perform a deep copy of the data before unreferencing the packet + data := C.GoBytes(unsafe.Pointer(d.pkt.data), d.pkt.size) + pts = (int64)(d.pkt.pts) + C.av_packet_unref(d.pkt) + + // decompress + var bs av1.Bitstream + err := bs.Unmarshal(data) + if err != nil { + return nil, 0, err + } + + return bs, pts, nil +} diff --git a/examples/client-record-format-mjpeg-from-image/main.go b/examples/client-record-format-av1/main.go similarity index 58% rename from examples/client-record-format-mjpeg-from-image/main.go rename to examples/client-record-format-av1/main.go index 236ef013..7017eb26 100644 --- a/examples/client-record-format-mjpeg-from-image/main.go +++ b/examples/client-record-format-av1/main.go @@ -1,11 +1,12 @@ +//go:build cgo + package main import ( - "bytes" "crypto/rand" "image" "image/color" - "image/jpeg" + "log" "time" "github.com/bluenviron/gortsplib/v4" @@ -14,11 +15,14 @@ import ( ) // This example shows how to -// 1. connect to a RTSP server, announce a M-JPEG format -// 2. generate an image -// 3. encode the image with JPEG -// 4. generate RTP packets from the JPEG image -// 5. write packets to the server +// 1. connect to a RTSP server, announce an AV1 format +// 2. generate dummy RGBA images +// 3. encode images with AV1 +// 4. generate RTP packets from AV1 +// 5. write RTP packets to the server + +// This example requires the FFmpeg libraries, that can be installed with this command: +// apt install -y libavformat-dev libswscale-dev gcc pkg-config func multiplyAndDivide(v, m, d int64) int64 { secs := v / d @@ -35,7 +39,7 @@ func randUint32() (uint32, error) { return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]), nil } -func createRandomImage(i int) *image.RGBA { +func createDummyImage(i int) *image.RGBA { img := image.NewRGBA(image.Rect(0, 0, 640, 480)) var cl color.RGBA @@ -58,8 +62,10 @@ func createRandomImage(i int) *image.RGBA { } func main() { - // create a description that contains a M-JPEG format - forma := &format.MJPEG{} + // create a stream description that contains a AV1 format + forma := &format.AV1{ + PayloadTyp: 96, + } desc := &description.Session{ Medias: []*description.Media{{ Type: description.MediaTypeVideo, @@ -75,7 +81,19 @@ func main() { } defer c.Close() - // setup JPEG -> RTP encoder + // setup RGBA -> AV1 encoder + av1enc := &av1Encoder{ + Width: 640, + Height: 480, + FPS: 5, + } + err = av1enc.initialize() + if err != nil { + panic(err) + } + defer av1enc.close() + + // setup AV1 -> RTP encoder rtpEnc, err := forma.CreateEncoder() if err != nil { panic(err) @@ -95,29 +113,35 @@ func main() { i := 0 for range ticker.C { - // create a random image - img := createRandomImage(i) + // create a dummy image + img := createDummyImage(i) i = (i + 1) % 3 - // encode the image with JPEG - var buf bytes.Buffer - err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 80}) - if err != nil { - panic(err) - } - - // generate RTP packets from the JPEG image - pkts, err := rtpEnc.Encode(buf.Bytes()) - if err != nil { - panic(err) - } - // get current timestamp - pts := uint32(multiplyAndDivide(int64(time.Since(start)), int64(forma.ClockRate()), int64(time.Second))) + pts := multiplyAndDivide(int64(time.Since(start)), int64(forma.ClockRate()), int64(time.Second)) - // write packets to the server + // encode the image with AV1 + au, pts, err := av1enc.encode(img, pts) + if err != nil { + panic(err) + } + + // wait for a AV1 access unit + if au == nil { + continue + } + + // generate RTP packets from the AV1 access unit + pkts, err := rtpEnc.Encode(au) + if err != nil { + panic(err) + } + + log.Printf("writing RTP packets with PTS=%d, au=%d, pkts=%d", pts, len(au), len(pkts)) + + // write RTP packets to the server for _, pkt := range pkts { - pkt.Timestamp = randomStart + pts + pkt.Timestamp = uint32(int64(randomStart) + pts) err = c.WritePacketRTP(desc.Medias[0], pkt) if err != nil { diff --git a/examples/client-record-format-h264-from-disk/main.go b/examples/client-record-format-h264-from-disk/main.go index 6f30d488..e196ac82 100644 --- a/examples/client-record-format-h264-from-disk/main.go +++ b/examples/client-record-format-h264-from-disk/main.go @@ -17,7 +17,7 @@ import ( // 1. read H264 frames from a video file in MPEG-TS format // 2. connect to a RTSP server, announce a H264 format // 3. wrap frames into RTP packets -// 4. write packets to the server +// 4. write RTP packets to the server func findTrack(r *mpegts.Reader) (*mpegts.Track, error) { for _, track := range r.Tracks() { @@ -46,7 +46,8 @@ func main() { defer f.Close() // setup MPEG-TS parser - r, err := mpegts.NewReader(f) + r := &mpegts.Reader{R: f} + err = r.Initialize() if err != nil { panic(err) } @@ -88,7 +89,9 @@ func main() { panic(err) } - timeDecoder := mpegts.NewTimeDecoder() + timeDecoder := mpegts.TimeDecoder{} + timeDecoder.Initialize() + var firstDTS *int64 var startTime time.Time @@ -120,10 +123,10 @@ func main() { // we don't have to perform any conversion // since H264 clock rate is the same in both MPEG-TS and RTSP for _, packet := range packets { - packet.Timestamp = randomStart + uint32(pts) + packet.Timestamp = uint32(int64(randomStart) + pts) } - // write packets to the server + // write RTP packets to the server for _, packet := range packets { err := c.WritePacketRTP(desc.Medias[0], packet) if err != nil { diff --git a/examples/client-record-format-h264/h264_encoder.go b/examples/client-record-format-h264/h264_encoder.go new file mode 100644 index 00000000..6d563d70 --- /dev/null +++ b/examples/client-record-format-h264/h264_encoder.go @@ -0,0 +1,179 @@ +package main + +import ( + "fmt" + "image" + "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/h264" +) + +// #cgo pkg-config: libavcodec libavutil libswscale +// #include +// #include +// #include +import "C" + +func frameData(frame *C.AVFrame) **C.uint8_t { + return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) +} + +func frameLineSize(frame *C.AVFrame) *C.int { + return (*C.int)(unsafe.Pointer(&frame.linesize[0])) +} + +// h264Encoder is a wrapper around FFmpeg's H264 encoder. +type h264Encoder struct { + Width int + Height int + FPS int + + codecCtx *C.AVCodecContext + rgbaFrame *C.AVFrame + yuv420Frame *C.AVFrame + swsCtx *C.struct_SwsContext + pkt *C.AVPacket +} + +// initialize initializes a h264Encoder. +func (d *h264Encoder) initialize() error { + codec := C.avcodec_find_encoder(C.AV_CODEC_ID_H264) + if codec == nil { + return fmt.Errorf("avcodec_find_encoder() failed") + } + + d.codecCtx = C.avcodec_alloc_context3(codec) + if d.codecCtx == nil { + return fmt.Errorf("avcodec_alloc_context3() failed") + } + + key := C.CString("tune") + defer C.free(unsafe.Pointer(key)) + val := C.CString("zerolatency") + defer C.free(unsafe.Pointer(val)) + C.av_opt_set(d.codecCtx.priv_data, key, val, 0) + + key = C.CString("preset") + defer C.free(unsafe.Pointer(key)) + val = C.CString("ultrafast") + defer C.free(unsafe.Pointer(val)) + C.av_opt_set(d.codecCtx.priv_data, key, val, 0) + + d.codecCtx.pix_fmt = C.AV_PIX_FMT_YUV420P + d.codecCtx.width = (C.int)(d.Height) + d.codecCtx.height = (C.int)(d.Width) + d.codecCtx.time_base.num = 1 + d.codecCtx.time_base.den = (C.int)(d.FPS) + d.codecCtx.gop_size = 10 + d.codecCtx.max_b_frames = 0 + d.codecCtx.bit_rate = 600000 + + res := C.avcodec_open2(d.codecCtx, codec, nil) + if res < 0 { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("avcodec_open2() failed") + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.codecCtx.width + d.rgbaFrame.height = d.codecCtx.height + + res = C.av_frame_get_buffer(d.rgbaFrame, 0) + if res < 0 { + return fmt.Errorf("av_frame_get_buffer() failed") + } + + d.yuv420Frame = C.av_frame_alloc() + if d.rgbaFrame == nil { + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + d.yuv420Frame.format = C.AV_PIX_FMT_YUV420P + d.yuv420Frame.width = d.codecCtx.width + d.yuv420Frame.height = d.codecCtx.height + + res = C.av_frame_get_buffer(d.yuv420Frame, 0) + if res < 0 { + return fmt.Errorf("av_frame_get_buffer() failed") + } + + d.swsCtx = C.sws_getContext(d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), + d.yuv420Frame.width, d.yuv420Frame.height, (int32)(d.yuv420Frame.format), C.SWS_BILINEAR, nil, nil, nil) + if d.swsCtx == nil { + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("sws_getContext() failed") + } + + d.pkt = C.av_packet_alloc() + if d.pkt == nil { + C.av_packet_free(&d.pkt) + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_packet_alloc() failed") + } + + return nil +} + +// close closes the decoder. +func (d *h264Encoder) close() { + C.av_packet_free(&d.pkt) + C.sws_freeContext(d.swsCtx) + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) +} + +// encode encodes a RGBA image into H264. +func (d *h264Encoder) encode(img *image.RGBA, pts int64) ([][]byte, int64, error) { + // pass image pointer to frame + d.rgbaFrame.data[0] = (*C.uint8_t)(&img.Pix[0]) + + // convert color space from RGBA to YUV420 + res := C.sws_scale(d.swsCtx, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame), + 0, d.rgbaFrame.height, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame)) + if res < 0 { + return nil, 0, fmt.Errorf("sws_scale() failed") + } + + // send frame to the encoder + d.yuv420Frame.pts = (C.int64_t)(pts) + res = C.avcodec_send_frame(d.codecCtx, d.yuv420Frame) + if res < 0 { + return nil, 0, fmt.Errorf("avcodec_send_frame() failed") + } + + // wait for result + res = C.avcodec_receive_packet(d.codecCtx, d.pkt) + if res == -C.EAGAIN { + return nil, 0, nil + } + if res < 0 { + return nil, 0, fmt.Errorf("avcodec_receive_packet() failed") + } + + // perform a deep copy of the data before unreferencing the packet + data := C.GoBytes(unsafe.Pointer(d.pkt.data), d.pkt.size) + pts = (int64)(d.pkt.pts) + C.av_packet_unref(d.pkt) + + // decompress + var au h264.AnnexB + err := au.Unmarshal(data) + if err != nil { + return nil, 0, err + } + + return au, pts, nil +} diff --git a/examples/client-record-format-h264/main.go b/examples/client-record-format-h264/main.go index 37fed695..9166d72a 100644 --- a/examples/client-record-format-h264/main.go +++ b/examples/client-record-format-h264/main.go @@ -1,79 +1,153 @@ +//go:build cgo + package main import ( + "crypto/rand" + "image" + "image/color" "log" - "net" + "time" "github.com/bluenviron/gortsplib/v4" "github.com/bluenviron/gortsplib/v4/pkg/description" "github.com/bluenviron/gortsplib/v4/pkg/format" - "github.com/pion/rtp" ) // This example shows how to -// 1. generate a H264 stream and RTP packets with GStreamer -// 2. connect to a RTSP server, announce an H264 format -// 3. route the packets from GStreamer to the server +// 1. connect to a RTSP server, announce an H264 format +// 2. generate dummy RGBA images +// 3. encode images with H264 +// 4. generate RTP packets from H264 +// 5. write RTP packets to the server + +// This example requires the FFmpeg libraries, that can be installed with this command: +// apt install -y libavformat-dev libswscale-dev gcc pkg-config + +func multiplyAndDivide(v, m, d int64) int64 { + secs := v / d + dec := v % d + return (secs*m + dec*m/d) +} + +func randUint32() (uint32, error) { + var b [4]byte + _, err := rand.Read(b[:]) + if err != nil { + return 0, err + } + return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]), nil +} + +func createDummyImage(i int) *image.RGBA { + img := image.NewRGBA(image.Rect(0, 0, 640, 480)) + + var cl color.RGBA + switch i { + case 0: + cl = color.RGBA{255, 0, 0, 0} + case 1: + cl = color.RGBA{0, 255, 0, 0} + case 2: + cl = color.RGBA{0, 0, 255, 0} + } + + for y := 0; y < img.Rect.Dy(); y++ { + for x := 0; x < img.Rect.Dx(); x++ { + img.SetRGBA(x, y, cl) + } + } + + return img +} func main() { - // open a listener to receive RTP/H264 packets - pc, err := net.ListenPacket("udp", "localhost:9000") - if err != nil { - panic(err) - } - defer pc.Close() - - log.Println("Waiting for a RTP/H264 stream on UDP port 9000 - you can send one with GStreamer:\n" + - "gst-launch-1.0 videotestsrc ! video/x-raw,width=1920,height=1080" + - " ! x264enc speed-preset=ultrafast bitrate=3000 key-int-max=60" + - " ! video/x-h264,profile=high" + - " ! rtph264pay ! udpsink host=127.0.0.1 port=9000") - - // wait for first packet - buf := make([]byte, 2048) - n, _, err := pc.ReadFrom(buf) - if err != nil { - panic(err) - } - log.Println("stream connected") - // create a stream description that contains a H264 format + forma := &format.H264{ + PayloadTyp: 96, + PacketizationMode: 1, + } desc := &description.Session{ Medias: []*description.Media{{ - Type: description.MediaTypeVideo, - Formats: []format.Format{&format.H264{ - PayloadTyp: 96, - PacketizationMode: 1, - }}, + Type: description.MediaTypeVideo, + Formats: []format.Format{forma}, }}, } - // connect to the server and start recording + // connect to the server, announce the format and start recording c := gortsplib.Client{} - err = c.StartRecording("rtsp://myuser:mypass@localhost:8554/mystream", desc) + err := c.StartRecording("rtsp://myuser:mypass@localhost:8554/mystream", desc) if err != nil { panic(err) } defer c.Close() - var pkt rtp.Packet - for { - // parse RTP packet - err = pkt.Unmarshal(buf[:n]) + // setup RGBA -> H264 encoder + h264enc := &h264Encoder{ + Width: 640, + Height: 480, + FPS: 5, + } + err = h264enc.initialize() + if err != nil { + panic(err) + } + defer h264enc.close() + + // setup H264 -> RTP encoder + rtpEnc, err := forma.CreateEncoder() + if err != nil { + panic(err) + } + + start := time.Now() + + randomStart, err := randUint32() + if err != nil { + panic(err) + } + + // setup a ticker to sleep between frames + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + i := 0 + + for range ticker.C { + // create a dummy image + img := createDummyImage(i) + i = (i + 1) % 3 + + // get current timestamp + pts := multiplyAndDivide(int64(time.Since(start)), int64(forma.ClockRate()), int64(time.Second)) + + // encode the image with H264 + au, pts, err := h264enc.encode(img, pts) if err != nil { panic(err) } - // route RTP packet to the server - err = c.WritePacketRTP(desc.Medias[0], &pkt) + // wait for a H264 access unit + if au == nil { + continue + } + + // generate RTP packets from the H264 access unit + pkts, err := rtpEnc.Encode(au) if err != nil { panic(err) } - // read another RTP packet from source - n, _, err = pc.ReadFrom(buf) - if err != nil { - panic(err) + log.Printf("writing RTP packets with PTS=%d, au=%d, pkts=%d", pts, len(au), len(pkts)) + + // write RTP packets to the server + for _, pkt := range pkts { + pkt.Timestamp = uint32(int64(randomStart) + pts) + + err = c.WritePacketRTP(desc.Medias[0], pkt) + if err != nil { + panic(err) + } } } } diff --git a/examples/client-record-format-h265/h265_encoder.go b/examples/client-record-format-h265/h265_encoder.go new file mode 100644 index 00000000..f90f48cb --- /dev/null +++ b/examples/client-record-format-h265/h265_encoder.go @@ -0,0 +1,179 @@ +package main + +import ( + "fmt" + "image" + "unsafe" + + "github.com/bluenviron/mediacommon/v2/pkg/codecs/h264" +) + +// #cgo pkg-config: libavcodec libavutil libswscale +// #include +// #include +// #include +import "C" + +func frameData(frame *C.AVFrame) **C.uint8_t { + return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) +} + +func frameLineSize(frame *C.AVFrame) *C.int { + return (*C.int)(unsafe.Pointer(&frame.linesize[0])) +} + +// h265Encoder is a wrapper around FFmpeg's H265 encoder. +type h265Encoder struct { + Width int + Height int + FPS int + + codecCtx *C.AVCodecContext + rgbaFrame *C.AVFrame + yuv420Frame *C.AVFrame + swsCtx *C.struct_SwsContext + pkt *C.AVPacket +} + +// initialize initializes a h265Encoder. +func (d *h265Encoder) initialize() error { + codec := C.avcodec_find_encoder(C.AV_CODEC_ID_H265) + if codec == nil { + return fmt.Errorf("avcodec_find_encoder() failed") + } + + d.codecCtx = C.avcodec_alloc_context3(codec) + if d.codecCtx == nil { + return fmt.Errorf("avcodec_alloc_context3() failed") + } + + key := C.CString("tune") + defer C.free(unsafe.Pointer(key)) + val := C.CString("zerolatency") + defer C.free(unsafe.Pointer(val)) + C.av_opt_set(d.codecCtx.priv_data, key, val, 0) + + key = C.CString("preset") + defer C.free(unsafe.Pointer(key)) + val = C.CString("ultrafast") + defer C.free(unsafe.Pointer(val)) + C.av_opt_set(d.codecCtx.priv_data, key, val, 0) + + d.codecCtx.pix_fmt = C.AV_PIX_FMT_YUV420P + d.codecCtx.width = (C.int)(d.Height) + d.codecCtx.height = (C.int)(d.Width) + d.codecCtx.time_base.num = 1 + d.codecCtx.time_base.den = (C.int)(d.FPS) + d.codecCtx.gop_size = 10 + d.codecCtx.max_b_frames = 0 + d.codecCtx.bit_rate = 600000 + + res := C.avcodec_open2(d.codecCtx, codec, nil) + if res < 0 { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("avcodec_open2() failed") + } + + d.rgbaFrame = C.av_frame_alloc() + if d.rgbaFrame == nil { + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + d.rgbaFrame.format = C.AV_PIX_FMT_RGBA + d.rgbaFrame.width = d.codecCtx.width + d.rgbaFrame.height = d.codecCtx.height + + res = C.av_frame_get_buffer(d.rgbaFrame, 0) + if res < 0 { + return fmt.Errorf("av_frame_get_buffer() failed") + } + + d.yuv420Frame = C.av_frame_alloc() + if d.rgbaFrame == nil { + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_frame_alloc() failed") + } + + d.yuv420Frame.format = C.AV_PIX_FMT_YUV420P + d.yuv420Frame.width = d.codecCtx.width + d.yuv420Frame.height = d.codecCtx.height + + res = C.av_frame_get_buffer(d.yuv420Frame, 0) + if res < 0 { + return fmt.Errorf("av_frame_get_buffer() failed") + } + + d.swsCtx = C.sws_getContext(d.rgbaFrame.width, d.rgbaFrame.height, (int32)(d.rgbaFrame.format), + d.yuv420Frame.width, d.yuv420Frame.height, (int32)(d.yuv420Frame.format), C.SWS_BILINEAR, nil, nil, nil) + if d.swsCtx == nil { + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("sws_getContext() failed") + } + + d.pkt = C.av_packet_alloc() + if d.pkt == nil { + C.av_packet_free(&d.pkt) + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) + return fmt.Errorf("av_packet_alloc() failed") + } + + return nil +} + +// close closes the decoder. +func (d *h265Encoder) close() { + C.av_packet_free(&d.pkt) + C.sws_freeContext(d.swsCtx) + C.av_frame_free(&d.yuv420Frame) + C.av_frame_free(&d.rgbaFrame) + C.avcodec_close(d.codecCtx) +} + +// encode encodes a RGBA image into H265. +func (d *h265Encoder) encode(img *image.RGBA, pts int64) ([][]byte, int64, error) { + // pass image pointer to frame + d.rgbaFrame.data[0] = (*C.uint8_t)(&img.Pix[0]) + + // convert color space from RGBA to YUV420 + res := C.sws_scale(d.swsCtx, frameData(d.rgbaFrame), frameLineSize(d.rgbaFrame), + 0, d.rgbaFrame.height, frameData(d.yuv420Frame), frameLineSize(d.yuv420Frame)) + if res < 0 { + return nil, 0, fmt.Errorf("sws_scale() failed") + } + + // send frame to the encoder + d.yuv420Frame.pts = (C.int64_t)(pts) + res = C.avcodec_send_frame(d.codecCtx, d.yuv420Frame) + if res < 0 { + return nil, 0, fmt.Errorf("avcodec_send_frame() failed") + } + + // wait for result + res = C.avcodec_receive_packet(d.codecCtx, d.pkt) + if res == -C.EAGAIN { + return nil, 0, nil + } + if res < 0 { + return nil, 0, fmt.Errorf("avcodec_receive_packet() failed") + } + + // perform a deep copy of the data before unreferencing the packet + data := C.GoBytes(unsafe.Pointer(d.pkt.data), d.pkt.size) + pts = (int64)(d.pkt.pts) + C.av_packet_unref(d.pkt) + + // decompress + var au h264.AnnexB + err := au.Unmarshal(data) + if err != nil { + return nil, 0, err + } + + return au, pts, nil +} diff --git a/examples/client-record-format-h265/main.go b/examples/client-record-format-h265/main.go index e7eb31ed..a72496a3 100644 --- a/examples/client-record-format-h265/main.go +++ b/examples/client-record-format-h265/main.go @@ -1,77 +1,152 @@ +//go:build cgo + package main import ( + "crypto/rand" + "image" + "image/color" "log" - "net" + "time" "github.com/bluenviron/gortsplib/v4" "github.com/bluenviron/gortsplib/v4/pkg/description" "github.com/bluenviron/gortsplib/v4/pkg/format" - "github.com/pion/rtp" ) // This example shows how to -// 1. generate a H265 stream and RTP packets with GStreamer -// 2. connect to a RTSP server, announce an H265 format -// 3. route the packets from GStreamer to the server +// 1. connect to a RTSP server, announce an H265 format +// 2. generate dummy RGBA images +// 3. encode images with H265 +// 4. generate RTP packets from H265 +// 5. write RTP packets to the server + +// This example requires the FFmpeg libraries, that can be installed with this command: +// apt install -y libavformat-dev libswscale-dev gcc pkg-config + +func multiplyAndDivide(v, m, d int64) int64 { + secs := v / d + dec := v % d + return (secs*m + dec*m/d) +} + +func randUint32() (uint32, error) { + var b [4]byte + _, err := rand.Read(b[:]) + if err != nil { + return 0, err + } + return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]), nil +} + +func createDummyImage(i int) *image.RGBA { + img := image.NewRGBA(image.Rect(0, 0, 640, 480)) + + var cl color.RGBA + switch i { + case 0: + cl = color.RGBA{255, 0, 0, 0} + case 1: + cl = color.RGBA{0, 255, 0, 0} + case 2: + cl = color.RGBA{0, 0, 255, 0} + } + + for y := 0; y < img.Rect.Dy(); y++ { + for x := 0; x < img.Rect.Dx(); x++ { + img.SetRGBA(x, y, cl) + } + } + + return img +} func main() { - // open a listener to receive RTP/H265 packets - pc, err := net.ListenPacket("udp", "localhost:9000") - if err != nil { - panic(err) + // create a stream description that contains a H265 format + forma := &format.H265{ + PayloadTyp: 96, } - defer pc.Close() - - log.Println("Waiting for a RTP/H265 stream on UDP port 9000 - you can send one with GStreamer:\n" + - "gst-launch-1.0 videotestsrc ! video/x-raw,width=1920,height=1080" + - " ! x265enc speed-preset=ultrafast tune=zerolatency bitrate=3000" + - " ! rtph265pay config-interval=1 ! udpsink host=127.0.0.1 port=9000") - - // wait for first packet - buf := make([]byte, 2048) - n, _, err := pc.ReadFrom(buf) - if err != nil { - panic(err) - } - log.Println("stream connected") - - // create a description that contains a H265 format desc := &description.Session{ Medias: []*description.Media{{ - Type: description.MediaTypeVideo, - Formats: []format.Format{&format.H265{ - PayloadTyp: 96, - }}, + Type: description.MediaTypeVideo, + Formats: []format.Format{forma}, }}, } - // connect to the server and start recording + // connect to the server, announce the format and start recording c := gortsplib.Client{} - err = c.StartRecording("rtsp://myuser:mypass@localhost:8554/mystream", desc) + err := c.StartRecording("rtsp://myuser:mypass@localhost:8554/mystream", desc) if err != nil { panic(err) } defer c.Close() - var pkt rtp.Packet - for { - // parse RTP packet - err = pkt.Unmarshal(buf[:n]) + // setup RGBA -> H265 encoder + h265enc := &h265Encoder{ + Width: 640, + Height: 480, + FPS: 5, + } + err = h265enc.initialize() + if err != nil { + panic(err) + } + defer h265enc.close() + + // setup H265 -> RTP encoder + rtpEnc, err := forma.CreateEncoder() + if err != nil { + panic(err) + } + + start := time.Now() + + randomStart, err := randUint32() + if err != nil { + panic(err) + } + + // setup a ticker to sleep between frames + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + i := 0 + + for range ticker.C { + // create a dummy image + img := createDummyImage(i) + i = (i + 1) % 3 + + // get current timestamp + pts := multiplyAndDivide(int64(time.Since(start)), int64(forma.ClockRate()), int64(time.Second)) + + // encode the image with H265 + au, pts, err := h265enc.encode(img, pts) if err != nil { panic(err) } - // route RTP packet to the server - err = c.WritePacketRTP(desc.Medias[0], &pkt) + // wait for a H265 access unit + if au == nil { + continue + } + + // generate RTP packets from the H265 access unit + pkts, err := rtpEnc.Encode(au) if err != nil { panic(err) } - // read another RTP packet from source - n, _, err = pc.ReadFrom(buf) - if err != nil { - panic(err) + log.Printf("writing RTP packets with PTS=%d, au=%d, pkts=%d", pts, len(au), len(pkts)) + + // write RTP packets to the server + for _, pkt := range pkts { + pkt.Timestamp = uint32(int64(randomStart) + pts) + + err = c.WritePacketRTP(desc.Medias[0], pkt) + if err != nil { + panic(err) + } } } } diff --git a/examples/client-record-format-mjpeg/main.go b/examples/client-record-format-mjpeg/main.go index d30a0173..d2c27f10 100644 --- a/examples/client-record-format-mjpeg/main.go +++ b/examples/client-record-format-mjpeg/main.go @@ -1,74 +1,128 @@ package main import ( - "log" - "net" + "bytes" + "crypto/rand" + "image" + "image/color" + "image/jpeg" + "time" "github.com/bluenviron/gortsplib/v4" "github.com/bluenviron/gortsplib/v4/pkg/description" "github.com/bluenviron/gortsplib/v4/pkg/format" - "github.com/pion/rtp" ) // This example shows how to -// 1. generate a M-JPEG stream and RTP packets with GStreamer -// 2. connect to a RTSP server, announce a M-JPEG format -// 3. route the packets from GStreamer to the server +// 1. connect to a RTSP server, announce a M-JPEG format +// 2. generate dummy RGBA images +// 3. encode images with JPEG +// 4. generate RTP packets from JPEG +// 5. write RTP packets to the server + +func multiplyAndDivide(v, m, d int64) int64 { + secs := v / d + dec := v % d + return (secs*m + dec*m/d) +} + +func randUint32() (uint32, error) { + var b [4]byte + _, err := rand.Read(b[:]) + if err != nil { + return 0, err + } + return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]), nil +} + +func createDummyImage(i int) *image.RGBA { + img := image.NewRGBA(image.Rect(0, 0, 640, 480)) + + var cl color.RGBA + switch i { + case 0: + cl = color.RGBA{255, 0, 0, 0} + case 1: + cl = color.RGBA{0, 255, 0, 0} + case 2: + cl = color.RGBA{0, 0, 255, 0} + } + + for y := 0; y < img.Rect.Dy(); y++ { + for x := 0; x < img.Rect.Dx(); x++ { + img.SetRGBA(x, y, cl) + } + } + + return img +} func main() { - // open a listener to receive RTP/M-JPEG packets - pc, err := net.ListenPacket("udp", "localhost:9000") - if err != nil { - panic(err) - } - defer pc.Close() - - log.Println("Waiting for a RTP/M-JPEG stream on UDP port 9000 - you can send one with GStreamer:\n" + - "gst-launch-1.0 videotestsrc ! video/x-raw,width=1920,height=1080,format=I420" + - " ! jpegenc ! rtpjpegpay ! udpsink host=127.0.0.1 port=9000") - - // wait for first packet - buf := make([]byte, 2048) - n, _, err := pc.ReadFrom(buf) - if err != nil { - panic(err) - } - log.Println("stream connected") - // create a description that contains a M-JPEG format + forma := &format.MJPEG{} desc := &description.Session{ Medias: []*description.Media{{ Type: description.MediaTypeVideo, - Formats: []format.Format{&format.MJPEG{}}, + Formats: []format.Format{forma}, }}, } - // connect to the server and start recording + // connect to the server, announce the format and start recording c := gortsplib.Client{} - err = c.StartRecording("rtsp://myuser:mypass@localhost:8554/mystream", desc) + err := c.StartRecording("rtsp://myuser:mypass@localhost:8554/mystream", desc) if err != nil { panic(err) } defer c.Close() - var pkt rtp.Packet - for { - // parse RTP packet - err = pkt.Unmarshal(buf[:n]) + // setup JPEG -> RTP encoder + rtpEnc, err := forma.CreateEncoder() + if err != nil { + panic(err) + } + + start := time.Now() + + randomStart, err := randUint32() + if err != nil { + panic(err) + } + + // setup a ticker to sleep between frames + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + i := 0 + + for range ticker.C { + // create a dummy image + img := createDummyImage(i) + i = (i + 1) % 3 + + // encode the image with JPEG + var buf bytes.Buffer + err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 80}) if err != nil { panic(err) } - // route RTP packet to the server - err = c.WritePacketRTP(desc.Medias[0], &pkt) + // generate RTP packets from the JPEG image + pkts, err := rtpEnc.Encode(buf.Bytes()) if err != nil { panic(err) } - // read another RTP packet from source - n, _, err = pc.ReadFrom(buf) - if err != nil { - panic(err) + // get current timestamp + pts := multiplyAndDivide(int64(time.Since(start)), int64(forma.ClockRate()), int64(time.Second)) + + // write RTP packets to the server + for _, pkt := range pkts { + pkt.Timestamp = uint32(int64(randomStart) + pts) + + err = c.WritePacketRTP(desc.Medias[0], pkt) + if err != nil { + panic(err) + } } } } diff --git a/go.mod b/go.mod index fc886aef..03fc1760 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/bluenviron/gortsplib/v4 go 1.21.0 require ( - github.com/bluenviron/mediacommon/v2 v2.0.0 + github.com/bluenviron/mediacommon/v2 v2.0.1-0.20250219181023-5dae4feddd9c github.com/google/uuid v1.6.0 github.com/pion/rtcp v1.2.15 github.com/pion/rtp v1.8.11 diff --git a/go.sum b/go.sum index 5022817f..a546a3d0 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,8 @@ github.com/asticode/go-astikit v0.30.0 h1:DkBkRQRIxYcknlaU7W7ksNfn4gMFsB0tqMJflx github.com/asticode/go-astikit v0.30.0/go.mod h1:h4ly7idim1tNhaVkdVBeXQZEE3L0xblP7fCWbgwipF0= github.com/asticode/go-astits v1.13.0 h1:XOgkaadfZODnyZRR5Y0/DWkA9vrkLLPLeeOvDwfKZ1c= github.com/asticode/go-astits v1.13.0/go.mod h1:QSHmknZ51pf6KJdHKZHJTLlMegIrhega3LPWz3ND/iI= -github.com/bluenviron/mediacommon/v2 v2.0.0 h1:JinZ9v2x6QeAOzA0cDA6aFe8vQuCrU8OyWEhG2iNzwY= -github.com/bluenviron/mediacommon/v2 v2.0.0/go.mod h1:iHEz1SFIet6zBwAQoh1a92vTQ3dV3LpVFbom6/SLz3k= +github.com/bluenviron/mediacommon/v2 v2.0.1-0.20250219181023-5dae4feddd9c h1:Piva4HXk7CRxCqsGpb+SfkZX0M45UeMsNHlikgjn2Ug= +github.com/bluenviron/mediacommon/v2 v2.0.1-0.20250219181023-5dae4feddd9c/go.mod h1:iHEz1SFIet6zBwAQoh1a92vTQ3dV3LpVFbom6/SLz3k= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/pkg/format/rtpav1/encoder.go b/pkg/format/rtpav1/encoder.go index 148990ad..1eda76d8 100644 --- a/pkg/format/rtpav1/encoder.go +++ b/pkg/format/rtpav1/encoder.go @@ -69,11 +69,6 @@ func (e *Encoder) Init() error { // Encode encodes OBUs into RTP packets. func (e *Encoder) Encode(obus [][]byte) ([]*rtp.Packet, error) { - isKeyFrame, err := av1.IsRandomAccess(obus) - if err != nil { - return nil, err - } - var curPacket *rtp.Packet var packets []*rtp.Packet curPayloadLen := 0 @@ -138,7 +133,7 @@ func (e *Encoder) Encode(obus [][]byte) ([]*rtp.Packet, error) { finalizeCurPacket(false) - if isKeyFrame { + if av1.IsRandomAccess2(obus) { packets[0].Payload[0] |= 1 << 3 }