From 3eda3e45e67deffae5dbd9214c2af17f56e4899d Mon Sep 17 00:00:00 2001 From: aler9 <46489434+aler9@users.noreply.github.com> Date: Mon, 31 Jan 2022 18:47:09 +0100 Subject: [PATCH] replace client-read-h264 example with client-read-h264-decode --- README.md | 2 +- .../client-read-h264-convert-to-jpeg/main.go | 6 +- .../client-read-h264-decode/h264decoder.go | 144 ++++++++++++++++++ .../main.go | 31 +++- 4 files changed, 173 insertions(+), 10 deletions(-) create mode 100644 examples/client-read-h264-decode/h264decoder.go rename examples/{client-read-h264 => client-read-h264-decode}/main.go (68%) diff --git a/README.md b/README.md index 429856e0..a8bef9b7 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Features: * [client-read-partial](examples/client-read-partial/main.go) * [client-read-options](examples/client-read-options/main.go) * [client-read-pause](examples/client-read-pause/main.go) -* [client-read-h264](examples/client-read-h264/main.go) +* [client-read-h264-decode](examples/client-read-h264-decode/main.go) * [client-read-h264-convert-to-jpeg](examples/client-read-h264-convert-to-jpeg/main.go) * [client-read-h264-save-to-disk](examples/client-read-h264-save-to-disk/main.go) * [client-read-aac](examples/client-read-aac/main.go) diff --git a/examples/client-read-h264-convert-to-jpeg/main.go b/examples/client-read-h264-convert-to-jpeg/main.go index 95dd2bdb..77b43a73 100644 --- a/examples/client-read-h264-convert-to-jpeg/main.go +++ b/examples/client-read-h264-convert-to-jpeg/main.go @@ -17,7 +17,7 @@ import ( // This example shows how to // 1. connect to a RTSP server and read all tracks on a path // 2. check whether there's a H264 track -// 3. decode the H264 track to raw frames +// 3. decode H264 NALUs of that track into raw frames // 4. encode the frames into JPEG images and save them on disk // This example requires the ffmpeg libraries, that can be installed in this way: // apt install -y libavformat-dev libswscale-dev gcc pkg-config @@ -81,7 +81,7 @@ func main() { } // setup RTP->H264 decoder - dec := rtph264.NewDecoder() + rtpDec := rtph264.NewDecoder() // setup H264->raw frames decoder h264dec, err := newH264Decoder() @@ -105,7 +105,7 @@ func main() { } // decode H264 NALUs from the RTP packet - nalus, _, err := dec.Decode(&pkt) + nalus, _, err := rtpDec.Decode(&pkt) if err != nil { return } diff --git a/examples/client-read-h264-decode/h264decoder.go b/examples/client-read-h264-decode/h264decoder.go new file mode 100644 index 00000000..f55bc7ee --- /dev/null +++ b/examples/client-read-h264-decode/h264decoder.go @@ -0,0 +1,144 @@ +package main + +import ( + "fmt" + "image" + "unsafe" +) + +// #cgo pkg-config: libavcodec libavutil libswscale +// #include +// #include +// #include +import "C" + +func frameData(frame *C.AVFrame) **C.uint8_t { + return (**C.uint8_t)(unsafe.Pointer(&frame.data[0])) +} + +func frameLineSize(frame *C.AVFrame) *C.int { + return (*C.int)(unsafe.Pointer(&frame.linesize[0])) +} + +// h264Decoder is a wrapper around ffmpeg's H264 decoder. +type h264Decoder struct { + codecCtx *C.AVCodecContext + avPacket C.AVPacket + srcFrame *C.AVFrame + swsCtx *C.struct_SwsContext + dstFrame *C.AVFrame + dstFramePtr []uint8 +} + +// newH264Decoder allocates a new h264Decoder. +func newH264Decoder() (*h264Decoder, error) { + codec := C.avcodec_find_decoder(C.AV_CODEC_ID_H264) + if codec == nil { + return nil, fmt.Errorf("avcodec_find_decoder() failed") + } + + codecCtx := C.avcodec_alloc_context3(codec) + if codecCtx == nil { + return nil, fmt.Errorf("avcodec_alloc_context3() failed") + } + + res := C.avcodec_open2(codecCtx, codec, nil) + if res < 0 { + C.avcodec_close(codecCtx) + return nil, fmt.Errorf("avcodec_open2() failed") + } + + srcFrame := C.av_frame_alloc() + if srcFrame == nil { + C.avcodec_close(codecCtx) + return nil, fmt.Errorf("av_frame_alloc() failed") + } + + avPacket := C.AVPacket{} + C.av_init_packet(&avPacket) + + return &h264Decoder{ + codecCtx: codecCtx, + srcFrame: srcFrame, + avPacket: avPacket, + }, nil +} + +// close closes the decoder. +func (d *h264Decoder) close() { + if d.dstFrame != nil { + C.av_frame_free(&d.dstFrame) + } + + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + C.av_frame_free(&d.srcFrame) + C.avcodec_close(d.codecCtx) +} + +func (d *h264Decoder) decode(nalu []byte) (image.Image, error) { + nalu = append([]uint8{0x00, 0x00, 0x00, 0x01}, []uint8(nalu)...) + + // send frame to decoder + d.avPacket.data = (*C.uint8_t)(C.CBytes(nalu)) + defer C.free(unsafe.Pointer(d.avPacket.data)) + d.avPacket.size = C.int(len(nalu)) + res := C.avcodec_send_packet(d.codecCtx, &d.avPacket) + if res < 0 { + return nil, nil + } + + // receive frame if available + res = C.avcodec_receive_frame(d.codecCtx, d.srcFrame) + if res < 0 { + return nil, nil + } + + // if frame size has changed, allocate needed objects + if d.dstFrame == nil || d.dstFrame.width != d.srcFrame.width || d.dstFrame.height != d.srcFrame.height { + if d.dstFrame != nil { + C.av_frame_free(&d.dstFrame) + } + + if d.swsCtx != nil { + C.sws_freeContext(d.swsCtx) + } + + d.swsCtx = C.sws_getContext(d.srcFrame.width, d.srcFrame.height, C.AV_PIX_FMT_YUV420P, // d.codecCtx.pix_fmt, + d.srcFrame.width, d.srcFrame.height, C.AV_PIX_FMT_RGBA, C.SWS_BILINEAR, nil, nil, nil) + if d.swsCtx == nil { + return nil, fmt.Errorf("sws_getContext() err") + } + + d.dstFrame = C.av_frame_alloc() + d.dstFrame.format = C.AV_PIX_FMT_RGBA + d.dstFrame.width = d.srcFrame.width + d.dstFrame.height = d.srcFrame.height + d.dstFrame.color_range = C.AVCOL_RANGE_JPEG + res = C.av_frame_get_buffer(d.dstFrame, 32) + if res < 0 { + return nil, fmt.Errorf("av_frame_get_buffer() err") + } + + dstFrameSize := C.av_image_get_buffer_size((int32)(d.dstFrame.format), d.dstFrame.width, d.dstFrame.height, 1) + d.dstFramePtr = (*[1 << 30]uint8)(unsafe.Pointer(d.dstFrame.data[0]))[:dstFrameSize:dstFrameSize] + } + + // convert frame from YUV420 to RGB + res = C.sws_scale(d.swsCtx, frameData(d.srcFrame), frameLineSize(d.srcFrame), + 0, d.codecCtx.height, frameData(d.dstFrame), frameLineSize(d.dstFrame)) + if res < 0 { + return nil, fmt.Errorf("sws_scale() err") + } + + // embed frame into an image.Image + return &image.RGBA{ + Pix: d.dstFramePtr, + Stride: 4 * (int)(d.dstFrame.width), + Rect: image.Rectangle{ + Max: image.Point{(int)(d.dstFrame.width), (int)(d.dstFrame.height)}, + }, + }, nil +} diff --git a/examples/client-read-h264/main.go b/examples/client-read-h264-decode/main.go similarity index 68% rename from examples/client-read-h264/main.go rename to examples/client-read-h264-decode/main.go index e89576c6..546eb722 100644 --- a/examples/client-read-h264/main.go +++ b/examples/client-read-h264-decode/main.go @@ -12,7 +12,9 @@ import ( // This example shows how to // 1. connect to a RTSP server and read all tracks on a path // 2. check whether there's an H264 track -// 3. get H264 NALUs of that track +// 3. decode H264 NALUs of that track into raw frames +// This example requires the ffmpeg libraries, that can be installed in this way: +// apt install -y libavformat-dev libswscale-dev gcc pkg-config func main() { c := gortsplib.Client{} @@ -55,8 +57,15 @@ func main() { panic("H264 track not found") } - // setup decoder - dec := rtph264.NewDecoder() + // setup RTP->H264 decoder + rtpDec := rtph264.NewDecoder() + + // setup H264->raw frames decoder + h264dec, err := newH264Decoder() + if err != nil { + panic(err) + } + defer h264dec.close() // called when a RTP packet arrives c.OnPacketRTP = func(trackID int, payload []byte) { @@ -72,14 +81,24 @@ func main() { } // decode H264 NALUs from the RTP packet - nalus, _, err := dec.Decode(&pkt) + nalus, _, err := rtpDec.Decode(&pkt) if err != nil { return } - // print NALUs for _, nalu := range nalus { - log.Printf("received H264 NALU of size %d\n", len(nalu)) + // decode raw frames from H264 NALUs + img, err := h264dec.decode(nalu) + if err != nil { + panic(err) + } + + // wait for a frame + if img == nil { + continue + } + + log.Printf("decoded frame with size %v", img.Bounds().Max) } }