Compare commits

...

9 Commits

Author SHA1 Message Date
Atsushi Watanabe
bc7653cbc3 Fix reading multiple decoded frames 2025-09-16 13:33:45 +09:00
Lei Kang
bb3a7120ef fix the test 2025-09-12 16:32:50 -07:00
Lei Kang
d5c98cb970 add codec decoder interface 2025-09-12 15:35:07 -07:00
Lei Kang
dd145ac720 add return error code 2025-09-05 16:15:34 -07:00
Lei Kang
8cd08c4280 add null pointer from C 2025-09-04 15:42:13 -07:00
Lei Kang
9218f8bf7c wrap vpx_image into a struct 2025-09-04 15:00:14 -07:00
Lei Kang
d30d98198a add vpx decoder 2025-09-04 14:35:05 -07:00
Leo (Lei) Kang
6047a32ea0 [VPX] vpx dynamic encoding (#647)
* Add vp8 decoder and dynamic vp8 decoding

* Add QPController

* change parameters into const

* move decoder into another PR

* use explicit parameter name
2025-09-04 14:33:07 -07:00
Leo (Lei) Kang
60bf158757 [CODEC] Add encoder bitrate tracker (#646)
add encoder bitrate tracker
2025-09-03 15:55:37 -07:00
6 changed files with 426 additions and 0 deletions

View File

@@ -0,0 +1,48 @@
package codec
import (
"time"
)
type BitrateTracker struct {
windowSize time.Duration
buffer []int
times []time.Time
}
func NewBitrateTracker(windowSize time.Duration) *BitrateTracker {
return &BitrateTracker{
windowSize: windowSize,
}
}
func (bt *BitrateTracker) AddFrame(sizeBytes int, timestamp time.Time) {
bt.buffer = append(bt.buffer, sizeBytes)
bt.times = append(bt.times, timestamp)
// Remove old entries outside the window
cutoff := timestamp.Add(-bt.windowSize)
i := 0
for ; i < len(bt.times); i++ {
if bt.times[i].After(cutoff) {
break
}
}
bt.buffer = bt.buffer[i:]
bt.times = bt.times[i:]
}
func (bt *BitrateTracker) GetBitrate() float64 {
if len(bt.times) < 2 {
return 0
}
totalBytes := 0
for _, b := range bt.buffer {
totalBytes += b
}
duration := bt.times[len(bt.times)-1].Sub(bt.times[0]).Seconds()
if duration <= 0 {
return 0
}
return float64(totalBytes*8) / duration // bits per second
}

View File

@@ -0,0 +1,19 @@
package codec
import (
"math"
"testing"
"time"
)
func TestBitrateTracker(t *testing.T) {
packetSize := 1000
bt := NewBitrateTracker(time.Second)
bt.AddFrame(packetSize, time.Now())
bt.AddFrame(packetSize, time.Now().Add(time.Millisecond*100))
bt.AddFrame(packetSize, time.Now().Add(time.Millisecond*999))
eps := float64(packetSize*8) / 10
if got, want := bt.GetBitrate(), float64(packetSize*8)*3; math.Abs(got-want) > eps {
t.Fatalf("GetBitrate() = %v, want %v (|diff| <= %v)", got, want, eps)
}
}

View File

@@ -1,6 +1,8 @@
package codec
import (
"image"
"io"
"time"
"github.com/pion/mediadevices/pkg/io/audio"
@@ -153,6 +155,15 @@ type ReadCloser interface {
Controllable
}
type VideoDecoderBuilder interface {
BuildVideoDecoder(r io.Reader, p prop.Media) (VideoDecoder, error)
}
type VideoDecoder interface {
Read() (image.Image, func(), error)
Close() error
}
// EncoderController is the interface allowing to control the encoder behaviour after it's initialisation.
// It will possibly have common control method in the future.
// A controller can have optional methods represented by *Controller interfaces
@@ -179,6 +190,12 @@ type BitRateController interface {
SetBitRate(int) error
}
type QPController interface {
EncoderController
// DynamicQPControl adjusts the QP of the encoder based on the current and target bitrate
DynamicQPControl(currentBitrate int, targetBitrate int) error
}
// BaseParams represents an codec's encoding properties
type BaseParams struct {
// Target bitrate in bps.

View File

@@ -54,6 +54,7 @@ import (
"fmt"
"image"
"io"
"math"
"sync"
"time"
"unsafe"
@@ -81,6 +82,12 @@ type encoder struct {
closed bool
}
const (
kRateControlThreshold = 0.15
kMinQuantizer = 20
kMaxQuantizer = 63
)
// VP8Params is codec specific paramaters
type VP8Params struct {
Params
@@ -254,6 +261,10 @@ func (e *encoder) Read() ([]byte, func(), error) {
e.raw.d_w, e.raw.d_h = C.uint(width), C.uint(height)
}
if ec := C.vpx_codec_enc_config_set(e.codec, e.cfg); ec != 0 {
return nil, func() {}, fmt.Errorf("vpx_codec_enc_config_set failed (%d)", ec)
}
duration := t.Sub(e.tLastFrame).Microseconds()
// VPX doesn't allow 0 duration. If 0 is given, vpx_codec_encode will fail with VPX_CODEC_INVALID_PARAM.
// 0 duration is possible because mediadevices first gets the frame meta data by reading from the source,
@@ -322,6 +333,24 @@ func (e *encoder) SetBitRate(bitrate int) error {
return nil
}
func (e *encoder) DynamicQPControl(currentBitrate int, targetBitrate int) error {
e.mu.Lock()
defer e.mu.Unlock()
bitrateDiff := math.Abs(float64(currentBitrate - targetBitrate))
if bitrateDiff <= float64(currentBitrate)*kRateControlThreshold {
return nil
}
currentMax := e.cfg.rc_max_quantizer
if targetBitrate < currentBitrate {
e.cfg.rc_max_quantizer = min(currentMax+1, kMaxQuantizer)
} else {
e.cfg.rc_max_quantizer = max(currentMax-1, kMinQuantizer)
}
e.cfg.rc_min_quantizer = e.cfg.rc_max_quantizer
return nil
}
func (e *encoder) Controller() codec.EncoderController {
return e
}

View File

@@ -0,0 +1,155 @@
package vpx
/*
#cgo pkg-config: vpx
#include <stdlib.h>
#include <stdint.h>
#include <vpx/vpx_decoder.h>
#include <vpx/vpx_codec.h>
#include <vpx/vpx_image.h>
#include <vpx/vp8dx.h>
vpx_codec_iface_t *ifaceVP8Decoder() {
return vpx_codec_vp8_dx();
}
vpx_codec_iface_t *ifaceVP9Decoder() {
return vpx_codec_vp9_dx();
}
// Allocates a new decoder context
vpx_codec_ctx_t* newDecoderCtx() {
return (vpx_codec_ctx_t*)malloc(sizeof(vpx_codec_ctx_t));
}
// Initializes the decoder
vpx_codec_err_t decoderInit(vpx_codec_ctx_t* ctx, vpx_codec_iface_t* iface) {
return vpx_codec_dec_init_ver(ctx, iface, NULL, 0, VPX_DECODER_ABI_VERSION);
}
// Decodes an encoded frame
vpx_codec_err_t decodeFrame(vpx_codec_ctx_t* ctx, const uint8_t* data, unsigned int data_sz) {
return vpx_codec_decode(ctx, data, data_sz, NULL, 0);
}
// Creates an iterator
vpx_codec_iter_t* newIter() {
return (vpx_codec_iter_t*)malloc(sizeof(vpx_codec_iter_t));
}
// Returns the next decoded frame
vpx_image_t* getFrame(vpx_codec_ctx_t* ctx, vpx_codec_iter_t* iter) {
return vpx_codec_get_frame(ctx, iter);
}
// Frees a decoded frane
void freeFrame(vpx_image_t* f) {
vpx_img_free(f);
}
// Frees a decoder context
void freeDecoderCtx(vpx_codec_ctx_t* ctx) {
vpx_codec_destroy(ctx);
free(ctx);
}
*/
import "C"
import (
"fmt"
"image"
"io"
"sync"
"time"
"unsafe"
"github.com/pion/mediadevices/pkg/codec"
"github.com/pion/mediadevices/pkg/prop"
)
type decoder struct {
codec *C.vpx_codec_ctx_t
raw *C.vpx_image_t
cfg *C.vpx_codec_dec_cfg_t
iter C.vpx_codec_iter_t
frameIndex int
tStart time.Time
tLastFrame time.Time
reader io.Reader
buf []byte
mu sync.Mutex
closed bool
}
func BuildVideoDecoder(r io.Reader, property prop.Media) (codec.VideoDecoder, error) {
return NewDecoder(r, property)
}
func NewDecoder(r io.Reader, p prop.Media) (codec.VideoDecoder, error) {
cfg := &C.vpx_codec_dec_cfg_t{}
cfg.threads = 1
cfg.w = C.uint(p.Width)
cfg.h = C.uint(p.Height)
codec := C.newDecoderCtx()
if C.decoderInit(codec, C.ifaceVP8Decoder()) != C.VPX_CODEC_OK {
return nil, fmt.Errorf("vpx_codec_dec_init failed")
}
return &decoder{
codec: codec,
cfg: cfg,
iter: nil, // initialize to NULL to start iteration
reader: r,
buf: make([]byte, 1024*1024),
}, nil
}
func (d *decoder) Read() (image.Image, func(), error) {
var input *C.vpx_image_t
for {
input = C.getFrame(d.codec, &d.iter)
if input != nil {
break
}
d.iter = nil
// Read if there are no remained frames in the decoder
n, err := d.reader.Read(d.buf)
if err != nil {
return nil, nil, err
}
status := C.decodeFrame(d.codec, (*C.uint8_t)(&d.buf[0]), C.uint(n))
if status != C.VPX_CODEC_OK {
return nil, nil, fmt.Errorf("decode failed: %v", status)
}
}
w := int(input.d_w)
h := int(input.d_h)
yStride := int(input.stride[0])
uStride := int(input.stride[1])
vStride := int(input.stride[2])
ySrc := unsafe.Slice((*byte)(unsafe.Pointer(input.planes[0])), yStride*h)
uSrc := unsafe.Slice((*byte)(unsafe.Pointer(input.planes[1])), uStride*h/2)
vSrc := unsafe.Slice((*byte)(unsafe.Pointer(input.planes[2])), vStride*h/2)
dst := image.NewYCbCr(image.Rect(0, 0, w, h), image.YCbCrSubsampleRatio420)
// copy luma
for r := 0; r < h; r++ {
copy(dst.Y[r*dst.YStride:r*dst.YStride+w], ySrc[r*yStride:r*yStride+w])
}
// copy chroma
for r := 0; r < h/2; r++ {
copy(dst.Cb[r*dst.CStride:r*dst.CStride+w/2], uSrc[r*uStride:r*uStride+w/2])
copy(dst.Cr[r*dst.CStride:r*dst.CStride+w/2], vSrc[r*vStride:r*vStride+w/2])
}
C.freeFrame(input)
return dst, func() {}, nil
}
func (d *decoder) Close() error {
C.freeDecoderCtx(d.codec)
d.closed = true
return nil
}

View File

@@ -4,6 +4,9 @@ import (
"context"
"image"
"io"
"math"
"math/rand"
"sync"
"sync/atomic"
"testing"
"time"
@@ -13,6 +16,7 @@ import (
"github.com/pion/mediadevices/pkg/frame"
"github.com/pion/mediadevices/pkg/io/video"
"github.com/pion/mediadevices/pkg/prop"
"github.com/stretchr/testify/assert"
)
func TestEncoder(t *testing.T) {
@@ -360,3 +364,157 @@ func TestEncoderFrameMonotonic(t *testing.T) {
}
}
}
func TestVP8DynamicQPControl(t *testing.T) {
t.Run("VP8", func(t *testing.T) {
p, err := NewVP8Params()
if err != nil {
t.Fatal(err)
}
p.LagInFrames = 0 // Disable frame lag buffering for real-time encoding
p.RateControlEndUsage = RateControlCBR
totalFrames := 100
frameRate := 10
initialWidth, initialHeight := 800, 600
var cnt uint32
r, err := p.BuildVideoEncoder(
video.ReaderFunc(func() (image.Image, func(), error) {
i := atomic.AddUint32(&cnt, 1)
if i == uint32(totalFrames+1) {
return nil, nil, io.EOF
}
img := image.NewYCbCr(image.Rect(0, 0, initialWidth, initialHeight), image.YCbCrSubsampleRatio420)
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for i := range img.Y {
img.Y[i] = uint8(r.Intn(256))
}
for i := range img.Cb {
img.Cb[i] = uint8(r.Intn(256))
}
for i := range img.Cr {
img.Cr[i] = uint8(r.Intn(256))
}
return img, func() {}, nil
}),
prop.Media{
Video: prop.Video{
Width: initialWidth,
Height: initialHeight,
FrameRate: float32(frameRate),
FrameFormat: frame.FormatI420,
},
},
)
if err != nil {
t.Fatal(err)
}
initialBitrate := 100
currentBitrate := initialBitrate
targetBitrate := 300
for i := 0; i < totalFrames; i++ {
r.Controller().(codec.KeyFrameController).ForceKeyFrame()
r.Controller().(codec.QPController).DynamicQPControl(currentBitrate, targetBitrate)
data, rel, err := r.Read()
if err != nil {
t.Fatal(err)
}
rel()
encodedSize := len(data)
currentBitrate = encodedSize * 8 / 1000 / frameRate
}
assert.Less(t, math.Abs(float64(targetBitrate-currentBitrate)), math.Abs(float64(initialBitrate-currentBitrate)))
})
}
func TestVP8EncodeDecode(t *testing.T) {
t.Run("VP8", func(t *testing.T) {
initialWidth, initialHeight := 800, 600
reader, writer := io.Pipe()
decoder, err := BuildVideoDecoder(reader, prop.Media{
Video: prop.Video{
Width: initialWidth,
Height: initialHeight,
FrameFormat: frame.FormatI420,
},
})
if err != nil {
t.Fatalf("Error creating VP8 decoder: %v", err)
}
defer decoder.Close()
// [... encoder setup code ...]
p, err := NewVP8Params()
if err != nil {
t.Fatal(err)
}
p.LagInFrames = 0 // Disable frame lag buffering for real-time encoding
p.RateControlEndUsage = RateControlCBR
totalFrames := 10
var cnt uint32
r, err := p.BuildVideoEncoder(
video.ReaderFunc(func() (image.Image, func(), error) {
i := atomic.AddUint32(&cnt, 1)
if i == uint32(totalFrames+1) {
return nil, nil, io.EOF
}
img := image.NewYCbCr(image.Rect(0, 0, initialWidth, initialHeight), image.YCbCrSubsampleRatio420)
return img, func() {}, nil
}),
prop.Media{
Video: prop.Video{
Width: initialWidth,
Height: initialHeight,
FrameRate: 30,
FrameFormat: frame.FormatI420,
},
},
)
if err != nil {
t.Fatal(err)
}
var wg sync.WaitGroup
wg.Add(1)
counter := 0
go func() {
defer wg.Done()
for {
img, rel, err := decoder.Read()
if err == io.EOF {
return
}
if err != nil {
t.Errorf("decoder read error: %v", err)
return
}
assert.Equal(t, initialWidth, img.Bounds().Dx())
assert.Equal(t, initialHeight, img.Bounds().Dy())
rel()
counter++
}
}()
// --- feed encoded frames to writer
for {
data, rel, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
t.Fatalf("encoder error: %v", err)
}
_, werr := writer.Write(data)
rel()
if werr != nil {
t.Fatalf("writer error: %v", werr)
}
}
writer.Close()
// ✅ wait until decoder goroutine is done
wg.Wait()
assert.Equal(t, counter, totalFrames)
})
}