mirror of
https://github.com/pion/mediadevices.git
synced 2025-09-26 20:41:46 +08:00
Compare commits
9 Commits
21d2f4618c
...
vpx-decode
Author | SHA1 | Date | |
---|---|---|---|
![]() |
bc7653cbc3 | ||
![]() |
bb3a7120ef | ||
![]() |
d5c98cb970 | ||
![]() |
dd145ac720 | ||
![]() |
8cd08c4280 | ||
![]() |
9218f8bf7c | ||
![]() |
d30d98198a | ||
![]() |
6047a32ea0 | ||
![]() |
60bf158757 |
48
pkg/codec/bitrate_tracker.go
Normal file
48
pkg/codec/bitrate_tracker.go
Normal file
@@ -0,0 +1,48 @@
|
||||
package codec
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type BitrateTracker struct {
|
||||
windowSize time.Duration
|
||||
buffer []int
|
||||
times []time.Time
|
||||
}
|
||||
|
||||
func NewBitrateTracker(windowSize time.Duration) *BitrateTracker {
|
||||
return &BitrateTracker{
|
||||
windowSize: windowSize,
|
||||
}
|
||||
}
|
||||
|
||||
func (bt *BitrateTracker) AddFrame(sizeBytes int, timestamp time.Time) {
|
||||
bt.buffer = append(bt.buffer, sizeBytes)
|
||||
bt.times = append(bt.times, timestamp)
|
||||
|
||||
// Remove old entries outside the window
|
||||
cutoff := timestamp.Add(-bt.windowSize)
|
||||
i := 0
|
||||
for ; i < len(bt.times); i++ {
|
||||
if bt.times[i].After(cutoff) {
|
||||
break
|
||||
}
|
||||
}
|
||||
bt.buffer = bt.buffer[i:]
|
||||
bt.times = bt.times[i:]
|
||||
}
|
||||
|
||||
func (bt *BitrateTracker) GetBitrate() float64 {
|
||||
if len(bt.times) < 2 {
|
||||
return 0
|
||||
}
|
||||
totalBytes := 0
|
||||
for _, b := range bt.buffer {
|
||||
totalBytes += b
|
||||
}
|
||||
duration := bt.times[len(bt.times)-1].Sub(bt.times[0]).Seconds()
|
||||
if duration <= 0 {
|
||||
return 0
|
||||
}
|
||||
return float64(totalBytes*8) / duration // bits per second
|
||||
}
|
19
pkg/codec/bitrate_tracker_test.go
Normal file
19
pkg/codec/bitrate_tracker_test.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package codec
|
||||
|
||||
import (
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestBitrateTracker(t *testing.T) {
|
||||
packetSize := 1000
|
||||
bt := NewBitrateTracker(time.Second)
|
||||
bt.AddFrame(packetSize, time.Now())
|
||||
bt.AddFrame(packetSize, time.Now().Add(time.Millisecond*100))
|
||||
bt.AddFrame(packetSize, time.Now().Add(time.Millisecond*999))
|
||||
eps := float64(packetSize*8) / 10
|
||||
if got, want := bt.GetBitrate(), float64(packetSize*8)*3; math.Abs(got-want) > eps {
|
||||
t.Fatalf("GetBitrate() = %v, want %v (|diff| <= %v)", got, want, eps)
|
||||
}
|
||||
}
|
@@ -1,6 +1,8 @@
|
||||
package codec
|
||||
|
||||
import (
|
||||
"image"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/pion/mediadevices/pkg/io/audio"
|
||||
@@ -153,6 +155,15 @@ type ReadCloser interface {
|
||||
Controllable
|
||||
}
|
||||
|
||||
type VideoDecoderBuilder interface {
|
||||
BuildVideoDecoder(r io.Reader, p prop.Media) (VideoDecoder, error)
|
||||
}
|
||||
|
||||
type VideoDecoder interface {
|
||||
Read() (image.Image, func(), error)
|
||||
Close() error
|
||||
}
|
||||
|
||||
// EncoderController is the interface allowing to control the encoder behaviour after it's initialisation.
|
||||
// It will possibly have common control method in the future.
|
||||
// A controller can have optional methods represented by *Controller interfaces
|
||||
@@ -179,6 +190,12 @@ type BitRateController interface {
|
||||
SetBitRate(int) error
|
||||
}
|
||||
|
||||
type QPController interface {
|
||||
EncoderController
|
||||
// DynamicQPControl adjusts the QP of the encoder based on the current and target bitrate
|
||||
DynamicQPControl(currentBitrate int, targetBitrate int) error
|
||||
}
|
||||
|
||||
// BaseParams represents an codec's encoding properties
|
||||
type BaseParams struct {
|
||||
// Target bitrate in bps.
|
||||
|
@@ -54,6 +54,7 @@ import (
|
||||
"fmt"
|
||||
"image"
|
||||
"io"
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
@@ -81,6 +82,12 @@ type encoder struct {
|
||||
closed bool
|
||||
}
|
||||
|
||||
const (
|
||||
kRateControlThreshold = 0.15
|
||||
kMinQuantizer = 20
|
||||
kMaxQuantizer = 63
|
||||
)
|
||||
|
||||
// VP8Params is codec specific paramaters
|
||||
type VP8Params struct {
|
||||
Params
|
||||
@@ -254,6 +261,10 @@ func (e *encoder) Read() ([]byte, func(), error) {
|
||||
e.raw.d_w, e.raw.d_h = C.uint(width), C.uint(height)
|
||||
}
|
||||
|
||||
if ec := C.vpx_codec_enc_config_set(e.codec, e.cfg); ec != 0 {
|
||||
return nil, func() {}, fmt.Errorf("vpx_codec_enc_config_set failed (%d)", ec)
|
||||
}
|
||||
|
||||
duration := t.Sub(e.tLastFrame).Microseconds()
|
||||
// VPX doesn't allow 0 duration. If 0 is given, vpx_codec_encode will fail with VPX_CODEC_INVALID_PARAM.
|
||||
// 0 duration is possible because mediadevices first gets the frame meta data by reading from the source,
|
||||
@@ -322,6 +333,24 @@ func (e *encoder) SetBitRate(bitrate int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *encoder) DynamicQPControl(currentBitrate int, targetBitrate int) error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
bitrateDiff := math.Abs(float64(currentBitrate - targetBitrate))
|
||||
if bitrateDiff <= float64(currentBitrate)*kRateControlThreshold {
|
||||
return nil
|
||||
}
|
||||
currentMax := e.cfg.rc_max_quantizer
|
||||
|
||||
if targetBitrate < currentBitrate {
|
||||
e.cfg.rc_max_quantizer = min(currentMax+1, kMaxQuantizer)
|
||||
} else {
|
||||
e.cfg.rc_max_quantizer = max(currentMax-1, kMinQuantizer)
|
||||
}
|
||||
e.cfg.rc_min_quantizer = e.cfg.rc_max_quantizer
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *encoder) Controller() codec.EncoderController {
|
||||
return e
|
||||
}
|
||||
|
155
pkg/codec/vpx/vpx_decoder.go
Normal file
155
pkg/codec/vpx/vpx_decoder.go
Normal file
@@ -0,0 +1,155 @@
|
||||
package vpx
|
||||
|
||||
/*
|
||||
#cgo pkg-config: vpx
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <vpx/vpx_decoder.h>
|
||||
#include <vpx/vpx_codec.h>
|
||||
#include <vpx/vpx_image.h>
|
||||
#include <vpx/vp8dx.h>
|
||||
|
||||
vpx_codec_iface_t *ifaceVP8Decoder() {
|
||||
return vpx_codec_vp8_dx();
|
||||
}
|
||||
vpx_codec_iface_t *ifaceVP9Decoder() {
|
||||
return vpx_codec_vp9_dx();
|
||||
}
|
||||
|
||||
// Allocates a new decoder context
|
||||
vpx_codec_ctx_t* newDecoderCtx() {
|
||||
return (vpx_codec_ctx_t*)malloc(sizeof(vpx_codec_ctx_t));
|
||||
}
|
||||
|
||||
// Initializes the decoder
|
||||
vpx_codec_err_t decoderInit(vpx_codec_ctx_t* ctx, vpx_codec_iface_t* iface) {
|
||||
return vpx_codec_dec_init_ver(ctx, iface, NULL, 0, VPX_DECODER_ABI_VERSION);
|
||||
}
|
||||
|
||||
// Decodes an encoded frame
|
||||
vpx_codec_err_t decodeFrame(vpx_codec_ctx_t* ctx, const uint8_t* data, unsigned int data_sz) {
|
||||
return vpx_codec_decode(ctx, data, data_sz, NULL, 0);
|
||||
}
|
||||
|
||||
// Creates an iterator
|
||||
vpx_codec_iter_t* newIter() {
|
||||
return (vpx_codec_iter_t*)malloc(sizeof(vpx_codec_iter_t));
|
||||
}
|
||||
|
||||
// Returns the next decoded frame
|
||||
vpx_image_t* getFrame(vpx_codec_ctx_t* ctx, vpx_codec_iter_t* iter) {
|
||||
return vpx_codec_get_frame(ctx, iter);
|
||||
}
|
||||
|
||||
// Frees a decoded frane
|
||||
void freeFrame(vpx_image_t* f) {
|
||||
vpx_img_free(f);
|
||||
}
|
||||
|
||||
// Frees a decoder context
|
||||
void freeDecoderCtx(vpx_codec_ctx_t* ctx) {
|
||||
vpx_codec_destroy(ctx);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"image"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/pion/mediadevices/pkg/codec"
|
||||
"github.com/pion/mediadevices/pkg/prop"
|
||||
)
|
||||
|
||||
type decoder struct {
|
||||
codec *C.vpx_codec_ctx_t
|
||||
raw *C.vpx_image_t
|
||||
cfg *C.vpx_codec_dec_cfg_t
|
||||
iter C.vpx_codec_iter_t
|
||||
frameIndex int
|
||||
tStart time.Time
|
||||
tLastFrame time.Time
|
||||
reader io.Reader
|
||||
buf []byte
|
||||
|
||||
mu sync.Mutex
|
||||
closed bool
|
||||
}
|
||||
|
||||
func BuildVideoDecoder(r io.Reader, property prop.Media) (codec.VideoDecoder, error) {
|
||||
return NewDecoder(r, property)
|
||||
}
|
||||
|
||||
func NewDecoder(r io.Reader, p prop.Media) (codec.VideoDecoder, error) {
|
||||
cfg := &C.vpx_codec_dec_cfg_t{}
|
||||
cfg.threads = 1
|
||||
cfg.w = C.uint(p.Width)
|
||||
cfg.h = C.uint(p.Height)
|
||||
|
||||
codec := C.newDecoderCtx()
|
||||
if C.decoderInit(codec, C.ifaceVP8Decoder()) != C.VPX_CODEC_OK {
|
||||
return nil, fmt.Errorf("vpx_codec_dec_init failed")
|
||||
}
|
||||
|
||||
return &decoder{
|
||||
codec: codec,
|
||||
cfg: cfg,
|
||||
iter: nil, // initialize to NULL to start iteration
|
||||
reader: r,
|
||||
buf: make([]byte, 1024*1024),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *decoder) Read() (image.Image, func(), error) {
|
||||
var input *C.vpx_image_t
|
||||
for {
|
||||
input = C.getFrame(d.codec, &d.iter)
|
||||
if input != nil {
|
||||
break
|
||||
}
|
||||
d.iter = nil
|
||||
// Read if there are no remained frames in the decoder
|
||||
n, err := d.reader.Read(d.buf)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
status := C.decodeFrame(d.codec, (*C.uint8_t)(&d.buf[0]), C.uint(n))
|
||||
if status != C.VPX_CODEC_OK {
|
||||
return nil, nil, fmt.Errorf("decode failed: %v", status)
|
||||
}
|
||||
}
|
||||
w := int(input.d_w)
|
||||
h := int(input.d_h)
|
||||
yStride := int(input.stride[0])
|
||||
uStride := int(input.stride[1])
|
||||
vStride := int(input.stride[2])
|
||||
|
||||
ySrc := unsafe.Slice((*byte)(unsafe.Pointer(input.planes[0])), yStride*h)
|
||||
uSrc := unsafe.Slice((*byte)(unsafe.Pointer(input.planes[1])), uStride*h/2)
|
||||
vSrc := unsafe.Slice((*byte)(unsafe.Pointer(input.planes[2])), vStride*h/2)
|
||||
|
||||
dst := image.NewYCbCr(image.Rect(0, 0, w, h), image.YCbCrSubsampleRatio420)
|
||||
|
||||
// copy luma
|
||||
for r := 0; r < h; r++ {
|
||||
copy(dst.Y[r*dst.YStride:r*dst.YStride+w], ySrc[r*yStride:r*yStride+w])
|
||||
}
|
||||
// copy chroma
|
||||
for r := 0; r < h/2; r++ {
|
||||
copy(dst.Cb[r*dst.CStride:r*dst.CStride+w/2], uSrc[r*uStride:r*uStride+w/2])
|
||||
copy(dst.Cr[r*dst.CStride:r*dst.CStride+w/2], vSrc[r*vStride:r*vStride+w/2])
|
||||
}
|
||||
C.freeFrame(input)
|
||||
return dst, func() {}, nil
|
||||
}
|
||||
|
||||
func (d *decoder) Close() error {
|
||||
C.freeDecoderCtx(d.codec)
|
||||
d.closed = true
|
||||
return nil
|
||||
}
|
@@ -4,6 +4,9 @@ import (
|
||||
"context"
|
||||
"image"
|
||||
"io"
|
||||
"math"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -13,6 +16,7 @@ import (
|
||||
"github.com/pion/mediadevices/pkg/frame"
|
||||
"github.com/pion/mediadevices/pkg/io/video"
|
||||
"github.com/pion/mediadevices/pkg/prop"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestEncoder(t *testing.T) {
|
||||
@@ -360,3 +364,157 @@ func TestEncoderFrameMonotonic(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVP8DynamicQPControl(t *testing.T) {
|
||||
t.Run("VP8", func(t *testing.T) {
|
||||
p, err := NewVP8Params()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
p.LagInFrames = 0 // Disable frame lag buffering for real-time encoding
|
||||
p.RateControlEndUsage = RateControlCBR
|
||||
totalFrames := 100
|
||||
frameRate := 10
|
||||
initialWidth, initialHeight := 800, 600
|
||||
var cnt uint32
|
||||
|
||||
r, err := p.BuildVideoEncoder(
|
||||
video.ReaderFunc(func() (image.Image, func(), error) {
|
||||
i := atomic.AddUint32(&cnt, 1)
|
||||
if i == uint32(totalFrames+1) {
|
||||
return nil, nil, io.EOF
|
||||
}
|
||||
img := image.NewYCbCr(image.Rect(0, 0, initialWidth, initialHeight), image.YCbCrSubsampleRatio420)
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
for i := range img.Y {
|
||||
img.Y[i] = uint8(r.Intn(256))
|
||||
}
|
||||
for i := range img.Cb {
|
||||
img.Cb[i] = uint8(r.Intn(256))
|
||||
}
|
||||
for i := range img.Cr {
|
||||
img.Cr[i] = uint8(r.Intn(256))
|
||||
}
|
||||
return img, func() {}, nil
|
||||
}),
|
||||
prop.Media{
|
||||
Video: prop.Video{
|
||||
Width: initialWidth,
|
||||
Height: initialHeight,
|
||||
FrameRate: float32(frameRate),
|
||||
FrameFormat: frame.FormatI420,
|
||||
},
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
initialBitrate := 100
|
||||
currentBitrate := initialBitrate
|
||||
targetBitrate := 300
|
||||
for i := 0; i < totalFrames; i++ {
|
||||
r.Controller().(codec.KeyFrameController).ForceKeyFrame()
|
||||
r.Controller().(codec.QPController).DynamicQPControl(currentBitrate, targetBitrate)
|
||||
data, rel, err := r.Read()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
rel()
|
||||
encodedSize := len(data)
|
||||
currentBitrate = encodedSize * 8 / 1000 / frameRate
|
||||
}
|
||||
assert.Less(t, math.Abs(float64(targetBitrate-currentBitrate)), math.Abs(float64(initialBitrate-currentBitrate)))
|
||||
})
|
||||
}
|
||||
|
||||
func TestVP8EncodeDecode(t *testing.T) {
|
||||
t.Run("VP8", func(t *testing.T) {
|
||||
initialWidth, initialHeight := 800, 600
|
||||
reader, writer := io.Pipe()
|
||||
decoder, err := BuildVideoDecoder(reader, prop.Media{
|
||||
Video: prop.Video{
|
||||
Width: initialWidth,
|
||||
Height: initialHeight,
|
||||
FrameFormat: frame.FormatI420,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Error creating VP8 decoder: %v", err)
|
||||
}
|
||||
defer decoder.Close()
|
||||
|
||||
// [... encoder setup code ...]
|
||||
p, err := NewVP8Params()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
p.LagInFrames = 0 // Disable frame lag buffering for real-time encoding
|
||||
p.RateControlEndUsage = RateControlCBR
|
||||
totalFrames := 10
|
||||
var cnt uint32
|
||||
r, err := p.BuildVideoEncoder(
|
||||
video.ReaderFunc(func() (image.Image, func(), error) {
|
||||
i := atomic.AddUint32(&cnt, 1)
|
||||
if i == uint32(totalFrames+1) {
|
||||
return nil, nil, io.EOF
|
||||
}
|
||||
img := image.NewYCbCr(image.Rect(0, 0, initialWidth, initialHeight), image.YCbCrSubsampleRatio420)
|
||||
return img, func() {}, nil
|
||||
}),
|
||||
prop.Media{
|
||||
Video: prop.Video{
|
||||
Width: initialWidth,
|
||||
Height: initialHeight,
|
||||
FrameRate: 30,
|
||||
FrameFormat: frame.FormatI420,
|
||||
},
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
|
||||
counter := 0
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
img, rel, err := decoder.Read()
|
||||
if err == io.EOF {
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("decoder read error: %v", err)
|
||||
return
|
||||
}
|
||||
assert.Equal(t, initialWidth, img.Bounds().Dx())
|
||||
assert.Equal(t, initialHeight, img.Bounds().Dy())
|
||||
rel()
|
||||
counter++
|
||||
}
|
||||
}()
|
||||
|
||||
// --- feed encoded frames to writer
|
||||
for {
|
||||
data, rel, err := r.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("encoder error: %v", err)
|
||||
}
|
||||
_, werr := writer.Write(data)
|
||||
rel()
|
||||
if werr != nil {
|
||||
t.Fatalf("writer error: %v", werr)
|
||||
}
|
||||
}
|
||||
writer.Close()
|
||||
|
||||
// ✅ wait until decoder goroutine is done
|
||||
wg.Wait()
|
||||
assert.Equal(t, counter, totalFrames)
|
||||
})
|
||||
}
|
||||
|
Reference in New Issue
Block a user