Fix audio codec latency handling (#317)

To avoid buffering audio data multiple times, remove buffer from malgo audio driver and pass expected codec latency as a codec parameter.
2025-10-27 02:20:24 +08:00 · 2021-03-26 06:09:57 +09:00
parent 8c2c8a9b27
commit d71b72c64d
6 changed files with 102 additions and 27 deletions
--- a/pkg/codec/codec.go
+++ b/pkg/codec/codec.go
@@ -1,6 +1,8 @@
 package codec
 import (
 	"time"
 	"github.com/pion/mediadevices/pkg/io/audio"
 	"github.com/pion/mediadevices/pkg/io/video"
 	"github.com/pion/mediadevices/pkg/prop"
@@ -13,6 +15,9 @@ import (
 type RTPCodec struct {
 	webrtc.RTPCodecParameters
 	rtp.Payloader
 	// Latency of static frame size codec.
 	Latency time.Duration
 }
 // NewRTPH264Codec is a helper to create an H264 codec
--- a/pkg/codec/opus/opus.go
+++ b/pkg/codec/opus/opus.go
@@ -3,7 +3,6 @@ package opus
 import (
 	"errors"
 	"fmt"
 	"math"
 	"github.com/pion/mediadevices/pkg/codec"
 	"github.com/pion/mediadevices/pkg/io/audio"
@@ -28,8 +27,6 @@ type encoder struct {
 	engine *C.OpusEncoder
 }
 var latencies = []float64{5, 10, 20, 40, 60}
 func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, error) {
 	var cerror C.int
@@ -37,10 +34,6 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
 		return nil, fmt.Errorf("opus: inProp.SampleRate is required")
 	}
 	if p.Latency == 0 {
 		p.Latency = 20
 	}
 	if params.BitRate == 0 {
 		params.BitRate = 32000
 	}
@@ -49,19 +42,8 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
 		params.ChannelMixer = &mixer.MonoMixer{}
 	}
-	// Select the nearest supported latency
+	if !params.Latency.Validate() {
-	var targetLatency float64
+		return nil, fmt.Errorf("opus: unsupported latency %v", params.Latency)
 	// TODO: use p.Latency.Milliseconds() after Go 1.12 EOL
 	latencyInMS := float64(p.Latency.Nanoseconds() / 1000000)
 	nearestDist := math.Inf(+1)
 	for _, latency := range latencies {
 		dist := math.Abs(latency - latencyInMS)
 		if dist >= nearestDist {
 			break
 		}
 		nearestDist = dist
 		targetLatency = latency
 	}
 	channels := p.ChannelCount
@@ -77,7 +59,7 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
 	}
 	rMix := audio.NewChannelMixer(channels, params.ChannelMixer)
-	rBuf := audio.NewBuffer(int(targetLatency * float64(p.SampleRate) / 1000))
+	rBuf := audio.NewBuffer(params.Latency.samples(p.SampleRate))
 	e := encoder{
 		engine: engine,
 		reader: rMix(rBuf(r)),
--- a/pkg/codec/opus/params.go
+++ b/pkg/codec/opus/params.go
@@ -1,27 +1,69 @@
 package opus
 import (
 	"time"
 	"github.com/pion/mediadevices/pkg/codec"
 	"github.com/pion/mediadevices/pkg/io/audio"
 	"github.com/pion/mediadevices/pkg/prop"
 	"github.com/pion/mediadevices/pkg/wave/mixer"
 )
 // Latency is a type of OPUS codec frame duration.
 type Latency time.Duration
 // Latency values available in OPUS codec.
 const (
 	Latency2500us Latency = Latency(2500 * time.Microsecond)
 	Latency5ms    Latency = Latency(5 * time.Millisecond)
 	Latency10ms   Latency = Latency(10 * time.Millisecond)
 	Latency20ms   Latency = Latency(20 * time.Millisecond)
 	Latency40ms   Latency = Latency(40 * time.Millisecond)
 	Latency60ms   Latency = Latency(60 * time.Millisecond)
 )
 // Validate that the Latency is allowed in OPUS.
 func (l Latency) Validate() bool {
 	switch l {
 	case Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms:
 		return true
 	default:
 		return false
 	}
 }
 // Duration returns latency in time.Duration.
 func (l Latency) Duration() time.Duration {
 	return time.Duration(l)
 }
 // samples returns number of samples for given sample rate.
 func (l Latency) samples(sampleRate int) int {
 	return int(l.Duration() * time.Duration(sampleRate) / time.Second)
 }
 // Params stores opus specific encoding parameters.
 type Params struct {
 	codec.BaseParams
 	// ChannelMixer is a mixer to be used if number of given and expected channels differ.
 	ChannelMixer mixer.ChannelMixer
 	// Expected latency of the codec.
 	Latency Latency
 }
 // NewParams returns default opus codec specific parameters.
 func NewParams() (Params, error) {
-	return Params{}, nil
+	return Params{
 		Latency: Latency20ms,
 	}, nil
 }
 // RTPCodec represents the codec metadata
 func (p *Params) RTPCodec() *codec.RTPCodec {
-	return codec.NewRTPOpusCodec(48000)
+	c := codec.NewRTPOpusCodec(48000)
 	c.Latency = time.Duration(p.Latency)
 	return c
 }
 // BuildAudioEncoder builds opus encoder with given params
--- a/pkg/codec/opus/params_test.go
+++ b/pkg/codec/opus/params_test.go
@@ -0,0 +1,49 @@
 package opus
 import (
 	"fmt"
 	"testing"
 	"time"
 )
 func TestLatency_Validate(t *testing.T) {
 	t.Run("Valid", func(t *testing.T) {
 		for _, l := range []Latency{
 			Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms,
 		} {
 			if !l.Validate() {
 				t.Errorf("Defined Latency(%v) must be valid", l)
 			}
 		}
 	})
 	t.Run("Invalid", func(t *testing.T) {
 		for _, l := range []Latency{
 			0, Latency(time.Second),
 		} {
 			if l.Validate() {
 				t.Errorf("Latency(%v) must be valid", l)
 			}
 		}
 	})
 }
 func TestLatency_samples(t *testing.T) {
 	testCases := []struct {
 		latency    Latency
 		sampleRate int
 		samples    int
 	}{
 		{Latency5ms, 48000, 240},
 		{Latency20ms, 16000, 320},
 		{Latency20ms, 48000, 960},
 	}
 	for _, testCase := range testCases {
 		testCase := testCase
 		t.Run(fmt.Sprintf("%v_%d", time.Duration(testCase.latency), testCase.sampleRate), func(t *testing.T) {
 			samples := testCase.latency.samples(testCase.sampleRate)
 			if samples != testCase.samples {
 				t.Errorf("Expected samples: %d, got: %d", testCase.samples, samples)
 			}
 		})
 	}
 }
--- a/pkg/driver/microphone/microphone.go
+++ b/pkg/driver/microphone/microphone.go
@@ -155,9 +155,6 @@ func (m *microphone) AudioRecord(inputProp prop.Media) (audio.Reader, error) {
 		return decodedChunk, func() {}, err
 	})
 	// FIXME: The current audio detection and audio encoder can only work with a static latency. Since the latency from the driver
 	//        can fluctuate, we need to stabilize it. Maybe there's a better way for doing this?
 	reader = audio.NewBuffer(int(inputProp.Latency.Seconds() * float64(inputProp.SampleRate)))(reader)
 	return reader, nil
 }
--- a/track.go
+++ b/track.go
@@ -425,7 +425,7 @@ func (track *AudioTrack) newEncodedReader(codecNames ...string) (EncodedReadClos
 		return nil, nil, err
 	}
-	sample := newAudioSampler(selectedCodec.ClockRate, inputProp.Latency)
+	sample := newAudioSampler(selectedCodec.ClockRate, selectedCodec.Latency)
 	return &encodedReadCloserImpl{
 		readFn: func() (EncodedBuffer, func(), error) {