mirror of
https://github.com/pion/mediadevices.git
synced 2025-10-26 18:10:23 +08:00
Fix audio codec latency handling (#317)
To avoid buffering audio data multiple times, remove buffer from malgo audio driver and pass expected codec latency as a codec parameter.
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
package codec
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/pion/mediadevices/pkg/io/audio"
|
||||
"github.com/pion/mediadevices/pkg/io/video"
|
||||
"github.com/pion/mediadevices/pkg/prop"
|
||||
@@ -13,6 +15,9 @@ import (
|
||||
type RTPCodec struct {
|
||||
webrtc.RTPCodecParameters
|
||||
rtp.Payloader
|
||||
|
||||
// Latency of static frame size codec.
|
||||
Latency time.Duration
|
||||
}
|
||||
|
||||
// NewRTPH264Codec is a helper to create an H264 codec
|
||||
|
||||
@@ -3,7 +3,6 @@ package opus
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/pion/mediadevices/pkg/codec"
|
||||
"github.com/pion/mediadevices/pkg/io/audio"
|
||||
@@ -28,8 +27,6 @@ type encoder struct {
|
||||
engine *C.OpusEncoder
|
||||
}
|
||||
|
||||
var latencies = []float64{5, 10, 20, 40, 60}
|
||||
|
||||
func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, error) {
|
||||
var cerror C.int
|
||||
|
||||
@@ -37,10 +34,6 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
|
||||
return nil, fmt.Errorf("opus: inProp.SampleRate is required")
|
||||
}
|
||||
|
||||
if p.Latency == 0 {
|
||||
p.Latency = 20
|
||||
}
|
||||
|
||||
if params.BitRate == 0 {
|
||||
params.BitRate = 32000
|
||||
}
|
||||
@@ -49,19 +42,8 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
|
||||
params.ChannelMixer = &mixer.MonoMixer{}
|
||||
}
|
||||
|
||||
// Select the nearest supported latency
|
||||
var targetLatency float64
|
||||
// TODO: use p.Latency.Milliseconds() after Go 1.12 EOL
|
||||
latencyInMS := float64(p.Latency.Nanoseconds() / 1000000)
|
||||
nearestDist := math.Inf(+1)
|
||||
for _, latency := range latencies {
|
||||
dist := math.Abs(latency - latencyInMS)
|
||||
if dist >= nearestDist {
|
||||
break
|
||||
}
|
||||
|
||||
nearestDist = dist
|
||||
targetLatency = latency
|
||||
if !params.Latency.Validate() {
|
||||
return nil, fmt.Errorf("opus: unsupported latency %v", params.Latency)
|
||||
}
|
||||
|
||||
channels := p.ChannelCount
|
||||
@@ -77,7 +59,7 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
|
||||
}
|
||||
|
||||
rMix := audio.NewChannelMixer(channels, params.ChannelMixer)
|
||||
rBuf := audio.NewBuffer(int(targetLatency * float64(p.SampleRate) / 1000))
|
||||
rBuf := audio.NewBuffer(params.Latency.samples(p.SampleRate))
|
||||
e := encoder{
|
||||
engine: engine,
|
||||
reader: rMix(rBuf(r)),
|
||||
|
||||
@@ -1,27 +1,69 @@
|
||||
package opus
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/pion/mediadevices/pkg/codec"
|
||||
"github.com/pion/mediadevices/pkg/io/audio"
|
||||
"github.com/pion/mediadevices/pkg/prop"
|
||||
"github.com/pion/mediadevices/pkg/wave/mixer"
|
||||
)
|
||||
|
||||
// Latency is a type of OPUS codec frame duration.
|
||||
type Latency time.Duration
|
||||
|
||||
// Latency values available in OPUS codec.
|
||||
const (
|
||||
Latency2500us Latency = Latency(2500 * time.Microsecond)
|
||||
Latency5ms Latency = Latency(5 * time.Millisecond)
|
||||
Latency10ms Latency = Latency(10 * time.Millisecond)
|
||||
Latency20ms Latency = Latency(20 * time.Millisecond)
|
||||
Latency40ms Latency = Latency(40 * time.Millisecond)
|
||||
Latency60ms Latency = Latency(60 * time.Millisecond)
|
||||
)
|
||||
|
||||
// Validate that the Latency is allowed in OPUS.
|
||||
func (l Latency) Validate() bool {
|
||||
switch l {
|
||||
case Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Duration returns latency in time.Duration.
|
||||
func (l Latency) Duration() time.Duration {
|
||||
return time.Duration(l)
|
||||
}
|
||||
|
||||
// samples returns number of samples for given sample rate.
|
||||
func (l Latency) samples(sampleRate int) int {
|
||||
return int(l.Duration() * time.Duration(sampleRate) / time.Second)
|
||||
}
|
||||
|
||||
// Params stores opus specific encoding parameters.
|
||||
type Params struct {
|
||||
codec.BaseParams
|
||||
// ChannelMixer is a mixer to be used if number of given and expected channels differ.
|
||||
ChannelMixer mixer.ChannelMixer
|
||||
|
||||
// Expected latency of the codec.
|
||||
Latency Latency
|
||||
}
|
||||
|
||||
// NewParams returns default opus codec specific parameters.
|
||||
func NewParams() (Params, error) {
|
||||
return Params{}, nil
|
||||
return Params{
|
||||
Latency: Latency20ms,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// RTPCodec represents the codec metadata
|
||||
func (p *Params) RTPCodec() *codec.RTPCodec {
|
||||
return codec.NewRTPOpusCodec(48000)
|
||||
c := codec.NewRTPOpusCodec(48000)
|
||||
c.Latency = time.Duration(p.Latency)
|
||||
return c
|
||||
}
|
||||
|
||||
// BuildAudioEncoder builds opus encoder with given params
|
||||
|
||||
49
pkg/codec/opus/params_test.go
Normal file
49
pkg/codec/opus/params_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package opus
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLatency_Validate(t *testing.T) {
|
||||
t.Run("Valid", func(t *testing.T) {
|
||||
for _, l := range []Latency{
|
||||
Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms,
|
||||
} {
|
||||
if !l.Validate() {
|
||||
t.Errorf("Defined Latency(%v) must be valid", l)
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("Invalid", func(t *testing.T) {
|
||||
for _, l := range []Latency{
|
||||
0, Latency(time.Second),
|
||||
} {
|
||||
if l.Validate() {
|
||||
t.Errorf("Latency(%v) must be valid", l)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestLatency_samples(t *testing.T) {
|
||||
testCases := []struct {
|
||||
latency Latency
|
||||
sampleRate int
|
||||
samples int
|
||||
}{
|
||||
{Latency5ms, 48000, 240},
|
||||
{Latency20ms, 16000, 320},
|
||||
{Latency20ms, 48000, 960},
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
testCase := testCase
|
||||
t.Run(fmt.Sprintf("%v_%d", time.Duration(testCase.latency), testCase.sampleRate), func(t *testing.T) {
|
||||
samples := testCase.latency.samples(testCase.sampleRate)
|
||||
if samples != testCase.samples {
|
||||
t.Errorf("Expected samples: %d, got: %d", testCase.samples, samples)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -155,9 +155,6 @@ func (m *microphone) AudioRecord(inputProp prop.Media) (audio.Reader, error) {
|
||||
return decodedChunk, func() {}, err
|
||||
})
|
||||
|
||||
// FIXME: The current audio detection and audio encoder can only work with a static latency. Since the latency from the driver
|
||||
// can fluctuate, we need to stabilize it. Maybe there's a better way for doing this?
|
||||
reader = audio.NewBuffer(int(inputProp.Latency.Seconds() * float64(inputProp.SampleRate)))(reader)
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
|
||||
2
track.go
2
track.go
@@ -425,7 +425,7 @@ func (track *AudioTrack) newEncodedReader(codecNames ...string) (EncodedReadClos
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
sample := newAudioSampler(selectedCodec.ClockRate, inputProp.Latency)
|
||||
sample := newAudioSampler(selectedCodec.ClockRate, selectedCodec.Latency)
|
||||
|
||||
return &encodedReadCloserImpl{
|
||||
readFn: func() (EncodedBuffer, func(), error) {
|
||||
|
||||
Reference in New Issue
Block a user