mirror of
https://github.com/pion/mediadevices.git
synced 2025-10-26 18:10:23 +08:00
Fix audio codec latency handling (#317)
To avoid buffering audio data multiple times, remove buffer from malgo audio driver and pass expected codec latency as a codec parameter.
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
package codec
|
package codec
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pion/mediadevices/pkg/io/audio"
|
"github.com/pion/mediadevices/pkg/io/audio"
|
||||||
"github.com/pion/mediadevices/pkg/io/video"
|
"github.com/pion/mediadevices/pkg/io/video"
|
||||||
"github.com/pion/mediadevices/pkg/prop"
|
"github.com/pion/mediadevices/pkg/prop"
|
||||||
@@ -13,6 +15,9 @@ import (
|
|||||||
type RTPCodec struct {
|
type RTPCodec struct {
|
||||||
webrtc.RTPCodecParameters
|
webrtc.RTPCodecParameters
|
||||||
rtp.Payloader
|
rtp.Payloader
|
||||||
|
|
||||||
|
// Latency of static frame size codec.
|
||||||
|
Latency time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewRTPH264Codec is a helper to create an H264 codec
|
// NewRTPH264Codec is a helper to create an H264 codec
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package opus
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/pion/mediadevices/pkg/codec"
|
"github.com/pion/mediadevices/pkg/codec"
|
||||||
"github.com/pion/mediadevices/pkg/io/audio"
|
"github.com/pion/mediadevices/pkg/io/audio"
|
||||||
@@ -28,8 +27,6 @@ type encoder struct {
|
|||||||
engine *C.OpusEncoder
|
engine *C.OpusEncoder
|
||||||
}
|
}
|
||||||
|
|
||||||
var latencies = []float64{5, 10, 20, 40, 60}
|
|
||||||
|
|
||||||
func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, error) {
|
func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, error) {
|
||||||
var cerror C.int
|
var cerror C.int
|
||||||
|
|
||||||
@@ -37,10 +34,6 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
|
|||||||
return nil, fmt.Errorf("opus: inProp.SampleRate is required")
|
return nil, fmt.Errorf("opus: inProp.SampleRate is required")
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.Latency == 0 {
|
|
||||||
p.Latency = 20
|
|
||||||
}
|
|
||||||
|
|
||||||
if params.BitRate == 0 {
|
if params.BitRate == 0 {
|
||||||
params.BitRate = 32000
|
params.BitRate = 32000
|
||||||
}
|
}
|
||||||
@@ -49,19 +42,8 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
|
|||||||
params.ChannelMixer = &mixer.MonoMixer{}
|
params.ChannelMixer = &mixer.MonoMixer{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select the nearest supported latency
|
if !params.Latency.Validate() {
|
||||||
var targetLatency float64
|
return nil, fmt.Errorf("opus: unsupported latency %v", params.Latency)
|
||||||
// TODO: use p.Latency.Milliseconds() after Go 1.12 EOL
|
|
||||||
latencyInMS := float64(p.Latency.Nanoseconds() / 1000000)
|
|
||||||
nearestDist := math.Inf(+1)
|
|
||||||
for _, latency := range latencies {
|
|
||||||
dist := math.Abs(latency - latencyInMS)
|
|
||||||
if dist >= nearestDist {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
nearestDist = dist
|
|
||||||
targetLatency = latency
|
|
||||||
}
|
}
|
||||||
|
|
||||||
channels := p.ChannelCount
|
channels := p.ChannelCount
|
||||||
@@ -77,7 +59,7 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
|
|||||||
}
|
}
|
||||||
|
|
||||||
rMix := audio.NewChannelMixer(channels, params.ChannelMixer)
|
rMix := audio.NewChannelMixer(channels, params.ChannelMixer)
|
||||||
rBuf := audio.NewBuffer(int(targetLatency * float64(p.SampleRate) / 1000))
|
rBuf := audio.NewBuffer(params.Latency.samples(p.SampleRate))
|
||||||
e := encoder{
|
e := encoder{
|
||||||
engine: engine,
|
engine: engine,
|
||||||
reader: rMix(rBuf(r)),
|
reader: rMix(rBuf(r)),
|
||||||
|
|||||||
@@ -1,27 +1,69 @@
|
|||||||
package opus
|
package opus
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pion/mediadevices/pkg/codec"
|
"github.com/pion/mediadevices/pkg/codec"
|
||||||
"github.com/pion/mediadevices/pkg/io/audio"
|
"github.com/pion/mediadevices/pkg/io/audio"
|
||||||
"github.com/pion/mediadevices/pkg/prop"
|
"github.com/pion/mediadevices/pkg/prop"
|
||||||
"github.com/pion/mediadevices/pkg/wave/mixer"
|
"github.com/pion/mediadevices/pkg/wave/mixer"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Latency is a type of OPUS codec frame duration.
|
||||||
|
type Latency time.Duration
|
||||||
|
|
||||||
|
// Latency values available in OPUS codec.
|
||||||
|
const (
|
||||||
|
Latency2500us Latency = Latency(2500 * time.Microsecond)
|
||||||
|
Latency5ms Latency = Latency(5 * time.Millisecond)
|
||||||
|
Latency10ms Latency = Latency(10 * time.Millisecond)
|
||||||
|
Latency20ms Latency = Latency(20 * time.Millisecond)
|
||||||
|
Latency40ms Latency = Latency(40 * time.Millisecond)
|
||||||
|
Latency60ms Latency = Latency(60 * time.Millisecond)
|
||||||
|
)
|
||||||
|
|
||||||
|
// Validate that the Latency is allowed in OPUS.
|
||||||
|
func (l Latency) Validate() bool {
|
||||||
|
switch l {
|
||||||
|
case Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Duration returns latency in time.Duration.
|
||||||
|
func (l Latency) Duration() time.Duration {
|
||||||
|
return time.Duration(l)
|
||||||
|
}
|
||||||
|
|
||||||
|
// samples returns number of samples for given sample rate.
|
||||||
|
func (l Latency) samples(sampleRate int) int {
|
||||||
|
return int(l.Duration() * time.Duration(sampleRate) / time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
// Params stores opus specific encoding parameters.
|
// Params stores opus specific encoding parameters.
|
||||||
type Params struct {
|
type Params struct {
|
||||||
codec.BaseParams
|
codec.BaseParams
|
||||||
// ChannelMixer is a mixer to be used if number of given and expected channels differ.
|
// ChannelMixer is a mixer to be used if number of given and expected channels differ.
|
||||||
ChannelMixer mixer.ChannelMixer
|
ChannelMixer mixer.ChannelMixer
|
||||||
|
|
||||||
|
// Expected latency of the codec.
|
||||||
|
Latency Latency
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewParams returns default opus codec specific parameters.
|
// NewParams returns default opus codec specific parameters.
|
||||||
func NewParams() (Params, error) {
|
func NewParams() (Params, error) {
|
||||||
return Params{}, nil
|
return Params{
|
||||||
|
Latency: Latency20ms,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// RTPCodec represents the codec metadata
|
// RTPCodec represents the codec metadata
|
||||||
func (p *Params) RTPCodec() *codec.RTPCodec {
|
func (p *Params) RTPCodec() *codec.RTPCodec {
|
||||||
return codec.NewRTPOpusCodec(48000)
|
c := codec.NewRTPOpusCodec(48000)
|
||||||
|
c.Latency = time.Duration(p.Latency)
|
||||||
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
// BuildAudioEncoder builds opus encoder with given params
|
// BuildAudioEncoder builds opus encoder with given params
|
||||||
|
|||||||
49
pkg/codec/opus/params_test.go
Normal file
49
pkg/codec/opus/params_test.go
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
package opus
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLatency_Validate(t *testing.T) {
|
||||||
|
t.Run("Valid", func(t *testing.T) {
|
||||||
|
for _, l := range []Latency{
|
||||||
|
Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms,
|
||||||
|
} {
|
||||||
|
if !l.Validate() {
|
||||||
|
t.Errorf("Defined Latency(%v) must be valid", l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
t.Run("Invalid", func(t *testing.T) {
|
||||||
|
for _, l := range []Latency{
|
||||||
|
0, Latency(time.Second),
|
||||||
|
} {
|
||||||
|
if l.Validate() {
|
||||||
|
t.Errorf("Latency(%v) must be valid", l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLatency_samples(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
latency Latency
|
||||||
|
sampleRate int
|
||||||
|
samples int
|
||||||
|
}{
|
||||||
|
{Latency5ms, 48000, 240},
|
||||||
|
{Latency20ms, 16000, 320},
|
||||||
|
{Latency20ms, 48000, 960},
|
||||||
|
}
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
testCase := testCase
|
||||||
|
t.Run(fmt.Sprintf("%v_%d", time.Duration(testCase.latency), testCase.sampleRate), func(t *testing.T) {
|
||||||
|
samples := testCase.latency.samples(testCase.sampleRate)
|
||||||
|
if samples != testCase.samples {
|
||||||
|
t.Errorf("Expected samples: %d, got: %d", testCase.samples, samples)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -155,9 +155,6 @@ func (m *microphone) AudioRecord(inputProp prop.Media) (audio.Reader, error) {
|
|||||||
return decodedChunk, func() {}, err
|
return decodedChunk, func() {}, err
|
||||||
})
|
})
|
||||||
|
|
||||||
// FIXME: The current audio detection and audio encoder can only work with a static latency. Since the latency from the driver
|
|
||||||
// can fluctuate, we need to stabilize it. Maybe there's a better way for doing this?
|
|
||||||
reader = audio.NewBuffer(int(inputProp.Latency.Seconds() * float64(inputProp.SampleRate)))(reader)
|
|
||||||
return reader, nil
|
return reader, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
2
track.go
2
track.go
@@ -425,7 +425,7 @@ func (track *AudioTrack) newEncodedReader(codecNames ...string) (EncodedReadClos
|
|||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
sample := newAudioSampler(selectedCodec.ClockRate, inputProp.Latency)
|
sample := newAudioSampler(selectedCodec.ClockRate, selectedCodec.Latency)
|
||||||
|
|
||||||
return &encodedReadCloserImpl{
|
return &encodedReadCloserImpl{
|
||||||
readFn: func() (EncodedBuffer, func(), error) {
|
readFn: func() (EncodedBuffer, func(), error) {
|
||||||
|
|||||||
Reference in New Issue
Block a user