Fix audio codec latency handling (#317)

To avoid buffering audio data multiple times, remove buffer from
malgo audio driver and pass expected codec latency as a codec
parameter.
This commit is contained in:
Atsushi Watanabe
2021-03-26 06:09:57 +09:00
committed by GitHub
parent 8c2c8a9b27
commit d71b72c64d
6 changed files with 102 additions and 27 deletions

View File

@@ -1,6 +1,8 @@
package codec package codec
import ( import (
"time"
"github.com/pion/mediadevices/pkg/io/audio" "github.com/pion/mediadevices/pkg/io/audio"
"github.com/pion/mediadevices/pkg/io/video" "github.com/pion/mediadevices/pkg/io/video"
"github.com/pion/mediadevices/pkg/prop" "github.com/pion/mediadevices/pkg/prop"
@@ -13,6 +15,9 @@ import (
type RTPCodec struct { type RTPCodec struct {
webrtc.RTPCodecParameters webrtc.RTPCodecParameters
rtp.Payloader rtp.Payloader
// Latency of static frame size codec.
Latency time.Duration
} }
// NewRTPH264Codec is a helper to create an H264 codec // NewRTPH264Codec is a helper to create an H264 codec

View File

@@ -3,7 +3,6 @@ package opus
import ( import (
"errors" "errors"
"fmt" "fmt"
"math"
"github.com/pion/mediadevices/pkg/codec" "github.com/pion/mediadevices/pkg/codec"
"github.com/pion/mediadevices/pkg/io/audio" "github.com/pion/mediadevices/pkg/io/audio"
@@ -28,8 +27,6 @@ type encoder struct {
engine *C.OpusEncoder engine *C.OpusEncoder
} }
var latencies = []float64{5, 10, 20, 40, 60}
func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, error) { func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, error) {
var cerror C.int var cerror C.int
@@ -37,10 +34,6 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
return nil, fmt.Errorf("opus: inProp.SampleRate is required") return nil, fmt.Errorf("opus: inProp.SampleRate is required")
} }
if p.Latency == 0 {
p.Latency = 20
}
if params.BitRate == 0 { if params.BitRate == 0 {
params.BitRate = 32000 params.BitRate = 32000
} }
@@ -49,19 +42,8 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
params.ChannelMixer = &mixer.MonoMixer{} params.ChannelMixer = &mixer.MonoMixer{}
} }
// Select the nearest supported latency if !params.Latency.Validate() {
var targetLatency float64 return nil, fmt.Errorf("opus: unsupported latency %v", params.Latency)
// TODO: use p.Latency.Milliseconds() after Go 1.12 EOL
latencyInMS := float64(p.Latency.Nanoseconds() / 1000000)
nearestDist := math.Inf(+1)
for _, latency := range latencies {
dist := math.Abs(latency - latencyInMS)
if dist >= nearestDist {
break
}
nearestDist = dist
targetLatency = latency
} }
channels := p.ChannelCount channels := p.ChannelCount
@@ -77,7 +59,7 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
} }
rMix := audio.NewChannelMixer(channels, params.ChannelMixer) rMix := audio.NewChannelMixer(channels, params.ChannelMixer)
rBuf := audio.NewBuffer(int(targetLatency * float64(p.SampleRate) / 1000)) rBuf := audio.NewBuffer(params.Latency.samples(p.SampleRate))
e := encoder{ e := encoder{
engine: engine, engine: engine,
reader: rMix(rBuf(r)), reader: rMix(rBuf(r)),

View File

@@ -1,27 +1,69 @@
package opus package opus
import ( import (
"time"
"github.com/pion/mediadevices/pkg/codec" "github.com/pion/mediadevices/pkg/codec"
"github.com/pion/mediadevices/pkg/io/audio" "github.com/pion/mediadevices/pkg/io/audio"
"github.com/pion/mediadevices/pkg/prop" "github.com/pion/mediadevices/pkg/prop"
"github.com/pion/mediadevices/pkg/wave/mixer" "github.com/pion/mediadevices/pkg/wave/mixer"
) )
// Latency is a type of OPUS codec frame duration.
type Latency time.Duration
// Latency values available in OPUS codec.
const (
Latency2500us Latency = Latency(2500 * time.Microsecond)
Latency5ms Latency = Latency(5 * time.Millisecond)
Latency10ms Latency = Latency(10 * time.Millisecond)
Latency20ms Latency = Latency(20 * time.Millisecond)
Latency40ms Latency = Latency(40 * time.Millisecond)
Latency60ms Latency = Latency(60 * time.Millisecond)
)
// Validate that the Latency is allowed in OPUS.
func (l Latency) Validate() bool {
switch l {
case Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms:
return true
default:
return false
}
}
// Duration returns latency in time.Duration.
func (l Latency) Duration() time.Duration {
return time.Duration(l)
}
// samples returns number of samples for given sample rate.
func (l Latency) samples(sampleRate int) int {
return int(l.Duration() * time.Duration(sampleRate) / time.Second)
}
// Params stores opus specific encoding parameters. // Params stores opus specific encoding parameters.
type Params struct { type Params struct {
codec.BaseParams codec.BaseParams
// ChannelMixer is a mixer to be used if number of given and expected channels differ. // ChannelMixer is a mixer to be used if number of given and expected channels differ.
ChannelMixer mixer.ChannelMixer ChannelMixer mixer.ChannelMixer
// Expected latency of the codec.
Latency Latency
} }
// NewParams returns default opus codec specific parameters. // NewParams returns default opus codec specific parameters.
func NewParams() (Params, error) { func NewParams() (Params, error) {
return Params{}, nil return Params{
Latency: Latency20ms,
}, nil
} }
// RTPCodec represents the codec metadata // RTPCodec represents the codec metadata
func (p *Params) RTPCodec() *codec.RTPCodec { func (p *Params) RTPCodec() *codec.RTPCodec {
return codec.NewRTPOpusCodec(48000) c := codec.NewRTPOpusCodec(48000)
c.Latency = time.Duration(p.Latency)
return c
} }
// BuildAudioEncoder builds opus encoder with given params // BuildAudioEncoder builds opus encoder with given params

View File

@@ -0,0 +1,49 @@
package opus
import (
"fmt"
"testing"
"time"
)
func TestLatency_Validate(t *testing.T) {
t.Run("Valid", func(t *testing.T) {
for _, l := range []Latency{
Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms,
} {
if !l.Validate() {
t.Errorf("Defined Latency(%v) must be valid", l)
}
}
})
t.Run("Invalid", func(t *testing.T) {
for _, l := range []Latency{
0, Latency(time.Second),
} {
if l.Validate() {
t.Errorf("Latency(%v) must be valid", l)
}
}
})
}
func TestLatency_samples(t *testing.T) {
testCases := []struct {
latency Latency
sampleRate int
samples int
}{
{Latency5ms, 48000, 240},
{Latency20ms, 16000, 320},
{Latency20ms, 48000, 960},
}
for _, testCase := range testCases {
testCase := testCase
t.Run(fmt.Sprintf("%v_%d", time.Duration(testCase.latency), testCase.sampleRate), func(t *testing.T) {
samples := testCase.latency.samples(testCase.sampleRate)
if samples != testCase.samples {
t.Errorf("Expected samples: %d, got: %d", testCase.samples, samples)
}
})
}
}

View File

@@ -155,9 +155,6 @@ func (m *microphone) AudioRecord(inputProp prop.Media) (audio.Reader, error) {
return decodedChunk, func() {}, err return decodedChunk, func() {}, err
}) })
// FIXME: The current audio detection and audio encoder can only work with a static latency. Since the latency from the driver
// can fluctuate, we need to stabilize it. Maybe there's a better way for doing this?
reader = audio.NewBuffer(int(inputProp.Latency.Seconds() * float64(inputProp.SampleRate)))(reader)
return reader, nil return reader, nil
} }

View File

@@ -425,7 +425,7 @@ func (track *AudioTrack) newEncodedReader(codecNames ...string) (EncodedReadClos
return nil, nil, err return nil, nil, err
} }
sample := newAudioSampler(selectedCodec.ClockRate, inputProp.Latency) sample := newAudioSampler(selectedCodec.ClockRate, selectedCodec.Latency)
return &encodedReadCloserImpl{ return &encodedReadCloserImpl{
readFn: func() (EncodedBuffer, func(), error) { readFn: func() (EncodedBuffer, func(), error) {