Fix audio codec latency handling (#317)

To avoid buffering audio data multiple times, remove buffer from
malgo audio driver and pass expected codec latency as a codec
parameter.
This commit is contained in:
Atsushi Watanabe
2021-03-26 06:09:57 +09:00
committed by GitHub
parent 8c2c8a9b27
commit d71b72c64d
6 changed files with 102 additions and 27 deletions

View File

@@ -1,6 +1,8 @@
package codec
import (
"time"
"github.com/pion/mediadevices/pkg/io/audio"
"github.com/pion/mediadevices/pkg/io/video"
"github.com/pion/mediadevices/pkg/prop"
@@ -13,6 +15,9 @@ import (
type RTPCodec struct {
webrtc.RTPCodecParameters
rtp.Payloader
// Latency of static frame size codec.
Latency time.Duration
}
// NewRTPH264Codec is a helper to create an H264 codec

View File

@@ -3,7 +3,6 @@ package opus
import (
"errors"
"fmt"
"math"
"github.com/pion/mediadevices/pkg/codec"
"github.com/pion/mediadevices/pkg/io/audio"
@@ -28,8 +27,6 @@ type encoder struct {
engine *C.OpusEncoder
}
var latencies = []float64{5, 10, 20, 40, 60}
func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, error) {
var cerror C.int
@@ -37,10 +34,6 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
return nil, fmt.Errorf("opus: inProp.SampleRate is required")
}
if p.Latency == 0 {
p.Latency = 20
}
if params.BitRate == 0 {
params.BitRate = 32000
}
@@ -49,19 +42,8 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
params.ChannelMixer = &mixer.MonoMixer{}
}
// Select the nearest supported latency
var targetLatency float64
// TODO: use p.Latency.Milliseconds() after Go 1.12 EOL
latencyInMS := float64(p.Latency.Nanoseconds() / 1000000)
nearestDist := math.Inf(+1)
for _, latency := range latencies {
dist := math.Abs(latency - latencyInMS)
if dist >= nearestDist {
break
}
nearestDist = dist
targetLatency = latency
if !params.Latency.Validate() {
return nil, fmt.Errorf("opus: unsupported latency %v", params.Latency)
}
channels := p.ChannelCount
@@ -77,7 +59,7 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser,
}
rMix := audio.NewChannelMixer(channels, params.ChannelMixer)
rBuf := audio.NewBuffer(int(targetLatency * float64(p.SampleRate) / 1000))
rBuf := audio.NewBuffer(params.Latency.samples(p.SampleRate))
e := encoder{
engine: engine,
reader: rMix(rBuf(r)),

View File

@@ -1,27 +1,69 @@
package opus
import (
"time"
"github.com/pion/mediadevices/pkg/codec"
"github.com/pion/mediadevices/pkg/io/audio"
"github.com/pion/mediadevices/pkg/prop"
"github.com/pion/mediadevices/pkg/wave/mixer"
)
// Latency is a type of OPUS codec frame duration.
type Latency time.Duration
// Latency values available in OPUS codec.
const (
Latency2500us Latency = Latency(2500 * time.Microsecond)
Latency5ms Latency = Latency(5 * time.Millisecond)
Latency10ms Latency = Latency(10 * time.Millisecond)
Latency20ms Latency = Latency(20 * time.Millisecond)
Latency40ms Latency = Latency(40 * time.Millisecond)
Latency60ms Latency = Latency(60 * time.Millisecond)
)
// Validate that the Latency is allowed in OPUS.
func (l Latency) Validate() bool {
switch l {
case Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms:
return true
default:
return false
}
}
// Duration returns latency in time.Duration.
func (l Latency) Duration() time.Duration {
return time.Duration(l)
}
// samples returns number of samples for given sample rate.
func (l Latency) samples(sampleRate int) int {
return int(l.Duration() * time.Duration(sampleRate) / time.Second)
}
// Params stores opus specific encoding parameters.
type Params struct {
codec.BaseParams
// ChannelMixer is a mixer to be used if number of given and expected channels differ.
ChannelMixer mixer.ChannelMixer
// Expected latency of the codec.
Latency Latency
}
// NewParams returns default opus codec specific parameters.
func NewParams() (Params, error) {
return Params{}, nil
return Params{
Latency: Latency20ms,
}, nil
}
// RTPCodec represents the codec metadata
func (p *Params) RTPCodec() *codec.RTPCodec {
return codec.NewRTPOpusCodec(48000)
c := codec.NewRTPOpusCodec(48000)
c.Latency = time.Duration(p.Latency)
return c
}
// BuildAudioEncoder builds opus encoder with given params

View File

@@ -0,0 +1,49 @@
package opus
import (
"fmt"
"testing"
"time"
)
func TestLatency_Validate(t *testing.T) {
t.Run("Valid", func(t *testing.T) {
for _, l := range []Latency{
Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms,
} {
if !l.Validate() {
t.Errorf("Defined Latency(%v) must be valid", l)
}
}
})
t.Run("Invalid", func(t *testing.T) {
for _, l := range []Latency{
0, Latency(time.Second),
} {
if l.Validate() {
t.Errorf("Latency(%v) must be valid", l)
}
}
})
}
func TestLatency_samples(t *testing.T) {
testCases := []struct {
latency Latency
sampleRate int
samples int
}{
{Latency5ms, 48000, 240},
{Latency20ms, 16000, 320},
{Latency20ms, 48000, 960},
}
for _, testCase := range testCases {
testCase := testCase
t.Run(fmt.Sprintf("%v_%d", time.Duration(testCase.latency), testCase.sampleRate), func(t *testing.T) {
samples := testCase.latency.samples(testCase.sampleRate)
if samples != testCase.samples {
t.Errorf("Expected samples: %d, got: %d", testCase.samples, samples)
}
})
}
}

View File

@@ -155,9 +155,6 @@ func (m *microphone) AudioRecord(inputProp prop.Media) (audio.Reader, error) {
return decodedChunk, func() {}, err
})
// FIXME: The current audio detection and audio encoder can only work with a static latency. Since the latency from the driver
// can fluctuate, we need to stabilize it. Maybe there's a better way for doing this?
reader = audio.NewBuffer(int(inputProp.Latency.Seconds() * float64(inputProp.SampleRate)))(reader)
return reader, nil
}

View File

@@ -425,7 +425,7 @@ func (track *AudioTrack) newEncodedReader(codecNames ...string) (EncodedReadClos
return nil, nil, err
}
sample := newAudioSampler(selectedCodec.ClockRate, inputProp.Latency)
sample := newAudioSampler(selectedCodec.ClockRate, selectedCodec.Latency)
return &encodedReadCloserImpl{
readFn: func() (EncodedBuffer, func(), error) {