diff --git a/pkg/codec/codec.go b/pkg/codec/codec.go index fc64395..8b9367f 100644 --- a/pkg/codec/codec.go +++ b/pkg/codec/codec.go @@ -1,6 +1,8 @@ package codec import ( + "time" + "github.com/pion/mediadevices/pkg/io/audio" "github.com/pion/mediadevices/pkg/io/video" "github.com/pion/mediadevices/pkg/prop" @@ -13,6 +15,9 @@ import ( type RTPCodec struct { webrtc.RTPCodecParameters rtp.Payloader + + // Latency of static frame size codec. + Latency time.Duration } // NewRTPH264Codec is a helper to create an H264 codec diff --git a/pkg/codec/opus/opus.go b/pkg/codec/opus/opus.go index a41cd4b..a839503 100644 --- a/pkg/codec/opus/opus.go +++ b/pkg/codec/opus/opus.go @@ -3,7 +3,6 @@ package opus import ( "errors" "fmt" - "math" "github.com/pion/mediadevices/pkg/codec" "github.com/pion/mediadevices/pkg/io/audio" @@ -28,8 +27,6 @@ type encoder struct { engine *C.OpusEncoder } -var latencies = []float64{5, 10, 20, 40, 60} - func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, error) { var cerror C.int @@ -37,10 +34,6 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, return nil, fmt.Errorf("opus: inProp.SampleRate is required") } - if p.Latency == 0 { - p.Latency = 20 - } - if params.BitRate == 0 { params.BitRate = 32000 } @@ -49,19 +42,8 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, params.ChannelMixer = &mixer.MonoMixer{} } - // Select the nearest supported latency - var targetLatency float64 - // TODO: use p.Latency.Milliseconds() after Go 1.12 EOL - latencyInMS := float64(p.Latency.Nanoseconds() / 1000000) - nearestDist := math.Inf(+1) - for _, latency := range latencies { - dist := math.Abs(latency - latencyInMS) - if dist >= nearestDist { - break - } - - nearestDist = dist - targetLatency = latency + if !params.Latency.Validate() { + return nil, fmt.Errorf("opus: unsupported latency %v", params.Latency) } channels := p.ChannelCount @@ -77,7 +59,7 @@ func newEncoder(r audio.Reader, p prop.Media, params Params) (codec.ReadCloser, } rMix := audio.NewChannelMixer(channels, params.ChannelMixer) - rBuf := audio.NewBuffer(int(targetLatency * float64(p.SampleRate) / 1000)) + rBuf := audio.NewBuffer(params.Latency.samples(p.SampleRate)) e := encoder{ engine: engine, reader: rMix(rBuf(r)), diff --git a/pkg/codec/opus/params.go b/pkg/codec/opus/params.go index 9f29916..00f8988 100644 --- a/pkg/codec/opus/params.go +++ b/pkg/codec/opus/params.go @@ -1,27 +1,69 @@ package opus import ( + "time" + "github.com/pion/mediadevices/pkg/codec" "github.com/pion/mediadevices/pkg/io/audio" "github.com/pion/mediadevices/pkg/prop" "github.com/pion/mediadevices/pkg/wave/mixer" ) +// Latency is a type of OPUS codec frame duration. +type Latency time.Duration + +// Latency values available in OPUS codec. +const ( + Latency2500us Latency = Latency(2500 * time.Microsecond) + Latency5ms Latency = Latency(5 * time.Millisecond) + Latency10ms Latency = Latency(10 * time.Millisecond) + Latency20ms Latency = Latency(20 * time.Millisecond) + Latency40ms Latency = Latency(40 * time.Millisecond) + Latency60ms Latency = Latency(60 * time.Millisecond) +) + +// Validate that the Latency is allowed in OPUS. +func (l Latency) Validate() bool { + switch l { + case Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms: + return true + default: + return false + } +} + +// Duration returns latency in time.Duration. +func (l Latency) Duration() time.Duration { + return time.Duration(l) +} + +// samples returns number of samples for given sample rate. +func (l Latency) samples(sampleRate int) int { + return int(l.Duration() * time.Duration(sampleRate) / time.Second) +} + // Params stores opus specific encoding parameters. type Params struct { codec.BaseParams // ChannelMixer is a mixer to be used if number of given and expected channels differ. ChannelMixer mixer.ChannelMixer + + // Expected latency of the codec. + Latency Latency } // NewParams returns default opus codec specific parameters. func NewParams() (Params, error) { - return Params{}, nil + return Params{ + Latency: Latency20ms, + }, nil } // RTPCodec represents the codec metadata func (p *Params) RTPCodec() *codec.RTPCodec { - return codec.NewRTPOpusCodec(48000) + c := codec.NewRTPOpusCodec(48000) + c.Latency = time.Duration(p.Latency) + return c } // BuildAudioEncoder builds opus encoder with given params diff --git a/pkg/codec/opus/params_test.go b/pkg/codec/opus/params_test.go new file mode 100644 index 0000000..b280fb9 --- /dev/null +++ b/pkg/codec/opus/params_test.go @@ -0,0 +1,49 @@ +package opus + +import ( + "fmt" + "testing" + "time" +) + +func TestLatency_Validate(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + for _, l := range []Latency{ + Latency2500us, Latency5ms, Latency10ms, Latency20ms, Latency40ms, Latency60ms, + } { + if !l.Validate() { + t.Errorf("Defined Latency(%v) must be valid", l) + } + } + }) + t.Run("Invalid", func(t *testing.T) { + for _, l := range []Latency{ + 0, Latency(time.Second), + } { + if l.Validate() { + t.Errorf("Latency(%v) must be valid", l) + } + } + }) +} + +func TestLatency_samples(t *testing.T) { + testCases := []struct { + latency Latency + sampleRate int + samples int + }{ + {Latency5ms, 48000, 240}, + {Latency20ms, 16000, 320}, + {Latency20ms, 48000, 960}, + } + for _, testCase := range testCases { + testCase := testCase + t.Run(fmt.Sprintf("%v_%d", time.Duration(testCase.latency), testCase.sampleRate), func(t *testing.T) { + samples := testCase.latency.samples(testCase.sampleRate) + if samples != testCase.samples { + t.Errorf("Expected samples: %d, got: %d", testCase.samples, samples) + } + }) + } +} diff --git a/pkg/driver/microphone/microphone.go b/pkg/driver/microphone/microphone.go index 5e848d7..3b73a8b 100644 --- a/pkg/driver/microphone/microphone.go +++ b/pkg/driver/microphone/microphone.go @@ -155,9 +155,6 @@ func (m *microphone) AudioRecord(inputProp prop.Media) (audio.Reader, error) { return decodedChunk, func() {}, err }) - // FIXME: The current audio detection and audio encoder can only work with a static latency. Since the latency from the driver - // can fluctuate, we need to stabilize it. Maybe there's a better way for doing this? - reader = audio.NewBuffer(int(inputProp.Latency.Seconds() * float64(inputProp.SampleRate)))(reader) return reader, nil } diff --git a/track.go b/track.go index fd19b5f..eaa040b 100644 --- a/track.go +++ b/track.go @@ -425,7 +425,7 @@ func (track *AudioTrack) newEncodedReader(codecNames ...string) (EncodedReadClos return nil, nil, err } - sample := newAudioSampler(selectedCodec.ClockRate, inputProp.Latency) + sample := newAudioSampler(selectedCodec.ClockRate, selectedCodec.Latency) return &encodedReadCloserImpl{ readFn: func() (EncodedBuffer, func(), error) {