Fix matching codecs with different rate or channels

Currently codecs are matched regardless of the clock
rate and the channel count, and this makes impossible to fully support
codecs that might have a clock rate or channel count different than the
default one, in particular LPCM, PCMU, PCMA and multiopus (the last one
is a custom Opus variant present in the Chrome source code to support
multichannel Opus).

For instance, let's suppose a peer (receiver) wants to receive an audio
track encoded with LPCM, 48khz sample rate and 2 channels. This receiver
doesn't know the audio codec yet, therefore it advertises all supported
sample rates in the SDP:

```
LPCM/44100
LPCM/48000
LPCM/44100/2
LPCM/48000/2
```

The other peer (sender) receives the SDP, but since the clock rate and
channel count are not taken into consideration when matching codecs, the
sender codec `LPCM/48000/2` is wrongly associated with the receiver
codec `LPCM/44100`. The result is that the audio track cannot be decoded
correctly from the receiver side.

This patch fixes the issue and has been running smoothly in MediaMTX for
almost a year.

Unfortunately, in lots of examples and tests, clock rate and/or channels
are not present (and in fact they are producing horrible SDPs that
contain `VP8/0` instead of `VP8/90000` and are incompatible with lots of
servers) therefore this new check causes troubles in existing code. In
order to maintain compatibility, default clock rates and channels are
provided for most codecs.

In the future, it might be better to update examples (i can do it in a
future patch) and remove the exception.
This commit is contained in:
Alessandro Ros
2025-02-16 03:16:47 +01:00
committed by GitHub
parent 70d06fd9f6
commit 969ab684e3
4 changed files with 330 additions and 32 deletions

View File

@@ -8,6 +8,32 @@ import (
"strings"
)
func defaultClockRate(mimeType string) uint32 {
defaults := map[string]uint32{
"audio/opus": 48000,
"audio/pcmu": 8000,
"audio/pcma": 8000,
}
if def, ok := defaults[strings.ToLower(mimeType)]; ok {
return def
}
return 90000
}
func defaultChannels(mimeType string) uint16 {
defaults := map[string]uint16{
"audio/opus": 2,
}
if def, ok := defaults[strings.ToLower(mimeType)]; ok {
return def
}
return 0
}
func parseParameters(line string) map[string]string {
parameters := make(map[string]string)
@@ -24,6 +50,61 @@ func parseParameters(line string) map[string]string {
return parameters
}
// ClockRateEqual checks whether two clock rates are equal.
func ClockRateEqual(mimeType string, valA, valB uint32) bool {
// Lots of users use formats without setting clock rate or channels.
// In this case, use default values.
// It would be better to remove this exception in a future major release.
if valA == 0 {
valA = defaultClockRate(mimeType)
}
if valB == 0 {
valB = defaultClockRate(mimeType)
}
return valA == valB
}
// ChannelsEqual checks whether two channels are equal.
func ChannelsEqual(mimeType string, valA, valB uint16) bool {
// Lots of users use formats without setting clock rate or channels.
// In this case, use default values.
// It would be better to remove this exception in a future major release.
if valA == 0 {
valA = defaultChannels(mimeType)
}
if valB == 0 {
valB = defaultChannels(mimeType)
}
// RFC8866: channel count "is OPTIONAL and may be omitted
// if the number of channels is one".
if valA == 0 {
valA = 1
}
if valB == 0 {
valB = 1
}
return valA == valB
}
func paramsEqual(valA, valB map[string]string) bool {
for k, v := range valA {
if vb, ok := valB[k]; ok && !strings.EqualFold(vb, v) {
return false
}
}
for k, v := range valB {
if va, ok := valA[k]; ok && !strings.EqualFold(va, v) {
return false
}
}
return true
}
// FMTP interface for implementing custom
// FMTP parsers based on MimeType.
type FMTP interface {
@@ -39,7 +120,7 @@ type FMTP interface {
}
// Parse parses an fmtp string based on the MimeType.
func Parse(mimeType, line string) FMTP {
func Parse(mimeType string, clockRate uint32, channels uint16, line string) FMTP {
var fmtp FMTP
parameters := parseParameters(line)
@@ -63,6 +144,8 @@ func Parse(mimeType, line string) FMTP {
default:
fmtp = &genericFMTP{
mimeType: mimeType,
clockRate: clockRate,
channels: channels,
parameters: parameters,
}
}
@@ -72,6 +155,8 @@ func Parse(mimeType, line string) FMTP {
type genericFMTP struct {
mimeType string
clockRate uint32
channels uint16
parameters map[string]string
}
@@ -87,23 +172,10 @@ func (g *genericFMTP) Match(b FMTP) bool {
return false
}
if !strings.EqualFold(g.mimeType, fmtp.MimeType()) {
return false
}
for k, v := range g.parameters {
if vb, ok := fmtp.parameters[k]; ok && !strings.EqualFold(vb, v) {
return false
}
}
for k, v := range fmtp.parameters {
if va, ok := g.parameters[k]; ok && !strings.EqualFold(va, v) {
return false
}
}
return true
return strings.EqualFold(g.mimeType, fmtp.MimeType()) &&
ClockRateEqual(g.mimeType, g.clockRate, fmtp.clockRate) &&
ChannelsEqual(g.mimeType, g.channels, fmtp.channels) &&
paramsEqual(g.parameters, fmtp.parameters)
}
func (g *genericFMTP) Parameter(key string) (string, bool) {

View File

@@ -56,17 +56,23 @@ func TestParseParameters(t *testing.T) {
func TestParse(t *testing.T) {
for _, ca := range []struct {
name string
mimeType string
line string
expected FMTP
name string
mimeType string
clockRate uint32
channels uint16
line string
expected FMTP
}{
{
"generic",
"generic",
90000,
2,
"key-name=value",
&genericFMTP{
mimeType: "generic",
mimeType: "generic",
clockRate: 90000,
channels: 2,
parameters: map[string]string{
"key-name": "value",
},
@@ -75,9 +81,13 @@ func TestParse(t *testing.T) {
{
"generic case normalization",
"generic",
90000,
2,
"Key=value",
&genericFMTP{
mimeType: "generic",
mimeType: "generic",
clockRate: 90000,
channels: 2,
parameters: map[string]string{
"key": "value",
},
@@ -86,6 +96,8 @@ func TestParse(t *testing.T) {
{
"h264",
"video/h264",
90000,
0,
"key-name=value",
&h264FMTP{
parameters: map[string]string{
@@ -96,6 +108,8 @@ func TestParse(t *testing.T) {
{
"vp9",
"video/vp9",
90000,
0,
"key-name=value",
&vp9FMTP{
parameters: map[string]string{
@@ -106,6 +120,8 @@ func TestParse(t *testing.T) {
{
"av1",
"video/av1",
90000,
0,
"key-name=value",
&av1FMTP{
parameters: map[string]string{
@@ -115,7 +131,7 @@ func TestParse(t *testing.T) {
},
} {
t.Run(ca.name, func(t *testing.T) {
f := Parse(ca.mimeType, ca.line)
f := Parse(ca.mimeType, ca.clockRate, ca.channels, ca.line)
if !reflect.DeepEqual(ca.expected, f) {
t.Errorf("expected '%v', got '%v'", ca.expected, f)
}
@@ -177,6 +193,27 @@ func TestMatch(t *testing.T) { //nolint:maintidx
},
true,
},
{
"generic inferred channels",
&genericFMTP{
mimeType: "generic",
channels: 1,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
&genericFMTP{
mimeType: "generic",
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
true,
},
{
"generic inconsistent different kind",
&genericFMTP{
@@ -210,6 +247,52 @@ func TestMatch(t *testing.T) { //nolint:maintidx
},
false,
},
{
"generic inconsistent different clock rate",
&genericFMTP{
mimeType: "generic",
clockRate: 90000,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
&genericFMTP{
mimeType: "generic",
clockRate: 48000,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
false,
},
{
"generic inconsistent different channels",
&genericFMTP{
mimeType: "generic",
clockRate: 90000,
channels: 2,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
&genericFMTP{
mimeType: "generic",
clockRate: 90000,
channels: 1,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
false,
},
{
"generic inconsistent different parameters",
&genericFMTP{
@@ -498,6 +581,126 @@ func TestMatch(t *testing.T) { //nolint:maintidx
},
false,
},
{
"pcmu channels",
&genericFMTP{
mimeType: "audio/pcmu",
clockRate: 8000,
channels: 0,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
&genericFMTP{
mimeType: "audio/pcmu",
clockRate: 8000,
channels: 1,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
true,
},
{
"pcmu inconsistent channels",
&genericFMTP{
mimeType: "audio/pcmu",
clockRate: 8000,
channels: 0,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
&genericFMTP{
mimeType: "audio/pcmu",
clockRate: 8000,
channels: 2,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
false,
},
{
"pcmu clockrate",
&genericFMTP{
mimeType: "audio/pcmu",
clockRate: 0,
channels: 0,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
&genericFMTP{
mimeType: "audio/pcmu",
clockRate: 8000,
channels: 0,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
true,
},
{
"pcmu inconsistent clockrate",
&genericFMTP{
mimeType: "audio/pcmu",
clockRate: 0,
channels: 0,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
&genericFMTP{
mimeType: "audio/pcmu",
clockRate: 16000,
channels: 0,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
false,
},
{
"opus clockrate",
&genericFMTP{
mimeType: "audio/opus",
clockRate: 0,
channels: 0,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
&genericFMTP{
mimeType: "audio/opus",
clockRate: 48000,
channels: 2,
parameters: map[string]string{
"key1": "value1",
"key2": "value2",
"key3": "value3",
},
},
true,
},
} {
t.Run(ca.name, func(t *testing.T) {
c := ca.a.Match(ca.b)

View File

@@ -246,7 +246,10 @@ func (m *MediaEngine) RegisterDefaultCodecs() error {
// addCodec will append codec if it not exists.
func (m *MediaEngine) addCodec(codecs []RTPCodecParameters, codec RTPCodecParameters) []RTPCodecParameters {
for _, c := range codecs {
if c.MimeType == codec.MimeType && c.PayloadType == codec.PayloadType {
if c.MimeType == codec.MimeType &&
fmtp.ClockRateEqual(c.MimeType, c.ClockRate, codec.ClockRate) &&
fmtp.ChannelsEqual(c.MimeType, c.Channels, codec.Channels) &&
c.PayloadType == codec.PayloadType {
return codecs
}
}
@@ -459,7 +462,12 @@ func (m *MediaEngine) matchRemoteCodec(
codecs = m.audioCodecs
}
remoteFmtp := fmtp.Parse(remoteCodec.RTPCodecCapability.MimeType, remoteCodec.RTPCodecCapability.SDPFmtpLine)
remoteFmtp := fmtp.Parse(
remoteCodec.RTPCodecCapability.MimeType,
remoteCodec.RTPCodecCapability.ClockRate,
remoteCodec.RTPCodecCapability.Channels,
remoteCodec.RTPCodecCapability.SDPFmtpLine)
if apt, hasApt := remoteFmtp.Parameter("apt"); hasApt { //nolint:nestif
payloadType, err := strconv.ParseUint(apt, 10, 8)
if err != nil {

View File

@@ -108,19 +108,34 @@ func codecParametersFuzzySearch(
needle RTPCodecParameters,
haystack []RTPCodecParameters,
) (RTPCodecParameters, codecMatchType) {
needleFmtp := fmtp.Parse(needle.RTPCodecCapability.MimeType, needle.RTPCodecCapability.SDPFmtpLine)
needleFmtp := fmtp.Parse(
needle.RTPCodecCapability.MimeType,
needle.RTPCodecCapability.ClockRate,
needle.RTPCodecCapability.Channels,
needle.RTPCodecCapability.SDPFmtpLine)
// First attempt to match on MimeType + SDPFmtpLine
// First attempt to match on MimeType + ClockRate + Channels + SDPFmtpLine
for _, c := range haystack {
cfmtp := fmtp.Parse(c.RTPCodecCapability.MimeType, c.RTPCodecCapability.SDPFmtpLine)
cfmtp := fmtp.Parse(
c.RTPCodecCapability.MimeType,
c.RTPCodecCapability.ClockRate,
c.RTPCodecCapability.Channels,
c.RTPCodecCapability.SDPFmtpLine)
if needleFmtp.Match(cfmtp) {
return c, codecMatchExact
}
}
// Fallback to just MimeType
// Fallback to just MimeType + ClockRate + Channels
for _, c := range haystack {
if strings.EqualFold(c.RTPCodecCapability.MimeType, needle.RTPCodecCapability.MimeType) {
if strings.EqualFold(c.RTPCodecCapability.MimeType, needle.RTPCodecCapability.MimeType) &&
fmtp.ClockRateEqual(c.RTPCodecCapability.MimeType,
c.RTPCodecCapability.ClockRate,
needle.RTPCodecCapability.ClockRate) &&
fmtp.ChannelsEqual(c.RTPCodecCapability.MimeType,
c.RTPCodecCapability.Channels,
needle.RTPCodecCapability.Channels) {
return c, codecMatchPartial
}
}