Compare commits

...

8 Commits

Author SHA1 Message Date
Lukas Herman
dda8d2502f Use square instead 2020-11-05 21:03:22 -08:00
Lukas Herman
d593404e39 WIP 2020-11-04 22:34:17 -08:00
Lukas Herman
3ea35bebab Fix broken mediastream unit test 2020-11-02 23:05:59 -08:00
Lukas Herman
83c08e6c5f Recreate facedetection example with the new APIs 2020-11-02 23:04:58 -08:00
Lukas Herman
2f17017450 Rename rtp-send to rtp 2020-11-02 22:31:46 -08:00
Lukas Herman
7cbda134b0 Add NewRTPReader to Track interface 2020-11-02 22:28:01 -08:00
Lukas Herman
115be126ec Add documentation around Reader interfaces 2020-11-02 22:22:19 -08:00
Lukas Herman
79dcb4f1af Add video and audio RTP readers 2020-11-02 22:12:43 -08:00
11 changed files with 429 additions and 0 deletions

Binary file not shown.

View File

@@ -0,0 +1,191 @@
package main
import (
"fmt"
"image"
"io/ioutil"
"log"
"net"
"os"
pigo "github.com/esimov/pigo/core"
"github.com/pion/mediadevices"
"github.com/pion/mediadevices/pkg/codec/vpx" // This is required to use h264 video encoder
_ "github.com/pion/mediadevices/pkg/driver/camera" // This is required to register camera adapter
"github.com/pion/mediadevices/pkg/frame"
"github.com/pion/mediadevices/pkg/io/video"
"github.com/pion/mediadevices/pkg/prop"
)
const (
confidenceLevel = 9.5
mtu = 1000
thickness = 5
)
var (
cascade []byte
classifier *pigo.Pigo
)
func must(err error) {
if err != nil {
panic(err)
}
}
func detectFaces(frame *image.YCbCr) []pigo.Detection {
bounds := frame.Bounds()
cascadeParams := pigo.CascadeParams{
MinSize: 100,
MaxSize: 600,
ShiftFactor: 0.15,
ScaleFactor: 1.1,
ImageParams: pigo.ImageParams{
Pixels: frame.Y, // Y in YCbCr should be enough to detect faces
Rows: bounds.Dy(),
Cols: bounds.Dx(),
Dim: bounds.Dx(),
},
}
// Run the classifier over the obtained leaf nodes and return the detection results.
// The result contains quadruplets representing the row, column, scale and detection score.
dets := classifier.RunCascade(cascadeParams, 0.0)
// Calculate the intersection over union (IoU) of two clusters.
dets = classifier.ClusterDetections(dets, 0)
return dets
}
func drawRect(frame *image.YCbCr, x0, y0, size int) {
if x0 < 0 {
x0 = 0
}
if y0 < 0 {
y0 = 0
}
width := frame.Bounds().Dx()
height := frame.Bounds().Dy()
x1 := x0 + size
y1 := y0 + size
if x1 >= width {
x1 = width - 1
}
if y1 >= height {
y1 = height - 1
}
convert := func(x, y int) int {
return y*width + x
}
for x := x0; x < x1; x++ {
for t := 0; t < thickness; t++ {
frame.Y[convert(x, y0+t)] = 0
frame.Y[convert(x, y1-t)] = 0
}
}
for y := y0; y < y1; y++ {
for t := 0; t < thickness; t++ {
frame.Y[convert(x0+t, y)] = 0
frame.Y[convert(x1-t, y)] = 0
}
}
}
func detectFace(r video.Reader) video.Reader {
return video.ReaderFunc(func() (img image.Image, release func(), err error) {
img, release, err = r.Read()
if err != nil {
return
}
yuv := img.(*image.YCbCr)
dets := detectFaces(yuv)
for _, det := range dets {
if det.Q < confidenceLevel {
continue
}
drawRect(yuv, det.Col-det.Scale/2, det.Row-det.Scale/2, det.Scale)
}
return
})
}
func main() {
if len(os.Args) != 2 {
fmt.Printf("usage: %s host:port\n", os.Args[0])
return
}
dest := os.Args[1]
// prepare face detector
var err error
cascade, err = ioutil.ReadFile("facefinder")
if err != nil {
log.Fatalf("Error reading the cascade file: %s", err)
}
p := pigo.NewPigo()
// Unpack the binary file. This will return the number of cascade trees,
// the tree depth, the threshold and the prediction from tree's leaf nodes.
classifier, err = p.Unpack(cascade)
if err != nil {
log.Fatalf("Error unpacking the cascade file: %s", err)
}
vp8Params, err := vpx.NewVP8Params()
must(err)
vp8Params.BitRate = 1_000_000 // 100kbps
codecSelector := mediadevices.NewCodecSelector(
mediadevices.WithVideoEncoders(&vp8Params),
)
mediaStream, err := mediadevices.GetUserMedia(mediadevices.MediaStreamConstraints{
Video: func(c *mediadevices.MediaTrackConstraints) {
c.FrameFormat = prop.FrameFormatExact(frame.FormatUYVY)
c.Width = prop.Int(640)
c.Height = prop.Int(480)
},
Codec: codecSelector,
})
must(err)
// since we're trying to access the raw data, we need to cast Track to its real type, *mediadevices.VideoTrack
videoTrack := mediaStream.GetVideoTracks()[0].(*mediadevices.VideoTrack)
defer videoTrack.Close()
videoTrack.Transform(detectFace)
rtpReader, err := videoTrack.NewRTPReader(vp8Params.RTPCodec().Name, mtu)
must(err)
addr, err := net.ResolveUDPAddr("udp", dest)
must(err)
conn, err := net.DialUDP("udp", nil, addr)
must(err)
buff := make([]byte, mtu)
for {
pkts, release, err := rtpReader.Read()
must(err)
for _, pkt := range pkts {
n, err := pkt.MarshalTo(buff)
must(err)
_, err = conn.Write(buff[:n])
must(err)
}
release()
}
}

30
examples/rtp/README.md Normal file
View File

@@ -0,0 +1,30 @@
## Instructions
### Download rtpexample
```
go get github.com/pion/mediadevices/examples/rtp
```
### Listen RTP
Install GStreamer and run:
```
gst-launch-1.0 udpsrc port=5000 caps=application/x-rtp,encode-name=VP8 \
! rtpvp8depay ! vp8dec ! videoconvert ! autovideosink
```
Or run VLC media plyer:
```
vlc ./vp8.sdp
```
### Run rtp
Run `rtp localhost:5000`
A video should start playing in your GStreamer or VLC window.
It's not WebRTC, but pure RTP.
Congrats, you have used pion-MediaDevices! Now start building something cool

76
examples/rtp/main.go Normal file
View File

@@ -0,0 +1,76 @@
package main
import (
"fmt"
"net"
"os"
"github.com/pion/mediadevices"
"github.com/pion/mediadevices/pkg/codec/vpx" // This is required to use VP8/VP9 video encoder
_ "github.com/pion/mediadevices/pkg/driver/camera" // This is required to register camera adapter
"github.com/pion/mediadevices/pkg/frame"
"github.com/pion/mediadevices/pkg/prop"
)
const (
mtu = 1000
)
func must(err error) {
if err != nil {
panic(err)
}
}
func main() {
if len(os.Args) != 2 {
fmt.Printf("usage: %s host:port\n", os.Args[0])
return
}
dest := os.Args[1]
vp8Params, err := vpx.NewVP8Params()
must(err)
vp8Params.BitRate = 100000 // 100kbps
codecSelector := mediadevices.NewCodecSelector(
mediadevices.WithVideoEncoders(&vp8Params),
)
mediaStream, err := mediadevices.GetUserMedia(mediadevices.MediaStreamConstraints{
Video: func(c *mediadevices.MediaTrackConstraints) {
c.FrameFormat = prop.FrameFormat(frame.FormatYUY2)
c.Width = prop.Int(640)
c.Height = prop.Int(480)
},
Codec: codecSelector,
})
must(err)
videoTrack := mediaStream.GetVideoTracks()[0]
defer videoTrack.Close()
rtpReader, err := videoTrack.NewRTPReader(vp8Params.RTPCodec().Name, mtu)
must(err)
addr, err := net.ResolveUDPAddr("udp", dest)
must(err)
conn, err := net.DialUDP("udp", nil, addr)
must(err)
buff := make([]byte, mtu)
for {
pkts, release, err := rtpReader.Read()
must(err)
for _, pkt := range pkts {
n, err := pkt.MarshalTo(buff)
must(err)
_, err = conn.Write(buff[:n])
must(err)
}
release()
}
}

9
examples/rtp/vp8.sdp Normal file
View File

@@ -0,0 +1,9 @@
v=0
o=- 1234567890 1234567890 IN IP4 0.0.0.0
s=RTP-Send Example
i=Example
c=IN IP4 0.0.0.0
t=0 0
a=recvonly
m=video 5000 RTP/AVP 100
a=rtpmap:100 VP8/90000

View File

@@ -33,6 +33,10 @@ func (track *mockMediaStreamTrack) Unbind(pc *webrtc.PeerConnection) error {
return nil
}
func (track *mockMediaStreamTrack) NewRTPReader(codecName string, mtu int) (RTPReadCloser, error) {
return nil, nil
}
func TestMediaStreamFilters(t *testing.T) {
audioTracks := []Track{
&mockMediaStreamTrack{AudioInput},

View File

@@ -5,6 +5,14 @@ import (
)
type Reader interface {
// Read reads data from the source. The caller is responsible to release the memory that's associated
// with data by calling the given release function. When err is not nil, the caller MUST NOT call release
// as data is going to be nil (no memory was given). Otherwise, the caller SHOULD call release after
// using the data. The caller is NOT REQUIRED to call release, as this is only a part of memory management
// optimization. If release is not called, the source is forced to allocate a new memory, which also means
// there will be new allocations during streaming, and old unused memory will become garbage. As a consequence,
// these garbage will put a lot of pressure to the garbage collector and makes it to run more often and finish
// slower as the heap memory usage increases and more garbage to collect.
Read() (chunk wave.Audio, release func(), err error)
}

View File

@@ -3,6 +3,14 @@ package io
// Reader is a generic data reader. In the future, interface{} should be replaced by a generic type
// to provide strong type.
type Reader interface {
// Read reads data from the source. The caller is responsible to release the memory that's associated
// with data by calling the given release function. When err is not nil, the caller MUST NOT call release
// as data is going to be nil (no memory was given). Otherwise, the caller SHOULD call release after
// using the data. The caller is NOT REQUIRED to call release, as this is only a part of memory management
// optimization. If release is not called, the source is forced to allocate a new memory, which also means
// there will be new allocations during streaming, and old unused memory will become garbage. As a consequence,
// these garbage will put a lot of pressure to the garbage collector and makes it to run more often and finish
// slower as the heap memory usage increases and more garbage to collect.
Read() (data interface{}, release func(), err error)
}

View File

@@ -5,6 +5,14 @@ import (
)
type Reader interface {
// Read reads data from the source. The caller is responsible to release the memory that's associated
// with data by calling the given release function. When err is not nil, the caller MUST NOT call release
// as data is going to be nil (no memory was given). Otherwise, the caller SHOULD call release after
// using the data. The caller is NOT REQUIRED to call release, as this is only a part of memory management
// optimization. If release is not called, the source is forced to allocate a new memory, which also means
// there will be new allocations during streaming, and old unused memory will become garbage. As a consequence,
// these garbage will put a lot of pressure to the garbage collector and makes it to run more often and finish
// slower as the heap memory usage increases and more garbage to collect.
Read() (img image.Image, release func(), err error)
}

21
rtpreader.go Normal file
View File

@@ -0,0 +1,21 @@
package mediadevices
import "github.com/pion/rtp"
type RTPReadCloser interface {
Read() (pkts []*rtp.Packet, release func(), err error)
Close() error
}
type rtpReadCloserImpl struct {
readFn func() ([]*rtp.Packet, func(), error)
closeFn func() error
}
func (r *rtpReadCloserImpl) Read() ([]*rtp.Packet, func(), error) {
return r.readFn()
}
func (r *rtpReadCloserImpl) Close() error {
return r.closeFn()
}

View File

@@ -11,6 +11,7 @@ import (
"github.com/pion/mediadevices/pkg/io/audio"
"github.com/pion/mediadevices/pkg/io/video"
"github.com/pion/mediadevices/pkg/wave"
"github.com/pion/rtp"
"github.com/pion/webrtc/v2"
"github.com/pion/webrtc/v2/pkg/media"
)
@@ -53,6 +54,9 @@ type Track interface {
// Unbind is the clean up operation that should be called after Bind. Similar to Bind, unbind will
// be called automatically in the future.
Unbind(*webrtc.PeerConnection) error
// NewRTPReader creates a new reader from the source. The reader will encode the source, and packetize
// the encoded data in RTP format with given mtu size.
NewRTPReader(codecName string, mtu int) (RTPReadCloser, error)
}
type baseTrack struct {
@@ -259,6 +263,41 @@ func (track *VideoTrack) Unbind(pc *webrtc.PeerConnection) error {
return track.unbind(pc)
}
func (track *VideoTrack) NewRTPReader(codecName string, mtu int) (RTPReadCloser, error) {
reader := track.NewReader(false)
inputProp, err := detectCurrentVideoProp(track.Broadcaster)
if err != nil {
return nil, err
}
encodedReader, selectedCodec, err := track.selector.selectVideoCodecByNames(reader, inputProp, codecName)
if err != nil {
return nil, err
}
sample := newVideoSampler(selectedCodec.ClockRate)
// FIXME: not sure the best way to get unique ssrc. We probably should have a global keeper that can generate a random ID and does book keeping?
packetizer := rtp.NewPacketizer(mtu, selectedCodec.PayloadType, rand.Uint32(), selectedCodec.Payloader, rtp.NewRandomSequencer(), selectedCodec.ClockRate)
return &rtpReadCloserImpl{
readFn: func() ([]*rtp.Packet, func(), error) {
encoded, release, err := encodedReader.Read()
if err != nil {
encodedReader.Close()
track.onError(err)
return nil, func() {}, err
}
defer release()
samples := sample()
pkts := packetizer.Packetize(encoded, samples)
return pkts, release, err
},
closeFn: encodedReader.Close,
}, nil
}
// AudioTrack is a specific track type that contains audio source which allows multiple readers to access, and
// manipulate.
type AudioTrack struct {
@@ -328,3 +367,38 @@ func (track *AudioTrack) Bind(pc *webrtc.PeerConnection) (*webrtc.Track, error)
func (track *AudioTrack) Unbind(pc *webrtc.PeerConnection) error {
return track.unbind(pc)
}
func (track *AudioTrack) NewRTPReader(codecName string, mtu int) (RTPReadCloser, error) {
reader := track.NewReader(false)
inputProp, err := detectCurrentAudioProp(track.Broadcaster)
if err != nil {
return nil, err
}
encodedReader, selectedCodec, err := track.selector.selectAudioCodecByNames(reader, inputProp, codecName)
if err != nil {
return nil, err
}
sample := newVideoSampler(selectedCodec.ClockRate)
// FIXME: not sure the best way to get unique ssrc. We probably should have a global keeper that can generate a random ID and does book keeping?
packetizer := rtp.NewPacketizer(mtu, selectedCodec.PayloadType, rand.Uint32(), selectedCodec.Payloader, rtp.NewRandomSequencer(), selectedCodec.ClockRate)
return &rtpReadCloserImpl{
readFn: func() ([]*rtp.Packet, func(), error) {
encoded, release, err := encodedReader.Read()
if err != nil {
encodedReader.Close()
track.onError(err)
return nil, func() {}, err
}
defer release()
samples := sample()
pkts := packetizer.Packetize(encoded, samples)
return pkts, release, err
},
closeFn: encodedReader.Close,
}, nil
}