Perform AR processing on Jetson

This commit is contained in:
Kalit Inani
2024-11-07 10:57:31 -05:00
parent d9bb654e4e
commit b257e37207
4 changed files with 62 additions and 17 deletions

View File

@@ -3,17 +3,15 @@ import sys
import cv2
import itertools
import numpy as np
from time import time
import mediapipe as mp
import matplotlib.pyplot as plt
import socket
import struct
mp_face_mesh = mp.solutions.face_mesh
face_mesh_videos = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5, min_tracking_confidence=0.3)
eye = cv2.imread('/home/kalit/Desktop/GeorgiaTech/Fall_2024/CS_8903/WebRTC_research/ar-filters/filter_imgs/eye.jpg')
mouth = cv2.imread('/home/kalit/Desktop/GeorgiaTech/Fall_2024/CS_8903/WebRTC_research/ar-filters/filter_imgs/smile.png')
eye = cv2.imread('/home/epl/Desktop/WebRTC_research/ar-filters/filter_imgs/eye.jpg')
mouth = cv2.imread('/home/epl/Desktop/WebRTC_research/ar-filters/filter_imgs/smile.png')
def detectFacialLandmarks(image, face_mesh):
return face_mesh.process(image[:,:,::-1])
@@ -120,9 +118,10 @@ def main():
new_frame = frame
_, buffer = cv2.imencode('.jpg', new_frame)
output_file_path = 'output.jpg'
with open(output_file_path, 'wb') as f:
f.write(buffer)
# output_file_path = 'output.jpg'
# with open(output_file_path, 'wb') as f:
# f.write(buffer)
print("Sending back processed image")
conn.sendall(struct.pack('!I', len(buffer)) + buffer.tobytes())
conn.close()

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"log"
"net"
"os/exec"
"time"
"github.com/asticode/go-astiav"
@@ -35,15 +36,34 @@ func openCameraFeed(peerConnection *webrtc.PeerConnection, videoTrack *webrtc.Tr
fmt.Println("Writing to tracks")
vp := NewVideoProcessor()
go vp.writeH264ToTrackFFmpegFilters(videoTrack)
go vp.writeH264ToTrackAR(videoTrack)
// go vp.writeH264ToTrackFFmpegFilters(videoTrack)
return nil
}
func establishSSHTunnel() (*exec.Cmd, error) {
cmd := exec.Command("ssh", "-L", "5005:127.0.0.1:5005", "-J", "fastvideo", "-i", "~/.ssh/picluster", "epl@10.100.1.165")
err := cmd.Start()
if err != nil {
return nil, fmt.Errorf("failed to start SSH tunnel: %w", err)
}
fmt.Println("SSH Tunnel established")
return cmd, nil
}
func closeSSHTunnel(cmd *exec.Cmd) error {
err := cmd.Wait()
if err != nil {
return fmt.Errorf("error waiting for SSH tunnel process: %w", err)
}
return nil
}
func (vp *VideoProcessor) writeH264ToTrackAR(track *webrtc.TrackLocalStaticSample) {
defer vp.freeVideoCoding()
conn, err := net.Dial("tcp", "localhost:5005")
conn, err := net.Dial("tcp", "127.0.0.1:5005")
if err != nil {
panic(err)
}
@@ -51,6 +71,8 @@ func (vp *VideoProcessor) writeH264ToTrackAR(track *webrtc.TrackLocalStaticSampl
ticker := time.NewTicker(h264FrameDuration)
for ; true; <-ticker.C {
startTime := time.Now()
if err = vp.inputFormatContext.ReadFrame(vp.decodePacket); err != nil {
if errors.Is(err, astiav.ErrEof) {
break
@@ -66,7 +88,7 @@ func (vp *VideoProcessor) writeH264ToTrackAR(track *webrtc.TrackLocalStaticSampl
for {
if err = vp.decodeCodecContext.ReceiveFrame(vp.decodeFrame); err != nil {
if errors.Is(err, astiav.ErrEof) || errors.Is(err, astiav.ErrEagain) {
fmt.Println("Error while receiving decoded framed: ", err)
// fmt.Println("Error while receiving decoded framed: ", err)
break
}
panic(err)
@@ -79,11 +101,13 @@ func (vp *VideoProcessor) writeH264ToTrackAR(track *webrtc.TrackLocalStaticSampl
vp.pts++
vp.rgbaFrame.SetPts(vp.pts)
startTime2 := time.Now()
vp.arFilterFrame, err = OverlayARFilter(conn, vp.rgbaFrame)
if err != nil {
fmt.Println("Failed to add AR filter to frame: ", err)
}
elapsedTime2 := time.Since(startTime2)
fmt.Printf("Time taken for adding AR filter: %v\n", elapsedTime2)
if err = vp.convertToYUV420PContext.ScaleFrame(vp.arFilterFrame, vp.yuv420PFrame); err != nil {
panic(err)
@@ -112,6 +136,8 @@ func (vp *VideoProcessor) writeH264ToTrackAR(track *webrtc.TrackLocalStaticSampl
}
}
}
elapsedTime := time.Since(startTime)
fmt.Printf("Time taken from reading the packet, decoding, adding AR filter, encoding to writing in the WebRTC track: %v\n", elapsedTime)
}
}
@@ -123,6 +149,7 @@ func (vp *VideoProcessor) writeH264ToTrackFFmpegFilters(track *webrtc.TrackLocal
ticker := time.NewTicker(h264FrameDuration)
for ; true; <-ticker.C {
startTime := time.Now()
if err = vp.inputFormatContext.ReadFrame(vp.decodePacket); err != nil {
if errors.Is(err, astiav.ErrEof) {
break
@@ -182,5 +209,7 @@ func (vp *VideoProcessor) writeH264ToTrackFFmpegFilters(track *webrtc.TrackLocal
}
}
}
elapsedTime := time.Since(startTime)
fmt.Printf("Time taken from reading the packet, decoding, adding AR filter, encoding to writing in the WebRTC track: %v\n", elapsedTime)
}
}

View File

@@ -48,8 +48,21 @@ monitoring tools for battery drains
TPUs = Tensor Processing Units
Tasks:
1. Start timing on local machine
2. Integrate AR filters with WebRTC video stream -> stuck with yuv422p to rgb conversion
3. WebRTC support for Rasberry Pi. -> jetson
4. Devkit for VR -> hardware test platforms
5. Look for other people working on same problem, google scholar
1. Look for other people working on same problem, google scholar
2. explore AR Workload
3. Volumetric videos - format, can handle with regular RTP?
4. battery savings?
ffmpeg -i input.webm -f mpegts udp://224.0.0.251:5353
/home/kalit/Desktop/GeorgiaTech/Fall_2024/CS_8903/WebRTC_research/ar-filters/filter_imgs/eye.jpg
/home/epl/Desktop/WebRTC_research/ar-filters/filter_imgs/eye.jpg
/home/epl/Desktop/WebRTC_research/ar-filters/filter_imgs/smile.png
AR workloads:
https://cuhksz-inml.github.io/full_scene_volumetric_video_dataset/factsfigures.html
Forward streams to localhost:5005 to jetson machine's localhost:5005 -
ssh -L 5005:localhost:5005 -J fastvideo -i ~/.ssh/picluster epl@10.100.1.165

4
requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
matplotlib==3.5.1
mediapipe==0.10.11
opencv-contrib-python==4.9.0.80
opencv-python==4.9.0.80