mirror of
https://github.com/livepeer/lpms
synced 2025-09-26 19:51:36 +08:00
Revert "Remove scene detection code (#377)"
This reverts commit 98566e26c0
.
This commit is contained in:
5
.github/runner/Dockerfile
vendored
5
.github/runner/Dockerfile
vendored
@@ -37,6 +37,11 @@ RUN cd /home/devops && mkdir actions-runner && cd actions-runner \
|
||||
&& curl -O -L https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz \
|
||||
&& tar xzf ./actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && chown -R devops ~devops
|
||||
|
||||
RUN LIBTENSORFLOW_VERSION=2.6.3 \
|
||||
&& curl -LO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-${LIBTENSORFLOW_VERSION}.tar.gz \
|
||||
&& sudo tar -C /usr/local -xzf libtensorflow-gpu-linux-x86_64-${LIBTENSORFLOW_VERSION}.tar.gz \
|
||||
&& sudo ldconfig
|
||||
|
||||
# Add mime type for ts
|
||||
RUN sudo echo '<?xml version="1.0" encoding="UTF-8"?><mime-info xmlns="http://www.freedesktop.org/standards/shared-mime-info"><mime-type type="video/mp2t"><comment>ts</comment><glob pattern="*.ts"/></mime-type></mime-info>'>>/usr/share/mime/packages/custom_mime_type.xml
|
||||
RUN sudo update-mime-database /usr/share/mime
|
||||
|
111
cmd/scenedetection/scenedetection.go
Normal file
111
cmd/scenedetection/scenedetection.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/livepeer/lpms/ffmpeg"
|
||||
)
|
||||
|
||||
func validRenditions() []string {
|
||||
valids := make([]string, len(ffmpeg.VideoProfileLookup))
|
||||
for p, _ := range ffmpeg.VideoProfileLookup {
|
||||
valids = append(valids, p)
|
||||
}
|
||||
return valids
|
||||
}
|
||||
|
||||
func main() {
|
||||
if len(os.Args) <= 4 {
|
||||
//0,1 input.mp4 P720p25fps16x9,P720p30fps4x3 nv 0
|
||||
panic("Usage:<dnn init deviceid> <input file> <output renditions, comma separated> <sw/nv>")
|
||||
}
|
||||
str2accel := func(inp string) (ffmpeg.Acceleration, string) {
|
||||
if inp == "nv" {
|
||||
return ffmpeg.Nvidia, "nv"
|
||||
}
|
||||
return ffmpeg.Software, "sw"
|
||||
}
|
||||
str2profs := func(inp string) []ffmpeg.VideoProfile {
|
||||
profs := []ffmpeg.VideoProfile{}
|
||||
strs := strings.Split(inp, ",")
|
||||
for _, k := range strs {
|
||||
p, ok := ffmpeg.VideoProfileLookup[k]
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("Invalid rendition %s. Valid renditions are:\n%s", k, validRenditions()))
|
||||
}
|
||||
profs = append(profs, p)
|
||||
}
|
||||
return profs
|
||||
}
|
||||
deviceid := os.Args[1]
|
||||
fname := os.Args[2]
|
||||
profiles := str2profs(os.Args[3])
|
||||
accel, lbl := str2accel(os.Args[4])
|
||||
|
||||
var dev string
|
||||
if accel == ffmpeg.Nvidia {
|
||||
if len(os.Args) <= 5 {
|
||||
panic("Expected device number")
|
||||
}
|
||||
dev = os.Args[5]
|
||||
}
|
||||
ffmpeg.InitFFmpeg()
|
||||
|
||||
t := time.Now()
|
||||
tc, err := ffmpeg.NewTranscoderWithDetector(&ffmpeg.DSceneAdultSoccer, deviceid)
|
||||
defer tc.StopTranscoder()
|
||||
end := time.Now()
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Printf("InitFFmpegWithDetectorProfile time %0.4v\n", end.Sub(t).Seconds())
|
||||
|
||||
profs2opts := func(profs []ffmpeg.VideoProfile) []ffmpeg.TranscodeOptions {
|
||||
opts := []ffmpeg.TranscodeOptions{}
|
||||
for i := range profs {
|
||||
o := ffmpeg.TranscodeOptions{
|
||||
Oname: fmt.Sprintf("out_%s_%d_out.mkv", lbl, i),
|
||||
Profile: profs[i],
|
||||
Accel: accel,
|
||||
}
|
||||
opts = append(opts, o)
|
||||
}
|
||||
//add detection profile
|
||||
detectorProfile := ffmpeg.DSceneAdultSoccer
|
||||
detectorProfile.SampleRate = 100
|
||||
o := ffmpeg.TranscodeOptions{
|
||||
Oname: fmt.Sprintf("out_dnn.mkv"),
|
||||
Profile: ffmpeg.P144p30fps16x9,
|
||||
Detector: &detectorProfile,
|
||||
Accel: accel,
|
||||
}
|
||||
opts = append(opts, o)
|
||||
return opts
|
||||
}
|
||||
options := profs2opts(profiles)
|
||||
|
||||
t = time.Now()
|
||||
fmt.Printf("Setting fname %s encoding %d renditions with %v\n", fname, len(options), lbl)
|
||||
res, err := tc.Transcode(&ffmpeg.TranscodeOptionsIn{
|
||||
Fname: fname,
|
||||
Accel: accel,
|
||||
Device: dev,
|
||||
}, options)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
end = time.Now()
|
||||
fmt.Printf("profile=input frames=%v pixels=%v\n", res.Decoded.Frames, res.Decoded.Pixels)
|
||||
for i, r := range res.Encoded {
|
||||
if r.DetectData != nil {
|
||||
fmt.Printf("profile=%v frames=%v pixels=%v detectdata= %v\n", options[i].Profile, r.Frames, r.Pixels, r.DetectData)
|
||||
} else {
|
||||
fmt.Printf("profile=%v frames=%v pixels=%v\n", options[i].Profile, r.Frames, r.Pixels)
|
||||
}
|
||||
}
|
||||
fmt.Printf("Transcoding time %0.4v\n", end.Sub(t).Seconds())
|
||||
}
|
@@ -64,7 +64,9 @@ func main() {
|
||||
o := ffmpeg.TranscodeOptions{
|
||||
Oname: fmt.Sprintf("out_%s_%d_out.mp4", lbl, i),
|
||||
Profile: profs[i],
|
||||
Accel: accel,
|
||||
// Uncomment the following to test scene classifier
|
||||
// Detector: &ffmpeg.DSceneAdultSoccer,
|
||||
Accel: accel,
|
||||
}
|
||||
o.From = *from
|
||||
o.To = *to
|
||||
@@ -97,7 +99,11 @@ func main() {
|
||||
end := time.Now()
|
||||
fmt.Printf("profile=input frames=%v pixels=%v\n", res.Decoded.Frames, res.Decoded.Pixels)
|
||||
for i, r := range res.Encoded {
|
||||
fmt.Printf("profile=%v frames=%v pixels=%v\n", profiles[i].Name, r.Frames, r.Pixels)
|
||||
if r.DetectData != nil {
|
||||
fmt.Printf("profile=%v frames=%v pixels=%v detectdata=%v\n", profiles[i].Name, r.Frames, r.Pixels, r.DetectData)
|
||||
} else {
|
||||
fmt.Printf("profile=%v frames=%v pixels=%v\n", profiles[i].Name, r.Frames, r.Pixels)
|
||||
}
|
||||
}
|
||||
fmt.Printf("Transcoding time %0.4v\n", end.Sub(t).Seconds())
|
||||
}
|
||||
|
@@ -1520,6 +1520,61 @@ func TestTranscoder_CompareVideo(t *testing.T) {
|
||||
compareVideo(t, Software)
|
||||
}
|
||||
|
||||
func detectionFreq(t *testing.T, accel Acceleration, deviceid string) {
|
||||
run, dir := setupTest(t)
|
||||
defer os.RemoveAll(dir)
|
||||
cmd := `
|
||||
# run segmenter and sanity check frame counts . Hardcode for now.
|
||||
ffmpeg -loglevel warning -i "$1"/../transcoder/test.ts -c:a copy -c:v copy -f hls test.m3u8
|
||||
ffprobe -loglevel warning -select_streams v -count_frames -show_streams test0.ts | grep nb_read_frames=120
|
||||
ffprobe -loglevel warning -select_streams v -count_frames -show_streams test1.ts | grep nb_read_frames=120
|
||||
ffprobe -loglevel warning -select_streams v -count_frames -show_streams test2.ts | grep nb_read_frames=120
|
||||
ffprobe -loglevel warning -select_streams v -count_frames -show_streams test3.ts | grep nb_read_frames=120
|
||||
`
|
||||
run(cmd)
|
||||
|
||||
InitFFmpeg()
|
||||
tc, err := NewTranscoderWithDetector(&DSceneAdultSoccer, deviceid)
|
||||
require.NotNil(t, tc, "look for `Failed to load native model` logs above")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
defer tc.StopTranscoder()
|
||||
// Test encoding with only seg0 and seg2 under detection
|
||||
prof := P144p30fps16x9
|
||||
for i := 0; i < 4; i++ {
|
||||
in := &TranscodeOptionsIn{
|
||||
Fname: fmt.Sprintf("%s/test%d.ts", dir, i),
|
||||
Accel: accel,
|
||||
}
|
||||
out := []TranscodeOptions{
|
||||
{
|
||||
Oname: fmt.Sprintf("%s/out%d.ts", dir, i),
|
||||
Profile: prof,
|
||||
Accel: accel,
|
||||
},
|
||||
}
|
||||
if i%2 == 0 {
|
||||
out = append(out, TranscodeOptions{
|
||||
Detector: &DSceneAdultSoccer,
|
||||
Accel: accel,
|
||||
})
|
||||
}
|
||||
res, err := tc.Transcode(in, out)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if i%2 == 0 && (len(res.Encoded) < 2 || res.Encoded[1].DetectData == nil) {
|
||||
t.Error("No detect data returned for detection profile")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTranscoder_DetectionFreq(t *testing.T) {
|
||||
detectionFreq(t, Software, "-1")
|
||||
}
|
||||
|
||||
func discontinuityAudioSegment(t *testing.T, accel Acceleration) {
|
||||
run, dir := setupTest(t)
|
||||
defer os.RemoveAll(dir)
|
||||
|
69
ffmpeg/detector.go
Normal file
69
ffmpeg/detector.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package ffmpeg
|
||||
|
||||
type DetectorType int
|
||||
|
||||
const (
|
||||
SceneClassification = iota
|
||||
// Example for future:
|
||||
// ObjectDetection
|
||||
)
|
||||
|
||||
type DetectorProfile interface {
|
||||
Type() DetectorType
|
||||
}
|
||||
|
||||
type DetectorClass struct {
|
||||
ID int // unique ID within LPMS per class
|
||||
Name string // unique Name within LPMS per class
|
||||
}
|
||||
|
||||
type SceneClassificationProfile struct {
|
||||
SampleRate uint
|
||||
ModelPath string
|
||||
Input string
|
||||
Output string
|
||||
Classes []DetectorClass
|
||||
}
|
||||
|
||||
func (p *SceneClassificationProfile) Type() DetectorType {
|
||||
return SceneClassification
|
||||
}
|
||||
|
||||
var (
|
||||
DSceneAdultSoccer = SceneClassificationProfile{
|
||||
SampleRate: 30,
|
||||
ModelPath: "tasmodel.pb",
|
||||
Input: "input_1",
|
||||
Output: "Identity",
|
||||
Classes: []DetectorClass{{ID: 0, Name: "adult"}, {ID: 1, Name: "soccer"}},
|
||||
}
|
||||
DSceneViolence = SceneClassificationProfile{
|
||||
SampleRate: 30,
|
||||
ModelPath: "tviomodel.pb",
|
||||
Input: "input_1",
|
||||
Output: "reshape_3/Reshape",
|
||||
Classes: []DetectorClass{{ID: 2, Name: "violence"}},
|
||||
}
|
||||
)
|
||||
|
||||
var SceneClassificationProfileLookup = map[string]SceneClassificationProfile{
|
||||
"adult": DSceneAdultSoccer,
|
||||
"soccer": DSceneAdultSoccer,
|
||||
"violence": DSceneViolence,
|
||||
}
|
||||
|
||||
var DetectorClassIDLookup = map[string]int{
|
||||
"adult": 0,
|
||||
"soccer": 1,
|
||||
"violence": 2,
|
||||
}
|
||||
|
||||
type DetectData interface {
|
||||
Type() DetectorType
|
||||
}
|
||||
|
||||
type SceneClassificationData map[int]float64
|
||||
|
||||
func (scd SceneClassificationData) Type() DetectorType {
|
||||
return SceneClassification
|
||||
}
|
@@ -222,6 +222,11 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
|
||||
|
||||
// add video encoder if a decoder exists and this output requires one
|
||||
if (ictx->vc && needs_decoder(octx->video->name)) {
|
||||
if (octx->dnn_filtergraph && !ictx->vc->hw_frames_ctx) {
|
||||
// swap filtergraph with the pre-initialized DNN filtergraph for SW
|
||||
// for HW we handle it later during filter re-init
|
||||
octx->vf.graph = *octx->dnn_filtergraph;
|
||||
}
|
||||
ret = init_video_filters(ictx, octx);
|
||||
if (ret < 0) LPMS_ERR(open_output_err, "Unable to open video filter");
|
||||
|
||||
@@ -430,6 +435,32 @@ int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost)
|
||||
return av_interleaved_write_frame(octx->oc, pkt);
|
||||
}
|
||||
|
||||
static int getmetadatainf(AVFrame *inf, struct output_ctx *octx)
|
||||
{
|
||||
if(inf == NULL) return -1;
|
||||
char classinfo[128] = {0,};
|
||||
AVDictionaryEntry *element = NULL;
|
||||
AVDictionary *metadata = inf->metadata;
|
||||
|
||||
if(metadata != NULL) {
|
||||
element = av_dict_get(metadata, LVPDNN_FILTER_META, element, 0);
|
||||
if(element != NULL) {
|
||||
strcpy(classinfo, element->value);
|
||||
if(strlen(classinfo) > 0) {
|
||||
char * token = strtok(classinfo, ",");
|
||||
int cid = 0;
|
||||
while( token != NULL ) {
|
||||
octx->res->probs[cid] += atof(token);
|
||||
token = strtok(NULL, ",");
|
||||
cid++;
|
||||
}
|
||||
octx->res->frames++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int calc_signature(AVFrame *inf, struct output_ctx *octx)
|
||||
{
|
||||
int ret = 0;
|
||||
@@ -521,11 +552,19 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext
|
||||
octx->next_kf_pts = frame->pts + octx->gop_pts_len;
|
||||
}
|
||||
|
||||
if(octx->is_dnn_profile) {
|
||||
ret = getmetadatainf(frame, octx);
|
||||
if(ret == -1 && frame == NULL) {
|
||||
// Return EOF in case of flushing procedure
|
||||
ret = AVERROR_EOF;
|
||||
}
|
||||
} else {
|
||||
if(is_video && frame != NULL && octx->sfilters != NULL) {
|
||||
ret = calc_signature(frame, octx);
|
||||
if(ret < 0) LPMS_WARN("Could not calculate signature value for frame");
|
||||
}
|
||||
ret = encode(encoder, frame, octx, ost);
|
||||
}
|
||||
skip:
|
||||
av_frame_unref(frame);
|
||||
// For HW we keep the encoder open so will only get EAGAIN.
|
||||
|
@@ -39,6 +39,7 @@ var ErrTranscoderPrf = errors.New("TranscoderUnrecognizedProfile")
|
||||
var ErrTranscoderGOP = errors.New("TranscoderInvalidGOP")
|
||||
var ErrTranscoderDev = errors.New("TranscoderIncompatibleDevices")
|
||||
var ErrEmptyData = errors.New("EmptyData")
|
||||
var ErrDNNInitialize = errors.New("DetectorInitializationError")
|
||||
var ErrSignCompare = errors.New("InvalidSignData")
|
||||
var ErrTranscoderPixelformat = errors.New("TranscoderInvalidPixelformat")
|
||||
var ErrVideoCompare = errors.New("InvalidVideoData")
|
||||
@@ -102,6 +103,7 @@ type TranscodeOptionsIn struct {
|
||||
type TranscodeOptions struct {
|
||||
Oname string
|
||||
Profile VideoProfile
|
||||
Detector DetectorProfile
|
||||
Accel Acceleration
|
||||
Device string
|
||||
CalcSign bool
|
||||
@@ -114,8 +116,9 @@ type TranscodeOptions struct {
|
||||
}
|
||||
|
||||
type MediaInfo struct {
|
||||
Frames int
|
||||
Pixels int64
|
||||
Frames int
|
||||
Pixels int64
|
||||
DetectData DetectData
|
||||
}
|
||||
|
||||
type TranscodeResults struct {
|
||||
@@ -618,6 +621,14 @@ func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.
|
||||
params := make([]C.output_params, len(ps))
|
||||
finalizer := func() { destroyCOutputParams(params) }
|
||||
for i, p := range ps {
|
||||
if p.Detector != nil {
|
||||
// We don't do any encoding for detector profiles
|
||||
// Adding placeholder values to pass checks for these everywhere
|
||||
p.Oname = "/dev/null"
|
||||
p.Profile = P144p30fps16x9
|
||||
p.Muxer = ComponentOptions{Name: "mpegts"}
|
||||
}
|
||||
|
||||
param := p.Profile
|
||||
w, h, err := VideoProfileResolution(param)
|
||||
if err != nil {
|
||||
@@ -667,7 +678,18 @@ func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.
|
||||
filters += fmt.Sprintf(",fps=%d/%d", param.Framerate, param.FramerateDen)
|
||||
fps = C.AVRational{num: C.int(param.Framerate), den: C.int(param.FramerateDen)}
|
||||
}
|
||||
|
||||
// if has a detector profile, ignore all video options
|
||||
if p.Detector != nil {
|
||||
switch p.Detector.Type() {
|
||||
case SceneClassification:
|
||||
detectorProfile := p.Detector.(*SceneClassificationProfile)
|
||||
// Set samplerate using select filter to prevent unnecessary HW->SW copying
|
||||
filters = fmt.Sprintf("select='not(mod(n\\,%v))'", detectorProfile.SampleRate)
|
||||
if input.Accel != Software {
|
||||
filters += ",hwdownload,format=nv12"
|
||||
}
|
||||
}
|
||||
}
|
||||
// Set video encoder options
|
||||
// TODO understand how h264 profiles and GOP setting works for
|
||||
// NETINT encoder, and make sure we change relevant things here
|
||||
@@ -787,13 +809,17 @@ func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.
|
||||
fromMs := int(p.From.Milliseconds())
|
||||
toMs := int(p.To.Milliseconds())
|
||||
vfilt := C.CString(filters)
|
||||
isDNN := C.int(0)
|
||||
if p.Detector != nil {
|
||||
isDNN = C.int(1)
|
||||
}
|
||||
oname := C.CString(p.Oname)
|
||||
xcoderOutParams := C.CString(xcoderOutParamsStr)
|
||||
params[i] = C.output_params{fname: oname, fps: fps,
|
||||
w: C.int(w), h: C.int(h), bitrate: C.int(bitrate),
|
||||
gop_time: C.int(gopMs), from: C.int(fromMs), to: C.int(toMs),
|
||||
muxer: muxOpts, audio: audioOpts, video: vidOpts,
|
||||
vfilters: vfilt, sfilters: nil, xcoderParams: xcoderOutParams}
|
||||
vfilters: vfilt, sfilters: nil, is_dnn: isDNN, xcoderParams: xcoderOutParams}
|
||||
if p.CalcSign {
|
||||
//signfilter string
|
||||
escapedOname := ffmpegStrEscape(p.Oname)
|
||||
@@ -996,6 +1022,18 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions)
|
||||
Frames: int(r.frames),
|
||||
Pixels: int64(r.pixels),
|
||||
}
|
||||
// add detect result
|
||||
if ps[i].Detector != nil {
|
||||
switch ps[i].Detector.Type() {
|
||||
case SceneClassification:
|
||||
detector := ps[i].Detector.(*SceneClassificationProfile)
|
||||
res := make(SceneClassificationData)
|
||||
for j, class := range detector.Classes {
|
||||
res[class.ID] = float64(r.probs[j])
|
||||
}
|
||||
tr[i].DetectData = res
|
||||
}
|
||||
}
|
||||
}
|
||||
dec := MediaInfo{
|
||||
Frames: int(decoded.frames),
|
||||
@@ -1050,6 +1088,32 @@ func InitFFmpeg() {
|
||||
InitFFmpegWithLogLevel(FFLogWarning)
|
||||
}
|
||||
|
||||
func NewTranscoderWithDetector(detector DetectorProfile, deviceid string) (*Transcoder, error) {
|
||||
switch detector.Type() {
|
||||
case SceneClassification:
|
||||
detectorProfile := detector.(*SceneClassificationProfile)
|
||||
backendConfigs := createBackendConfig(deviceid)
|
||||
dnnOpt := &C.lvpdnn_opts{
|
||||
modelpath: C.CString(detectorProfile.ModelPath),
|
||||
inputname: C.CString(detectorProfile.Input),
|
||||
outputname: C.CString(detectorProfile.Output),
|
||||
backend_configs: C.CString(backendConfigs),
|
||||
}
|
||||
defer C.free(unsafe.Pointer(dnnOpt.modelpath))
|
||||
defer C.free(unsafe.Pointer(dnnOpt.inputname))
|
||||
defer C.free(unsafe.Pointer(dnnOpt.outputname))
|
||||
defer C.free(unsafe.Pointer(dnnOpt.backend_configs))
|
||||
handle := C.lpms_transcode_new_with_dnn(dnnOpt)
|
||||
if handle != nil {
|
||||
return &Transcoder{
|
||||
handle: handle,
|
||||
mu: &sync.Mutex{},
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
return nil, ErrDNNInitialize
|
||||
}
|
||||
|
||||
func createBackendConfig(deviceid string) string {
|
||||
configProto := &pb.ConfigProto{GpuOptions: &pb.GPUOptions{AllowGrowth: true}}
|
||||
bytes, err := proto.Marshal(configProto)
|
||||
|
@@ -106,6 +106,20 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx)
|
||||
ret = filtergraph_parser(vf, filters_descr, &inputs, &outputs);
|
||||
if (ret < 0) LPMS_ERR(vf_init_cleanup, "Unable to parse video filters desc");
|
||||
|
||||
if (octx->is_dnn_profile && vf->graph == *octx->dnn_filtergraph) {
|
||||
// Try to find DNN filter in the pre-initialized graph
|
||||
AVFilterContext *dnn_filter = avfilter_graph_get_filter(vf->graph, "livepeer_dnn");
|
||||
if (!dnn_filter) {
|
||||
ret = AVERROR_FILTER_NOT_FOUND;
|
||||
LPMS_ERR(vf_init_cleanup, "Unable to find DNN filter inside filtergraph");
|
||||
}
|
||||
// Place DNN filter in correct position, i.e. just before the sink
|
||||
assert(vf->sink_ctx->nb_inputs == 1);
|
||||
ret = avfilter_insert_filter(vf->sink_ctx->inputs[0], dnn_filter, 0, 0);
|
||||
// Take ownership of the filtergraph from the thread/output_ctx
|
||||
*octx->dnn_filtergraph = NULL;
|
||||
}
|
||||
|
||||
ret = avfilter_graph_config(vf->graph, NULL);
|
||||
if (ret < 0) LPMS_ERR(vf_init_cleanup, "Unable configure video filtergraph");
|
||||
|
||||
@@ -279,6 +293,10 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
|
||||
if (is_video && inf && inf->hw_frames_ctx && filter->hwframes &&
|
||||
inf->hw_frames_ctx->data != filter->hwframes) {
|
||||
free_filter(&octx->vf); // XXX really should flush filter first
|
||||
if (octx->dnn_filtergraph) {
|
||||
// swap filtergraph with the pre-initialized DNN filtergraph
|
||||
octx->vf.graph = *octx->dnn_filtergraph;
|
||||
}
|
||||
ret = init_video_filters(ictx, octx);
|
||||
if (ret < 0) return lpms_ERR_FILTERS;
|
||||
}
|
||||
|
@@ -68,6 +68,9 @@ struct output_ctx {
|
||||
int64_t clip_from, clip_to, clip_from_pts, clip_to_pts, clip_started, clip_start_pts, clip_start_pts_found; // for clipping
|
||||
int64_t clip_audio_from_pts, clip_audio_to_pts, clip_audio_start_pts, clip_audio_start_pts_found; // for clipping
|
||||
|
||||
AVFilterGraph **dnn_filtergraph;
|
||||
int is_dnn_profile; //if not dnn profile: 0
|
||||
|
||||
output_results *res; // data to return for this output
|
||||
char *xcoderParams;
|
||||
};
|
||||
|
@@ -728,6 +728,10 @@ func TestNvidia_CompareVideo(t *testing.T) {
|
||||
compareVideo(t, Nvidia)
|
||||
}
|
||||
|
||||
func TestNvidia_DetectionFreq(t *testing.T) {
|
||||
detectionFreq(t, Nvidia, "0")
|
||||
}
|
||||
|
||||
func portraitTest(t *testing.T, input string, checkResults bool, profiles []VideoProfile) error {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
@@ -76,6 +76,8 @@ struct transcode_thread {
|
||||
struct input_ctx ictx;
|
||||
struct output_ctx outputs[MAX_OUTPUT_SIZE];
|
||||
|
||||
AVFilterGraph *dnn_filtergraph;
|
||||
|
||||
int nb_outputs;
|
||||
};
|
||||
|
||||
@@ -213,6 +215,10 @@ int transcode_init(struct transcode_thread *h, input_params *inp,
|
||||
octx->vfilters = params[i].vfilters;
|
||||
octx->sfilters = params[i].sfilters;
|
||||
octx->xcoderParams = params[i].xcoderParams;
|
||||
if (params[i].is_dnn && h->dnn_filtergraph != NULL) {
|
||||
octx->is_dnn_profile = params[i].is_dnn;
|
||||
octx->dnn_filtergraph = &h->dnn_filtergraph;
|
||||
}
|
||||
if (params[i].bitrate) octx->bitrate = params[i].bitrate;
|
||||
if (params[i].fps.den) octx->fps = params[i].fps;
|
||||
if (params[i].gop_time) octx->gop_time = params[i].gop_time;
|
||||
@@ -569,10 +575,16 @@ int flush_all_outputs(struct transcode_thread *h)
|
||||
// just flush muxer, but do not write trailer and close
|
||||
av_interleaved_write_frame(h->outputs[i].oc, NULL);
|
||||
} else {
|
||||
if(h->outputs[i].is_dnn_profile == 0) {
|
||||
// this will flush video and audio streams, flush muxer, write trailer
|
||||
// and close
|
||||
ret = flush_outputs(ictx, h->outputs + i);
|
||||
if (ret < 0) LPMS_ERR_RETURN("Unable to fully flush outputs")
|
||||
} else if(h->outputs[i].is_dnn_profile && h->outputs[i].res->frames > 0) {
|
||||
for (int j = 0; j < MAX_CLASSIFY_SIZE; j++) {
|
||||
h->outputs[i].res->probs[j] = h->outputs[i].res->probs[j] / h->outputs[i].res->frames;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -863,8 +875,15 @@ whileloop_end:
|
||||
|
||||
// flush outputs
|
||||
for (int i = 0; i < nb_outputs; i++) {
|
||||
if(outputs[i].is_dnn_profile == 0/* && outputs[i].has_output > 0*/) {
|
||||
ret = flush_outputs(ictx, &outputs[i]);
|
||||
if (ret < 0) LPMS_ERR(transcode_cleanup, "Unable to fully flush outputs")
|
||||
}
|
||||
else if(outputs[i].is_dnn_profile && outputs[i].res->frames > 0) {
|
||||
for (int j = 0; j < MAX_CLASSIFY_SIZE; j++) {
|
||||
outputs[i].res->probs[j] = outputs[i].res->probs[j] / outputs[i].res->frames;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
transcode_cleanup:
|
||||
@@ -910,6 +929,7 @@ int lpms_transcode(input_params *inp, output_params *params,
|
||||
if (h->nb_outputs != nb_outputs) {
|
||||
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
bool only_detector_diff = true;
|
||||
// MA: we have a problem here. Consider first configuration with 1 output,
|
||||
// and second one with 2 outputs. When transcode_thread was created
|
||||
// (in lpms_transcode_new) all the outputs were cleared with zeros. Then,
|
||||
@@ -924,7 +944,15 @@ int lpms_transcode(input_params *inp, output_params *params,
|
||||
// approach doesn't work if the "new" configuration has more outputs than
|
||||
// old one, even if "added" outputs are actually dnn outputs.
|
||||
// make sure only detection related outputs are changed
|
||||
for (int i = MIN(nb_outputs, h->nb_outputs); i < MAX(nb_outputs, h->nb_outputs); i++) {
|
||||
if (!h->outputs[i].is_dnn_profile)
|
||||
only_detector_diff = false;
|
||||
}
|
||||
if (only_detector_diff) {
|
||||
h->nb_outputs = nb_outputs;
|
||||
} else {
|
||||
return lpms_ERR_OUTPUTS;
|
||||
}
|
||||
#undef MAX
|
||||
#undef MIN
|
||||
}
|
||||
@@ -975,9 +1003,66 @@ void lpms_transcode_stop(struct transcode_thread *handle) {
|
||||
free_output(&handle->outputs[i]);
|
||||
}
|
||||
|
||||
if (handle->dnn_filtergraph) avfilter_graph_free(&handle->dnn_filtergraph);
|
||||
|
||||
free(handle);
|
||||
}
|
||||
|
||||
static AVFilterGraph * create_dnn_filtergraph(lvpdnn_opts *dnn_opts)
|
||||
{
|
||||
const AVFilter *filter = NULL;
|
||||
AVFilterContext *filter_ctx = NULL;
|
||||
AVFilterGraph *graph_ctx = NULL;
|
||||
int ret = 0;
|
||||
char errstr[1024];
|
||||
char *filter_name = "livepeer_dnn";
|
||||
char filter_args[512];
|
||||
snprintf(filter_args, sizeof filter_args, "model=%s:input=%s:output=%s:backend_configs=%s",
|
||||
dnn_opts->modelpath, dnn_opts->inputname, dnn_opts->outputname, dnn_opts->backend_configs);
|
||||
|
||||
/* allocate graph */
|
||||
graph_ctx = avfilter_graph_alloc();
|
||||
if (!graph_ctx)
|
||||
LPMS_ERR(create_dnn_error, "Unable to open DNN filtergraph");
|
||||
|
||||
/* get a corresponding filter and open it */
|
||||
if (!(filter = avfilter_get_by_name(filter_name))) {
|
||||
snprintf(errstr, sizeof errstr, "Unrecognized filter with name '%s'\n", filter_name);
|
||||
LPMS_ERR(create_dnn_error, errstr);
|
||||
}
|
||||
|
||||
/* open filter and add it to the graph */
|
||||
if (!(filter_ctx = avfilter_graph_alloc_filter(graph_ctx, filter, filter_name))) {
|
||||
snprintf(errstr, sizeof errstr, "Impossible to open filter with name '%s'\n", filter_name);
|
||||
LPMS_ERR(create_dnn_error, errstr);
|
||||
}
|
||||
if (avfilter_init_str(filter_ctx, filter_args) < 0) {
|
||||
snprintf(errstr, sizeof errstr, "Impossible to init filter '%s' with arguments '%s'\n", filter_name, filter_args);
|
||||
LPMS_ERR(create_dnn_error, errstr);
|
||||
}
|
||||
|
||||
return graph_ctx;
|
||||
|
||||
create_dnn_error:
|
||||
avfilter_graph_free(&graph_ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct transcode_thread* lpms_transcode_new_with_dnn(lvpdnn_opts *dnn_opts)
|
||||
{
|
||||
struct transcode_thread *h = malloc(sizeof (struct transcode_thread));
|
||||
if (!h) return NULL;
|
||||
memset(h, 0, sizeof *h);
|
||||
AVFilterGraph *filtergraph = create_dnn_filtergraph(dnn_opts);
|
||||
if (!filtergraph) {
|
||||
free(h);
|
||||
h = NULL;
|
||||
} else {
|
||||
h->dnn_filtergraph = filtergraph;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
void lpms_transcode_discontinuity(struct transcode_thread *handle) {
|
||||
if (!handle)
|
||||
return;
|
||||
|
@@ -31,6 +31,7 @@ typedef struct {
|
||||
char *sfilters;
|
||||
int w, h, bitrate, gop_time, from, to;
|
||||
AVRational fps;
|
||||
int is_dnn;
|
||||
char *xcoderParams;
|
||||
component_opts muxer;
|
||||
component_opts audio;
|
||||
@@ -58,11 +59,22 @@ typedef struct {
|
||||
} input_params;
|
||||
|
||||
#define MAX_CLASSIFY_SIZE 10
|
||||
#define LVPDNN_FILTER_NAME "lvpdnn"
|
||||
#define LVPDNN_FILTER_META "lavfi.lvpdnn.text"
|
||||
#define MAX_OUTPUT_SIZE 10
|
||||
|
||||
typedef struct {
|
||||
char *modelpath;
|
||||
char *inputname;
|
||||
char *outputname;
|
||||
char *backend_configs;
|
||||
} lvpdnn_opts;
|
||||
|
||||
typedef struct {
|
||||
int frames;
|
||||
int64_t pixels;
|
||||
//for scene classification
|
||||
float probs[MAX_CLASSIFY_SIZE];//probability
|
||||
} output_results;
|
||||
|
||||
enum LPMSLogLevel {
|
||||
@@ -81,6 +93,7 @@ void lpms_init(enum LPMSLogLevel max_level);
|
||||
int lpms_transcode(input_params *inp, output_params *params, output_results *results, int nb_outputs, output_results *decoded_results, int use_new);
|
||||
int lpms_transcode_reopen_demux(input_params *inp);
|
||||
struct transcode_thread* lpms_transcode_new();
|
||||
struct transcode_thread* lpms_transcode_new_with_dnn(lvpdnn_opts *dnn_opts);
|
||||
void lpms_transcode_stop(struct transcode_thread* handle);
|
||||
void lpms_transcode_discontinuity(struct transcode_thread *handle);
|
||||
|
||||
|
Reference in New Issue
Block a user