Revert "Remove scene detection code (#377)"

This reverts commit 98566e26c0.
This commit is contained in:
Thom Shutt
2024-01-19 09:07:47 +00:00
parent 98566e26c0
commit 5ad522cf23
12 changed files with 478 additions and 6 deletions

View File

@@ -37,6 +37,11 @@ RUN cd /home/devops && mkdir actions-runner && cd actions-runner \
&& curl -O -L https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz \
&& tar xzf ./actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz && chown -R devops ~devops
RUN LIBTENSORFLOW_VERSION=2.6.3 \
&& curl -LO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-${LIBTENSORFLOW_VERSION}.tar.gz \
&& sudo tar -C /usr/local -xzf libtensorflow-gpu-linux-x86_64-${LIBTENSORFLOW_VERSION}.tar.gz \
&& sudo ldconfig
# Add mime type for ts
RUN sudo echo '<?xml version="1.0" encoding="UTF-8"?><mime-info xmlns="http://www.freedesktop.org/standards/shared-mime-info"><mime-type type="video/mp2t"><comment>ts</comment><glob pattern="*.ts"/></mime-type></mime-info>'>>/usr/share/mime/packages/custom_mime_type.xml
RUN sudo update-mime-database /usr/share/mime

View File

@@ -0,0 +1,111 @@
package main
import (
"fmt"
"os"
"strings"
"time"
"github.com/livepeer/lpms/ffmpeg"
)
func validRenditions() []string {
valids := make([]string, len(ffmpeg.VideoProfileLookup))
for p, _ := range ffmpeg.VideoProfileLookup {
valids = append(valids, p)
}
return valids
}
func main() {
if len(os.Args) <= 4 {
//0,1 input.mp4 P720p25fps16x9,P720p30fps4x3 nv 0
panic("Usage:<dnn init deviceid> <input file> <output renditions, comma separated> <sw/nv>")
}
str2accel := func(inp string) (ffmpeg.Acceleration, string) {
if inp == "nv" {
return ffmpeg.Nvidia, "nv"
}
return ffmpeg.Software, "sw"
}
str2profs := func(inp string) []ffmpeg.VideoProfile {
profs := []ffmpeg.VideoProfile{}
strs := strings.Split(inp, ",")
for _, k := range strs {
p, ok := ffmpeg.VideoProfileLookup[k]
if !ok {
panic(fmt.Sprintf("Invalid rendition %s. Valid renditions are:\n%s", k, validRenditions()))
}
profs = append(profs, p)
}
return profs
}
deviceid := os.Args[1]
fname := os.Args[2]
profiles := str2profs(os.Args[3])
accel, lbl := str2accel(os.Args[4])
var dev string
if accel == ffmpeg.Nvidia {
if len(os.Args) <= 5 {
panic("Expected device number")
}
dev = os.Args[5]
}
ffmpeg.InitFFmpeg()
t := time.Now()
tc, err := ffmpeg.NewTranscoderWithDetector(&ffmpeg.DSceneAdultSoccer, deviceid)
defer tc.StopTranscoder()
end := time.Now()
if err != nil {
panic(err)
}
fmt.Printf("InitFFmpegWithDetectorProfile time %0.4v\n", end.Sub(t).Seconds())
profs2opts := func(profs []ffmpeg.VideoProfile) []ffmpeg.TranscodeOptions {
opts := []ffmpeg.TranscodeOptions{}
for i := range profs {
o := ffmpeg.TranscodeOptions{
Oname: fmt.Sprintf("out_%s_%d_out.mkv", lbl, i),
Profile: profs[i],
Accel: accel,
}
opts = append(opts, o)
}
//add detection profile
detectorProfile := ffmpeg.DSceneAdultSoccer
detectorProfile.SampleRate = 100
o := ffmpeg.TranscodeOptions{
Oname: fmt.Sprintf("out_dnn.mkv"),
Profile: ffmpeg.P144p30fps16x9,
Detector: &detectorProfile,
Accel: accel,
}
opts = append(opts, o)
return opts
}
options := profs2opts(profiles)
t = time.Now()
fmt.Printf("Setting fname %s encoding %d renditions with %v\n", fname, len(options), lbl)
res, err := tc.Transcode(&ffmpeg.TranscodeOptionsIn{
Fname: fname,
Accel: accel,
Device: dev,
}, options)
if err != nil {
panic(err)
}
end = time.Now()
fmt.Printf("profile=input frames=%v pixels=%v\n", res.Decoded.Frames, res.Decoded.Pixels)
for i, r := range res.Encoded {
if r.DetectData != nil {
fmt.Printf("profile=%v frames=%v pixels=%v detectdata= %v\n", options[i].Profile, r.Frames, r.Pixels, r.DetectData)
} else {
fmt.Printf("profile=%v frames=%v pixels=%v\n", options[i].Profile, r.Frames, r.Pixels)
}
}
fmt.Printf("Transcoding time %0.4v\n", end.Sub(t).Seconds())
}

View File

@@ -64,7 +64,9 @@ func main() {
o := ffmpeg.TranscodeOptions{
Oname: fmt.Sprintf("out_%s_%d_out.mp4", lbl, i),
Profile: profs[i],
Accel: accel,
// Uncomment the following to test scene classifier
// Detector: &ffmpeg.DSceneAdultSoccer,
Accel: accel,
}
o.From = *from
o.To = *to
@@ -97,7 +99,11 @@ func main() {
end := time.Now()
fmt.Printf("profile=input frames=%v pixels=%v\n", res.Decoded.Frames, res.Decoded.Pixels)
for i, r := range res.Encoded {
fmt.Printf("profile=%v frames=%v pixels=%v\n", profiles[i].Name, r.Frames, r.Pixels)
if r.DetectData != nil {
fmt.Printf("profile=%v frames=%v pixels=%v detectdata=%v\n", profiles[i].Name, r.Frames, r.Pixels, r.DetectData)
} else {
fmt.Printf("profile=%v frames=%v pixels=%v\n", profiles[i].Name, r.Frames, r.Pixels)
}
}
fmt.Printf("Transcoding time %0.4v\n", end.Sub(t).Seconds())
}

View File

@@ -1520,6 +1520,61 @@ func TestTranscoder_CompareVideo(t *testing.T) {
compareVideo(t, Software)
}
func detectionFreq(t *testing.T, accel Acceleration, deviceid string) {
run, dir := setupTest(t)
defer os.RemoveAll(dir)
cmd := `
# run segmenter and sanity check frame counts . Hardcode for now.
ffmpeg -loglevel warning -i "$1"/../transcoder/test.ts -c:a copy -c:v copy -f hls test.m3u8
ffprobe -loglevel warning -select_streams v -count_frames -show_streams test0.ts | grep nb_read_frames=120
ffprobe -loglevel warning -select_streams v -count_frames -show_streams test1.ts | grep nb_read_frames=120
ffprobe -loglevel warning -select_streams v -count_frames -show_streams test2.ts | grep nb_read_frames=120
ffprobe -loglevel warning -select_streams v -count_frames -show_streams test3.ts | grep nb_read_frames=120
`
run(cmd)
InitFFmpeg()
tc, err := NewTranscoderWithDetector(&DSceneAdultSoccer, deviceid)
require.NotNil(t, tc, "look for `Failed to load native model` logs above")
if err != nil {
t.Error(err)
} else {
defer tc.StopTranscoder()
// Test encoding with only seg0 and seg2 under detection
prof := P144p30fps16x9
for i := 0; i < 4; i++ {
in := &TranscodeOptionsIn{
Fname: fmt.Sprintf("%s/test%d.ts", dir, i),
Accel: accel,
}
out := []TranscodeOptions{
{
Oname: fmt.Sprintf("%s/out%d.ts", dir, i),
Profile: prof,
Accel: accel,
},
}
if i%2 == 0 {
out = append(out, TranscodeOptions{
Detector: &DSceneAdultSoccer,
Accel: accel,
})
}
res, err := tc.Transcode(in, out)
if err != nil {
t.Error(err)
}
if i%2 == 0 && (len(res.Encoded) < 2 || res.Encoded[1].DetectData == nil) {
t.Error("No detect data returned for detection profile")
}
}
}
}
func TestTranscoder_DetectionFreq(t *testing.T) {
detectionFreq(t, Software, "-1")
}
func discontinuityAudioSegment(t *testing.T, accel Acceleration) {
run, dir := setupTest(t)
defer os.RemoveAll(dir)

69
ffmpeg/detector.go Normal file
View File

@@ -0,0 +1,69 @@
package ffmpeg
type DetectorType int
const (
SceneClassification = iota
// Example for future:
// ObjectDetection
)
type DetectorProfile interface {
Type() DetectorType
}
type DetectorClass struct {
ID int // unique ID within LPMS per class
Name string // unique Name within LPMS per class
}
type SceneClassificationProfile struct {
SampleRate uint
ModelPath string
Input string
Output string
Classes []DetectorClass
}
func (p *SceneClassificationProfile) Type() DetectorType {
return SceneClassification
}
var (
DSceneAdultSoccer = SceneClassificationProfile{
SampleRate: 30,
ModelPath: "tasmodel.pb",
Input: "input_1",
Output: "Identity",
Classes: []DetectorClass{{ID: 0, Name: "adult"}, {ID: 1, Name: "soccer"}},
}
DSceneViolence = SceneClassificationProfile{
SampleRate: 30,
ModelPath: "tviomodel.pb",
Input: "input_1",
Output: "reshape_3/Reshape",
Classes: []DetectorClass{{ID: 2, Name: "violence"}},
}
)
var SceneClassificationProfileLookup = map[string]SceneClassificationProfile{
"adult": DSceneAdultSoccer,
"soccer": DSceneAdultSoccer,
"violence": DSceneViolence,
}
var DetectorClassIDLookup = map[string]int{
"adult": 0,
"soccer": 1,
"violence": 2,
}
type DetectData interface {
Type() DetectorType
}
type SceneClassificationData map[int]float64
func (scd SceneClassificationData) Type() DetectorType {
return SceneClassification
}

View File

@@ -222,6 +222,11 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
// add video encoder if a decoder exists and this output requires one
if (ictx->vc && needs_decoder(octx->video->name)) {
if (octx->dnn_filtergraph && !ictx->vc->hw_frames_ctx) {
// swap filtergraph with the pre-initialized DNN filtergraph for SW
// for HW we handle it later during filter re-init
octx->vf.graph = *octx->dnn_filtergraph;
}
ret = init_video_filters(ictx, octx);
if (ret < 0) LPMS_ERR(open_output_err, "Unable to open video filter");
@@ -430,6 +435,32 @@ int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost)
return av_interleaved_write_frame(octx->oc, pkt);
}
static int getmetadatainf(AVFrame *inf, struct output_ctx *octx)
{
if(inf == NULL) return -1;
char classinfo[128] = {0,};
AVDictionaryEntry *element = NULL;
AVDictionary *metadata = inf->metadata;
if(metadata != NULL) {
element = av_dict_get(metadata, LVPDNN_FILTER_META, element, 0);
if(element != NULL) {
strcpy(classinfo, element->value);
if(strlen(classinfo) > 0) {
char * token = strtok(classinfo, ",");
int cid = 0;
while( token != NULL ) {
octx->res->probs[cid] += atof(token);
token = strtok(NULL, ",");
cid++;
}
octx->res->frames++;
}
}
}
return 0;
}
static int calc_signature(AVFrame *inf, struct output_ctx *octx)
{
int ret = 0;
@@ -521,11 +552,19 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext
octx->next_kf_pts = frame->pts + octx->gop_pts_len;
}
if(octx->is_dnn_profile) {
ret = getmetadatainf(frame, octx);
if(ret == -1 && frame == NULL) {
// Return EOF in case of flushing procedure
ret = AVERROR_EOF;
}
} else {
if(is_video && frame != NULL && octx->sfilters != NULL) {
ret = calc_signature(frame, octx);
if(ret < 0) LPMS_WARN("Could not calculate signature value for frame");
}
ret = encode(encoder, frame, octx, ost);
}
skip:
av_frame_unref(frame);
// For HW we keep the encoder open so will only get EAGAIN.

View File

@@ -39,6 +39,7 @@ var ErrTranscoderPrf = errors.New("TranscoderUnrecognizedProfile")
var ErrTranscoderGOP = errors.New("TranscoderInvalidGOP")
var ErrTranscoderDev = errors.New("TranscoderIncompatibleDevices")
var ErrEmptyData = errors.New("EmptyData")
var ErrDNNInitialize = errors.New("DetectorInitializationError")
var ErrSignCompare = errors.New("InvalidSignData")
var ErrTranscoderPixelformat = errors.New("TranscoderInvalidPixelformat")
var ErrVideoCompare = errors.New("InvalidVideoData")
@@ -102,6 +103,7 @@ type TranscodeOptionsIn struct {
type TranscodeOptions struct {
Oname string
Profile VideoProfile
Detector DetectorProfile
Accel Acceleration
Device string
CalcSign bool
@@ -114,8 +116,9 @@ type TranscodeOptions struct {
}
type MediaInfo struct {
Frames int
Pixels int64
Frames int
Pixels int64
DetectData DetectData
}
type TranscodeResults struct {
@@ -618,6 +621,14 @@ func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.
params := make([]C.output_params, len(ps))
finalizer := func() { destroyCOutputParams(params) }
for i, p := range ps {
if p.Detector != nil {
// We don't do any encoding for detector profiles
// Adding placeholder values to pass checks for these everywhere
p.Oname = "/dev/null"
p.Profile = P144p30fps16x9
p.Muxer = ComponentOptions{Name: "mpegts"}
}
param := p.Profile
w, h, err := VideoProfileResolution(param)
if err != nil {
@@ -667,7 +678,18 @@ func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.
filters += fmt.Sprintf(",fps=%d/%d", param.Framerate, param.FramerateDen)
fps = C.AVRational{num: C.int(param.Framerate), den: C.int(param.FramerateDen)}
}
// if has a detector profile, ignore all video options
if p.Detector != nil {
switch p.Detector.Type() {
case SceneClassification:
detectorProfile := p.Detector.(*SceneClassificationProfile)
// Set samplerate using select filter to prevent unnecessary HW->SW copying
filters = fmt.Sprintf("select='not(mod(n\\,%v))'", detectorProfile.SampleRate)
if input.Accel != Software {
filters += ",hwdownload,format=nv12"
}
}
}
// Set video encoder options
// TODO understand how h264 profiles and GOP setting works for
// NETINT encoder, and make sure we change relevant things here
@@ -787,13 +809,17 @@ func createCOutputParams(input *TranscodeOptionsIn, ps []TranscodeOptions) ([]C.
fromMs := int(p.From.Milliseconds())
toMs := int(p.To.Milliseconds())
vfilt := C.CString(filters)
isDNN := C.int(0)
if p.Detector != nil {
isDNN = C.int(1)
}
oname := C.CString(p.Oname)
xcoderOutParams := C.CString(xcoderOutParamsStr)
params[i] = C.output_params{fname: oname, fps: fps,
w: C.int(w), h: C.int(h), bitrate: C.int(bitrate),
gop_time: C.int(gopMs), from: C.int(fromMs), to: C.int(toMs),
muxer: muxOpts, audio: audioOpts, video: vidOpts,
vfilters: vfilt, sfilters: nil, xcoderParams: xcoderOutParams}
vfilters: vfilt, sfilters: nil, is_dnn: isDNN, xcoderParams: xcoderOutParams}
if p.CalcSign {
//signfilter string
escapedOname := ffmpegStrEscape(p.Oname)
@@ -996,6 +1022,18 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions)
Frames: int(r.frames),
Pixels: int64(r.pixels),
}
// add detect result
if ps[i].Detector != nil {
switch ps[i].Detector.Type() {
case SceneClassification:
detector := ps[i].Detector.(*SceneClassificationProfile)
res := make(SceneClassificationData)
for j, class := range detector.Classes {
res[class.ID] = float64(r.probs[j])
}
tr[i].DetectData = res
}
}
}
dec := MediaInfo{
Frames: int(decoded.frames),
@@ -1050,6 +1088,32 @@ func InitFFmpeg() {
InitFFmpegWithLogLevel(FFLogWarning)
}
func NewTranscoderWithDetector(detector DetectorProfile, deviceid string) (*Transcoder, error) {
switch detector.Type() {
case SceneClassification:
detectorProfile := detector.(*SceneClassificationProfile)
backendConfigs := createBackendConfig(deviceid)
dnnOpt := &C.lvpdnn_opts{
modelpath: C.CString(detectorProfile.ModelPath),
inputname: C.CString(detectorProfile.Input),
outputname: C.CString(detectorProfile.Output),
backend_configs: C.CString(backendConfigs),
}
defer C.free(unsafe.Pointer(dnnOpt.modelpath))
defer C.free(unsafe.Pointer(dnnOpt.inputname))
defer C.free(unsafe.Pointer(dnnOpt.outputname))
defer C.free(unsafe.Pointer(dnnOpt.backend_configs))
handle := C.lpms_transcode_new_with_dnn(dnnOpt)
if handle != nil {
return &Transcoder{
handle: handle,
mu: &sync.Mutex{},
}, nil
}
}
return nil, ErrDNNInitialize
}
func createBackendConfig(deviceid string) string {
configProto := &pb.ConfigProto{GpuOptions: &pb.GPUOptions{AllowGrowth: true}}
bytes, err := proto.Marshal(configProto)

View File

@@ -106,6 +106,20 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx)
ret = filtergraph_parser(vf, filters_descr, &inputs, &outputs);
if (ret < 0) LPMS_ERR(vf_init_cleanup, "Unable to parse video filters desc");
if (octx->is_dnn_profile && vf->graph == *octx->dnn_filtergraph) {
// Try to find DNN filter in the pre-initialized graph
AVFilterContext *dnn_filter = avfilter_graph_get_filter(vf->graph, "livepeer_dnn");
if (!dnn_filter) {
ret = AVERROR_FILTER_NOT_FOUND;
LPMS_ERR(vf_init_cleanup, "Unable to find DNN filter inside filtergraph");
}
// Place DNN filter in correct position, i.e. just before the sink
assert(vf->sink_ctx->nb_inputs == 1);
ret = avfilter_insert_filter(vf->sink_ctx->inputs[0], dnn_filter, 0, 0);
// Take ownership of the filtergraph from the thread/output_ctx
*octx->dnn_filtergraph = NULL;
}
ret = avfilter_graph_config(vf->graph, NULL);
if (ret < 0) LPMS_ERR(vf_init_cleanup, "Unable configure video filtergraph");
@@ -279,6 +293,10 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
if (is_video && inf && inf->hw_frames_ctx && filter->hwframes &&
inf->hw_frames_ctx->data != filter->hwframes) {
free_filter(&octx->vf); // XXX really should flush filter first
if (octx->dnn_filtergraph) {
// swap filtergraph with the pre-initialized DNN filtergraph
octx->vf.graph = *octx->dnn_filtergraph;
}
ret = init_video_filters(ictx, octx);
if (ret < 0) return lpms_ERR_FILTERS;
}

View File

@@ -68,6 +68,9 @@ struct output_ctx {
int64_t clip_from, clip_to, clip_from_pts, clip_to_pts, clip_started, clip_start_pts, clip_start_pts_found; // for clipping
int64_t clip_audio_from_pts, clip_audio_to_pts, clip_audio_start_pts, clip_audio_start_pts_found; // for clipping
AVFilterGraph **dnn_filtergraph;
int is_dnn_profile; //if not dnn profile: 0
output_results *res; // data to return for this output
char *xcoderParams;
};

View File

@@ -728,6 +728,10 @@ func TestNvidia_CompareVideo(t *testing.T) {
compareVideo(t, Nvidia)
}
func TestNvidia_DetectionFreq(t *testing.T) {
detectionFreq(t, Nvidia, "0")
}
func portraitTest(t *testing.T, input string, checkResults bool, profiles []VideoProfile) error {
wd, err := os.Getwd()
require.NoError(t, err)

View File

@@ -76,6 +76,8 @@ struct transcode_thread {
struct input_ctx ictx;
struct output_ctx outputs[MAX_OUTPUT_SIZE];
AVFilterGraph *dnn_filtergraph;
int nb_outputs;
};
@@ -213,6 +215,10 @@ int transcode_init(struct transcode_thread *h, input_params *inp,
octx->vfilters = params[i].vfilters;
octx->sfilters = params[i].sfilters;
octx->xcoderParams = params[i].xcoderParams;
if (params[i].is_dnn && h->dnn_filtergraph != NULL) {
octx->is_dnn_profile = params[i].is_dnn;
octx->dnn_filtergraph = &h->dnn_filtergraph;
}
if (params[i].bitrate) octx->bitrate = params[i].bitrate;
if (params[i].fps.den) octx->fps = params[i].fps;
if (params[i].gop_time) octx->gop_time = params[i].gop_time;
@@ -569,10 +575,16 @@ int flush_all_outputs(struct transcode_thread *h)
// just flush muxer, but do not write trailer and close
av_interleaved_write_frame(h->outputs[i].oc, NULL);
} else {
if(h->outputs[i].is_dnn_profile == 0) {
// this will flush video and audio streams, flush muxer, write trailer
// and close
ret = flush_outputs(ictx, h->outputs + i);
if (ret < 0) LPMS_ERR_RETURN("Unable to fully flush outputs")
} else if(h->outputs[i].is_dnn_profile && h->outputs[i].res->frames > 0) {
for (int j = 0; j < MAX_CLASSIFY_SIZE; j++) {
h->outputs[i].res->probs[j] = h->outputs[i].res->probs[j] / h->outputs[i].res->frames;
}
}
}
}
@@ -863,8 +875,15 @@ whileloop_end:
// flush outputs
for (int i = 0; i < nb_outputs; i++) {
if(outputs[i].is_dnn_profile == 0/* && outputs[i].has_output > 0*/) {
ret = flush_outputs(ictx, &outputs[i]);
if (ret < 0) LPMS_ERR(transcode_cleanup, "Unable to fully flush outputs")
}
else if(outputs[i].is_dnn_profile && outputs[i].res->frames > 0) {
for (int j = 0; j < MAX_CLASSIFY_SIZE; j++) {
outputs[i].res->probs[j] = outputs[i].res->probs[j] / outputs[i].res->frames;
}
}
}
transcode_cleanup:
@@ -910,6 +929,7 @@ int lpms_transcode(input_params *inp, output_params *params,
if (h->nb_outputs != nb_outputs) {
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
bool only_detector_diff = true;
// MA: we have a problem here. Consider first configuration with 1 output,
// and second one with 2 outputs. When transcode_thread was created
// (in lpms_transcode_new) all the outputs were cleared with zeros. Then,
@@ -924,7 +944,15 @@ int lpms_transcode(input_params *inp, output_params *params,
// approach doesn't work if the "new" configuration has more outputs than
// old one, even if "added" outputs are actually dnn outputs.
// make sure only detection related outputs are changed
for (int i = MIN(nb_outputs, h->nb_outputs); i < MAX(nb_outputs, h->nb_outputs); i++) {
if (!h->outputs[i].is_dnn_profile)
only_detector_diff = false;
}
if (only_detector_diff) {
h->nb_outputs = nb_outputs;
} else {
return lpms_ERR_OUTPUTS;
}
#undef MAX
#undef MIN
}
@@ -975,9 +1003,66 @@ void lpms_transcode_stop(struct transcode_thread *handle) {
free_output(&handle->outputs[i]);
}
if (handle->dnn_filtergraph) avfilter_graph_free(&handle->dnn_filtergraph);
free(handle);
}
static AVFilterGraph * create_dnn_filtergraph(lvpdnn_opts *dnn_opts)
{
const AVFilter *filter = NULL;
AVFilterContext *filter_ctx = NULL;
AVFilterGraph *graph_ctx = NULL;
int ret = 0;
char errstr[1024];
char *filter_name = "livepeer_dnn";
char filter_args[512];
snprintf(filter_args, sizeof filter_args, "model=%s:input=%s:output=%s:backend_configs=%s",
dnn_opts->modelpath, dnn_opts->inputname, dnn_opts->outputname, dnn_opts->backend_configs);
/* allocate graph */
graph_ctx = avfilter_graph_alloc();
if (!graph_ctx)
LPMS_ERR(create_dnn_error, "Unable to open DNN filtergraph");
/* get a corresponding filter and open it */
if (!(filter = avfilter_get_by_name(filter_name))) {
snprintf(errstr, sizeof errstr, "Unrecognized filter with name '%s'\n", filter_name);
LPMS_ERR(create_dnn_error, errstr);
}
/* open filter and add it to the graph */
if (!(filter_ctx = avfilter_graph_alloc_filter(graph_ctx, filter, filter_name))) {
snprintf(errstr, sizeof errstr, "Impossible to open filter with name '%s'\n", filter_name);
LPMS_ERR(create_dnn_error, errstr);
}
if (avfilter_init_str(filter_ctx, filter_args) < 0) {
snprintf(errstr, sizeof errstr, "Impossible to init filter '%s' with arguments '%s'\n", filter_name, filter_args);
LPMS_ERR(create_dnn_error, errstr);
}
return graph_ctx;
create_dnn_error:
avfilter_graph_free(&graph_ctx);
return NULL;
}
struct transcode_thread* lpms_transcode_new_with_dnn(lvpdnn_opts *dnn_opts)
{
struct transcode_thread *h = malloc(sizeof (struct transcode_thread));
if (!h) return NULL;
memset(h, 0, sizeof *h);
AVFilterGraph *filtergraph = create_dnn_filtergraph(dnn_opts);
if (!filtergraph) {
free(h);
h = NULL;
} else {
h->dnn_filtergraph = filtergraph;
}
return h;
}
void lpms_transcode_discontinuity(struct transcode_thread *handle) {
if (!handle)
return;

View File

@@ -31,6 +31,7 @@ typedef struct {
char *sfilters;
int w, h, bitrate, gop_time, from, to;
AVRational fps;
int is_dnn;
char *xcoderParams;
component_opts muxer;
component_opts audio;
@@ -58,11 +59,22 @@ typedef struct {
} input_params;
#define MAX_CLASSIFY_SIZE 10
#define LVPDNN_FILTER_NAME "lvpdnn"
#define LVPDNN_FILTER_META "lavfi.lvpdnn.text"
#define MAX_OUTPUT_SIZE 10
typedef struct {
char *modelpath;
char *inputname;
char *outputname;
char *backend_configs;
} lvpdnn_opts;
typedef struct {
int frames;
int64_t pixels;
//for scene classification
float probs[MAX_CLASSIFY_SIZE];//probability
} output_results;
enum LPMSLogLevel {
@@ -81,6 +93,7 @@ void lpms_init(enum LPMSLogLevel max_level);
int lpms_transcode(input_params *inp, output_params *params, output_results *results, int nb_outputs, output_results *decoded_results, int use_new);
int lpms_transcode_reopen_demux(input_params *inp);
struct transcode_thread* lpms_transcode_new();
struct transcode_thread* lpms_transcode_new_with_dnn(lvpdnn_opts *dnn_opts);
void lpms_transcode_stop(struct transcode_thread* handle);
void lpms_transcode_discontinuity(struct transcode_thread *handle);