Fix CUVID crash on resolution change (#418)

Also adds a bunch of other changes necessary to better support
mid-stream resolution changes.

Unfortunately with CUVID there still seems to be a brief flash of
green (looks to be the length of the decoder's internal frame buffer)
but we can tackle that separately. This PR simply makes the transcoder

1. not crash, and
2. correctly encode mid-stream rotations, including with CPUs
This commit is contained in:
Josh Allmann
2024-08-19 10:36:37 -07:00
committed by GitHub
8 changed files with 400 additions and 18 deletions

View File

@@ -22,12 +22,13 @@ static int lpms_receive_frame(struct input_ctx *ictx, AVCodecContext *dec, AVFra
return ret;
}
static int send_first_pkt(struct input_ctx *ictx)
static int send_flush_pkt(struct input_ctx *ictx)
{
if (ictx->flushed) return 0;
if (!ictx->first_pkt) return lpms_ERR_INPUT_NOKF;
if (!ictx->flush_pkt) return lpms_ERR_INPUT_NOKF;
int ret = avcodec_send_packet(ictx->vc, ictx->first_pkt);
int ret = avcodec_send_packet(ictx->vc, ictx->flush_pkt);
if (ret == AVERROR(EAGAIN)) return ret; // decoder is mid-reset
ictx->sentinel_count++;
if (ret < 0) {
LPMS_ERR(packet_cleanup, "Error sending flush packet");
@@ -68,13 +69,25 @@ int decode_in(struct input_ctx *ictx, AVPacket *pkt, AVFrame *frame, int *stream
return 0;
}
if (!ictx->first_pkt && pkt->flags & AV_PKT_FLAG_KEY && decoder == ictx->vc) {
ictx->first_pkt = av_packet_clone(pkt);
ictx->first_pkt->pts = -1;
// Set up flush packet. Do this every keyframe in case the underlying frame changes
if (pkt->flags & AV_PKT_FLAG_KEY && decoder == ictx->vc) {
if (!ictx->flush_pkt) ictx->flush_pkt = av_packet_clone(pkt);
else {
av_packet_unref(ictx->flush_pkt);
av_packet_ref(ictx->flush_pkt, pkt);
}
ictx->flush_pkt->pts = -1;
}
ret = lpms_send_packet(ictx, decoder, pkt);
if (ret < 0) {
if (ret == AVERROR(EAGAIN)) {
// Usually means the decoder needs to drain itself - block demuxing until then
// Seems to happen during mid-stream resolution changes
if (ictx->blocked_pkt) LPMS_ERR_RETURN("unexpectedly got multiple blocked packets");
ictx->blocked_pkt = av_packet_clone(pkt);
if (!ictx->blocked_pkt) LPMS_ERR_RETURN("could not clone packet for blocking");
// continue in an attempt to drain the decoder
} else if (ret < 0) {
LPMS_ERR_RETURN("Error sending packet to decoder");
}
ret = lpms_receive_frame(ictx, decoder, frame);
@@ -104,8 +117,10 @@ int flush_in(struct input_ctx *ictx, AVFrame *frame, int *stream_index)
// TODO this is unnecessary for SW decoding! SW process should match audio
if (ictx->vc && !ictx->flushed && ictx->pkt_diff > 0) {
ictx->flushing = 1;
ret = send_first_pkt(ictx);
if (ret < 0) {
ret = send_flush_pkt(ictx);
if (ret == AVERROR(EAGAIN)) {
// do nothing; decoder recently reset and needs to drain so let it
} else if (ret < 0) {
ictx->flushed = 1;
return ret;
}
@@ -137,7 +152,10 @@ int process_in(struct input_ctx *ictx, AVFrame *frame, AVPacket *pkt,
av_packet_unref(pkt);
// Demux next packet
ret = demux_in(ictx, pkt);
if (ictx->blocked_pkt) {
av_packet_move_ref(pkt, ictx->blocked_pkt);
av_packet_free(&ictx->blocked_pkt);
} else ret = demux_in(ictx, pkt);
// See if we got anything
if (ret == AVERROR_EOF) {
// no more packets, flush the decoder(s)
@@ -376,5 +394,6 @@ void free_input(struct input_ctx *inctx)
if (inctx->hw_device_ctx) av_buffer_unref(&inctx->hw_device_ctx);
if (inctx->last_frame_v) av_frame_free(&inctx->last_frame_v);
if (inctx->last_frame_a) av_frame_free(&inctx->last_frame_a);
if (inctx->blocked_pkt) av_packet_free(&inctx->blocked_pkt);
}

View File

@@ -20,7 +20,7 @@ struct input_ctx {
char *xcoderParams;
// Decoder flush
AVPacket *first_pkt;
AVPacket *flush_pkt;
int flushed;
int flushing;
// The diff of `packets sent - frames recv` serves as an estimate of
@@ -33,6 +33,9 @@ struct input_ctx {
#define SENTINEL_MAX 8
uint16_t sentinel_count;
// Packet held while decoder is blocked and needs to drain
AVPacket *blocked_pkt;
// Filter flush
AVFrame *last_frame_v, *last_frame_a;

View File

@@ -255,7 +255,11 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
if(strcmp(octx->xcoderParams,"")!=0){
av_opt_set(vc->priv_data, "xcoder-params", octx->xcoderParams, 0);
}
ret = avcodec_open2(vc, codec, &octx->video->opts);
// copy codec options and open encoder
AVDictionary *opts = NULL;
if (octx->video->opts) av_dict_copy(&opts, octx->video->opts, 0);
ret = avcodec_open2(vc, codec, &opts);
if (opts) av_dict_free(&opts);
if (ret < 0) LPMS_ERR(open_output_err, "Error opening video encoder");
octx->hw_type = ictx->hw_type;
}
@@ -332,12 +336,81 @@ reopen_out_err:
return ret;
}
static int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost)
int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost)
{
int ret = 0;
AVPacket *pkt = NULL;
if (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type && frame) {
if (encoder->width != frame->width || encoder->height != frame->height) {
// Frame dimensions changed so need to re-init encoder
const AVCodec *codec = avcodec_find_encoder_by_name(octx->video->name);
if (!codec) LPMS_ERR(encode_cleanup, "Unable to find encoder");
AVCodecContext *vc = avcodec_alloc_context3(codec);
if (!vc) LPMS_ERR(encode_cleanup, "Unable to alloc video encoder");
// copy any additional params needed from AVCodecParameters
AVCodecParameters *codecpar = avcodec_parameters_alloc();
if (!codecpar) LPMS_ERR(encode_cleanup, "Unable to alloc codec params");
avcodec_parameters_from_context(codecpar, encoder);
avcodec_parameters_to_context(vc, codecpar);
avcodec_parameters_free(&codecpar);
// manually set some additional fields
vc->width = frame->width;
vc->height = frame->height;
vc->time_base = encoder->time_base;
vc->flags = encoder->flags;
vc->rc_min_rate = encoder->rc_min_rate;
vc->rc_max_rate = encoder->rc_max_rate;
vc->bit_rate = encoder->bit_rate;
vc->rc_buffer_size = encoder->rc_buffer_size;
if (encoder->hw_frames_ctx) {
if (octx->vf.active && av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx)) {
vc->hw_frames_ctx =
av_buffer_ref(av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx));
if (!vc->hw_frames_ctx) {
LPMS_ERR(encode_cleanup, "Unable to re-alloc encoder hwframes")
}
} else {
vc->hw_frames_ctx = av_buffer_ref(encoder->hw_frames_ctx);
}
}
// flush old encoder
AVPacket *pkt = av_packet_alloc();
if (!pkt) LPMS_ERR(encode_cleanup, "Unable to alloc flush packet");
avcodec_send_frame(encoder, NULL);
AVRational time_base = encoder->time_base;
while (!ret) {
av_packet_unref(pkt);
ret = avcodec_receive_packet(encoder, pkt);
// TODO error handling
if (!ret) {
if (!octx->fps.den && octx->vf.active) {
// adjust timestamps for filter passthrough
time_base = octx->vf.time_base;
int64_t pts_dts = pkt->pts - pkt->dts;
pkt->pts = (int64_t)pkt->opaque; // already in filter timebase
pkt->dts = pkt->pts - av_rescale_q(pts_dts, encoder->time_base, time_base);
}
mux(pkt, time_base, octx, ost);
} else if (AVERROR_EOF != ret) {
av_packet_free(&pkt);
LPMS_ERR(encode_cleanup, "did not get eof");
}
}
av_packet_free(&pkt);
avcodec_free_context(&octx->vc);
// copy codec options and open encoder
AVDictionary *opts = NULL;
if (octx->video->opts) av_dict_copy(&opts, octx->video->opts, 0);
ret = avcodec_open2(vc, codec, &opts);
if (opts) av_dict_free(&opts);
if (ret < 0) LPMS_ERR(encode_cleanup, "Error opening video encoder");
if (octx->gop_pts_len) octx->next_kf_pts = frame->pts + octx->gop_pts_len;
octx->vc = vc;
encoder = vc;
}
if (!octx->res->frames) {
frame->pict_type = AV_PICTURE_TYPE_I;
}

View File

@@ -12,5 +12,6 @@ void free_output(struct output_ctx *octx);
int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext *encoder, AVStream *ost,
struct filter_ctx *filter, AVFrame *inf);
int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost);
int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost);
#endif // _LPMS_ENCODER_H_

View File

@@ -2121,3 +2121,248 @@ func TestDurationFPS_GetCodecInfo(t *testing.T) {
})
}
}
func TestTranscoder_Rotation(t *testing.T) {
runRotationTests(t, Software)
// TODO hevc
}
func runRotationTests(t *testing.T, accel Acceleration) {
run, dir := setupTest(t)
defer os.RemoveAll(dir)
// generate a sample that is rotated mid-stream
cmd := `
ffmpeg -i "$1/../transcoder/test.ts" -an -c:v libx264 -g 120 -s 100x56 -f segment -t 6 test-%d.ts
ffmpeg -i test-1.ts -vf transpose -c:v libx264 -c:a copy -copyts -muxdelay 0 test-1-transposed.ts
ffprobe -select_streams v -show_entries format=start_time,duration:stream=width,height -of default=nw=1 test-1.ts > test-1.data
ffprobe -select_streams v -count_frames -show_entries format=start_time,duration:stream=width,height,nb_read_frames -of default=nw=1 test-1-transposed.ts > test-1-transposed.data
cat <<-EOF1 > test-1.expected
width=100
height=56
width=100
height=56
start_time=3.433333
duration=2.000000
EOF1
# transposed
cat <<-EOF2 > test-1-transposed.expected
width=56
height=100
nb_read_frames=120
width=56
height=100
nb_read_frames=120
start_time=3.433333
duration=2.000000
EOF2
diff -u test-1.expected test-1.data
diff -u test-1-transposed.expected test-1-transposed.data
cat test-0.ts test-1-transposed.ts test-2.ts > double-rotated.ts
cat test-0.ts test-1-transposed.ts > single-rotated.ts
`
run(cmd)
profile := P144p30fps16x9
profilePassthrough := profile
profilePassthrough.Framerate = 0
res, err := Transcode3(
&TranscodeOptionsIn{Fname: dir + "/double-rotated.ts", Accel: accel},
[]TranscodeOptions{{
Profile: profile,
Oname: dir + "/out-double-rotated-30fps.ts",
Accel: accel,
}, {
Profile: profilePassthrough,
Oname: dir + "/out-double-rotated.ts",
Accel: accel,
}})
require.NoError(t, err)
assert.Equal(t, 360, res.Decoded.Frames)
assert.Equal(t, 181, res.Encoded[0].Frames) // should be 180 ... ts rounding ?
assert.Equal(t, 360, res.Encoded[1].Frames)
// TODO test rollover of gop interval during flush
cmd = `
ffprobe -count_frames -show_streams out-double-rotated.ts | grep nb_read_frames=360
ffprobe -show_entries frame=height,width -of csv=p=0 out-double-rotated.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > out.dims
ffprobe -show_entries frame=height,width -of csv=p=0 out-double-rotated-30fps.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > out-30fps.dims
`
// compare timestamps with input but software-only for now
// nvidia timestamps differ by the first 2 and last 2 packets
// TODO figure out why that is
// TODO ideally check for this diff anyway w nvidia (so we know when / if it changes)
if accel == Software {
cmd = cmd + `
ffprobe -show_entries packet=dts -of csv=p=0 out-double-rotated.ts | sed 's/,$//g' > out.ptsdts
ffprobe -show_entries packet=dts -of csv=p=0 double-rotated.ts | sed 's/,$//g' > expected.ptsdts
diff -u expected.ptsdts out.ptsdts
`
}
// TODO figure out why cpu/gpu are different
if accel == Nvidia {
cmd = cmd + `
cat <<-EOF1 > expected.dims
115 256,144
120 146,260
125 256,144
EOF1
cat <<-EOF2 > expected-30fps.dims
58 256,144
60 146,260
63 256,144
EOF2
`
} else {
cmd = cmd + `
cat <<-EOF1 > expected.dims
120 256,144
120 146,260
120 256,144
EOF1
cat <<-EOF2 > expected-30fps.dims
60 256,144
60 146,260
61 256,144
EOF2
`
}
cmd = cmd + `
diff -u expected.dims out.dims
diff -u expected-30fps.dims out-30fps.dims
`
run(cmd)
// double check separate transcodes of portrait vs landscape
_, err = Transcode3(
&TranscodeOptionsIn{Fname: dir + "/test-1-transposed.ts", Accel: accel},
[]TranscodeOptions{{
Profile: profile,
Oname: dir + "/out-transposed-30fps.ts",
Accel: accel,
}, {
Profile: profilePassthrough,
Oname: dir + "/out-transposed.ts",
Accel: accel,
}})
require.NoError(t, err)
// use the same transcoder instance for the landscape stuff
tc := NewTranscoder()
defer tc.StopTranscoder()
_, err = tc.Transcode(&TranscodeOptionsIn{
Fname: dir + "/test-0.ts", Accel: accel,
}, []TranscodeOptions{{
Profile: profile,
Oname: dir + "/out-test-0-30fps.ts",
Accel: accel,
}, {
Profile: profilePassthrough,
Oname: dir + "/out-test-0.ts",
Accel: accel,
}})
require.NoError(t, err)
_, err = tc.Transcode(&TranscodeOptionsIn{
Fname: dir + "/test-2.ts", Accel: accel,
}, []TranscodeOptions{{
Profile: profile,
Oname: dir + "/out-test-2-30fps.ts",
Accel: accel,
}, {
Profile: profilePassthrough,
Oname: dir + "/out-test-2.ts",
Accel: accel,
}})
require.NoError(t, err)
// TODO figure out why nvidia is different; green screen?
if accel == Software {
cmd = `
cat out-test-0.ts out-transposed.ts out-test-2.ts > out-test-concat.ts
ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height -of csv out-test-concat.ts > out-test-concat.framedata
cat out-test-0-30fps.ts out-transposed-30fps.ts out-test-2-30fps.ts > out-test-concat-30fps.ts
ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height out-test-concat-30fps.ts -of csv > out-test-concat-30fps.framedata
ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height out-double-rotated.ts -of csv > out-double-rotated.framedata
ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height out-double-rotated-30fps.ts -of csv > out-double-rotated-30fps.framedata
diff -u out-test-concat.framedata out-double-rotated.framedata
# this does not line up
#diff -u out-test-concat-30fps.framedata out-double-rotated-30fps.framedata
`
run(cmd)
}
// check single rotations
res, err = Transcode3(
&TranscodeOptionsIn{Fname: dir + "/single-rotated.ts", Accel: accel},
[]TranscodeOptions{{
Profile: profile,
Oname: dir + "/out-single-rotated-30fps.ts",
Accel: accel,
}, {
Profile: profilePassthrough,
Oname: dir + "/out-single-rotated.ts",
Accel: accel,
}})
require.NoError(t, err)
assert.Equal(t, 240, res.Decoded.Frames)
assert.Equal(t, 121, res.Encoded[0].Frames) // should be 120 ... ts rounding ?
assert.Equal(t, 240, res.Encoded[1].Frames)
cmd = `
ffprobe -count_frames -show_streams out-single-rotated.ts | grep nb_read_frames=24
ffprobe -show_entries frame=height,width -of csv=p=0 out-single-rotated.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > single-out.dims
ffprobe -show_entries frame=height,width -of csv=p=0 out-single-rotated-30fps.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > single-out-30fps.dims
`
// TODO figure out why cpu/gpu are different
if accel == Nvidia {
cmd = cmd + `
cat <<-EOF1 > single-expected.dims
115 256,144
125 146,260
EOF1
cat <<-EOF2 > single-expected-30fps.dims
58 256,144
63 146,260
EOF2
`
} else {
cmd = cmd + `
cat <<-EOF1 > single-expected.dims
120 256,144
120 146,260
EOF1
cat <<-EOF2 > single-expected-30fps.dims
60 256,144
61 146,260
EOF2
`
}
cmd = cmd + `
diff -u single-expected.dims single-out.dims
diff -u single-expected-30fps.dims single-out-30fps.dims
`
run(cmd)
}

View File

@@ -1,4 +1,5 @@
#include "filter.h"
#include "encoder.h"
#include "logging.h"
#include <libavfilter/buffersrc.h>
@@ -282,9 +283,45 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
// We have to reset the filter because we initially set the filter
// before the decoder is fully ready, and the decoder may change HW params
// XXX: Unclear if this path is hit on all devices
if (is_video && inf && inf->hw_frames_ctx && filter->hwframes &&
inf->hw_frames_ctx->data != filter->hwframes) {
free_filter(&octx->vf); // XXX really should flush filter first
if (is_video && inf && (
(inf->hw_frames_ctx && filter->hwframes &&
inf->hw_frames_ctx->data != filter->hwframes) ||
(filter->src_ctx->nb_outputs > 0 &&
filter->src_ctx->outputs[0]->w != inf->width &&
filter->src_ctx->outputs[0]->h != inf->height))) {
// flush video filter
ret = av_buffersrc_write_frame(filter->src_ctx, NULL);
if (ret < 0) LPMS_ERR(fg_write_cleanup, "Error closing filter for reinit");
while (!ret) {
ret = filtergraph_read(ictx, octx, filter, is_video);
if (AVERROR(EAGAIN) == ret || AVERROR_EOF == ret) break;
AVFrame *frame = filter->frame;
AVCodecContext *encoder = octx->vc;
// TODO does clipping need to be handled?
// TODO calculate signature?
// Set GOP interval if necessary
if (octx->gop_pts_len && frame && frame->pts >= octx->next_kf_pts) {
frame->pict_type = AV_PICTURE_TYPE_I;
octx->next_kf_pts = frame->pts + octx->gop_pts_len;
}
if (frame) {
// rescale pts to match encoder timebase if necessary (eg, fps passthrough)
AVRational filter_tb = av_buffersink_get_time_base(filter->sink_ctx);
if (av_cmp_q(filter_tb, encoder->time_base)) {
frame->pts = av_rescale_q(frame->pts, filter_tb, encoder->time_base);
// TODO does frame->duration needs to be rescaled too?
}
}
ret = encode(encoder, frame, octx, octx->oc->streams[octx->vi]);
if (!ret) LPMS_ERR(fg_write_cleanup, "Encoder error during filter reinit");
}
ret = 0;
free_filter(&octx->vf);
ret = init_video_filters(ictx, octx);
if (ret < 0) return lpms_ERR_FILTERS;
}

View File

@@ -792,3 +792,7 @@ func TestTranscoder_Portrait(t *testing.T) {
func TestNvidia_DiscontinuityAudioSegment(t *testing.T) {
discontinuityAudioSegment(t, Nvidia)
}
func TestNvidia_Rotation(t *testing.T) {
runRotationTests(t, Nvidia)
}

View File

@@ -43,7 +43,7 @@ const int lpms_ERR_UNRECOVERABLE = FFERRTAG('U', 'N', 'R', 'V');
// MOVED TO decoder.[ch]
// Decoder: For audio, we pay the price of closing and re-opening the decoder.
// For video, we cache the first packet we read (input_ctx.first_pkt).
// For video, we cache the last keyframe read (input_ctx.flush_pkt).
// The pts is set to a sentinel value and fed to the decoder. Once we
// receive all frames from the decoder OR have sent too many sentinel
// pkts without receiving anything, then we know the decoder has been
@@ -133,7 +133,7 @@ int transcode_shutdown(struct transcode_thread *h, int ret)
ictx->flushing = 0;
ictx->pkt_diff = 0;
ictx->sentinel_count = 0;
if (ictx->first_pkt) av_packet_free(&ictx->first_pkt);
if (ictx->flush_pkt) av_packet_free(&ictx->flush_pkt);
if (ictx->ac) avcodec_free_context(&ictx->ac);
if (ictx->vc && (AV_HWDEVICE_TYPE_NONE == ictx->hw_type)) avcodec_free_context(&ictx->vc);
for (int i = 0; i < nb_outputs; i++) {