Fix CUVID crash on resolution change (#418)

Also adds a bunch of other changes necessary to better support mid-stream resolution changes. Unfortunately with CUVID there still seems to be a brief flash of green (looks to be the length of the decoder's internal frame buffer) but we can tackle that separately. This PR simply makes the transcoder 1. not crash, and 2. correctly encode mid-stream rotations, including with CPUs
2025-09-26 19:51:36 +08:00 · 2024-08-19 10:36:37 -07:00
parent b5181eb92c 0e6fd2e7e2
commit 20131b673f
8 changed files with 400 additions and 18 deletions
--- a/ffmpeg/decoder.c
+++ b/ffmpeg/decoder.c
@@ -22,12 +22,13 @@ static int lpms_receive_frame(struct input_ctx *ictx, AVCodecContext *dec, AVFra
    return ret;
 }

-static int send_first_pkt(struct input_ctx *ictx)
+static int send_flush_pkt(struct input_ctx *ictx)
 {
  if (ictx->flushed) return 0;
-  if (!ictx->first_pkt) return lpms_ERR_INPUT_NOKF;
+  if (!ictx->flush_pkt) return lpms_ERR_INPUT_NOKF;

-  int ret = avcodec_send_packet(ictx->vc, ictx->first_pkt);
+  int ret = avcodec_send_packet(ictx->vc, ictx->flush_pkt);
+  if (ret == AVERROR(EAGAIN)) return ret; // decoder is mid-reset
  ictx->sentinel_count++;
  if (ret < 0) {
    LPMS_ERR(packet_cleanup, "Error sending flush packet");
@@ -68,13 +69,25 @@ int decode_in(struct input_ctx *ictx, AVPacket *pkt, AVFrame *frame, int *stream
    return 0;
  }

-  if (!ictx->first_pkt && pkt->flags & AV_PKT_FLAG_KEY && decoder == ictx->vc) {
-    ictx->first_pkt = av_packet_clone(pkt);
-    ictx->first_pkt->pts = -1;
+  // Set up flush packet. Do this every keyframe in case the underlying frame changes
+  if (pkt->flags & AV_PKT_FLAG_KEY && decoder == ictx->vc) {
+    if (!ictx->flush_pkt) ictx->flush_pkt = av_packet_clone(pkt);
+    else {
+      av_packet_unref(ictx->flush_pkt);
+      av_packet_ref(ictx->flush_pkt, pkt);
+    }
+    ictx->flush_pkt->pts = -1;
  }

  ret = lpms_send_packet(ictx, decoder, pkt);
-  if (ret < 0) {
+  if (ret == AVERROR(EAGAIN)) {
+    // Usually means the decoder needs to drain itself - block demuxing until then
+    // Seems to happen during mid-stream resolution changes
+    if (ictx->blocked_pkt) LPMS_ERR_RETURN("unexpectedly got multiple blocked packets");
+    ictx->blocked_pkt = av_packet_clone(pkt);
+    if (!ictx->blocked_pkt) LPMS_ERR_RETURN("could not clone packet for blocking");
+    // continue in an attempt to drain the decoder
+  } else if (ret < 0) {
    LPMS_ERR_RETURN("Error sending packet to decoder");
  }
  ret = lpms_receive_frame(ictx, decoder, frame);
@@ -104,8 +117,10 @@ int flush_in(struct input_ctx *ictx, AVFrame *frame, int *stream_index)
  // TODO this is unnecessary for SW decoding! SW process should match audio
  if (ictx->vc && !ictx->flushed && ictx->pkt_diff > 0) {
    ictx->flushing = 1;
-    ret = send_first_pkt(ictx);
-    if (ret < 0) {
+    ret = send_flush_pkt(ictx);
+    if (ret == AVERROR(EAGAIN)) {
+      // do nothing; decoder recently reset and needs to drain so let it
+    } else if (ret < 0) {
      ictx->flushed = 1;
      return ret;
    }
@@ -137,7 +152,10 @@ int process_in(struct input_ctx *ictx, AVFrame *frame, AVPacket *pkt,
  av_packet_unref(pkt);

  // Demux next packet
-  ret = demux_in(ictx, pkt);
+  if (ictx->blocked_pkt) {
+    av_packet_move_ref(pkt, ictx->blocked_pkt);
+    av_packet_free(&ictx->blocked_pkt);
+  } else ret = demux_in(ictx, pkt);
  // See if we got anything
  if (ret == AVERROR_EOF) {
    // no more packets, flush the decoder(s)
@@ -376,5 +394,6 @@ void free_input(struct input_ctx *inctx)
  if (inctx->hw_device_ctx) av_buffer_unref(&inctx->hw_device_ctx);
  if (inctx->last_frame_v) av_frame_free(&inctx->last_frame_v);
  if (inctx->last_frame_a) av_frame_free(&inctx->last_frame_a);
+  if (inctx->blocked_pkt) av_packet_free(&inctx->blocked_pkt);
 }

--- a/ffmpeg/decoder.h
+++ b/ffmpeg/decoder.h
@@ -20,7 +20,7 @@ struct input_ctx {
  char *xcoderParams;

  // Decoder flush
-  AVPacket *first_pkt;
+  AVPacket *flush_pkt;
  int flushed;
  int flushing;
  // The diff of `packets sent - frames recv` serves as an estimate of
@@ -33,6 +33,9 @@ struct input_ctx {
 #define SENTINEL_MAX 8
  uint16_t sentinel_count;

+  // Packet held while decoder is blocked and needs to drain
+  AVPacket *blocked_pkt;
+
  // Filter flush
  AVFrame *last_frame_v, *last_frame_a;

--- a/ffmpeg/encoder.c
+++ b/ffmpeg/encoder.c
@@ -255,7 +255,11 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
 	if(strcmp(octx->xcoderParams,"")!=0){
 	    av_opt_set(vc->priv_data, "xcoder-params", octx->xcoderParams, 0);
 	}
-    ret = avcodec_open2(vc, codec, &octx->video->opts);
+    // copy codec options and open encoder
+    AVDictionary *opts = NULL;
+    if (octx->video->opts) av_dict_copy(&opts, octx->video->opts, 0);
+    ret = avcodec_open2(vc, codec, &opts);
+    if (opts) av_dict_free(&opts);
    if (ret < 0) LPMS_ERR(open_output_err, "Error opening video encoder");
    octx->hw_type = ictx->hw_type;
  }
@@ -332,12 +336,81 @@ reopen_out_err:
  return ret;
 }

-static int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost)
+int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost)
 {
  int ret = 0;
  AVPacket *pkt = NULL;

  if (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type && frame) {
+    if (encoder->width != frame->width || encoder->height != frame->height) {
+      // Frame dimensions changed so need to re-init encoder
+      const AVCodec *codec = avcodec_find_encoder_by_name(octx->video->name);
+      if (!codec) LPMS_ERR(encode_cleanup, "Unable to find encoder");
+      AVCodecContext *vc = avcodec_alloc_context3(codec);
+      if (!vc) LPMS_ERR(encode_cleanup, "Unable to alloc video encoder");
+      // copy any additional params needed from AVCodecParameters
+      AVCodecParameters *codecpar = avcodec_parameters_alloc();
+      if (!codecpar) LPMS_ERR(encode_cleanup, "Unable to alloc codec params");
+      avcodec_parameters_from_context(codecpar, encoder);
+      avcodec_parameters_to_context(vc, codecpar);
+      avcodec_parameters_free(&codecpar);
+      // manually set some additional fields
+      vc->width = frame->width;
+      vc->height = frame->height;
+      vc->time_base = encoder->time_base;
+      vc->flags = encoder->flags;
+      vc->rc_min_rate = encoder->rc_min_rate;
+      vc->rc_max_rate = encoder->rc_max_rate;
+      vc->bit_rate = encoder->bit_rate;
+      vc->rc_buffer_size = encoder->rc_buffer_size;
+      if (encoder->hw_frames_ctx) {
+        if (octx->vf.active && av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx)) {
+          vc->hw_frames_ctx =
+            av_buffer_ref(av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx));
+          if (!vc->hw_frames_ctx) {
+            LPMS_ERR(encode_cleanup, "Unable to re-alloc encoder hwframes")
+          }
+        } else {
+          vc->hw_frames_ctx = av_buffer_ref(encoder->hw_frames_ctx);
+        }
+      }
+
+      // flush old encoder
+      AVPacket *pkt = av_packet_alloc();
+      if (!pkt) LPMS_ERR(encode_cleanup, "Unable to alloc flush packet");
+      avcodec_send_frame(encoder, NULL);
+      AVRational time_base = encoder->time_base;
+      while (!ret) {
+        av_packet_unref(pkt);
+        ret = avcodec_receive_packet(encoder, pkt);
+        // TODO error handling
+        if (!ret) {
+          if (!octx->fps.den && octx->vf.active) {
+            // adjust timestamps for filter passthrough
+            time_base = octx->vf.time_base;
+            int64_t pts_dts = pkt->pts - pkt->dts;
+            pkt->pts = (int64_t)pkt->opaque; // already in filter timebase
+            pkt->dts = pkt->pts - av_rescale_q(pts_dts, encoder->time_base, time_base);
+          }
+          mux(pkt, time_base, octx, ost);
+        } else if (AVERROR_EOF != ret) {
+          av_packet_free(&pkt);
+          LPMS_ERR(encode_cleanup, "did not get eof");
+        }
+      }
+      av_packet_free(&pkt);
+      avcodec_free_context(&octx->vc);
+
+      // copy codec options and open encoder
+      AVDictionary *opts = NULL;
+      if (octx->video->opts) av_dict_copy(&opts, octx->video->opts, 0);
+      ret = avcodec_open2(vc, codec, &opts);
+      if (opts) av_dict_free(&opts);
+      if (ret < 0) LPMS_ERR(encode_cleanup, "Error opening video encoder");
+      if (octx->gop_pts_len) octx->next_kf_pts = frame->pts + octx->gop_pts_len;
+      octx->vc = vc;
+      encoder = vc;
+    }
    if (!octx->res->frames) {
      frame->pict_type = AV_PICTURE_TYPE_I;
    }
--- a/ffmpeg/encoder.h
+++ b/ffmpeg/encoder.h
@@ -12,5 +12,6 @@ void free_output(struct output_ctx *octx);
 int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext *encoder, AVStream *ost,
  struct filter_ctx *filter, AVFrame *inf);
 int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost);
+int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost);

 #endif // _LPMS_ENCODER_H_
--- a/ffmpeg/ffmpeg_test.go
+++ b/ffmpeg/ffmpeg_test.go
@@ -2121,3 +2121,248 @@ func TestDurationFPS_GetCodecInfo(t *testing.T) {
 		})
 	}
 }
+
+func TestTranscoder_Rotation(t *testing.T) {
+	runRotationTests(t, Software)
+	// TODO hevc
+}
+
+func runRotationTests(t *testing.T, accel Acceleration) {
+	run, dir := setupTest(t)
+	defer os.RemoveAll(dir)
+
+	// generate a sample that is rotated mid-stream
+	cmd := `
+		ffmpeg -i "$1/../transcoder/test.ts" -an -c:v libx264 -g 120 -s 100x56 -f segment -t 6 test-%d.ts
+		ffmpeg -i test-1.ts -vf transpose -c:v libx264 -c:a copy -copyts -muxdelay 0 test-1-transposed.ts
+		ffprobe -select_streams v -show_entries format=start_time,duration:stream=width,height -of default=nw=1 test-1.ts > test-1.data
+		ffprobe -select_streams v -count_frames -show_entries format=start_time,duration:stream=width,height,nb_read_frames -of default=nw=1 test-1-transposed.ts > test-1-transposed.data
+
+		cat <<-EOF1 > test-1.expected
+			width=100
+			height=56
+			width=100
+			height=56
+			start_time=3.433333
+			duration=2.000000
+		EOF1
+
+		# transposed
+		cat <<-EOF2 > test-1-transposed.expected
+			width=56
+			height=100
+			nb_read_frames=120
+			width=56
+			height=100
+			nb_read_frames=120
+			start_time=3.433333
+			duration=2.000000
+		EOF2
+
+		diff -u test-1.expected test-1.data
+		diff -u test-1-transposed.expected test-1-transposed.data
+
+		cat test-0.ts test-1-transposed.ts test-2.ts > double-rotated.ts
+		cat test-0.ts test-1-transposed.ts > single-rotated.ts
+	`
+	run(cmd)
+
+	profile := P144p30fps16x9
+	profilePassthrough := profile
+	profilePassthrough.Framerate = 0
+	res, err := Transcode3(
+		&TranscodeOptionsIn{Fname: dir + "/double-rotated.ts", Accel: accel},
+		[]TranscodeOptions{{
+			Profile: profile,
+			Oname:   dir + "/out-double-rotated-30fps.ts",
+			Accel:   accel,
+		}, {
+			Profile: profilePassthrough,
+			Oname:   dir + "/out-double-rotated.ts",
+			Accel:   accel,
+		}})
+	require.NoError(t, err)
+
+	assert.Equal(t, 360, res.Decoded.Frames)
+	assert.Equal(t, 181, res.Encoded[0].Frames) // should be 180 ... ts rounding ?
+	assert.Equal(t, 360, res.Encoded[1].Frames)
+
+	// TODO test rollover of gop interval during flush
+
+	cmd = `
+		ffprobe -count_frames -show_streams out-double-rotated.ts | grep nb_read_frames=360
+		ffprobe -show_entries frame=height,width -of csv=p=0 out-double-rotated.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > out.dims
+		ffprobe -show_entries frame=height,width -of csv=p=0 out-double-rotated-30fps.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > out-30fps.dims
+	`
+
+	// compare timestamps with input but software-only for now
+	// nvidia timestamps differ by the first 2 and last 2 packets
+	// TODO figure out why that is
+	// TODO ideally check for this diff anyway w nvidia (so we know when / if it changes)
+	if accel == Software {
+		cmd = cmd + `
+			ffprobe -show_entries packet=dts -of csv=p=0 out-double-rotated.ts | sed 's/,$//g' > out.ptsdts
+			ffprobe -show_entries packet=dts -of csv=p=0 double-rotated.ts | sed 's/,$//g' > expected.ptsdts
+			diff -u expected.ptsdts out.ptsdts
+		`
+	}
+
+	// TODO figure out why cpu/gpu are different
+	if accel == Nvidia {
+		cmd = cmd + `
+			cat <<-EOF1 > expected.dims
+				115 256,144
+				120 146,260
+				125 256,144
+			EOF1
+
+			cat <<-EOF2 > expected-30fps.dims
+				58 256,144
+				60 146,260
+				63 256,144
+			EOF2
+		`
+	} else {
+		cmd = cmd + `
+			cat <<-EOF1 > expected.dims
+				120 256,144
+				120 146,260
+				120 256,144
+			EOF1
+
+			cat <<-EOF2 > expected-30fps.dims
+				60 256,144
+				60 146,260
+				61 256,144
+			EOF2
+		`
+	}
+
+	cmd = cmd + `
+		diff -u expected.dims out.dims
+		diff -u expected-30fps.dims out-30fps.dims
+	`
+
+	run(cmd)
+
+	// double check separate transcodes of portrait vs landscape
+	_, err = Transcode3(
+		&TranscodeOptionsIn{Fname: dir + "/test-1-transposed.ts", Accel: accel},
+		[]TranscodeOptions{{
+			Profile: profile,
+			Oname:   dir + "/out-transposed-30fps.ts",
+			Accel:   accel,
+		}, {
+			Profile: profilePassthrough,
+			Oname:   dir + "/out-transposed.ts",
+			Accel:   accel,
+		}})
+	require.NoError(t, err)
+
+	// use the same transcoder instance for the landscape stuff
+	tc := NewTranscoder()
+	defer tc.StopTranscoder()
+	_, err = tc.Transcode(&TranscodeOptionsIn{
+		Fname: dir + "/test-0.ts", Accel: accel,
+	}, []TranscodeOptions{{
+		Profile: profile,
+		Oname:   dir + "/out-test-0-30fps.ts",
+		Accel:   accel,
+	}, {
+		Profile: profilePassthrough,
+		Oname:   dir + "/out-test-0.ts",
+		Accel:   accel,
+	}})
+	require.NoError(t, err)
+
+	_, err = tc.Transcode(&TranscodeOptionsIn{
+		Fname: dir + "/test-2.ts", Accel: accel,
+	}, []TranscodeOptions{{
+		Profile: profile,
+		Oname:   dir + "/out-test-2-30fps.ts",
+		Accel:   accel,
+	}, {
+		Profile: profilePassthrough,
+		Oname:   dir + "/out-test-2.ts",
+		Accel:   accel,
+	}})
+	require.NoError(t, err)
+
+	// TODO figure out why nvidia is different; green screen?
+	if accel == Software {
+		cmd = `
+		cat out-test-0.ts  out-transposed.ts out-test-2.ts > out-test-concat.ts
+		ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height -of csv out-test-concat.ts > out-test-concat.framedata
+
+		cat out-test-0-30fps.ts  out-transposed-30fps.ts out-test-2-30fps.ts > out-test-concat-30fps.ts
+		ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height out-test-concat-30fps.ts -of csv > out-test-concat-30fps.framedata
+
+		ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height out-double-rotated.ts -of csv > out-double-rotated.framedata
+
+		ffprobe -show_entries frame=pts,pkt_dts,duration,pict_type,width,height out-double-rotated-30fps.ts -of csv > out-double-rotated-30fps.framedata
+
+		diff -u out-test-concat.framedata out-double-rotated.framedata
+
+		# this does not line up
+		#diff -u out-test-concat-30fps.framedata out-double-rotated-30fps.framedata
+	`
+		run(cmd)
+	}
+
+	// check single rotations
+	res, err = Transcode3(
+		&TranscodeOptionsIn{Fname: dir + "/single-rotated.ts", Accel: accel},
+		[]TranscodeOptions{{
+			Profile: profile,
+			Oname:   dir + "/out-single-rotated-30fps.ts",
+			Accel:   accel,
+		}, {
+			Profile: profilePassthrough,
+			Oname:   dir + "/out-single-rotated.ts",
+			Accel:   accel,
+		}})
+	require.NoError(t, err)
+
+	assert.Equal(t, 240, res.Decoded.Frames)
+	assert.Equal(t, 121, res.Encoded[0].Frames) // should be 120 ... ts rounding ?
+	assert.Equal(t, 240, res.Encoded[1].Frames)
+
+	cmd = `
+		ffprobe -count_frames -show_streams out-single-rotated.ts | grep nb_read_frames=24
+		ffprobe -show_entries frame=height,width -of csv=p=0 out-single-rotated.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > single-out.dims
+		ffprobe -show_entries frame=height,width -of csv=p=0 out-single-rotated-30fps.ts | sed 's/,$//g' | uniq -c | sed 's/^ *//g' > single-out-30fps.dims
+	`
+
+	// TODO figure out why cpu/gpu are different
+	if accel == Nvidia {
+		cmd = cmd + `
+			cat <<-EOF1 > single-expected.dims
+				115 256,144
+				125 146,260
+			EOF1
+
+			cat <<-EOF2 > single-expected-30fps.dims
+				58 256,144
+				63 146,260
+			EOF2
+		`
+	} else {
+		cmd = cmd + `
+			cat <<-EOF1 > single-expected.dims
+				120 256,144
+				120 146,260
+			EOF1
+
+			cat <<-EOF2 > single-expected-30fps.dims
+				60 256,144
+				61 146,260
+			EOF2
+		`
+	}
+
+	cmd = cmd + `
+		diff -u single-expected.dims single-out.dims
+		diff -u single-expected-30fps.dims single-out-30fps.dims
+	`
+	run(cmd)
+}
--- a/ffmpeg/filter.c
+++ b/ffmpeg/filter.c
@@ -1,4 +1,5 @@
 #include "filter.h"
+#include "encoder.h"
 #include "logging.h"

 #include <libavfilter/buffersrc.h>
@@ -282,9 +283,45 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
  // We have to reset the filter because we initially set the filter
  // before the decoder is fully ready, and the decoder may change HW params
  // XXX: Unclear if this path is hit on all devices
-  if (is_video && inf && inf->hw_frames_ctx && filter->hwframes &&
-      inf->hw_frames_ctx->data != filter->hwframes) {
-    free_filter(&octx->vf); // XXX really should flush filter first
+  if (is_video && inf && (
+      (inf->hw_frames_ctx && filter->hwframes &&
+        inf->hw_frames_ctx->data != filter->hwframes) ||
+      (filter->src_ctx->nb_outputs > 0 &&
+        filter->src_ctx->outputs[0]->w != inf->width &&
+        filter->src_ctx->outputs[0]->h != inf->height))) {
+
+
+    // flush video filter
+    ret = av_buffersrc_write_frame(filter->src_ctx, NULL);
+    if (ret < 0) LPMS_ERR(fg_write_cleanup, "Error closing filter for reinit");
+    while (!ret) {
+      ret = filtergraph_read(ictx, octx, filter, is_video);
+      if (AVERROR(EAGAIN) == ret || AVERROR_EOF == ret) break;
+      AVFrame *frame = filter->frame;
+      AVCodecContext *encoder = octx->vc;
+
+      // TODO does clipping need to be handled?
+      // TODO calculate signature?
+
+      // Set GOP interval if necessary
+      if (octx->gop_pts_len && frame && frame->pts >= octx->next_kf_pts) {
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        octx->next_kf_pts = frame->pts + octx->gop_pts_len;
+      }
+      if (frame) {
+        // rescale pts to match encoder timebase if necessary (eg, fps passthrough)
+        AVRational filter_tb = av_buffersink_get_time_base(filter->sink_ctx);
+        if (av_cmp_q(filter_tb, encoder->time_base)) {
+          frame->pts = av_rescale_q(frame->pts, filter_tb, encoder->time_base);
+          // TODO does frame->duration needs to be rescaled too?
+        }
+      }
+      ret = encode(encoder, frame, octx, octx->oc->streams[octx->vi]);
+      if (!ret) LPMS_ERR(fg_write_cleanup, "Encoder error during filter reinit");
+    }
+    ret = 0;
+
+    free_filter(&octx->vf);
    ret = init_video_filters(ictx, octx);
    if (ret < 0) return lpms_ERR_FILTERS;
  }
--- a/ffmpeg/nvidia_test.go
+++ b/ffmpeg/nvidia_test.go
@@ -792,3 +792,7 @@ func TestTranscoder_Portrait(t *testing.T) {
 func TestNvidia_DiscontinuityAudioSegment(t *testing.T) {
 	discontinuityAudioSegment(t, Nvidia)
 }
+
+func TestNvidia_Rotation(t *testing.T) {
+	runRotationTests(t, Nvidia)
+}
--- a/ffmpeg/transcoder.c
+++ b/ffmpeg/transcoder.c
@@ -43,7 +43,7 @@ const int lpms_ERR_UNRECOVERABLE = FFERRTAG('U', 'N', 'R', 'V');

 // MOVED TO decoder.[ch]
 //  Decoder: For audio, we pay the price of closing and re-opening the decoder.
-//           For video, we cache the first packet we read (input_ctx.first_pkt).
+//           For video, we cache the last keyframe read  (input_ctx.flush_pkt).
 //           The pts is set to a sentinel value and fed to the decoder. Once we
 //           receive all frames from the decoder OR have sent too many sentinel
 //           pkts without receiving anything, then we know the decoder has been
@@ -133,7 +133,7 @@ int transcode_shutdown(struct transcode_thread *h, int ret)
  ictx->flushing = 0;
  ictx->pkt_diff = 0;
  ictx->sentinel_count = 0;
-  if (ictx->first_pkt) av_packet_free(&ictx->first_pkt);
+  if (ictx->flush_pkt) av_packet_free(&ictx->flush_pkt);
  if (ictx->ac) avcodec_free_context(&ictx->ac);
  if (ictx->vc && (AV_HWDEVICE_TYPE_NONE == ictx->hw_type)) avcodec_free_context(&ictx->vc);
  for (int i = 0; i < nb_outputs; i++) {