mirror of
https://github.com/horgh/videostreamer.git
synced 2025-09-26 20:41:31 +08:00

The options I am going for are these: "-rtsp_transport", "udp", "-reset_timestamps", "1", "-vsync", "1", "-flags", "global_header", "-bsf:v", "dump_extra", This commit includes, I believe, all except `-reset_timestamps 1` and `-vsync 1`. For the first, I believe it is not applicable as we aren't segmenting, although I'm not sure. For the second, I believe again it is not applicable as we have only one stream. This is to address lag issues raised in #2. However, I'm unable to reproduce the issues locally, so this is to try and see if any of the options help.
588 lines
15 KiB
C
588 lines
15 KiB
C
//
|
|
// This library provides remuxing from a video stream (such as an RTSP URL) to
|
|
// an MP4 container. It writes a fragmented MP4 so that it can be streamed to a
|
|
// pipe.
|
|
//
|
|
// There is no re-encoding. The stream is copied as is.
|
|
//
|
|
// The logic here is heavily based on remuxing.c by Stefano Sabatini.
|
|
//
|
|
|
|
#include <errno.h>
|
|
#include <libavdevice/avdevice.h>
|
|
#include <libavutil/timestamp.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "videostreamer.h"
|
|
|
|
static void
|
|
__vs_log_packet(const AVFormatContext * const,
|
|
const AVPacket * const, const char * const);
|
|
|
|
void
|
|
vs_setup(void)
|
|
{
|
|
// Set up library.
|
|
|
|
// Register muxers, demuxers, and protocols.
|
|
av_register_all();
|
|
|
|
// Make formats available.
|
|
avdevice_register_all();
|
|
|
|
avformat_network_init();
|
|
}
|
|
|
|
struct VSInput *
|
|
vs_open_input(const char * const input_format_name,
|
|
const char * const input_url, const bool verbose)
|
|
{
|
|
if (!input_format_name || strlen(input_format_name) == 0 ||
|
|
!input_url || strlen(input_url) == 0) {
|
|
printf("%s\n", strerror(EINVAL));
|
|
return NULL;
|
|
}
|
|
|
|
struct VSInput * const input = calloc(1, sizeof(struct VSInput));
|
|
if (!input) {
|
|
printf("%s\n", strerror(errno));
|
|
return NULL;
|
|
}
|
|
|
|
|
|
AVInputFormat * const input_format = av_find_input_format(input_format_name);
|
|
if (!input_format) {
|
|
printf("input format not found\n");
|
|
vs_destroy_input(input);
|
|
return NULL;
|
|
}
|
|
|
|
// Set rtsp_transport to udp. Whether this makes any real difference I'm not
|
|
// sure. I am debugging a lag/freeze issue.
|
|
AVDictionary * opts = NULL;
|
|
if (av_dict_set(&opts, "rtsp_transport", "udp", 0) < 0) {
|
|
printf("unable to set rtsp_transport opt\n");
|
|
vs_destroy_input(input);
|
|
return NULL;
|
|
}
|
|
|
|
if (avformat_open_input(&input->format_ctx, input_url, input_format,
|
|
&opts) != 0) {
|
|
printf("unable to open input\n");
|
|
vs_destroy_input(input);
|
|
av_dict_free(&opts);
|
|
return NULL;
|
|
}
|
|
|
|
// Check that all options were found. If one wasn't then it will be returned
|
|
// and in the options.
|
|
if (av_dict_count(opts) != 0) {
|
|
printf("some options not set\n");
|
|
vs_destroy_input(input);
|
|
av_dict_free(&opts);
|
|
return NULL;
|
|
}
|
|
|
|
av_dict_free(&opts);
|
|
|
|
if (avformat_find_stream_info(input->format_ctx, NULL) < 0) {
|
|
printf("failed to find stream info\n");
|
|
vs_destroy_input(input);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
if (verbose) {
|
|
av_dump_format(input->format_ctx, 0, input_url, 0);
|
|
}
|
|
|
|
|
|
// Find the first video stream.
|
|
|
|
input->video_stream_index = -1;
|
|
|
|
for (unsigned int i = 0; i < input->format_ctx->nb_streams; i++) {
|
|
AVStream * const in_stream = input->format_ctx->streams[i];
|
|
|
|
if (in_stream->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
|
|
if (verbose) {
|
|
printf("skip non-video stream %u\n", i);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
input->video_stream_index = (int) i;
|
|
break;
|
|
}
|
|
|
|
if (input->video_stream_index == -1) {
|
|
printf("no video stream found\n");
|
|
vs_destroy_input(input);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
return input;
|
|
}
|
|
|
|
void
|
|
vs_destroy_input(struct VSInput * const input)
|
|
{
|
|
if (!input) {
|
|
return;
|
|
}
|
|
|
|
if (input->format_ctx) {
|
|
avformat_close_input(&input->format_ctx);
|
|
avformat_free_context(input->format_ctx);
|
|
}
|
|
|
|
free(input);
|
|
}
|
|
|
|
struct VSOutput *
|
|
vs_open_output(const char * const output_format_name,
|
|
const char * const output_url, const struct VSInput * const input,
|
|
const bool verbose)
|
|
{
|
|
if (!output_format_name || strlen(output_format_name) == 0 ||
|
|
!output_url || strlen(output_url) == 0 ||
|
|
!input) {
|
|
printf("%s\n", strerror(EINVAL));
|
|
return NULL;
|
|
}
|
|
|
|
struct VSOutput * const output = calloc(1, sizeof(struct VSOutput));
|
|
if (!output) {
|
|
printf("%s\n", strerror(errno));
|
|
return NULL;
|
|
}
|
|
|
|
|
|
AVOutputFormat * const output_format = av_guess_format(output_format_name,
|
|
NULL, NULL);
|
|
if (!output_format) {
|
|
printf("output format not found\n");
|
|
vs_destroy_output(output);
|
|
return NULL;
|
|
}
|
|
|
|
// Try setting this option, again for lag/freezing issues. Although I am
|
|
// primarily trying to set the AV_CODEC_FLAG_GLOBAL_HEADER, so I don't know
|
|
// if this is applicable.
|
|
output_format->flags |= AVFMT_GLOBALHEADER;
|
|
|
|
if (avformat_alloc_output_context2(&output->format_ctx, output_format,
|
|
NULL, NULL) < 0) {
|
|
printf("unable to create output context\n");
|
|
vs_destroy_output(output);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
// Copy the video stream.
|
|
|
|
AVStream * const out_stream = avformat_new_stream(output->format_ctx, NULL);
|
|
if (!out_stream) {
|
|
printf("unable to add stream\n");
|
|
vs_destroy_output(output);
|
|
return NULL;
|
|
}
|
|
|
|
AVStream * const in_stream = input->format_ctx->streams[
|
|
input->video_stream_index];
|
|
|
|
if (avcodec_parameters_copy(out_stream->codecpar,
|
|
in_stream->codecpar) < 0) {
|
|
printf("unable to copy codec parameters\n");
|
|
vs_destroy_output(output);
|
|
return NULL;
|
|
}
|
|
|
|
// I want to set a codec context flag. We can't do it directly to
|
|
// out_stream->codec as that is deprecated. Instead, allocate a codec context,
|
|
// copy the parameters, set the flag, then copy the codec context back to
|
|
// parameters. I'm not sure this is the correct way!
|
|
|
|
AVCodecContext * codec_ctx = avcodec_alloc_context3(NULL);
|
|
if (NULL == codec_ctx) {
|
|
printf("unable to allocate codec context\n");
|
|
vs_destroy_output(output);
|
|
return NULL;
|
|
}
|
|
|
|
if (avcodec_parameters_to_context(codec_ctx, out_stream->codecpar) < 0) {
|
|
printf("unable to copy codec parameters to codec context");
|
|
vs_destroy_output(output);
|
|
avcodec_free_context(&codec_ctx);
|
|
return NULL;
|
|
}
|
|
|
|
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
|
|
|
if (avcodec_parameters_from_context(out_stream->codecpar, codec_ctx) < 0) {
|
|
printf("unable to copy codec context to codec parameters");
|
|
vs_destroy_output(output);
|
|
return NULL;
|
|
}
|
|
|
|
avcodec_free_context(&codec_ctx);
|
|
|
|
if (verbose) {
|
|
av_dump_format(output->format_ctx, 0, output_url, 1);
|
|
}
|
|
|
|
|
|
// Open output file.
|
|
if (avio_open(&output->format_ctx->pb, output_url, AVIO_FLAG_WRITE) < 0) {
|
|
printf("unable to open output file\n");
|
|
vs_destroy_output(output);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
// Write file header.
|
|
|
|
AVDictionary * opts = NULL;
|
|
|
|
// -movflags frag_keyframe tells the mp4 muxer to fragment at each video
|
|
// keyframe. This is necessary for it to support output to a non-seekable
|
|
// file (e.g., pipe).
|
|
//
|
|
// -movflags isml+frag_keyframe is the same, except isml appears to be to
|
|
// make the output a live smooth streaming feed (as opposed to not live). I'm
|
|
// not sure the difference, but isml appears to be a microsoft
|
|
// format/protocol.
|
|
//
|
|
// To specify both, use isml+frag_keyframe as the value.
|
|
//
|
|
// I found that while Chrome had no trouble displaying the resulting mp4 with
|
|
// just frag_keyframe, Firefox would not until I also added empty_moov.
|
|
// empty_moov apparently writes some info at the start of the file.
|
|
if (av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0) < 0) {
|
|
printf("unable to set movflags opt\n");
|
|
vs_destroy_output(output);
|
|
return NULL;
|
|
}
|
|
|
|
if (av_dict_set_int(&opts, "flush_packets", 1, 0) < 0) {
|
|
printf("unable to set flush_packets opt\n");
|
|
vs_destroy_output(output);
|
|
av_dict_free(&opts);
|
|
return NULL;
|
|
}
|
|
|
|
if (avformat_write_header(output->format_ctx, &opts) < 0) {
|
|
printf("unable to write header\n");
|
|
vs_destroy_output(output);
|
|
av_dict_free(&opts);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
// Check any options that were not set. Because I'm not sure if all are
|
|
// appropriate to set through the avformat_write_header().
|
|
if (av_dict_count(opts) != 0) {
|
|
printf("some options not set\n");
|
|
vs_destroy_output(output);
|
|
av_dict_free(&opts);
|
|
return NULL;
|
|
}
|
|
|
|
av_dict_free(&opts);
|
|
|
|
|
|
output->last_dts = 0;
|
|
|
|
return output;
|
|
}
|
|
|
|
void
|
|
vs_destroy_output(struct VSOutput * const output)
|
|
{
|
|
if (!output) {
|
|
return;
|
|
}
|
|
|
|
if (output->format_ctx) {
|
|
if (av_write_trailer(output->format_ctx) != 0) {
|
|
printf("unable to write trailer\n");
|
|
}
|
|
|
|
if (avio_closep(&output->format_ctx->pb) != 0) {
|
|
printf("avio_closep failed\n");
|
|
}
|
|
|
|
avformat_free_context(output->format_ctx);
|
|
}
|
|
|
|
free(output);
|
|
}
|
|
|
|
// Read a compressed and encoded frame as a packet.
|
|
//
|
|
// Returns:
|
|
// -1 if error
|
|
// 0 if nothing useful read (e.g., non-video packet)
|
|
// 1 if read a packet
|
|
int
|
|
vs_read_packet(const struct VSInput * input, AVPacket * const pkt,
|
|
const bool verbose)
|
|
{
|
|
if (!input || !pkt) {
|
|
printf("%s\n", strerror(errno));
|
|
return -1;
|
|
}
|
|
|
|
memset(pkt, 0, sizeof(AVPacket));
|
|
|
|
|
|
// Read encoded frame (as a packet).
|
|
|
|
if (av_read_frame(input->format_ctx, pkt) != 0) {
|
|
printf("unable to read frame\n");
|
|
return -1;
|
|
}
|
|
|
|
|
|
// Ignore it if it's not our video stream.
|
|
|
|
if (pkt->stream_index != input->video_stream_index) {
|
|
if (verbose) {
|
|
printf("skipping packet from input stream %d, our video is from stream %d\n",
|
|
pkt->stream_index, input->video_stream_index);
|
|
}
|
|
|
|
av_packet_unref(pkt);
|
|
return 0;
|
|
}
|
|
|
|
|
|
if (verbose) {
|
|
__vs_log_packet(input->format_ctx, pkt, "in");
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
// We change the packet's pts, dts, duration, pos.
|
|
//
|
|
// We do not unref it.
|
|
//
|
|
// Returns:
|
|
// -1 if error
|
|
// 1 if we wrote the packet
|
|
int
|
|
vs_write_packet(const struct VSInput * const input,
|
|
struct VSOutput * const output, AVPacket * const pkt, const bool verbose)
|
|
{
|
|
if (!input || !output || !pkt) {
|
|
printf("%s\n", strerror(EINVAL));
|
|
return -1;
|
|
}
|
|
|
|
AVStream * const in_stream = input->format_ctx->streams[pkt->stream_index];
|
|
if (!in_stream) {
|
|
printf("input stream not found with stream index %d\n", pkt->stream_index);
|
|
return -1;
|
|
}
|
|
|
|
|
|
// If there are multiple input streams, then the stream index on the packet
|
|
// may not match the stream index in our output. We need to ensure the index
|
|
// matches. Note by this point we have checked that it is indeed a packet
|
|
// from the stream we want (we do this when reading the packet).
|
|
//
|
|
// As we only ever have a single output stream (one, video), the index will
|
|
// be 0.
|
|
if (pkt->stream_index != 0) {
|
|
if (verbose) {
|
|
printf("updating packet stream index to 0 (from %d)\n",
|
|
pkt->stream_index);
|
|
}
|
|
|
|
pkt->stream_index = 0;
|
|
}
|
|
|
|
|
|
AVStream * const out_stream = output->format_ctx->streams[pkt->stream_index];
|
|
if (!out_stream) {
|
|
printf("output stream not found with stream index %d\n", pkt->stream_index);
|
|
return -1;
|
|
}
|
|
|
|
// Try using -bsf dump_extra. This is again trying to address lag/halting
|
|
// issues.
|
|
|
|
// TODO: We could keep a single bsf context around the entire time.
|
|
const AVBitStreamFilter * const bsf = av_bsf_get_by_name("dump_extra");
|
|
if (NULL == bsf) {
|
|
printf("unable to find bitstreamfilter dump_extra\n");
|
|
return -1;
|
|
}
|
|
|
|
AVBSFContext * bsf_ctx = NULL;
|
|
if (av_bsf_alloc(bsf, &bsf_ctx) < 0) {
|
|
printf("unable to allocate bsf context\n");
|
|
return -1;
|
|
}
|
|
|
|
if (avcodec_parameters_copy(bsf_ctx->par_in, out_stream->codecpar) < 0) {
|
|
printf("unable to copy codec parameters (bsf input)\n");
|
|
av_bsf_free(&bsf_ctx);
|
|
return -1;
|
|
}
|
|
if (avcodec_parameters_copy(bsf_ctx->par_out, out_stream->codecpar) < 0) {
|
|
printf("unable to copy codec parameters (bsf output)\n");
|
|
av_bsf_free(&bsf_ctx);
|
|
return -1;
|
|
}
|
|
|
|
if (av_bsf_init(bsf_ctx) != 0) {
|
|
printf("unable to init bsf context\n");
|
|
av_bsf_free(&bsf_ctx);
|
|
return -1;
|
|
}
|
|
|
|
// The BSF takes ownership of the packet. Since we don't have ownership here
|
|
// ourselves, clone it first.
|
|
AVPacket * pkt2 = av_packet_clone(pkt);
|
|
if (NULL == pkt2) {
|
|
printf("unable to clone packet\n");
|
|
av_bsf_free(&bsf_ctx);
|
|
return -1;
|
|
}
|
|
|
|
if (av_bsf_send_packet(bsf_ctx, pkt2) != 0) {
|
|
printf("unable to send packet to bsf context\n");
|
|
av_bsf_free(&bsf_ctx);
|
|
return -1;
|
|
}
|
|
|
|
if (av_bsf_receive_packet(bsf_ctx, pkt2) != 0) {
|
|
printf("unable to retrieve packet from bsf context\n");
|
|
av_bsf_free(&bsf_ctx);
|
|
return -1;
|
|
}
|
|
|
|
// TODO: There may be multiple packets to retrieve. We should fully drain the
|
|
// bsf.
|
|
|
|
// Signal we won't send any more packets.
|
|
if (av_bsf_send_packet(bsf_ctx, NULL) != 0) {
|
|
printf("unable to tell bsf context that no more packets will be sent\n");
|
|
av_bsf_free(&bsf_ctx);
|
|
return -1;
|
|
}
|
|
|
|
// TODO: After signaling no more packets, we should ensure we drain the bsf.
|
|
|
|
av_bsf_free(&bsf_ctx);
|
|
|
|
// It is possible that the input is not well formed. Its dts (decompression
|
|
// timestamp) may fluctuate. av_write_frame() says that the dts must be
|
|
// strictly increasing.
|
|
//
|
|
// Packets from such inputs might look like:
|
|
//
|
|
// in: pts:18750 pts_time:0.208333 dts:18750 dts_time:0.208333 duration:3750 duration_time:0.0416667 stream_index:1
|
|
// in: pts:0 pts_time:0 dts:0 dts_time:0 duration:3750 duration_time:0.0416667 stream_index:1
|
|
//
|
|
// dts here is 18750 and then 0.
|
|
//
|
|
// If we try to write the second packet as is, we'll see this error:
|
|
// [mp4 @ 0x10f1ae0] Application provided invalid, non monotonically increasing dts to muxer in stream 1: 18750 >= 0
|
|
//
|
|
// This is apparently a fairly common problem. In ffmpeg.c (as of ffmpeg
|
|
// 3.2.4 at least) there is logic to rewrite the dts and warn if it happens.
|
|
// Let's do the same. Note my logic is a little different here.
|
|
if (pkt2->dts != AV_NOPTS_VALUE && output->last_dts != AV_NOPTS_VALUE &&
|
|
pkt2->dts < output->last_dts) {
|
|
int64_t const next_dts = output->last_dts+1;
|
|
|
|
if (verbose) {
|
|
printf("Warning: Non-monotonous DTS in input stream. Previous: %" PRId64 " current: %" PRId64 ". changing to %" PRId64 ".\n",
|
|
output->last_dts, pkt2->dts, next_dts);
|
|
}
|
|
|
|
// We also apparently (ffmpeg.c does this too) need to update the pts.
|
|
// Otherwise we see an error like:
|
|
//
|
|
// [mp4 @ 0x555e6825ea60] pts (3780) < dts (22531) in stream 0
|
|
|
|
if (pkt2->pts != AV_NOPTS_VALUE && pkt2->pts >= pkt2->dts) {
|
|
pkt2->pts = FFMAX(pkt2->pts, next_dts);
|
|
}
|
|
|
|
pkt2->dts = next_dts;
|
|
}
|
|
|
|
|
|
// Set pts/dts if not set. Otherwise we will receive warnings like
|
|
//
|
|
// [mp4 @ 0x55688397bc40] Timestamps are unset in a packet for stream 0. This
|
|
// is deprecated and will stop working in the future. Fix your code to set
|
|
// the timestamps properly
|
|
//
|
|
// [mp4 @ 0x55688397bc40] Encoder did not produce proper pts, making some up.
|
|
if (pkt2->pts == AV_NOPTS_VALUE) {
|
|
pkt2->pts = 0;
|
|
} else {
|
|
pkt2->pts = av_rescale_q_rnd(pkt2->pts, in_stream->time_base,
|
|
out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
|
|
}
|
|
|
|
if (pkt2->dts == AV_NOPTS_VALUE) {
|
|
pkt2->dts = 0;
|
|
} else {
|
|
pkt2->dts = av_rescale_q_rnd(pkt2->dts, in_stream->time_base,
|
|
out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
|
|
}
|
|
|
|
pkt2->duration = av_rescale_q(pkt2->duration, in_stream->time_base,
|
|
out_stream->time_base);
|
|
pkt2->pos = -1;
|
|
|
|
|
|
if (verbose) {
|
|
__vs_log_packet(output->format_ctx, pkt2, "out");
|
|
}
|
|
|
|
|
|
// Track last dts we see (see where we use it for why).
|
|
output->last_dts = pkt2->dts;
|
|
|
|
|
|
// Write encoded frame (as a packet).
|
|
|
|
// av_interleaved_write_frame() works too, but I don't think it is needed.
|
|
// Using av_write_frame() skips buffering.
|
|
const int write_res = av_write_frame(output->format_ctx, pkt2);
|
|
|
|
av_packet_unref(pkt2);
|
|
|
|
if (write_res != 0) {
|
|
char error_buf[256];
|
|
memset(error_buf, 0, 256);
|
|
av_strerror(write_res, error_buf, 256);
|
|
printf("unable to write frame: %s\n", error_buf);
|
|
return -1;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void
|
|
__vs_log_packet(const AVFormatContext * const format_ctx,
|
|
const AVPacket * const pkt, const char * const tag)
|
|
{
|
|
AVRational * const time_base = &format_ctx->streams[pkt->stream_index]->time_base;
|
|
|
|
printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
|
|
tag, av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
|
|
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
|
|
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
|
|
pkt->stream_index);
|
|
}
|