mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-30 20:16:42 +08:00 
			
		
		
		
	lavfi: add volumedetect filter.
This commit is contained in:
		| @@ -50,6 +50,7 @@ version next: | |||||||
| - edge detection filter | - edge detection filter | ||||||
| - framestep filter | - framestep filter | ||||||
| - ffmpeg -shortest option is now per-output file | - ffmpeg -shortest option is now per-output file | ||||||
|  | - volume measurement filter | ||||||
|  |  | ||||||
|  |  | ||||||
| version 0.11: | version 0.11: | ||||||
|   | |||||||
| @@ -690,6 +690,46 @@ volume=-12dB | |||||||
| @end example | @end example | ||||||
| @end itemize | @end itemize | ||||||
|  |  | ||||||
|  | @section volumedetect | ||||||
|  |  | ||||||
|  | Detect the volume of the input video. | ||||||
|  |  | ||||||
|  | The filter has no parameters. The input is not modified. Statistics about | ||||||
|  | the volume will be printed in the log when the input stream end is reached. | ||||||
|  |  | ||||||
|  | In particular it will show the mean volume (root mean square), maximum | ||||||
|  | volume (on a per-sample basis), and the beginning of an histogram of the | ||||||
|  | registered volume values (from the maximum value to a cumulated 1/1000 of | ||||||
|  | the samples). | ||||||
|  |  | ||||||
|  | All volumes are in decibels relative to the maximum PCM value. | ||||||
|  |  | ||||||
|  | Here is an excerpt of the output: | ||||||
|  | @example | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6 | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62 | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286 | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042 | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551 | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609 | ||||||
|  | [Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409 | ||||||
|  | @end example | ||||||
|  |  | ||||||
|  | It means that: | ||||||
|  | @itemize | ||||||
|  | @item | ||||||
|  | The mean square energy is approximately -27 dB, or 10^-2.7. | ||||||
|  | @item | ||||||
|  | The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB. | ||||||
|  | @item | ||||||
|  | There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc. | ||||||
|  | @end itemize | ||||||
|  |  | ||||||
|  | In other words, raising the volume by +4 dB does not cause any clipping, | ||||||
|  | raising it by +5 dB causes clipping for 6 samples, etc. | ||||||
|  |  | ||||||
| @section asyncts | @section asyncts | ||||||
| Synchronize audio data with timestamps by squeezing/stretching it and/or | Synchronize audio data with timestamps by squeezing/stretching it and/or | ||||||
| dropping samples/adding silence when needed. | dropping samples/adding silence when needed. | ||||||
|   | |||||||
| @@ -67,6 +67,7 @@ OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o | |||||||
| OBJS-$(CONFIG_RESAMPLE_FILTER)               += af_resample.o | OBJS-$(CONFIG_RESAMPLE_FILTER)               += af_resample.o | ||||||
| OBJS-$(CONFIG_SILENCEDETECT_FILTER)          += af_silencedetect.o | OBJS-$(CONFIG_SILENCEDETECT_FILTER)          += af_silencedetect.o | ||||||
| OBJS-$(CONFIG_VOLUME_FILTER)                 += af_volume.o | OBJS-$(CONFIG_VOLUME_FILTER)                 += af_volume.o | ||||||
|  | OBJS-$(CONFIG_VOLUMEDETECT_FILTER)           += af_volumedetect.o | ||||||
|  |  | ||||||
| OBJS-$(CONFIG_AEVALSRC_FILTER)               += asrc_aevalsrc.o | OBJS-$(CONFIG_AEVALSRC_FILTER)               += asrc_aevalsrc.o | ||||||
| OBJS-$(CONFIG_ANULLSRC_FILTER)               += asrc_anullsrc.o | OBJS-$(CONFIG_ANULLSRC_FILTER)               += asrc_anullsrc.o | ||||||
|   | |||||||
							
								
								
									
										159
									
								
								libavfilter/af_volumedetect.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										159
									
								
								libavfilter/af_volumedetect.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,159 @@ | |||||||
|  | /* | ||||||
|  |  * Copyright (c) 2012 Nicolas George | ||||||
|  |  * | ||||||
|  |  * This file is part of FFmpeg. | ||||||
|  |  * | ||||||
|  |  * FFmpeg is free software; you can redistribute it and/or | ||||||
|  |  * modify it under the terms of the GNU Lesser General Public License | ||||||
|  |  * as published by the Free Software Foundation; either | ||||||
|  |  * version 2.1 of the License, or (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * FFmpeg is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU Lesser General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU Lesser General Public License | ||||||
|  |  * along with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||||||
|  |  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | #include "libavutil/audioconvert.h" | ||||||
|  | #include "libavutil/avassert.h" | ||||||
|  | #include "audio.h" | ||||||
|  | #include "avfilter.h" | ||||||
|  | #include "internal.h" | ||||||
|  |  | ||||||
|  | typedef struct { | ||||||
|  |     /** | ||||||
|  |      * Number of samples at each PCM value. | ||||||
|  |      * histogram[0x8000 + i] is the number of samples at value i. | ||||||
|  |      * The extra element is there for symmetry. | ||||||
|  |      */ | ||||||
|  |     uint64_t histogram[0x10001]; | ||||||
|  | } VolDetectContext; | ||||||
|  |  | ||||||
|  | static int query_formats(AVFilterContext *ctx) | ||||||
|  | { | ||||||
|  |     enum AVSampleFormat sample_fmts[] = { | ||||||
|  |         AV_SAMPLE_FMT_S16, | ||||||
|  |         AV_SAMPLE_FMT_S16P, | ||||||
|  |         AV_SAMPLE_FMT_NONE | ||||||
|  |     }; | ||||||
|  |     AVFilterFormats *formats; | ||||||
|  |  | ||||||
|  |     if (!(formats = ff_make_format_list(sample_fmts))) | ||||||
|  |         return AVERROR(ENOMEM); | ||||||
|  |     ff_set_common_formats(ctx, formats); | ||||||
|  |  | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samples) | ||||||
|  | { | ||||||
|  |     AVFilterContext *ctx = inlink->dst; | ||||||
|  |     VolDetectContext *vd = ctx->priv; | ||||||
|  |     int64_t layout  = samples->audio->channel_layout; | ||||||
|  |     int nb_samples  = samples->audio->nb_samples; | ||||||
|  |     int nb_channels = av_get_channel_layout_nb_channels(layout); | ||||||
|  |     int nb_planes   = nb_planes; | ||||||
|  |     int plane, i; | ||||||
|  |     int16_t *pcm; | ||||||
|  |  | ||||||
|  |     if (!av_sample_fmt_is_planar(samples->format)) { | ||||||
|  |         nb_samples *= nb_channels; | ||||||
|  |         nb_planes = 1; | ||||||
|  |     } | ||||||
|  |     for (plane = 0; plane < nb_planes; plane++) { | ||||||
|  |         pcm = (int16_t *)samples->extended_data[plane]; | ||||||
|  |         for (i = 0; i < nb_samples; i++) | ||||||
|  |             vd->histogram[pcm[i] + 0x8000]++; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return ff_filter_samples(inlink->dst->outputs[0], samples); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #define MAX_DB 91 | ||||||
|  |  | ||||||
|  | static inline double logdb(uint64_t v) | ||||||
|  | { | ||||||
|  |     double d = v / (double)(0x8000 * 0x8000); | ||||||
|  |     if (!v) | ||||||
|  |         return MAX_DB; | ||||||
|  |     return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void print_stats(AVFilterContext *ctx) | ||||||
|  | { | ||||||
|  |     VolDetectContext *vd = ctx->priv; | ||||||
|  |     int i, max_volume, shift; | ||||||
|  |     uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; | ||||||
|  |     uint64_t histdb[MAX_DB + 1] = { 0 }; | ||||||
|  |  | ||||||
|  |     for (i = 0; i < 0x10000; i++) | ||||||
|  |         nb_samples += vd->histogram[i]; | ||||||
|  |     av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); | ||||||
|  |     if (!nb_samples) | ||||||
|  |         return; | ||||||
|  |  | ||||||
|  |     /* If nb_samples > 1<<34, there is a risk of overflow in the | ||||||
|  |        multiplication or the sum: shift all histogram values to avoid that. | ||||||
|  |        The total number of samples must be recomputed to avoid rounding | ||||||
|  |        errors. */ | ||||||
|  |     shift = av_log2(nb_samples >> 33); | ||||||
|  |     for (i = 0; i < 0x10000; i++) { | ||||||
|  |         nb_samples_shift += vd->histogram[i] >> shift; | ||||||
|  |         power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); | ||||||
|  |     } | ||||||
|  |     if (!nb_samples_shift) | ||||||
|  |         return; | ||||||
|  |     power = (power + nb_samples_shift / 2) / nb_samples_shift; | ||||||
|  |     av_assert0(power <= 0x8000 * 0x8000); | ||||||
|  |     av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); | ||||||
|  |  | ||||||
|  |     max_volume = 0x8000; | ||||||
|  |     while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && | ||||||
|  |                              !vd->histogram[0x8000 - max_volume]) | ||||||
|  |         max_volume--; | ||||||
|  |     av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); | ||||||
|  |  | ||||||
|  |     for (i = 0; i < 0x10000; i++) | ||||||
|  |         histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; | ||||||
|  |     for (i = 0; i <= MAX_DB && !histdb[i]; i++); | ||||||
|  |     for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { | ||||||
|  |         av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); | ||||||
|  |         sum += histdb[i]; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static int request_frame(AVFilterLink *outlink) | ||||||
|  | { | ||||||
|  |     AVFilterContext *ctx = outlink->src; | ||||||
|  |     int ret = ff_request_frame(ctx->inputs[0]); | ||||||
|  |     if (ret == AVERROR_EOF) | ||||||
|  |         print_stats(ctx); | ||||||
|  |     return ret; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | AVFilter avfilter_af_volumedetect = { | ||||||
|  |     .name          = "volumedetect", | ||||||
|  |     .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."), | ||||||
|  |  | ||||||
|  |     .priv_size     = sizeof(VolDetectContext), | ||||||
|  |     .query_formats = query_formats, | ||||||
|  |  | ||||||
|  |     .inputs    = (const AVFilterPad[]) { | ||||||
|  |         { .name             = "default", | ||||||
|  |           .type             = AVMEDIA_TYPE_AUDIO, | ||||||
|  |           .get_audio_buffer = ff_null_get_audio_buffer, | ||||||
|  |           .filter_samples   = filter_samples, | ||||||
|  |           .min_perms        = AV_PERM_READ, }, | ||||||
|  |         { .name = NULL } | ||||||
|  |     }, | ||||||
|  |     .outputs   = (const AVFilterPad[]) { | ||||||
|  |         { .name = "default", | ||||||
|  |           .type = AVMEDIA_TYPE_AUDIO, | ||||||
|  |           .request_frame = request_frame, }, | ||||||
|  |         { .name = NULL } | ||||||
|  |     }, | ||||||
|  | }; | ||||||
| @@ -57,6 +57,7 @@ void avfilter_register_all(void) | |||||||
|     REGISTER_FILTER (PAN,         pan,         af); |     REGISTER_FILTER (PAN,         pan,         af); | ||||||
|     REGISTER_FILTER (SILENCEDETECT, silencedetect, af); |     REGISTER_FILTER (SILENCEDETECT, silencedetect, af); | ||||||
|     REGISTER_FILTER (VOLUME,      volume,      af); |     REGISTER_FILTER (VOLUME,      volume,      af); | ||||||
|  |     REGISTER_FILTER (VOLUMEDETECT,volumedetect,af); | ||||||
|     REGISTER_FILTER (RESAMPLE,    resample,    af); |     REGISTER_FILTER (RESAMPLE,    resample,    af); | ||||||
|  |  | ||||||
|     REGISTER_FILTER (AEVALSRC,    aevalsrc,    asrc); |     REGISTER_FILTER (AEVALSRC,    aevalsrc,    asrc); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Nicolas George
					Nicolas George