package Audio::FindChunks; use 5.00503; use strict; use Data::Flow qw(0.09); BEGIN { require DynaLoader; use vars qw($VERSION @ISA); @ISA = qw(DynaLoader); $VERSION = '0.03'; bootstrap Audio::FindChunks $VERSION; } # Preloaded methods go here. sub default ($$$) {my ($o, $k, $v) = @_; $o->{$k} = $v unless defined $o->{$k}} my $le_short_size = length pack 'v', 0; my $short_size = length pack 's', 0; my $int_size = length pack 'i', 0; my $long_size = length pack 'l', 0; my $double_size = length pack 'd', 0; my $pointer_size = length pack 'p', 0; my $pointer_unpack = (($pointer_size == $int_size) ? 'I' : 'L'); my $long_min = unpack 'l', pack 'l', -1e100; my $long_max = -$long_min-1; sub le_short_sample_multichannel ($$$$$$) { my ($totstride, $stride, $channels, $out, $chunksize) = (shift,shift,shift,shift,shift); my $size = length $_[0]; my $bufaddr = unpack $pointer_unpack, pack 'p', $_[0]; die "Size of buffer not multiple of total stride" if $size % $totstride; # Do in multiples of 4K (to falicitate Level I cache) $chunksize = $totstride * int((1<<13)/$totstride) unless defined $chunksize; my $processed = 0; while ($size > 0) { $chunksize = $size if $chunksize > $size; $size -= $chunksize; my $samples = $chunksize / $totstride; $processed += $samples; for my $c (0..$channels-1) { # void le_short_sample_stats(char *buf, int stride, long samples, array_stats_t *stat) le_short_sample_stats($bufaddr + $stride * $c, $totstride, $samples, $out->[$c]); } $bufaddr += $chunksize; } return $processed; } sub rnd ($) {sprintf '%.0f', shift} my $wav_header = < $in} unless $read == $header_size; my %vals; @vals{@wav_fields} = unpack $wav_header, $in or return {buf => $in}; return {buf => $in} unless $vals{header} eq 'RIFF'; die "Unexpected RIFF format" unless $vals{type} eq 'WAVE' and $vals{type1} eq 'fmt ' and $vals{size1} == 0x10 and $vals{format} == 1 and $vals{bits_per_sample_channel} == 16 and $vals{format} == 1 and $vals{type2} eq 'data'; $vals{buf} = $in; return \%vals; } sub SOUND () {2} # Constants... Rarely promoted or demoted sub SIGNAL () {1} # May be promoted or demoted sub NOISE () {0} # Likewise sub SILENCE () {-1} # Rarely promoted or demoted sub merge_blocks ($) { # array ref: 0: type, 1: start, 2: len my $blocks = shift; my $c = 0; my @new; for my $b (@$blocks) { push(@new, [@$b]), next if not @new or $b->[0] != $new[-1][0]; $new[-1][2] += $b->[2]; } \@new } my %defaults = ( # For getting PCM flow (and if averaging data is read from cache) frequency => 44100, bytes_per_sample => 4, channels => 2, sizedata => MY_INF, out_fh => \*STDOUT, preprocess => {mp3 => [[qw(lame --silent --decode)], [], ['-']]}, # Second contains extra args to read stdin # For getting RMS info sec_per_chunk => 0.1, # RMS cache rms_extension => '.rms', # For threshold calculation threshold_in_sorted_min_rel => 0, threshold_in_sorted_min_sec => 1, threshold_in_sorted_max_rel => 0.5, threshold_in_sorted_max_sec => 0, threshold_ratio => 0.15, threshold_factor_min => 1, threshold_factor_max => 1, # Chunkification: smoothification above_thres_window => 11, above_thres_window_rel => 0.25, # Chunkification max_tracks => 9999, min_signal_sec => 5, min_silence_sec => 2, ignore_signal_sec => 1, # Final enlargement local_level_ignore_pre_sec => 0.3, local_level_ignore_post_sec => 0.3, local_level_ignore_pre_rel => 0.02, local_level_ignore_post_rel => 0.02, local_threshold_factor => 1.05, extend_track_end_sec => 0.5, extend_track_begin_sec => 0.3, min_boundary_silence_sec => 0.2, ); my %mirror_from = ( # May be set separately, otherwise are synonims min_actual_silence_sec => 'min_silence_sec', min_start_silence_sec => 'min_boundary_silence_sec', min_end_silence_sec => 'min_boundary_silence_sec', cache_rms_write => 'cache_rms', cache_rms_read => 'cache_rms', min_silence_chunks_merge => 'min_silence_chunks', ); my %chunk_times = map { (my $n = $_) =~ s/_sec/_chunks/; ($n => {filter => [sub {rnd(shift()/shift)}, $_, 'sec_per_chunk']}) } grep /_sec$/, keys %defaults, keys %mirror_from; my @recognized = # these default to undef, but accessing them is not fatal qw(filename stem_strip_extension filter raw_pcm rms_filename close_fh override_header_info cache_rms subchunk_size skip_medians); my %filters = ( # For getting RMS info filestem => [sub { my $f = shift; return 'filehandle' unless defined $f; $f =~ s/\.(\w+)$// if shift; $f }, 'filename', 'stem_strip_extension'], input_type => [sub { return unless defined (my $f = shift); return unless $f =~ /\.(\w+)$/; my $h = shift; return lc $1 if not $h->{$1} and $h->{lc $1}; $1 }, 'filename', 'preprocess'], preprocess_a => [sub {return unless defined $_[0]; $_[1]->{$_[0]} }, 'input_type', 'preprocess'], preprocess_input => [sub { my ($cmd, $f) = @_; return unless $cmd; return [@{$cmd->[0]}, $f, @{$cmd->[2]}] if defined $f; return [@{$cmd->[0]}, @{$cmd->[1]}, @{$cmd->[2]}]; }, 'preprocess_a', 'filename'], fh_bin => [sub { my $fh = shift; binmode $fh; $fh }, 'fh'], out_fh_bin => [sub { return unless shift; my $fh = shift; binmode $fh; $fh }, 'filter', 'out_fh'], rms_filename_default => [sub {shift() . shift}, 'filestem', 'rms_extension'], read_from_rms_file => [sub { return if shift; # Need output stream, not only RMS shift or defined shift }, 'filter', 'cache_rms_read', 'rms_filename'], write_to_rms_file => [sub {shift or defined shift}, 'cache_rms_write', 'rms_filename'], rms_filename_actual => [sub {my $f = shift; return $f if defined $f; shift}, 'rms_filename', 'rms_filename_default'], samples_per_chunk => [sub {rnd(shift()*shift)}, 'sec_per_chunk', 'frequency'], bytes_per_chunk => [sub {shift()*shift}, 'samples_per_chunk', 'bytes_per_sample'], rms_data_arr_f => [sub {return unless shift; local *RMS; open RMS, '< ' . shift or return; # No file is OK binmode *RMS; local $/; my @in; ($in[0] = ) =~ s/^GramoFile Binary RMS Data\n//i or die "Unknown format of RMS file"; push @in, unpack 'l2', substr $in[0], 0, 2*$long_size; substr($in[0], 0, 2*$long_size) = ''; die "Malformed length of RMS file" # sam/chunk, chunks unless $in[2] * $double_size == length $in[0]; my $sam = shift; die "Samples per chunk mismatch: RMSfile => $in[1], expected => $sam" # sam/chunk, chunks unless $in[1] == $sam; \@in }, 'read_from_rms_file', 'rms_filename_actual', 'samples_per_chunk'], # For threshold calculation medians => [sub { my $av = shift; my @r = $av->[0]; # Allocate the buffer double_median3($av->[0], $r[0], shift) unless shift; \@r }, 'rms_data', 'skip_medians', 'chunks'], sorted => [sub { my $av = shift; my @r = $av->[0]; # Allocate the buffer double_sort($av->[0], $r[0], shift); \@r }, 'medians', 'chunks'], map(("threshold_in_sorted_$_" => [sub { my ($c, $r) = shift; $r = $c*shift() + shift() - 1; $r = $c - 1 unless $r < $c - 1; $r = 0 unless $r > 0; $r }, 'chunks', "threshold_in_sorted_${_}_rel", "threshold_in_sorted_${_}_chunks"], "threshold_$_" => [sub { shift() * sqrt unpack 'd', substr shift->[0], $double_size * rnd(shift), $double_size }, "threshold_factor_$_", 'sorted', "threshold_in_sorted_$_"]), 'max', 'min'), threshold => [sub { my $min = shift; shift() * (shift()-$min) + $min }, 'threshold_min', 'threshold_ratio', 'threshold_max'], # Chunkification: smoothification above_thres => [sub { my $c = shift; my @r = 'x' x ($int_size * $c); # Reserve space double_find_above(shift->[0], $r[0], $c, shift()**2); \@r }, 'chunks', 'rms_data', 'threshold'], above_thres_in_window => [sub { my $a = shift; my @r = $a->[0]; # Reserve space int_sum_window($a->[0], $r[0], shift, shift); \@r}, 'above_thres', 'chunks', 'above_thres_window'], above_thres_window_abs => [sub {shift()*shift}, 'above_thres_window_rel', 'above_thres_window'], maybe_signal => [sub { my $a = shift; my @r = $a->[0]; # Reserve space int_find_above($a->[0], $r[0], shift, shift); \@r }, 'above_thres_in_window', 'chunks', 'above_thres_window_abs'], # Chunkification maybe_trk_pk => [sub { my $max = shift; my @r = 'x' x (3*$long_size*$max); # Reserve space my $c = bool_find_runs(shift->[0], $r[0], shift, $max); die "Max count $max of track candidates exceeded" unless $c >= 0; substr($r[0], 3*$long_size*$c) = ''; # Truncate \@r }, 'max_tracks', 'maybe_signal', 'chunks'], # Unpack b0 => [sub { my ($c, @b) = -1; my $tracks = shift->[0]; my $cnt = length($tracks)/(3*$long_size); my @bl = unpack 'l'.(3*$cnt), $tracks; while (++$c < $cnt) { # [SIGNAL/NOISE, start, len] push @b, [@bl[3*$c, 3*$c + 1, 3*$c + 2]]; } return [@b] }, 'maybe_trk_pk'], # "Force" long enough blocks b1 => [sub { my @b = map [@$_], @{shift()}; # Deep copy my ($min_sign, $min_sil) = (shift, shift); for my $t (@b) { $t->[0] = SOUND, next if $t->[0] == SIGNAL and $t->[2] >= $min_sign; $t->[0] = SILENCE, next if $t->[0] == NOISE and $t->[2] >= $min_sil; } # Force silence if it happens at boundary: $b[$_]->[0] == NOISE and $b[$_]->[0] = SILENCE for 0, -1; \@b }, 'b0', 'min_signal_chunks', 'min_silence_chunks'], # Ignore short bursts of signals (may be reversed later) b2 => [sub { my @b = map [@$_], @{shift()}; # Deep copy my ($c, $ign_sign) = (0, shift); while (++$c < @b - 1) { # XXXX What about those with SILENCE? $b[$c]->[0] = NOISE if $b[$c]->[0] == SIGNAL and $b[$c]->[2] <= $ign_sign and $b[$c-1]->[0] == NOISE and $b[$c+1]->[0] == NOISE } # After ignoring, need to merge similar blocks merge_blocks \@b }, 'b1', 'ignore_signal_chunks'], # Long enough silence block could appear after b1 ==> b2... b3 => [sub { my @b = map [@$_], @{shift()}; # Deep copy my $min_sil_mrg = shift; for my $t (@b) { $t->[0] = SILENCE, next if $t->[0] == NOISE and $t->[2] >= $min_sil_mrg; } # Need to merge similar blocks??? merge_blocks \@b }, 'b2', 'min_silence_chunks_merge'], # All undecided are signal unless between two silence intervals b4 => [sub { my @b = map [@$_], @{shift()}; # Deep copy my ($left, $c) = (SILENCE, -1); while (++$c < @b) { my $this = $b[$c][0]; $left = $this, next if $this == SILENCE or $this == SOUND; # Found undecided, force to SOUND unless between two SILENCE $b[$c][0] = SOUND, next if $left == SOUND; # $left is SILENCE, need to check the right one... my ($right, $cr) = (SILENCE, $c); while (++$cr < @b) { my $r = $b[$cr][0]; $right = $r, last if $r == SILENCE or $r == SOUND; } $b[$c++][0] = $right while $c < $cr; $left = $right; } # After ignoring, need to merge similar blocks merge_blocks \@b }, 'b3'], # Final enlargement of signal b => [sub { my @b = map [@$_], @{shift()}; # Deep copy my ($ign_pre, $ign_pre_rel, $ign_post, $ign_post_rel) = (shift, shift, shift, shift); my ($meds, $thres_factor) = (shift, shift); my ($ext_beg, $ext_end) = (shift, shift); my ($min_silence, $min_silence_s, $min_silence_e) = (shift, shift, shift); my $c = -1; for my $b (@b) { ++$c; next unless $b->[0] == SILENCE; my $pre = rnd($ign_pre + $ign_pre_rel * $b->[2]); my $post = rnd($ign_post + $ign_post_rel * $b->[2]); my $ilen = $pre + $post; next unless $b->[2] > $ilen; my $s = $b->[1]; my $av = double_sum( $meds->[0], $s + $pre, $b->[2] - $ilen ) / ($b->[2] - $ilen); $av *= $thres_factor*$thres_factor; my $e = $s + $b->[2]; if ($c) { # Not for the "leading gap" while ($s < $e) { my $lev = unpack 'd', substr $meds->[0], $s*$double_size, $double_size; last if $lev <= $av; $s++; } my $add = $e - $s; $add = $ext_end if $add > $ext_end; $s += $add; $b[$c-1]->[2] += $s - $b->[1]; $b->[2] -= $s - $b->[1]; $b->[1] += $s - $b->[1]; } if ($c != @b-1) { my $e_ini = $e; while ($s < $e) { my $lev = unpack 'd', substr $meds->[0], ($e-1)*$double_size, $double_size; last if $lev <= $av; $e--; } my $add = $e - $s; $add = $ext_beg if $add > $ext_beg; $e -= $add; $b[$c+1]->[2] += $e_ini - $e; $b[$c+1]->[1] -= $e_ini - $e; $b->[2] -= $e_ini - $e; } my $min_sil = ($c == 0 ? $min_silence_s : ($c == $#b ? $min_silence_e : $min_silence)); $b->[0] = SOUND if $b->[2] < $min_sil; } # After ignoring short silence, need to merge similar blocks merge_blocks \@b }, 'b4', 'local_level_ignore_pre_chunks', 'local_level_ignore_pre_rel', 'local_level_ignore_post_chunks', 'local_level_ignore_post_rel', 'medians', 'local_threshold_factor', 'extend_track_begin_chunks', 'extend_track_end_chunks', 'min_actual_silence_chunks', 'min_start_silence_chunks', 'min_end_silence_chunks'], ); my %recipes = ( map(($_ => {default => $defaults{$_}}), keys %defaults), map(($_ => {filter => [sub {shift}, $mirror_from{$_}]}), keys %mirror_from), %chunk_times, map( ($_ => {default => undef}), @recognized), map(($_ => {filter => $filters{$_}}), keys %filters), map(($_ => {prerequisites => ['rms_data']}), 'chunks', 'min', 'max'), fh => {self_filter => [sub { my ($self, $cmd) = (shift, shift); local *FH; if ($cmd) { $cmd = '"' . join('" "', @$cmd) . '"'; open FH, "$cmd |" or die "pipe open($cmd) error: $!"; } else { my $filename = shift; return \*STDIN unless defined $filename; open FH, "< $filename" or die "open($filename) error: $!"; } $self->set(close_fh => 1) unless $self->already_set('close_fh'); return *FH }, 'preprocess_input', 'filename']}, rms_data => { oo_output => sub { my $s = shift; my $d = $s->get('rms_data_arr_f'); if (defined $d) { $s->set(chunks => $d->[2]); return $d; } return read_averages($s); }}, ); sub read_averages ($) { my $self = shift; my $fh = $self->get('fh_bin'); my $vals = {}; $vals = wav_eat_header($fh) unless $self->get('raw_pcm'); if ($self->get('override_header_info')) { for my $k (keys %$vals) { $self->set($k => $vals->{$k}) unless $self->already_set($k) } } else { for my $k (keys %$vals) { $self->set($k => $vals->{$k}) } } my $out_fh = $self->get('out_fh_bin'); my $buf = $vals->{buf}; syswrite $out_fh, $buf or die "Error duping output: $!" if $out_fh and $vals->{header}; # in PCM mode we write later my $off = ($vals->{header} ? 0 : length $buf); my @stats = (pack 'd2 l2', 0, 0, $long_max, $long_min) x $self->get('channels'); my $read = $self->get('bytes_per_chunk') - $off; my $rem = $self->get('sizedata'); $rem = MY_INF if $rem == 0x7fffffff; # Lame puts this sometimes... defined (my $cnt = read $fh, $buf, $read, $off) or die "Error reading the first chunk: $!"; syswrite $out_fh, $buf or die "Error duping output: $!" if $out_fh; $rem -= $cnt; die "short read" unless $rem <= 0 or $rem == MY_INF or $cnt == $read; my @d = ''; my ($c, $b_p_s, $channels, $subchunk, $b_p_c) = (0, map $self->get($_), qw(bytes_per_sample channels subchunk_size bytes_per_chunk)); while (1) { my $p = le_short_sample_multichannel($b_p_s, 2, $channels, \@stats, $subchunk, $buf) or last; my $max_level = 0; for my $s (@stats) { # Take maximum per channel my $level = unpack 'd', $s; $max_level = $level if $max_level < $level; substr($s, 0, 2*$double_size) = pack 'd2', 0, 0; # Reset per-chunk sums } $d[0] .= pack 'd', $max_level / $p; $c++; #warn "avg = ", $sum_square / $p / @stats; last unless $rem; defined ($cnt = read $fh, $buf, $b_p_c) or die "Error reading: $!"; $rem -= $cnt; die "short read: rem=$rem, cnt=$cnt, b_p_c=$b_p_c" unless $rem <= 0 or $rem == MY_INF or $cnt == $b_p_c; syswrite $out_fh, $buf or die "Error duping output: $!" if $cnt and $out_fh; last unless $cnt; } close $fh or die "Error closing input: $!" if $self->get('close_fh'); $self->set(chunks => $c); $c = 0; my (@min, @max); for my $s (@stats) { # Take maximum per channel (undef, undef, my $min, my $max) = unpack 'd2 l2', $s; $min[$c] = $min; $max[$c++] = $max; } $self->set(min => \@min); $self->set(max => \@max); if ($self->get('write_to_rms_file')) { local *RMS; local $\ = ''; my $f = $self->get('rms_filename_actual'); open RMS, "> $f" or die "Can't open RMS file `$f' for write: $!"; binmode RMS; print RMS "GramoFile Binary RMS Data\n"; print RMS pack 'l2', map $self->get($_), qw(samples_per_chunk chunks); print RMS $d[0]; close RMS or die "closing RMS file `$f' for write: $!"; } #print "lev=$_" for map sqrt, unpack 'd*', $opts->{avgs}; push @d, $self->get('samples_per_chunk'), $c; \@d } sub format_hms ($) { my $t = shift; my $h = int($t/3600); my $m = int(($t - 3600*$h)/60); my $s = $t - 3600*$h - $m*60; $s = ($h || $m) ? (sprintf '%04.1f', $s) : sprintf '%3.1f', $s; $m = $h ? (sprintf '%02dm', $m) : ( $m ? "${m}m" : ''); $h = $h ? "${h}h" : ''; "$h$m$s" } my @represent = ('', ':', '>'); sub output_level ($$;$) { my ($n, $d, $l) = (shift, shift, shift); my $db = 10*log(($l * 2)/(1<<30))/log(10); # Max amplitude sine wave = 0db my $l2 = sqrt($l); $db = sprintf "%.0f", $db; my $s = '#' x (($db+96)/3) . $represent[$db % 3]; printf "%6d:%11s:%7.1f=%4.0fdb: %s\n", $n, format_hms($n*$d), sqrt($l), $db, $s; } sub output_levels ($;$) { my ($self, $what) = (shift, shift); $what ||= 'rms_data'; # 1-element array with a 'd'-packed elt my ($opts,$o) = {}; for $o ($what, qw(frequency bytes_per_sample channels sec_per_chunk bytes_per_chunk)) { $opts->{$o} = $self->get($o); } for $o (qw(min max)) { # Not available from RMS cache eval { $opts->{$o} = $self->get($o) }; } print <{frequency}. Stride: $opts->{bytes_per_sample}; $opts->{channels} channels. Chunk=$opts->{sec_per_chunk}sec=$opts->{bytes_per_chunk}bytes. EOP for my $c (0..$opts->{channels}-1) { next unless $opts->{min}; print "\t" if $c; my @l = map $opts->{$_}[$c], 'min', 'max'; my @db = map 20*log(abs($_)/(1<<15))/log(10), @l; printf "ch%d: %.1f .. %.1f (%.0fdb;%.0fdb).", $c, @l, @db; } print "\n"; my $n = 0; output_level($n++, $opts->{sec_per_chunk}, $_) for unpack 'd*', $opts->{$what}[0]; $self; } sub output_blocks ($;$) { my $self = shift; my $opts = shift; my $type = 'b'; if ($opts and not ref $opts) { $type = $opts; $opts = {}; } $opts ||= {}; my %opts = (format => 'long', %$opts); my $blocks = $self->get(shift || $type); my $l = $self->get('sec_per_chunk'); printf "# threshold: %s (in %s .. %s)\n", map $self->get($_), qw(threshold threshold_min threshold_max) if $opts{format} eq 'long'; my ($gap, $c, $b) = (0, 0); for $b (@$blocks) { $gap = $b->[2] * $l, next if $b->[0] < 0; printf("%s\t=%s\t# %s len=%s\n", $b->[1] * $l, ($b->[1] + $b->[2]) * $l, ++$c, $b->[2] * $l), next if $opts{format} eq 'short'; printf "%s\t=%s\t# n=%s duration %s; gap %s (%s .. %s; %s)\n", $b->[1] * $l, ($b->[1] + $b->[2]) * $l, ++$c, $b->[2] * $l, $gap, format_hms($b->[1] * $l), format_hms(($b->[1] + $b->[2]) * $l), format_hms($b->[2] * $l); } } my $splitter_loaded; sub split_file ($;$$) { my ($self, $opt) = (shift, shift); my $blocks = $self->get(shift || 'b'); my $t = $self->get('input_type'); die "Only MP3 split supported" unless $t and $t eq 'mp3'; my $l = $self->get('sec_per_chunk'); my @req = map [$_->[1] * $l, $_->[2] * $l], grep $_->[0] > 0, @$blocks or return; require MP3::Splitter; die "MP3::Splitter v0.02 required" if !$splitter_loaded++ and 0.02 > MP3::Splitter->VERSION; MP3::Splitter::mp3split($self->get('filename'), $opt || {}, @req); $self; } sub new { my $class = shift; my $s = new Data::Flow \%recipes; $s->set(@_); bless \$s, $class; } sub set ($$$) { ${$_[0]}->set($_[1],$_[2]); $_[0] } sub get ($$) { ${$_[0]}->get($_[1]) } my @exchange = qw(chunks rms_data medians sorted channels min max frequency bytes_per_sample sec_per_chunk bytes_per_chunk); sub get_rmsinfo ($) { my $i = ${$_[0]}; map $i->get($_), @exchange; } sub set_rmsinfo ($@) { my ($self, %h) = shift; @h{@exchange} = @_; map $$self->set($_, $h{$_}), @exchange; $self } 1; __END__ =head1 NAME Audio::FindChunks - breaks audio files into sound/silence parts. =head1 SYNOPSIS use Audio::FindChunks; # Duplicate input to output, caching RMS values to a file (as a side effect) Audio::FindChunks->new(rms_filename => 'x.rms', filter => 1)->get('rms_data'); # Output human-readable info, using RMS cache file 'xxx.rms' if present: Audio::FindChunks->new(cache_rms => 1, filename => 'xxx.mp3', stem_strip_extension => 1)->output_blocks(); # Remove start/end silence (if longer than 0.2sec): Audio::FindChunks->new(cache_rms => 1, filename => 'xxx.mp3', min_actual_silence_sec => 1e100)->split_file(); # Split a multiple-sides tape recording Audio::FindChunks->new(filename => 'xxx.mp3', min_actual_silence_sec => 11 )->split_file({verbose => 1}); =head1 DESCRIPTION Audio sequence is broken into parts which contain only noise ("gaps"), and parts with usable signal ("tracks"). The following configuration settings (and defaults) are supported: # For getting PCM flow (and if averaging data is read from cache) frequency => 44100, # If 'raw_pcm' or 'override_header_info' only bytes_per_sample => 4, # likewise channels => 2, # likewise sizedata => MY_INF, # likewise (how many bytes of PCM to read) out_fh => \*STDOUT, # mirror WAV/PCM to this FH if 'filter' # Process non-WAV data: preprocess => {mp3 => [[qw(lame --silent --decode)], [], ['-']]}, # Second contains extra args to read stdin # RMS cache (used if 'valid_rms') rms_extension => '.rms', # Appended to the 'filestem' # Averaging to RMS info sec_per_chunk => 0.1, # The window for taking mean square # thresholds picking from the list of sorted 3-medians of RMS data threshold_in_sorted_min_rel => 0, # relative position of 'threashold_min' threshold_in_sorted_min_sec => 1, # shifted by this amount in the list threshold_factor_min => 1, # the list elt is multiplied by this threshold_in_sorted_max_rel => 0.5, # likewise threshold_in_sorted_max_sec => 0, # likewise threshold_factor_max => 1, # likewise threshold_ratio => 0.15, # relative position between min/max # Chunkification: smoothification above_thres_window => 11, # in units of chunks above_thres_window_rel => 0.25, # fractions of chunks above threshold # in a window to make chunk signal # Splitting into runs of signal/noise max_tracks => 9999, # fail if more signal/noise runs min_signal_sec => 5, # such runs of signal are forced min_silence_sec => 2, # likewise ignore_signal_sec => 1, # short runs of signal are ignored min_silence_chunks_merge (see below) # and long resulting runs of silence # are forced # Calculate average signal in an interval "deeply inside" silence runs local_level_ignore_pre_sec => 0.3, # offset the start of this interval local_level_ignore_pre_rel => 0.02, # additional relative offset local_level_ignore_post_sec => 0.3, # likewise for end of the interval local_level_ignore_post_rel => 0.02, # likewise # Enlargement of signal runs: attach consequent chunks with signal this much # above this average over the neighbour silence run local_threshold_factor => 1.05, # Final enlargement of runs of signal extend_track_end_sec => 0.5, # Unconditional enlargement extend_track_begin_sec => 0.3, # likewise min_boundary_silence_sec => 0.2, # Ignore short silence at start/end Note that C is the only value specified directly in units of chunks; the other C<*_sec> may be optionally specified in units of chunks by setting the corresponding C<*_chunks> value. Note also that this window should better be decreased if minimal allowed silence length parameters are decreased. These values are mirrored from other values if not explicitly specified: min_actual_silence_sec << min_silence_sec # Ignore short gaps min_start_silence_sec << min_boundary_silence_sec # Same at start min_end_silence_sec << min_boundary_silence_sec # Same at end min_silence_chunks_merge << min_silence_chunks # See above cache_rms_write <<< cache_rms # Boolean: write RMS cache cache_rms_read <<< cache_rms # Boolean: read RMS cache (unless 'filter') The following values default to C: filename # if undef, read data from STDIN stem_strip_extension # Boolean: 'filestem' has no extension filter # If true, PCM data is mirrored to out_fh rms_filename # Specify cache file explicitly raw_pcm # The input has no WAV header override_header_info # The user specified values override WAV header cache_rms # Use cache file (see *_write, *_read above) skip_medians # Boolean: do not calculate 3-medians subchunk_size # Optimization of calculation of RMS; the # best value depends on the processor cache =head1 METHODS =over =item C value1, key2 =E value2, ....)> The arguments form a hash of configuration parameters. =item C value)> set a configuration parameter. =item C get a configuration parameter or a value which may be calculated basing on them. =item C prints a human-readable display of RMS (or similar) values. Defaults to C; additional possible values are C and C. =item C prints a human-readable display of obtained audio chunks. C defaults to C; additional possible values are C to C. Recognized options key is C; defaults to C, which results in windy output; the value C results in shorter output and no preamble. Preamble lines are all C<#>-commented; any output line is in the form START_SEC =END_SEC # COMMENT With C format there is no preamble, and (currently) C is of the form C. These formats are recognized, e.g., by MP3::Split::mp3split_read(). =item C Splits the file (only MP3 via L is supported now). The meaning of options is the same as for L. Defaults to blocks of type C; additional possible values are C to C. =item @vals = get_rmsinfo(); set_rmsinfo(@vals) Duplicate RMS info between two different C objects. The exchanged info is the following: chunks rms_data medians sorted channels min max frequency bytes_per_sample sec_per_chunk bytes_per_chunk set_rmsinfo() returns the object itself. =back =head1 set() and get() =head2 In and Out The functionality of the module is modelled on the architecture of L: the two principal methods are C value)> and C; the module knows how to calculate keys basing on values of other keys. The results of calculation are cached; in particular, if one needs to calculate some value for different values of a configuration parameter, one should create many copies of C object, as in my @info = Audio::FindChunks->new(filename => $f)->get_rmsinfo; for my $ratio (0..100) { Audio::FindChunks->new(threshold_ratio => $r/100) ->set_rmsinfo(@info)->print_blocks(); } The internally used format of intermediate data is designed for quick shallow copying even for enourmous audio files. =head2 Dependencies The current dependecies for values which are not explicitly set(): filestem <<< filename stem_strip_extension input_type <<< filename preprocess_a <<< input_type preprocess preprocess_input <<< preprocess_a filename fh AND close_fh <<< preprocess_input filename fh_bin <<< fh out_fh_bin <<< filter out_fh rms_filename_default <<< filestem rms_extension read_from_rms_file <<< filter cache_rms_read rms_filename write_to_rms_file <<< cache_rms_write rms_filename rms_filename_actual <<< rms_filename rms_filename_default samples_per_chunk <<< sec_per_chunk frequency bytes_per_chunk <<< samples_per_chunk bytes_per_sample rms_data_arr_f <<< read_from_rms_file rms_filename_actual samples_per_chunk rms_data AND chunks <<< rms_data_arr_f OR A LOT OF OTHER PARAMETERS medians <<< rms_data skip_medians chunks sorted <<< medians chunks, threshold_in_sorted_* <<< chunks threshold_in_sorted_*_* threshold_min/max <<< threshold_factor_* sorted threshold_in_sorted_min/max threshold <<< threshold_min threshold_ratio threshold_max above_thres <<< chunks rms_data threshold above_thres_in_window <<< above_thres chunks above_thres_window above_thres_window_abs<<< above_thres_window_rel above_thres_window maybe_signal <<< above_thres_in_window chunks above_thres_window_abs maybe_trk_pk <<< max_tracks maybe_signal chunks b0 <<< maybe_trk_pk b1 <<< b0 min_signal_chunks min_silence_chunks b2 <<< b1 ignore_signal_chunks b3 <<< b2 min_silence_chunks_merge b4 <<< b3 b <<< b4 local_level_ignore_* medians local_threshold_factor extend_track_begin_chunks extend_track_end_chunks min_actual_silence_chunks min_start_silence_chunks min_end_silence_chunks If C is not read from cached source, a lot of other fields may be also set from the WAV header (unless C). =head3 Formats Potentially large internally-cached values are stored as array references to decrease the overhead of shallow copying. The data which relates to the initial chunks (of size C) is stored as length 1 arrays with packed (either by C or C, depending on the semantic) data; this allows small memory footprint work with huge audio files, and allows an easy implemenation of most computationally intensive work in C. The blocks of audio/signal/noise/silence are stored as Perl arrays; each element is a reference to an array of length 3: type (-1 for silence, 0 for noise, 1 for signal, and 2 for audio), start chunks, duration in chunks. =head1 ALGORITHM The algorithm for finding boundaries of parts follows closely the algorithm used by GramoFile v1.7 (however, I version is I customizable, fully documented, and has some significant bugs fixed). The keywords in the discussion below refer to customization parameters; keywords of the form CEEkey> refer to Cable values. =over =item Smooth the input This is done in 2 distinct steps: Break the input into chunks of equal duration (governed by C); find the acoustic energy of each channel per chunk (no customization); energy is the quadratic average of signal level; calculate maximal energy among channels per chunk (no customization; CEErms_data>). Trim "extremal" chunks by replacing the energy level of each chunk by the median of it and its two neighbors (switched off if C; CEEmedians>). =item Calculate the signal/noise threshold basing on the distribution (CEEsorted>) of smoothed values. Governed by C parameters. CEEthreshold_min>, CEEthreshold_max>, CEEthreshold>. =item Smooth it again Separate into I and I chunks basing on the number of above-threshold chunks in a small window about the given chunk. Governed by C, C. CEEmaybe_signal>, CEEb0>. =item Find certain intervals of sound and silence Long enough runs of signal chunks are proclaimed carrying sound; likewise for noise chunks and silence. Governed by C, C, C. CEEb1>. Long enough "unproclaimed" runs of chunks with only short bursts of signal are proclaimed silence. Governed by C, CEEb2>; and C, CEEb3>. =item Merge undecided into sound/silence A run of chunks (signal or noise) "yet unproclaimed" to be sound or silence is proclaimed sound if it is adjacent to a run of sound on at least one side. The rest of unproclaimed runs are proclaimed silence. No customization. Runs of sound/silence are audio/gap candidates (no customization; CEEb4>). =item Calculate average signal level in each gap candidate ignoring short intervals near ends of gaps. Governed by C. =item Allow for slow attack/decay or fade in/out Extend runs of audio: join the consequent runs of chunks of adjacent gaps where the energy level remains significantly larger than the average level in this gap. Additionally, unconditionally extend the tracks by a small amount. Governed by C, C, C. =item Long enough gap candidates are gaps Gaps which became too short are considered audio and are merged into neighbors. Governed by C, C, C; CEEb>. =back =head2 Functions implemented in C long bool_find_runs(int *input, array_run_t *output, long cnt, long out_cnt) void double_find_above(double *input, int *output, long cnt, double threshold) void double_median3(double *rmsarray, double *medarray, long total_blocks) void double_sort(double *input, double *output, long cnt) void int_find_above(int *input, int *output, long cnt, int threshold) void int_sum_window(int *input, int *output, long cnt, int window_size) void le_short_sample_stats(char *buf, int stride, long samples, array_stats_t *stat) =head1 SEE ALSO C, C =head1 AUTHOR Ilya Zakharevich, Ecpan@ilyaz.org =head1 COPYRIGHT AND LICENSE Copyright (C) 2004 by Ilya Zakharevich This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.2 or, at your option, any later version of Perl 5 you may have available. =cut