spug_2008-08

Simple Perl

Using File::Find and MP3::Tag to search

through a junk drawer of mp3 files, finding

duplicates

File::Find•Searches a directory tree•Invokes your callback (\&wanted

subroutine) for each thing•Your callback subroutine does something with the thing

Using File::Find•Create your callback subroutine•Call find() with your callback and a list of directories as argumentssub wanted { # do something neat ... }

find( \&wanted, @directories );

\&wantedsub wanted { say "$_"; say "$File::Find::dir"; say "$File::Find::name";

}

01_find#!/usr/local/bin/perl

use v5.10;use strict; use warnings;

use File::Find;

#============================== # main program

# take any command line arguments as the names of directories to search my @dirs_to_search = @ARGV;

# if no search dirs were specified, just use '.' if ( ! @dirs_to_search ) { @dirs_to_search = ( '.' ); }

find( \&process_file, @dirs_to_search );

01_find (cont.)

sub process_file { # $_ is set to the name of the current file

# $File::Find::dir is the name of the containing # directory $File::Find::name is the full path

say "\$_ <$_>"; say "\$File::Find::dir <$File::Find::dir>"; say "\$File::Find::name <$File::Find::name>"; say ''; # blank line }

02_find_typesub process_file { my $type; if ( -f $_ ) { $type = 'normal file'; } elsif ( -d $_ ) { $type = 'directory'; } else { $type = 'other'; }

say "file: <$_>"; say "type: <$type>"; say ''; }

03_find_mp3sub process_file { # skip anything that isn't a normal file if ( not -f $_ ) { return; }

# skip any normal file that # doesn't have an .mp3 suffix if ( not /\.mp3$/ ) { return; }

say "file <$_>"; }

04_find_mp3sub process_file { # skip anything that isn't a normal file if ( not -f $_ ) { return; }

my $mime = qx{ /usr/bin/file -bi "$_" };

chomp $mime; # "text/plain; charset=us-ascii" # ... get rid of charset or other extra info $mime =~ s/;.*//;

# skip any non mp3 files if ( $mime ne 'audio/mpeg' ) { warn "skipping [wrong mimetype] file <$_> mime: <$mime>\n"; return; }

say " ** got an mp3 file: <$_>"; }

touch \ '"; echo "<$$> pwned orz" >> orz.log; echo"'

> ls "; echo "<$$> pwned orz" >> orz.log; echo"

# within process_file()# $_ = q{"; echo "<$$> pwned orz" >> orz.log; echo"};# ...my $mime = qx{ /usr/bin/file -bi "$_" };

/usr/bin/file -bi ""; echo "<$$> pwned orz" >> orz.log; echo""

DANGERS

05_find_mp3_secure

#!/usr/local/bin/perl -T

BEGIN { # delete certain tainted environment variables delete @ENV{ qw( PATH ENV ) }; }

•Turn on Taint mode

05_find_mp3_secure (cont.)

my $shellsafe = qr{^([-\@\w./ ]+)$};

find( { wanted => \&process_file, untaint => 1, untaint_pattern => $shellsafe, untaint_skip => 1, no_chdir => 1, }, @dirs_to_search, );

05_find_mp3_secure (cont.)

sub process_file { my $file; if ( m/$shellsafe/ ) { # untaint the safe filename $file = $1; } else { warn "skipping [suspicious name] file: <$_> \n"; return; }

# now use $file instead of $_ # ...

}

MP3::Taguse MP3::Tag;

my $mp3 = MP3::Tag->new( $filename );

my ( $title, $track, $artist, $album, $comment, $year, $genre,) = $mp3->autoinfo();

# or my $info = {}; # hashref

# hash slice@{ $info }{ qw(title track artist album comment year genre) } = $mp3->autoinfo();

06_mp3_info# process_file( writes directly into this my $mp3_database = { };

find( ... );

# use Data::Dumper; # print Dumper( $mp3_database );

# use JSON; # print to_json( $mp3_database );

use YAML; print Dump( $mp3_database );

06_mp3_info (cont.)

sub process_file {

# ...

my $mp3 = MP3::Tag->new( $file ); @{ $mp3_database->{ $file } } { qw( title track artist

album comment year genre ) } = $mp3->autoinfo(); }

07_find_mp3_dupessub process_file { # ... my $info = {}; $info->{ file } = $file;

my $mp3 = MP3::Tag->new( $file ); @{ $info->{ mp3 } }{ qw( title track artist album comment year genre ) } = $mp3->autoinfo();

# continued ...

07_find_mp3_dupes (cont.)

my $song = join '|', map { my $_ = lc $_; tr/àáâäãå/aaaaaa/; tr/èéêë/eeee/; tr/ìíîïĩ/iiiii/; tr/òóôöõ/ooooo/; tr/ùúûüũ/uuuuu/; tr/ñýÿ/nyy/; s/\s+//g; $_; } @{ $info->{ mp3 } }{ qw( artist title ) };

push @{ $mp3_database->{ $song } }, $info; }

07_find_mp3_dupes (cont.)

find( ... );

# print Dump( $mp3_database );

my @dupes = grep { @$_ > 1 } values %$mp3_database;

for my $dupe ( @dupes ) { say "\n*** Duplicate Songs ***"; print Dump( $dupe ); }

say "\n";

spug_2008-08

Technology

normal file

mp3 autoinfo

file writes

current file

mp3 suffix

mp3 sub process

wrong mimetype file

junk drawer of mp3 files