Top Banner
Simple Perl Using File::Find and MP3::Tag to search through a junk drawer of mp3 files, finding duplicates
19

spug_2008-08

Nov 21, 2014

Download

Technology

colinmeyer

Using File::Find and MP3::Tag to find duplicate mp3 files
Welcome message from author
This document is posted to help you gain knowledge. Please leave a comment to let me know what you think about it! Share it to your friends and learn new things together.
Transcript
Page 1: spug_2008-08

Simple Perl

Using File::Find and MP3::Tag to search

through a junk drawer of mp3 files, finding

duplicates

Page 2: spug_2008-08

File::Find•Searches a directory tree•Invokes your callback (\&wanted

subroutine) for each thing•Your callback subroutine does something with the thing

Page 3: spug_2008-08

Using File::Find•Create your callback subroutine•Call find() with your callback and a list of directories as argumentssub wanted { # do something neat ... }

find( \&wanted, @directories );

Page 4: spug_2008-08

\&wantedsub wanted { say "$_"; say "$File::Find::dir"; say "$File::Find::name";

}

Page 5: spug_2008-08

01_find#!/usr/local/bin/perl

use v5.10;use strict; use warnings;

use File::Find;

#============================== # main program

# take any command line arguments as the names of directories to search my @dirs_to_search = @ARGV;

# if no search dirs were specified, just use '.' if ( ! @dirs_to_search ) { @dirs_to_search = ( '.' ); }

find( \&process_file, @dirs_to_search );

Page 6: spug_2008-08

01_find (cont.)

sub process_file { # $_ is set to the name of the current file

# $File::Find::dir is the name of the containing # directory $File::Find::name is the full path

say "\$_ <$_>"; say "\$File::Find::dir <$File::Find::dir>"; say "\$File::Find::name <$File::Find::name>"; say ''; # blank line }

Page 7: spug_2008-08

02_find_typesub process_file { my $type; if ( -f $_ ) { $type = 'normal file'; } elsif ( -d $_ ) { $type = 'directory'; } else { $type = 'other'; }

say "file: <$_>"; say "type: <$type>"; say ''; }

Page 8: spug_2008-08

03_find_mp3sub process_file { # skip anything that isn't a normal file if ( not -f $_ ) { return; }

# skip any normal file that # doesn't have an .mp3 suffix if ( not /\.mp3$/ ) { return; }

say "file <$_>"; }

Page 9: spug_2008-08

04_find_mp3sub process_file { # skip anything that isn't a normal file if ( not -f $_ ) { return; }

my $mime = qx{ /usr/bin/file -bi "$_" };

chomp $mime; # "text/plain; charset=us-ascii" # ... get rid of charset or other extra info $mime =~ s/;.*//;

# skip any non mp3 files if ( $mime ne 'audio/mpeg' ) { warn "skipping [wrong mimetype] file <$_> mime: <$mime>\n"; return; }

say " ** got an mp3 file: <$_>"; }

Page 10: spug_2008-08

touch \ '"; echo "<$$> pwned orz" >> orz.log; echo"'

> ls "; echo "<$$> pwned orz" >> orz.log; echo"

# within process_file()# $_ = q{"; echo "<$$> pwned orz" >> orz.log; echo"};# ...my $mime = qx{ /usr/bin/file -bi "$_" };

/usr/bin/file -bi ""; echo "<$$> pwned orz" >> orz.log; echo""

DANGERS

Page 11: spug_2008-08

05_find_mp3_secure

#!/usr/local/bin/perl -T

BEGIN { # delete certain tainted environment variables delete @ENV{ qw( PATH ENV ) }; }

•Turn on Taint mode

Page 12: spug_2008-08

05_find_mp3_secure (cont.)

my $shellsafe = qr{^([-\@\w./ ]+)$};

find( { wanted => \&process_file, untaint => 1, untaint_pattern => $shellsafe, untaint_skip => 1, no_chdir => 1, }, @dirs_to_search, );

Page 13: spug_2008-08

05_find_mp3_secure (cont.)

sub process_file { my $file; if ( m/$shellsafe/ ) { # untaint the safe filename $file = $1; } else { warn "skipping [suspicious name] file: <$_> \n"; return; }

# now use $file instead of $_ # ...

}

Page 14: spug_2008-08

MP3::Taguse MP3::Tag;

my $mp3 = MP3::Tag->new( $filename );

my ( $title, $track, $artist, $album, $comment, $year, $genre,) = $mp3->autoinfo();

# or my $info = {}; # hashref

# hash slice@{ $info }{ qw(title track artist album comment year genre) } = $mp3->autoinfo();

Page 15: spug_2008-08

06_mp3_info# process_file( writes directly into this my $mp3_database = { };

find( ... );

# use Data::Dumper; # print Dumper( $mp3_database );

# use JSON; # print to_json( $mp3_database );

use YAML; print Dump( $mp3_database );

Page 16: spug_2008-08

06_mp3_info (cont.)

sub process_file {

# ...

my $mp3 = MP3::Tag->new( $file ); @{ $mp3_database->{ $file } } { qw( title track artist

album comment year genre ) } = $mp3->autoinfo(); }

Page 17: spug_2008-08

07_find_mp3_dupessub process_file { # ... my $info = {}; $info->{ file } = $file;

my $mp3 = MP3::Tag->new( $file ); @{ $info->{ mp3 } }{ qw( title track artist album comment year genre ) } = $mp3->autoinfo();

# continued ...

Page 18: spug_2008-08

07_find_mp3_dupes (cont.)

my $song = join '|', map { my $_ = lc $_; tr/àáâäãå/aaaaaa/; tr/èéêë/eeee/; tr/ìíîïĩ/iiiii/; tr/òóôöõ/ooooo/; tr/ùúûüũ/uuuuu/; tr/ñýÿ/nyy/; s/\s+//g; $_; } @{ $info->{ mp3 } }{ qw( artist title ) };

push @{ $mp3_database->{ $song } }, $info; }

Page 19: spug_2008-08

07_find_mp3_dupes (cont.)

find( ... );

# print Dump( $mp3_database );

my @dupes = grep { @$_ > 1 } values %$mp3_database;

for my $dupe ( @dupes ) { say "\n*** Duplicate Songs ***"; print Dump( $dupe ); }

say "\n";