Algorithm-SixDegrees
view release on metacpan or search on metacpan
samples/movie.pl view on Meta::CPAN
my $format = $th ? '%0.2f' : '%0d';
printf "%5d database hits in $format second%s\n",$dbhit,($end-$start),(sprintf($format,$end-$start)==1?'':'s');
exit(0);
# returns either int or floating time depending on if Time::HiRes is installed
sub time {
return $th ? Time::HiRes::time() : time;
}
# Connects to the db and prepares the SQL for quick execution
sub db_connect {
$dbh = DBI->connect('DBI:mysql:database=movact','movact','movact');
$actorsth = $dbh->prepare('SELECT movie FROM movact WHERE actor = ?');
$moviesth = $dbh->prepare('SELECT actor FROM movact WHERE movie = ?');
}
# Returns the actors in a given movie.
sub movie_actors {
$dbhit++;
$moviesth->execute($_[0]) or die 'Problem: ' . $moviesth->errstr . "\n";
my $results = $moviesth->fetchall_arrayref;
return map { $_->[0] } @{$results};
}
# Returns the movies a given actor has starred in.
sub actor_movies {
$dbhit++;
$actorsth->execute($_[0]) or die 'Problem: ' . $actorsth->errstr . "\n";
my $results = $actorsth->fetchall_arrayref;
return map { $_->[0] } @{$results};
}
# Tries looking for an actor if they're not in the database as given
sub suggest_actor {
my $actor = shift;
return $actor if (scalar(&actor_movies($actor)));
my $sth = $dbh->prepare('SELECT actor, min(year), max(year) FROM movact WHERE actor LIKE ? GROUP BY actor')
or die 'sql prepare error';
$sth->execute("$actor\%") or die 'sql execution error';
my $results = $sth->fetchall_arrayref;
$sth->finish;
if (scalar(@{$results}) < 1) {
print "No suggestions for '$actor'\n";
exit(0);
} elsif (scalar(@{$results}) == 1) {
my $new = $results->[0][0];
print "Using '$new' instead of '$actor'\n";
return $new;
}
print "No match for '$actor'. Did you mean:\n";
foreach my $result (@{$results}) {
print "\t'", $result->[0], "' (career: " . $result->[1];
print '-' . $result->[2] if ($result->[1] != $result->[2]);
print ")\n";
};
exit(0);
}
=head1 MAKING A DATA SOURCE
A sample data source is at L<ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/>
I grabbed the F<actors.list.gz> and the F<actresses.list.gz> files from there.
I created a MySQL database table and some indexes:
create database movact;
grant all privileges on movact.* to movact identified by 'movact';
use movact;
create table movact ( actor varchar(128), movie varchar(128), year int );
create index movact_actor on movact ( actor );
create index movact_movie on movact ( movie );
(You may want to make the indexes after the data load instead of before.)
I then trimmed the data source down to remove the header and footer,
followed by this Perl script on both data files to load them into the database:
#!/usr/bin/perl
use DBI;
my $dbh = DBI->connect('DBI:mysql:database=movact','movact','movact',{AutoCommit=>1});
my $sth = $dbh->prepare('INSERT INTO movact (movie, actor, year) VALUES (?,?,?)');
die unless $sth;
while (<>) {
chomp;
my ($a, $t) = split(/\t+/,$_,2);
$actor = $a if ($a !~ /^\s*$/ && $a ne $actor);
next unless $t;
next if $t =~ /\((TV|V|VG)\)/; # No TV movies / video-only movies / video games
next if $t =~ /^"/; # No TV series
$t =~ s/(\(((?:18|19|20)\d\d|\?\?\?\?)(?:\/(\w+))?\)).*/$1/;
$y = $2 || 1000; # Sets the year to 1000 if it's not present
$y = 1000 if $y !~ /^\d+$/; # Turns year ???? into year 1000
die $sth->errstr unless $sth->execute($t,$actor,$y);
}
$sth->finish;
$dbh->disconnect;
exit(0);
The database is thus prepared.
=cut
( run in 1.634 second using v1.01-cache-2.11-cpan-140bd7fdf52 )