App-Hashl
view release on metacpan or search on metacpan
#!/usr/bin/env perl
## Copyright © 2010 by Daniel Friesel <derf@finalrewind.org>
## License: WTFPL <http://sam.zoy.org/wtfpl>
## 0. You just DO WHAT THE FUCK YOU WANT TO.
use strict;
use warnings;
use 5.010;
no if $] >= 5.018, warnings => 'experimental::smartmatch';
use App::Hashl;
use Cwd;
use DateTime;
use File::Copy;
use File::Find;
use Getopt::Long;
use IO::Handle;
use List::MoreUtils qw(any);
use Time::Progress;
my $add_unignore = 0;
my $base = getcwd();
my $rel_paths = 1;
my $db_file = '.hashl.db';
my $total = 0;
my $cur = 0;
my $show_progress = 1;
my $xdev_fsno;
my @edb_files;
my $timer;
my $incoming_dir;
my $read_size;
my ( $find_ref, $find_db_write );
my $hashl;
my @ehashl;
our $VERSION = '1.01';
STDERR->autoflush(1);
GetOptions(
'd|database=s' => \$db_file,
'e|extra-db=s' => \@edb_files,
'f|force' => \$add_unignore,
'n|no-progress' => sub { $show_progress = 0 },
's|read-size=i' => sub { $read_size = $_[1] * 1024 },
'V|version' => sub { say "hashl version ${VERSION}"; exit 0 },
'x|one-file-system' => sub { $xdev_fsno = ( stat($base) )[0] },
) or usage();
if ( substr( $db_file, 0, 1 ) ne q{/} ) {
$db_file = "${base}/${db_file}";
}
my $action = shift;
sub usage {
die(<<"EOF");
Usage: $0 [options] <update|list|info|...> [args]
See 'perldoc -F $0' (or 'man hashl' if it is properly installed)
EOF
}
if ( not defined $action ) {
usage();
}
if ( -r $db_file ) {
$hashl = App::Hashl->new_from_file($db_file);
}
else {
$hashl = App::Hashl->new( read_size => $read_size );
}
@ehashl = ($hashl);
for my $file (@edb_files) {
if ( -r $file ) {
push( @ehashl, App::Hashl->new_from_file($file) );
}
else {
die("-e ${file}: database does not exist\n");
}
}
local $SIG{INT} = \&quit_save_db;
local $SIG{TERM} = \&quit_save_db;
sub quit_save_db {
$hashl->save($db_file);
exit 0;
}
sub get_total {
my $file = $File::Find::name;
if ( -f $file
and not -l $file
and $file ne $db_file
and ( not $xdev_fsno or ( stat($file) )[0] == $xdev_fsno ) )
{
$total++;
}
return;
}
sub drop_deleted {
for my $file ( $hashl->files ) {
if ( not -e $file ) {
$hashl->delete_file($file);
}
}
return;
}
sub ensure_equal_hash_sizes {
for my $i ( 1 .. $#ehashl ) {
if ( $ehashl[$i]->read_size != $hashl->read_size ) {
printf STDERR (
'Cannot list: main database has read size %d, but database'
. " %s has read size %d\n",
$hashl->read_size,
$edb_files[ $i - 1 ],
$ehashl[$i]->read_size
);
exit 1;
}
}
return;
}
sub copy_file {
my ( $file, $to ) = @_;
my $cp_base = substr( $file, length($base) + 1 );
if ( $base =~ s{ / [^/]+ $}{}x ) {
mkdirs( $incoming_dir, $cp_base );
}
copy( $file, "${to}/${cp_base}" )
or die("Cannot copy ${file} to ${to}/${cp_base}: $!\n");
return;
}
sub db_find_new {
my ( $file, $path ) = @_;
if ( not any { $_->file_in_db($path) } @ehashl ) {
print STDERR "\r\e[2K";
say $file;
}
return;
}
sub db_find_known {
my ( $file, $path ) = @_;
if ( any { $_->file_in_db($path) } @ehashl ) {
print STDERR "\r\e[2K";
say $file;
}
return;
}
sub file_info {
my ($file) = @_;
my $entry = $hashl->file($file);
if ( not $entry ) {
die("No such file in database\n");
}
printf(
"File: %s\nSize: %d bytes (%s)\nHash: %s\n",
$file, $entry->{size}, $hashl->si_size( $entry->{size} ),
$entry->{hash},
);
return;
}
sub process_file {
my ( $code, $write ) = ( $find_ref, $find_db_write );
my $file = $File::Find::name;
my $path = $file;
if ( not -f $file
or -l $file
or $file eq $db_file
or ( $xdev_fsno and ( stat($file) )[0] != $xdev_fsno ) )
{
return;
}
if ($rel_paths) {
$file = substr( $file, length($base) + 1 );
}
$cur++;
if ($show_progress) {
print STDERR $timer->report(
"\r\e[2KScanning directory: %p done, %L elapsed, %E remaining",
$cur, );
}
&{$code}( $file, $path );
if ( $write and ( ( $cur % 5000 ) == 0 ) ) {
$hashl->save($db_file);
}
return;
}
sub db_update {
my ( $file, $path ) = @_;
return;
}
sub colorize {
my ( $filename, $cmap ) = @_;
my $filetype = ( split( qr{ [.] }x, $filename ) )[-1];
if ( exists $cmap->{$filetype} ) {
return sprintf( "\e[%sm%s\e[0m", $cmap->{$filetype}, $filename );
}
return $filename;
}
sub cmd_ls {
my ($re) = @_;
my $now = DateTime->now;
ensure_equal_hash_sizes();
my @ls_colors = split( qr{ : }x, $ENV{LS_COLORS} // q{} );
my %cmap;
for my $ls_color (@ls_colors) {
if ( $ls_color =~ m{ ^ [*] [.] (?<filetype> [^=]+ ) = (?<color> .+) }x )
{
$cmap{ $+{filetype} } = $+{color};
}
}
for my $pair (
sort { $a->[1] cmp $b->[1] }
map { map_with_prefix( $_, $_->files ) } @ehashl
)
{
my ( $db, $name ) = @{$pair};
my $file = $db->file($name);
my $dt = DateTime->from_epoch(
epoch => $file->{mtime},
);
my $time_format = '%b %d %H:%M';
# Date math is hard. So we don't account for leap years (or leap seconds) here.
if ( $now->epoch - $dt->epoch >= 31536000 ) {
$time_format = '%b %d %Y';
}
if ( $re and $name !~ m{$re} ) {
next;
}
printf( "%-7s %s %s\n",
$db->si_size( $file->{size} ),
$dt->strftime($time_format),
colorize( $name, \%cmap ) );
}
return;
}
sub cmd_list_files {
say join( "\n", sort map { $_->files } @ehashl );
return;
}
sub cmd_list_ignored {
ensure_equal_hash_sizes();
say join( "\n", map { $_->ignored } @ehashl );
return;
}
sub cmd_update {
drop_deleted();
prepare_db_run();
$find_ref = \&db_update;
$find_db_write = 1;
find( \&process_file, $base );
print "\n";
$hashl->save($db_file);
return;
}
given ($action) {
when ('copy') { cmd_copy(@ARGV) }
when ('find-known') { cmd_find_known(@ARGV) }
when ('find-new') { cmd_find_new(@ARGV) }
when ('ignore') { cmd_ignore(@ARGV) }
when ('info') { cmd_info(@ARGV) }
when ('ls') { cmd_ls(@ARGV) }
when ('list') { cmd_list(@ARGV) }
when ('list-files') { cmd_list_files(@ARGV) }
when ('list-ignored') { cmd_list_ignored(@ARGV) }
when ('update') { cmd_update(@ARGV) }
default { usage() }
}
__END__
=head1 NAME
B<hashl> - Create database with partial file hashes, check if other files are in it
=head1 SYNOPSIS
B<hashl> [B<-fnx>] [B<-d> I<dbfile>] [B<-s> I<read-size>] I<action> [I<args>]
=head1 VERSION
This manual documents hashl version 1.01
=head1 DESCRIPTION
Actions:
=over
=item B<copy> I<newdir>
Copy all files in the current directory which are not in any database to
I<newdir>.
=item B<find-known> [I<directory>]
( run in 0.346 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )