App-Hashl

 view release on metacpan or  search on metacpan

bin/hashl  view on Meta::CPAN

#!/usr/bin/env perl
## Copyright © 2010 by Daniel Friesel <derf@finalrewind.org>
## License: WTFPL <http://sam.zoy.org/wtfpl>
##   0. You just DO WHAT THE FUCK YOU WANT TO.
use strict;
use warnings;
use 5.010;

no if $] >= 5.018, warnings => 'experimental::smartmatch';

use App::Hashl;
use Cwd;
use DateTime;
use File::Copy;
use File::Find;
use Getopt::Long;
use IO::Handle;
use List::MoreUtils qw(any);
use Time::Progress;

my $add_unignore  = 0;
my $base          = getcwd();
my $rel_paths     = 1;
my $db_file       = '.hashl.db';
my $total         = 0;
my $cur           = 0;
my $show_progress = 1;
my $xdev_fsno;
my @edb_files;
my $timer;
my $incoming_dir;
my $read_size;
my ( $find_ref, $find_db_write );

my $hashl;
my @ehashl;

our $VERSION = '1.01';

STDERR->autoflush(1);

GetOptions(
	'd|database=s'      => \$db_file,
	'e|extra-db=s'      => \@edb_files,
	'f|force'           => \$add_unignore,
	'n|no-progress'     => sub { $show_progress = 0 },
	's|read-size=i'     => sub { $read_size = $_[1] * 1024 },
	'V|version'         => sub { say "hashl version ${VERSION}"; exit 0 },
	'x|one-file-system' => sub { $xdev_fsno = ( stat($base) )[0] },
) or usage();

if ( substr( $db_file, 0, 1 ) ne q{/} ) {
	$db_file = "${base}/${db_file}";
}

my $action = shift;

sub usage {
	die(<<"EOF");
Usage: $0 [options] <update|list|info|...> [args]
See 'perldoc -F $0' (or 'man hashl' if it is properly installed)
EOF
}

if ( not defined $action ) {
	usage();
}

if ( -r $db_file ) {
	$hashl = App::Hashl->new_from_file($db_file);
}
else {
	$hashl = App::Hashl->new( read_size => $read_size );
}
@ehashl = ($hashl);

for my $file (@edb_files) {
	if ( -r $file ) {
		push( @ehashl, App::Hashl->new_from_file($file) );
	}
	else {
		die("-e ${file}: database does not exist\n");
	}
}

local $SIG{INT}  = \&quit_save_db;
local $SIG{TERM} = \&quit_save_db;

sub quit_save_db {
	$hashl->save($db_file);
	exit 0;
}

sub get_total {
	my $file = $File::Find::name;

	if (    -f $file
		and not -l $file
		and $file ne $db_file
		and ( not $xdev_fsno or ( stat($file) )[0] == $xdev_fsno ) )
	{
		$total++;
	}

	return;
}

sub drop_deleted {
	for my $file ( $hashl->files ) {
		if ( not -e $file ) {
			$hashl->delete_file($file);
		}
	}

	return;
}

sub ensure_equal_hash_sizes {
	for my $i ( 1 .. $#ehashl ) {
		if ( $ehashl[$i]->read_size != $hashl->read_size ) {
			printf STDERR (
				'Cannot list: main database has read size %d, but database'
				  . " %s has read size %d\n",
				$hashl->read_size,
				$edb_files[ $i - 1 ],
				$ehashl[$i]->read_size
			);
			exit 1;
		}
	}
	return;
}

sub copy_file {
	my ( $file, $to ) = @_;

	my $cp_base = substr( $file, length($base) + 1 );
	if ( $base =~ s{ / [^/]+ $}{}x ) {
		mkdirs( $incoming_dir, $cp_base );
	}

	copy( $file, "${to}/${cp_base}" )
	  or die("Cannot copy ${file} to ${to}/${cp_base}: $!\n");

	return;
}

sub db_find_new {
	my ( $file, $path ) = @_;

	if ( not any { $_->file_in_db($path) } @ehashl ) {
		print STDERR "\r\e[2K";
		say $file;
	}

	return;
}

sub db_find_known {
	my ( $file, $path ) = @_;

	if ( any { $_->file_in_db($path) } @ehashl ) {
		print STDERR "\r\e[2K";
		say $file;
	}

	return;
}

sub file_info {
	my ($file) = @_;

	my $entry = $hashl->file($file);

	if ( not $entry ) {
		die("No such file in database\n");
	}

	printf(
		"File: %s\nSize: %d bytes (%s)\nHash: %s\n",
		$file, $entry->{size}, $hashl->si_size( $entry->{size} ),
		$entry->{hash},
	);

	return;
}

sub process_file {
	my ( $code, $write ) = ( $find_ref, $find_db_write );

	my $file = $File::Find::name;
	my $path = $file;

	if (   not -f $file
		or -l $file
		or $file eq $db_file
		or ( $xdev_fsno and ( stat($file) )[0] != $xdev_fsno ) )
	{
		return;
	}

	if ($rel_paths) {
		$file = substr( $file, length($base) + 1 );
	}

	$cur++;

	if ($show_progress) {
		print STDERR $timer->report(
			"\r\e[2KScanning directory: %p done, %L elapsed, %E remaining",
			$cur, );
	}

	&{$code}( $file, $path );

	if ( $write and ( ( $cur % 5000 ) == 0 ) ) {
		$hashl->save($db_file);
	}

	return;
}

sub db_update {
	my ( $file, $path ) = @_;

bin/hashl  view on Meta::CPAN

	return;
}

sub colorize {
	my ( $filename, $cmap ) = @_;

	my $filetype = ( split( qr{ [.] }x, $filename ) )[-1];
	if ( exists $cmap->{$filetype} ) {
		return sprintf( "\e[%sm%s\e[0m", $cmap->{$filetype}, $filename );
	}
	return $filename;
}

sub cmd_ls {
	my ($re) = @_;
	my $now = DateTime->now;

	ensure_equal_hash_sizes();

	my @ls_colors = split( qr{ : }x, $ENV{LS_COLORS} // q{} );
	my %cmap;

	for my $ls_color (@ls_colors) {
		if ( $ls_color =~ m{ ^ [*] [.] (?<filetype> [^=]+ ) = (?<color> .+) }x )
		{
			$cmap{ $+{filetype} } = $+{color};
		}
	}

	for my $pair (
		sort { $a->[1] cmp $b->[1] }
		map { map_with_prefix( $_, $_->files ) } @ehashl
	  )
	{
		my ( $db, $name ) = @{$pair};
		my $file = $db->file($name);
		my $dt   = DateTime->from_epoch(
			epoch => $file->{mtime},
		);
		my $time_format = '%b %d %H:%M';

 # Date math is hard. So we don't account for leap years (or leap seconds) here.
		if ( $now->epoch - $dt->epoch >= 31536000 ) {
			$time_format = '%b %d  %Y';
		}

		if ( $re and $name !~ m{$re} ) {
			next;
		}

		printf( "%-7s %s %s\n",
			$db->si_size( $file->{size} ),
			$dt->strftime($time_format),
			colorize( $name, \%cmap ) );
	}

	return;
}

sub cmd_list_files {
	say join( "\n", sort map { $_->files } @ehashl );

	return;
}

sub cmd_list_ignored {

	ensure_equal_hash_sizes();

	say join( "\n", map { $_->ignored } @ehashl );

	return;
}

sub cmd_update {
	drop_deleted();
	prepare_db_run();

	$find_ref      = \&db_update;
	$find_db_write = 1;

	find( \&process_file, $base );

	print "\n";

	$hashl->save($db_file);

	return;
}

given ($action) {
	when ('copy')         { cmd_copy(@ARGV) }
	when ('find-known')   { cmd_find_known(@ARGV) }
	when ('find-new')     { cmd_find_new(@ARGV) }
	when ('ignore')       { cmd_ignore(@ARGV) }
	when ('info')         { cmd_info(@ARGV) }
	when ('ls')           { cmd_ls(@ARGV) }
	when ('list')         { cmd_list(@ARGV) }
	when ('list-files')   { cmd_list_files(@ARGV) }
	when ('list-ignored') { cmd_list_ignored(@ARGV) }
	when ('update')       { cmd_update(@ARGV) }
	default               { usage() }
}

__END__

=head1 NAME

B<hashl> - Create database with partial file hashes, check if other files are in it

=head1 SYNOPSIS

B<hashl> [B<-fnx>] [B<-d> I<dbfile>] [B<-s> I<read-size>] I<action> [I<args>]

=head1 VERSION

This manual documents hashl version 1.01

=head1 DESCRIPTION

Actions:

=over

=item B<copy> I<newdir>

Copy all files in the current directory which are not in any database to
I<newdir>.

=item B<find-known> [I<directory>]



( run in 0.346 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )