BuzzSaw
view release on metacpan or search on metacpan
lib/BuzzSaw/DataSource/Files.pm view on Meta::CPAN
# method triggers the files builder method so it's never going to be
# empty if names have been specified.
if ( $self->count_names == 0 && $self->count_files == 0 ) {
$self->log->logdie('You must specify either a set of files or a set of names to find');
}
return;
}
sub _find_files {
my ($self) = @_;
my $finder = File::Find::Rule->new();
$finder->file; # Only interested in files
$finder->nonempty; # No point examining empty files
if ( $self->has_size_limit && $self->size_limit ) {
$finder->size($self->size_limit);
}
my @rules = map { File::Find::Rule->name( $_ ) } $self->list_names;
$finder->any(@rules);
if ( !$self->recursive ) {
$finder->maxdepth(1);
}
my $iter = $finder->start($self->list_directories);
my %files;
while ( defined( my $file = $iter->match ) ) {
# converts relative to absolute path, resolves symbolic links
$file = Cwd::abs_path($file);
$files{$file} = 1;
}
# Typically we randomise the order of the list so that multiple
# processes will pass through the files in different orders which
# should make the process more efficient. We also support sorting
# by name and size in ascending or descending order. The size
# sorting can be handy if you really do need to leave the biggest
# files until last.
my @files;
my $order_by = $self->order_by;
if ( $order_by =~ m/^size_(asc|desc)$/ ) {
my $sorter;
if ( $1 eq 'asc' ) {
$sorter = sub { $a->[1] <=> $b->[1] };
} else {
$sorter = sub { $b->[1] <=> $a->[1] };
}
# Schwartzian transform for efficient sorting
@files = map { $_->[0] }
sort $sorter
map { [ $_, (stat($_))[7] ] } keys %files;
} elsif ( $order_by =~ m/^name_(asc|desc)$/ ) {
if ( $1 eq 'asc' ) {
@files = sort { $a cmp $b } keys %files;
} else {
@files = sort { $b cmp $a } keys %files;
}
} else {
@files = List::Util::shuffle( keys %files );
}
if ( $self->log->is_debug ) {
my $count = scalar @files;
$self->log->debug("Found $count log files");
}
return \@files;
}
sub reset {
my ($self) = @_;
if ( $self->log->is_debug ) {
$self->log->debug('Resetting data source');
}
$self->_current_fileidx(-1);
$self->_current_fh(undef);
$self->_current_digest(undef);
if ( $self->has_names ) {
$self->_set_files( $self->_find_files );
}
return;
}
sub next_entry {
my ($self) = @_;
my $fh = $self->_current_fh // $self->_next_fh;
# Ensure we do not attempt to get a line from an empty file
while ( defined $fh && $fh->eof ) {
$fh = $self->_next_fh;
}
if ( !defined $fh ) {
return;
}
chomp ( my $line = $fh->getline );
return $line;
}
sub _next_fh {
my ($self) = @_;
( run in 1.206 second using v1.01-cache-2.11-cpan-39bf76dae61 )