BuzzSaw

 view release on metacpan or  search on metacpan

lib/BuzzSaw/DataSource/Files.pm  view on Meta::CPAN

  # method triggers the files builder method so it's never going to be
  # empty if names have been specified.

  if ( $self->count_names == 0 && $self->count_files == 0 ) {
    $self->log->logdie('You must specify either a set of files or a set of names to find');
  }

  return;
}

sub _find_files {
    my ($self) = @_;

    my $finder = File::Find::Rule->new();
    $finder->file;     # Only interested in files
    $finder->nonempty; # No point examining empty files

    if ( $self->has_size_limit && $self->size_limit ) {
      $finder->size($self->size_limit);
    }

    my @rules = map { File::Find::Rule->name( $_ ) } $self->list_names;

    $finder->any(@rules);

    if ( !$self->recursive ) {
        $finder->maxdepth(1);
    }

    my $iter = $finder->start($self->list_directories);

    my %files;
    while ( defined( my $file = $iter->match ) ) {
      # converts relative to absolute path, resolves symbolic links
      $file = Cwd::abs_path($file);

      $files{$file} = 1;
    }

    # Typically we randomise the order of the list so that multiple
    # processes will pass through the files in different orders which
    # should make the process more efficient. We also support sorting
    # by name and size in ascending or descending order. The size
    # sorting can be handy if you really do need to leave the biggest
    # files until last.

    my @files;
    my $order_by = $self->order_by;
    if ( $order_by =~ m/^size_(asc|desc)$/ ) {

        my $sorter;
        if ( $1 eq 'asc' ) {
            $sorter = sub { $a->[1] <=> $b->[1] }; 
        } else {
            $sorter = sub { $b->[1] <=> $a->[1] }; 
        }

        # Schwartzian transform for efficient sorting
        @files = map  { $_->[0] }
                 sort $sorter
                 map  { [ $_, (stat($_))[7] ] } keys %files;

    } elsif ( $order_by =~ m/^name_(asc|desc)$/ ) {

        if ( $1 eq 'asc' ) {
            @files = sort { $a cmp $b } keys %files;
        } else {
            @files = sort { $b cmp $a } keys %files;
        }

    } else {
        @files = List::Util::shuffle( keys %files );
    }

    if ( $self->log->is_debug ) {
      my $count = scalar @files;
      $self->log->debug("Found $count log files");
    }

    return \@files;
}

sub reset {
  my ($self) = @_;

  if ( $self->log->is_debug ) {
    $self->log->debug('Resetting data source');
  }

  $self->_current_fileidx(-1);
  $self->_current_fh(undef);
  $self->_current_digest(undef);

  if ( $self->has_names ) {
    $self->_set_files( $self->_find_files );
  }

  return;
}

sub next_entry {
    my ($self) = @_;

    my $fh = $self->_current_fh // $self->_next_fh;

    # Ensure we do not attempt to get a line from an empty file
    while ( defined $fh && $fh->eof ) {
        $fh = $self->_next_fh;
    }

    if ( !defined $fh ) {
        return;
    }

    chomp ( my $line = $fh->getline );

    return $line;
}

sub _next_fh {
    my ($self) = @_;



( run in 1.206 second using v1.01-cache-2.11-cpan-39bf76dae61 )