AI-Categorizer
view release on metacpan or search on metacpan
lib/AI/Categorizer/Collection/Files.pm view on Meta::CPAN
package AI::Categorizer::Collection::Files;
use strict;
use AI::Categorizer::Collection;
use base qw(AI::Categorizer::Collection);
use Params::Validate qw(:types);
use File::Spec;
__PACKAGE__->valid_params
(
path => { type => SCALAR|ARRAYREF },
recurse => { type => BOOLEAN, default => 0 },
);
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
$self->{dir_fh} = do {local *FH; *FH}; # double *FH avoids a warning
# Documents are contained in a directory, or list of directories
$self->{path} = [$self->{path}] unless ref $self->{path};
$self->{used} = [];
$self->_next_path;
return $self;
}
sub _next_path {
my $self = shift;
closedir $self->{dir_fh} if $self->{cur_dir};
$self->{cur_dir} = shift @{$self->{path}};
push @{$self->{used}}, $self->{cur_dir};
opendir $self->{dir_fh}, $self->{cur_dir} or die "$self->{cur_dir}: $!";
}
sub next {
my $self = shift;
my $file = $self->_read_file;
return unless defined $file;
warn "No category information about '$file'" unless defined $self->{category_hash}{$file};
my @cats = map AI::Categorizer::Category->by_name(name => $_), @{ $self->{category_hash}{$file} || [] };
return $self->call_method('document', 'read',
path => File::Spec->catfile($self->{cur_dir}, $file),
name => $file,
categories => \@cats,
);
}
sub _read_file {
my ($self) = @_;
my $file = readdir $self->{dir_fh};
if (!defined $file) { # Directory has been exhausted
return undef unless @{$self->{path}};
$self->_next_path;
return $self->_read_file;
} elsif ($file eq '.' or $file eq '..') {
return $self->_read_file;
} elsif (-d (my $path = File::Spec->catdir($self->{cur_dir}, $file))) {
push @{$self->{path}}, $path # Add for later processing
if $self->{recurse} and !grep {$_ eq $path} @{$self->{path}}, @{$self->{used}};
return $self->_read_file;
}
return $file;
}
sub rewind {
my $self = shift;
push @{$self->{path}}, @{$self->{used}};
@{$self->{used}} = ();
$self->_next_path;
}
( run in 0.886 second using v1.01-cache-2.11-cpan-99c4e6809bf )