AI-Categorizer

 view release on metacpan or  search on metacpan

lib/AI/Categorizer/Collection/Files.pm  view on Meta::CPAN

package AI::Categorizer::Collection::Files;
use strict;

use AI::Categorizer::Collection;
use base qw(AI::Categorizer::Collection);

use Params::Validate qw(:types);
use File::Spec;

__PACKAGE__->valid_params
  (
   path => { type => SCALAR|ARRAYREF },
   recurse => { type => BOOLEAN, default => 0 },
  );

sub new {
  my $class = shift;
  my $self = $class->SUPER::new(@_);
  
  $self->{dir_fh} = do {local *FH; *FH};  # double *FH avoids a warning

  # Documents are contained in a directory, or list of directories
  $self->{path} = [$self->{path}] unless ref $self->{path};
  $self->{used} = [];

  $self->_next_path;
  return $self;
}

sub _next_path {
  my $self = shift;
  closedir $self->{dir_fh} if $self->{cur_dir};

  $self->{cur_dir} = shift @{$self->{path}};
  push @{$self->{used}}, $self->{cur_dir};
  opendir $self->{dir_fh}, $self->{cur_dir} or die "$self->{cur_dir}: $!";
}

sub next {
  my $self = shift;
  my $file = $self->_read_file;
  return unless defined $file;
  
  warn "No category information about '$file'" unless defined $self->{category_hash}{$file};
  my @cats = map AI::Categorizer::Category->by_name(name => $_), @{ $self->{category_hash}{$file} || [] };

  return $self->call_method('document', 'read', 
			    path => File::Spec->catfile($self->{cur_dir}, $file),
			    name => $file,
			    categories => \@cats,
			   );
}

sub _read_file {
  my ($self) = @_;
  
  my $file = readdir $self->{dir_fh};

  if (!defined $file) { # Directory has been exhausted
    return undef unless @{$self->{path}};
    $self->_next_path;
    return $self->_read_file;
  } elsif ($file eq '.' or $file eq '..') {
    return $self->_read_file;
  } elsif (-d (my $path = File::Spec->catdir($self->{cur_dir}, $file))) {
    push @{$self->{path}}, $path  # Add for later processing
      if $self->{recurse} and !grep {$_ eq $path} @{$self->{path}}, @{$self->{used}};
    return $self->_read_file;
  }
  return $file;
}

sub rewind {
  my $self = shift;
  push @{$self->{path}}, @{$self->{used}};
  @{$self->{used}} = ();
  $self->_next_path;
}



( run in 0.886 second using v1.01-cache-2.11-cpan-99c4e6809bf )