AI-Categorizer

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN


 - Added a 'Build.PL' file for an alternate installation method using
   Module::Build.

 - Fixed a problem in the Hypothesis' best_category() method that
   would often result in the wrong category being reported.  Added a
   regression test to exercise the Hypothesis class.  [Spotted by
   Xiaobo Li]

 - The 'categorizer' script now records more useful benchmarking
   information about time & memory in its outfile.

 - The AI::Categorizer->dump_parameters() method now tries to avoid
   showing you its entire list of stopwords.

 - Document objects now use a default 'name' if none is supplied.

 - For some Learner classes, the generated Hypothesis objects had
   non-functioning all_categories() methods.  Fixed.

 - The Collection::Files class now uses File::Spec internally to

eg/categorizer  view on Meta::CPAN

#
# Copyright 2002 Ken Williams, under the same license as the
# AI::Categorizer distribution.


use strict;
use AI::Categorizer;
use Benchmark;
my $HAVE_YAML = eval "use YAML; 1";

my ($opt, $do_stage, $outfile) = parse_command_line(@ARGV);
@ARGV = grep !/^-\d$/, @ARGV;

my $c = eval {new AI::Categorizer(%$opt)};
if ($@ and $@ =~ /^The following parameter/) {
  die "$@\nPlease see the AI::Categorizer documentation for a description of parameters accepted.\n";
}
die $@ if $@;

%$do_stage = map {$_, 1} 1..5 unless keys %$do_stage;

my $out_fh;
if ($outfile) {
  open $out_fh, ">> $outfile" or die "Can't create $outfile: $!";
  select((select($out_fh), $|=1)[0]);
  if (keys(%$do_stage) > 1) {
    print $out_fh "~~~~~~~~~~~~~~~~", scalar(localtime), "~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
    if ($HAVE_YAML) {
      print {$out_fh} YAML::Dump($c->dump_parameters);
    } else {
      warn "More detailed parameter dumping is available if you install the YAML module from CPAN.\n";
    }
  }
}

eg/categorizer  view on Meta::CPAN


  while (my ($k, $v) = each %opt) {
    # Allow abbreviations
    if ($k =~ /^(\w+)_class$/) {
      my $name = $1;
      $v =~ s/^::/AI::Categorizer::\u${name}::/;
      $opt{$k} = $v;
    }
  }

  my $outfile;
  unless ($outfile = delete $opt{outfile}) {
    $outfile = $opt{progress_file} ? "$opt{progress_file}-results.txt" : "results.txt";
  }

  return (\%opt, \%do_stage, $outfile);
}

sub usage {
  return <<EOF;
 Usage:

  $0 --parameter_1 <value_1> --parameter_2 <value_2>
      # You may specify a YAML config file as follows:
  $0 --config_file <path> --parameter_3 <value_3>
      # Or, to run only step 3 (of 5)

lib/AI/Categorizer/Learner/Weka.pm  view on Meta::CPAN

}

sub create_boolean_model {
  my ($self, $pos, $neg, $cat) = @_;

  my @docs = (map([$_->features, 1], @$pos),
	      map([$_->features, 0], @$neg));
  my $train_file = $self->create_arff_file($cat->name . '_train', \@docs);

  my %info = (machine_file => $cat->name . '_model');
  my $outfile = File::Spec->catfile($self->{model}{_in_dir}, $info{machine_file});

  my @args = ($self->{java_path},
	      @{$self->{java_args}},
	      $self->{weka_classifier}, 
	      @{$self->{weka_args}},
	      '-t', $train_file,
	      '-T', $self->{model}{dummy_file},
	      '-d', $outfile,
	      '-v',
	      '-p', '0',
	     );
  $self->do_cmd(@args);
  unlink $train_file or warn "Couldn't remove $train_file: $!";

  return \%info;
}

# java -classpath /Applications/Science/weka-3-2-3/weka.jar weka.classifiers.NaiveBayes -l out -T test.arff -p 0



( run in 0.247 second using v1.01-cache-2.11-cpan-a5abf4f5562 )