AI-Categorizer
view release on metacpan or search on metacpan
- Added a 'Build.PL' file for an alternate installation method using
Module::Build.
- Fixed a problem in the Hypothesis' best_category() method that
would often result in the wrong category being reported. Added a
regression test to exercise the Hypothesis class. [Spotted by
Xiaobo Li]
- The 'categorizer' script now records more useful benchmarking
information about time & memory in its outfile.
- The AI::Categorizer->dump_parameters() method now tries to avoid
showing you its entire list of stopwords.
- Document objects now use a default 'name' if none is supplied.
- For some Learner classes, the generated Hypothesis objects had
non-functioning all_categories() methods. Fixed.
- The Collection::Files class now uses File::Spec internally to
eg/categorizer view on Meta::CPAN
#
# Copyright 2002 Ken Williams, under the same license as the
# AI::Categorizer distribution.
use strict;
use AI::Categorizer;
use Benchmark;
my $HAVE_YAML = eval "use YAML; 1";
my ($opt, $do_stage, $outfile) = parse_command_line(@ARGV);
@ARGV = grep !/^-\d$/, @ARGV;
my $c = eval {new AI::Categorizer(%$opt)};
if ($@ and $@ =~ /^The following parameter/) {
die "$@\nPlease see the AI::Categorizer documentation for a description of parameters accepted.\n";
}
die $@ if $@;
%$do_stage = map {$_, 1} 1..5 unless keys %$do_stage;
my $out_fh;
if ($outfile) {
open $out_fh, ">> $outfile" or die "Can't create $outfile: $!";
select((select($out_fh), $|=1)[0]);
if (keys(%$do_stage) > 1) {
print $out_fh "~~~~~~~~~~~~~~~~", scalar(localtime), "~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
if ($HAVE_YAML) {
print {$out_fh} YAML::Dump($c->dump_parameters);
} else {
warn "More detailed parameter dumping is available if you install the YAML module from CPAN.\n";
}
}
}
eg/categorizer view on Meta::CPAN
while (my ($k, $v) = each %opt) {
# Allow abbreviations
if ($k =~ /^(\w+)_class$/) {
my $name = $1;
$v =~ s/^::/AI::Categorizer::\u${name}::/;
$opt{$k} = $v;
}
}
my $outfile;
unless ($outfile = delete $opt{outfile}) {
$outfile = $opt{progress_file} ? "$opt{progress_file}-results.txt" : "results.txt";
}
return (\%opt, \%do_stage, $outfile);
}
sub usage {
return <<EOF;
Usage:
$0 --parameter_1 <value_1> --parameter_2 <value_2>
# You may specify a YAML config file as follows:
$0 --config_file <path> --parameter_3 <value_3>
# Or, to run only step 3 (of 5)
lib/AI/Categorizer/Learner/Weka.pm view on Meta::CPAN
}
sub create_boolean_model {
my ($self, $pos, $neg, $cat) = @_;
my @docs = (map([$_->features, 1], @$pos),
map([$_->features, 0], @$neg));
my $train_file = $self->create_arff_file($cat->name . '_train', \@docs);
my %info = (machine_file => $cat->name . '_model');
my $outfile = File::Spec->catfile($self->{model}{_in_dir}, $info{machine_file});
my @args = ($self->{java_path},
@{$self->{java_args}},
$self->{weka_classifier},
@{$self->{weka_args}},
'-t', $train_file,
'-T', $self->{model}{dummy_file},
'-d', $outfile,
'-v',
'-p', '0',
);
$self->do_cmd(@args);
unlink $train_file or warn "Couldn't remove $train_file: $!";
return \%info;
}
# java -classpath /Applications/Science/weka-3-2-3/weka.jar weka.classifiers.NaiveBayes -l out -T test.arff -p 0
( run in 0.247 second using v1.01-cache-2.11-cpan-a5abf4f5562 )