Algorithm-AdaBoost

 view release on metacpan or  search on metacpan

lib/Algorithm/AdaBoost.pm  view on Meta::CPAN

package Algorithm::AdaBoost;

use 5.014;
use Algorithm::AdaBoost::Classifier;
use Carp qw//;
use List::Util;
use Smart::Args;

our $VERSION = '0.01';

sub new {
  args
    my $class => 'ClassName',
    my $training_set => +{ isa => 'ArrayRef', optional => 1 },
    my $weak_classifier_generator => +{ isa => 'CodeRef', optional => 1 };

  bless +{
    training_set => $training_set,
    weak_classifier_generator => $weak_classifier_generator,
  } => $class;
}

sub calculate_classifier_weight {
  args
    my $self,
    my $classifier => 'CodeRef',
    my $distribution => 'ArrayRef[Num]';

  my $error_ratio = $self->evaluate_error_ratio(
    classifier => $classifier,
    distribution => $distribution,
  );
  return log((1 - $error_ratio) / $error_ratio) / 2;
}

sub classify {
  args_pos
    my $self,
    my $feature => 'Any';
  Carp::croak 'Training phase is undone yet.' unless $self->trained;
  $self->final_classifier->classify($feature);
}

sub construct_hardest_distribution {
  args
    my $self,
    my $classifier => 'CodeRef',
    my $previous_distribution => 'ArrayRef[Num]',
    my $training_set => 'ArrayRef[HashRef]',
    my $weight => 'Num';

  my @distribution = map {
    my $training_data = $training_set->[$_];
    $previous_distribution->[$_]
      * exp(-$weight * $training_data->{label}
              * $classifier->($training_data->{feature}));
  } 0 .. $#$previous_distribution;
  my $partition_function = List::Util::sum(@distribution);
  [ map { $_ / $partition_function } @distribution ];
}

sub evaluate_error_ratio {
  args
    my $self,
    my $classifier => 'CodeRef',
    my $distribution => 'ArrayRef[Num]';

  my $accuracy = 0;
  for my $i (0 .. $#$distribution) {
    my $training_data = $self->training_set->[$i];
    if ($classifier->($training_data->{feature}) == $training_data->{label}) {
      $accuracy += $distribution->[$i];
    }
  }
  return 1 - $accuracy;
}

sub final_classifier {
  args my $self;
  Carp::croak 'The classifier is not trained' unless $self->trained;
  return $self->{final_classifier};
}

sub train {
  args
    my $self,
    my $num_iterations => 'Int',
    my $training_set => +{ isa => 'ArrayRef', optional => 1 },
    my $weak_classifier_generator => +{ isa => 'CodeRef', optional => 1 };

  $training_set //= $self->training_set
    // Carp::croak('Given no training set.');
  $weak_classifier_generator //= $self->weak_classifier_generator
    // Carp::croak('Given no weak classifier generator.');
  my $num_training_set = @$training_set;

  # Initial distribution is uniform.
  my $distribution = [ (1 / $num_training_set) x $num_training_set ];

  my ($weak_classifier, $weight);
  my @weak_classifiers;
  while ($num_iterations--) {
    # Construct a weak classifier which classifies data on the distribution.
    $weak_classifier = $weak_classifier_generator->(
      distribution => $distribution,
      training_set => $training_set,
    );
    $weight = $self->calculate_classifier_weight(
      classifier => $weak_classifier,
      distribution => $distribution,
    );
    push @weak_classifiers, +{
      classifier => $weak_classifier,
      weight => $weight,
    };
  } continue {
    $distribution = $self->construct_hardest_distribution(
      classifier => $weak_classifier,
      previous_distribution => $distribution,
      training_set => $training_set,
      weight => $weight,
    );



( run in 0.491 second using v1.01-cache-2.11-cpan-f0fbb3f571b )