Algorithm-DecisionTree

 view release on metacpan or  search on metacpan

lib/Algorithm/RegressionTree.pm  view on Meta::CPAN

        }
    }
    $self->{_YVector} = $YVector;
    return ($XMatrix, $YVector);
}

sub estimate_regression_coefficients {
    my $self = shift;
    my ($XMatrix, $YVector, $display) = @_;
    $display = 0 unless defined $display;
    my ($nrows, $ncols) = $XMatrix->dim;
    print "nrows=$nrows   ncols=$ncols\n" if $self->{_debug2_r};
    my $jacobian_choice = $self->{_jacobian_choice};
    my $X = $XMatrix->copy();
    my $y = $YVector->copy();
    if ($self->{_need_data_normalization}) {
        die "normalization feature is yet to be added to the module --- sorry";
    }
    my $beta0 = (transpose($X) * $X)->inverse() * transpose($X) * $y;
    my ($betarows, $betacols) = $beta0->dim;
    die "Something has gone wrong with the calculation of beta coefficients" if $betacols > 1;
    if ($jacobian_choice == 0) {
#        my $error = sum(abs_vector_as_list( $y - ($X * $beta) )) / $nrows;   
        my $error = sum( map abs, ($y - ($X * $beta0) )->col(0)->as_list ) / $nrows;   
        my $predictions = $X * $beta0;
        if ($display) {
            if ($ncols == 2) {
                my @xvalues = $X->col(0)->as_list;
                my @yvalues = $predictions->col(0)->as_list;
                $self->{_output_for_plots}->{scalar(keys %{$self->{_output_for_plots}}) + 1} = [\@xvalues,\@yvalues];
            } elsif ($ncols == 3) {
                my @xvalues;
                my @yvalues = $predictions->col(0)->as_list;
                foreach my $row_index (0 .. $X->rows - 1) {
                    my @onerow = $X->row($row_index)->as_list;
                    pop @onerow;
                    push @xvalues, "@onerow";
                }
                $self->{_output_for_surface_plots}->{scalar(keys %{$self->{_output_for_surface_plots}}) + 1} = [\@xvalues,\@yvalues];
            } else {
                print "no display when the overall dimensionality of the data exceeds 3\n";
            }
        }
        return ($error, $beta0);    
    }
    my $beta = $beta0; 
    if ($self->{_debug2_r}) {
        print "\ndisplaying beta0 matrix\n";
        display_matrix($beta);
    }
    my $gamma = 0.1;
    my $iterate_again_flag = 1;
    my $delta = 0.001;
    my $master_interation_index = 0;
    $|++;
    while (1) {
        print "*" unless $master_interation_index++ % 100;
        last unless $iterate_again_flag;
        $gamma *= 0.1;
        $beta0 = 0.99 * $beta0;
        print "\n\n======== starting iterations with gamma= $gamma ===========\n\n\n" if $self->{_debug2_r};
        $beta = $beta0;
        my $beta_old = Math::GSL::Matrix->new($betarows, 1)->zero;
        my $error_old = sum( map abs, ($y - ($X * $beta_old) )->col(0)->as_list ) / $nrows;   
        my $error;
        foreach my $iteration (0 .. 1499) {
            print "." unless $iteration % 100;
            $beta_old = $beta->copy;
            my $jacobian;
            if ($jacobian_choice == 1) {
                $jacobian = $X;
            } elsif ($jacobian_choice == 2) {      
                my $x_times_delta_beta = $delta * $X * $beta;
                $jacobian = Math::GSL::Matrix->new($nrows, $ncols);
                foreach my $i (0 .. $nrows - 1) {
                    my @row = ($x_times_delta_beta->get_elem($i,0)) x $ncols;
                    $jacobian->set_row($i, \@row);
                }
                $jacobian = (1.0/$delta) * $jacobian;
            } else {
                die "wrong choice for the jacobian_choice";
            }
#            $beta = $beta_old + 2 * $gamma * transpose($X) * ( $y - ($X * $beta) );
            $beta = $beta_old + 2 * $gamma * transpose($jacobian) * ( $y - ($X * $beta) );
            $error = sum( map abs, ($y - ($X * $beta) )->col(0)->as_list ) / $nrows;   
            if ($error > $error_old) {
                if (vector_norm($beta - $beta_old) < (0.00001 * vector_norm($beta_old))) {
                    $iterate_again_flag = 0;
                    last;
                } else {
                    last;
                }
            }
            if ($self->{_debug2_r}) {
                print "\n\niteration: $iteration   gamma: $gamma   current error: $error\n";
                print "\nnew beta:\n";
                display_matrix $beta;
            }
            if ( vector_norm($beta - $beta_old) < (0.00001 * vector_norm($beta_old)) ) { 
                print "iterations used: $iteration with gamma: $gamma\n" if $self->{_debug2_r};
                $iterate_again_flag = 0;
                last;
            }
            $error_old = $error;
        }
    }
    display_matrix($beta) if $self->{_debug2_r};
    my $predictions = $X * $beta;
    my @error_distribution = ($y - ($X * $beta))->as_list;
    my $squared_error =  sum map abs, @error_distribution;
    my $error = $squared_error / $nrows;
    if ($display) {
        if ($ncols == 2) {
            my @xvalues = $X->col(0)->as_list;
            my @yvalues = $predictions->col(0)->as_list;
            $self->{_output_for_plots}->{scalar(keys %{$self->{_output_for_plots}}) + 1} = [\@xvalues,\@yvalues];
        } elsif ($ncols == 3) {
            my @xvalues;
            my @yvalues = $predictions->col(0)->as_list;
            foreach my $row_index (0 .. $X->rows - 1) {
                my @onerow = $X->row($row_index)->as_list;



( run in 0.765 second using v1.01-cache-2.11-cpan-5a3173703d6 )