Algorithm-LinearManifoldDataClusterer

 view release on metacpan or  search on metacpan

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

        _visualize_each_iteration     =>   $args{visualize_each_iteration} == 0 ? 0 : 1,
        _show_hidden_in_3D_plots      =>   $args{show_hidden_in_3D_plots} == 0 ? 0 : 1,
        _make_png_for_each_iteration  =>   $args{make_png_for_each_iteration} == 0 ? 0 : 1,
        _debug                        =>   $args{debug} || 0,
        _N                            =>   0,
        _KM                           =>   $args{K} * $args{cluster_search_multiplier},
        _data_hash                    =>   {},
        _data_tags                    =>   [],
        _data_dimensions              =>   0,
        _final_clusters               =>   [],
        _auto_retry_flag              =>   0,
        _num_iterations_actually_used =>   undef,
        _scale_factor                 =>   undef,
        _data_tags_to_cluster_label_hash  => {},
        _final_reference_vecs_for_all_subspaces => [],
        _reconstruction_error_as_a_function_of_iteration => [],
        _final_trailing_eigenvec_matrices_for_all_subspaces => [],
        _subspace_construction_error_as_a_function_of_iteration => [],
    }, $class;
}

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

        print "\nHere come sorted eigenvectors --- from the largest to the smallest:\n";
        foreach my $i (0..@sorted_eigenvecs-1) {
            print "eigenvec:  @{$sorted_eigenvecs[$i]}       eigenvalue: $sorted_eigenvals[$i]\n";
        }
    }
    return (\@sorted_eigenvecs, \@sorted_eigenvals);
}

sub auto_retry_clusterer {
    my $self = shift;    
    $self->{_auto_retry_flag} = 1;
    my $clusters;
    $@ = 1;
    my $retry_attempts = 1;
    while ($@) {
        eval {
            $clusters = $self->linear_manifold_clusterer();
        };
        if ($@) {
            if ($self->{_terminal_output}) {
                print "Clustering failed. Trying again. --- $@";

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

        $self->visualize_clusters_on_sphere($visualization_msg, $initial_clusters, "png")
            if $self->{_make_png_for_each_iteration};
    }
    foreach my $cluster (@$initial_clusters) {
        my ($mean, $covariance) = $self->estimate_mean_and_covariance($cluster);
        display_mean_and_covariance($mean, $covariance) if $self->{_debug};
    }
    my @clusters = @$initial_clusters;
    display_clusters(\@clusters) if $self->{_debug};
    my $iteration_index = 0;
    my $unimodal_correction_flag;
    my $previous_min_value_for_unimodality_quotient;
    while ($iteration_index < $self->{_max_iterations}) {
        print "\n\n========================== STARTING ITERATION $iteration_index =====================\n\n"
            if $self->{_terminal_output};
        my $total_reconstruction_error_this_iteration = 0;
        my @subspace_construction_errors_this_iteration;
        my @trailing_eigenvec_matrices_for_all_subspaces;
        my @reference_vecs_for_all_subspaces;
        foreach my $cluster (@clusters) {
            next if @$cluster == 0;

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

              "the least reconstruction error.\n\n" .
              "Total reconstruction error in this iteration: $total_reconstruction_error_this_iteration\n"
                  if $self->{_terminal_output};
        foreach my $i (0..$self->{_KM}-1) {
            $clusters[$i] = $best_subspace_based_partition_of_data{$i};
        }
        display_clusters(\@clusters) if $self->{_terminal_output};
        # Check if any cluster has lost all its elements. If so, fragment the worst
        # existing cluster to create the additional clusters needed:
        if (any {@$_ == 0} @clusters) {
            die "empty cluster found" if $self->{_auto_retry_flag};
            print "\nOne or more clusters have become empty.  Will carve out the needed clusters\n" .
                  "from the cluster with the largest subspace construction error.\n\n";
            $total_reconstruction_error_this_iteration = 0;
            @subspace_construction_errors_this_iteration = ();
            my $how_many_extra_clusters_needed = $self->{_KM} - scalar(grep {@$_ != 0} @clusters);
            print "number of extra clusters needed at iteration $iteration_index: $how_many_extra_clusters_needed\n";
            my $max = List::Util::max @subspace_construction_errors_this_iteration;
            my $maxindex = List::Util::first {$_ == $max} @subspace_construction_errors_this_iteration;
            my @cluster_fragments = cluster_split($clusters[$maxindex], 
                                                  $how_many_extra_clusters_needed + 1);

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

                                                      $reference_vecs_for_all_subspaces[$_])} 0..@clusters-1;
        my $min_value_for_unimodality_quotient = List::Util::min @cluster_unimodality_quotients;
        print "\nCluster unimodality quotients: @cluster_unimodality_quotients\n" if $self->{_terminal_output};
        die "\n\nBailing out!\n" .
            "It does not look like these iterations will lead to a good clustering result.\n" .
            "Program terminating.  Try running again.\n" 
            if defined($previous_min_value_for_unimodality_quotient)
               && ($min_value_for_unimodality_quotient < 0.4)
               && ($min_value_for_unimodality_quotient < (0.5 * $previous_min_value_for_unimodality_quotient));
        if ( $min_value_for_unimodality_quotient < 0.5 ) {
            $unimodal_correction_flag = 1;
            print "\nApplying unimodality correction:\n\n" if $self->{_terminal_output};
            my @sorted_cluster_indexes = 
               sort {$cluster_unimodality_quotients[$b] <=> $cluster_unimodality_quotients[$a]} 0..@clusters-1;
            my @newclusters;
            foreach my $cluster_index (0..@clusters - 1) {
                push @newclusters, $clusters[$sorted_cluster_indexes[$cluster_index]];
            }
            @clusters = @newclusters;
            my $worst_cluster = pop @clusters;
            print "\nthe worst cluster: @$worst_cluster\n" if $self->{_terminal_output};

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

                            P
                            terminal_output
                            cluster_search_multiplier
                            max_iterations
                            delta_reconstruction_error
                            visualize_each_iteration
                            show_hidden_in_3D_plots
                            make_png_for_each_iteration
                            debug
                          /;
    my $found_match_flag;
    foreach my $param (@params) {
        foreach my $legal (@legal_params) {
            $found_match_flag = 0;
            if ($param eq $legal) {
                $found_match_flag = 1;
                last;
            }
        }
        last if $found_match_flag == 0;
    }
    return $found_match_flag;
}

sub display_matrix {
    my $matrix = shift;
    my $nrows = $matrix->rows();
    my $ncols = $matrix->cols();
    print "\nDisplaying a matrix of size $nrows rows and $ncols columns:\n";
    foreach my $i (0..$nrows-1) {
        my $row = $matrix->row($i);
        my @row_as_list = $row->as_list;

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

}

sub _check_for_illegal_params3 {
    my @params = @_;
    my @legal_params = qw / output_file
                            total_number_of_samples_needed
                            number_of_clusters_on_sphere
                            cluster_width
                            show_hidden_in_3D_plots
                          /;
    my $found_match_flag;
    foreach my $param (@params) {
        foreach my $legal (@legal_params) {
            $found_match_flag = 0;
            if ($param eq $legal) {
                $found_match_flag = 1;
                last;
            }
        }
        last if $found_match_flag == 0;
    }
    return $found_match_flag;
}

##  We first generate a set of points randomly on the unit sphere --- the number of
##  points being equal to the number of clusters desired.  These points will serve as
##  cluster means (or, as cluster centroids) subsequently when we ask
##  Math::Random::random_multivariate_normal($N, @m, @covar) to return $N number of
##  points on the sphere.  The second argument is the cluster mean and the third
##  argument the cluster covariance.  For the synthetic data, we set the cluster
##  covariance to a 2x2 diagonal matrix, with the (0,0) element corresponding to the
##  variance along the azimuth direction and the (1,1) element corresponding to the



( run in 0.885 second using v1.01-cache-2.11-cpan-94b05bcf43c )