Algorithm-LinearManifoldDataClusterer

 view release on metacpan or  search on metacpan

examples/data_visualizer.pl  view on Meta::CPAN


data_visualizer( $data_source );

sub data_visualizer {
#    my $self = shift;
    my $datafile = shift;
    my $filename = File::Basename::basename($datafile);
    my $temp_file = "__temp_" . $filename;
    $temp_file =~ s/\.\w+$/\.txt/;
    unlink $temp_file if -e $temp_file;
    open OUTPUT, ">$temp_file"
           or die "Unable to open a temp file in this directory: $!";
    open INPUT, "< $filename" or die "Unable to open $filename: $!";
    local $/ = undef;
    my @all_records = split /\s+/, <INPUT>;
    my %clusters;
    foreach my $record (@all_records) {    
        my @splits = split /,/, $record;
        my $record_name = shift @splits;
        $record_name =~ /(\w+?)_.*/;
        my $primary_cluster_label = $1;
        push @{$clusters{$primary_cluster_label}}, \@splits;
    }
    foreach my $key (sort {"\L$a" cmp "\L$b"} keys %clusters) {
        map {print OUTPUT "$_"} map {"@$_\n"} @{$clusters{$key}};
        print OUTPUT "\n\n";
    }
    my @sorted_cluster_keys = sort {"\L$a" cmp "\L$b"} keys %clusters;
    close OUTPUT;   
    my $plot = Graphics::GnuplotIF->new( persist => 1 );
    my $arg_string = "";
    $plot->gnuplot_cmd( "set noclip" );
    $plot->gnuplot_cmd( "set hidden3d" );

#    my attempt to make a sphere look like a sphere:
#    $plot->gnuplot_cmd( "set view 70, 20" );
#    $plot->gnuplot_cmd( "set size 0.75, 1.70" );

    $plot->gnuplot_cmd( "set pointsize 2" );

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

sub visualize_clusters_on_sphere {
    my $self = shift;
    my $visualization_msg = shift;
    my $clusters = deep_copy_AoA(shift);
    my $hardcopy_format = shift;
    my $pause_time = shift;
    my $d = $self->{_data_dimensions};
    my $temp_file = "__temp_" . $self->{_datafile};
    $temp_file =~ s/\.\w+$/\.txt/;
    unlink $temp_file if -e $temp_file;
    open OUTPUT, ">$temp_file"
           or die "Unable to open a temp file in this directory: $!";
    my @all_tags = "A".."Z";
    my @retagged_clusters;
    foreach my $cluster (@$clusters) {
        my $label = shift @all_tags;
        my @retagged_cluster = 
           map {$_ =~ s/^(\w+?)_(\w+)/$label . "_$2 @{$self->{_data_hash}->{$_}}"/e;$_} @$cluster;
        push @retagged_clusters, \@retagged_cluster;
    }
    my %clusters;
    foreach my $cluster (@retagged_clusters) {    
        foreach my $record (@$cluster) { 
            my @splits = grep $_, split /\s+/, $record;
            $splits[0] =~ /(\w+?)_.*/;
            my $primary_cluster_label = $1;
            my @coords = @splits[1..$d];
            push @{$clusters{$primary_cluster_label}}, \@coords;
        }
    }
    foreach my $key (sort {"\L$a" cmp "\L$b"} keys %clusters) {
        map {print OUTPUT "$_"} map {"@$_\n"} @{$clusters{$key}};
        print OUTPUT "\n\n";
    }
    my @sorted_cluster_keys = sort {"\L$a" cmp "\L$b"} keys %clusters;
    close OUTPUT;   
    my $plot;
    unless (defined $pause_time) {
        $plot = Graphics::GnuplotIF->new( persist => 1 );
    } else {
        $plot = Graphics::GnuplotIF->new();
    }
    my $arg_string = "";
    $plot->gnuplot_cmd( "set hidden3d" ) unless $self->{_show_hidden_in_3D_plots};
    $plot->gnuplot_cmd( "set title \"$visualization_msg\"" );
    $plot->gnuplot_cmd( "set noclip" );

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN

        my $label = $point_labels[$i];
        my $j = 0;
        @new_data = map {unshift @$_, $label."_".$j; $j++; $_} @wrapped_data;
        push @data_dump, @new_data;
    }
    if ($self->{_debug}) {
        print "\n\nThe labeled points for clusters:\n";
        map { print "@$_\n"; } @data_dump;
    }
    fisher_yates_shuffle( \@data_dump );
    open OUTPUT, ">$output_file";
    my $total_num_of_points = $N * $K;
    print "Total number of data points that will be written out to the file: $total_num_of_points\n"
        if $self->{_debug};
    foreach my $ele (@data_dump) {
        my ($x,$y,$z);
        my $label = $ele->[0];
        my $azimuth = $ele->[1];
        my $elevation = $ele->[2];
        $x = cos($elevation) * cos($azimuth);
        $y = cos($elevation) * sin($azimuth); 
        $z = sin($elevation);
        my $csv_str = join ",", ($label,$x,$y,$z);
        print OUTPUT "$csv_str\n";
    }
    print "\n\n";
    print "Data written out to file $output_file\n" if $self->{_debug};
    close OUTPUT;
}

# This version for the embedded class for data generation
sub visualize_data_on_sphere {
    my $self = shift;
    my $datafile = shift;
    my $filename = File::Basename::basename($datafile);
    my $temp_file = "__temp_" . $filename;
    $temp_file =~ s/\.\w+$/\.txt/;
    unlink $temp_file if -e $temp_file;
    open OUTPUT, ">$temp_file"
           or die "Unable to open a temp file in this directory: $!";
    open INPUT, "< $filename" or die "Unable to open $filename: $!";
    local $/ = undef;
    my @all_records = split /\s+/, <INPUT>;
    my %clusters;
    foreach my $record (@all_records) {    
        my @splits = split /,/, $record;
        my $record_name = shift @splits;
        $record_name =~ /(\w+?)_.*/;
        my $primary_cluster_label = $1;
        push @{$clusters{$primary_cluster_label}}, \@splits;
    }
    foreach my $key (sort {"\L$a" cmp "\L$b"} keys %clusters) {
        map {print OUTPUT "$_"} map {"@$_\n"} @{$clusters{$key}};
        print OUTPUT "\n\n";
    }
    my @sorted_cluster_keys = sort {"\L$a" cmp "\L$b"} keys %clusters;
    close OUTPUT;   
    my $plot = Graphics::GnuplotIF->new( persist => 1 );
    my $arg_string = "";
    $plot->gnuplot_cmd( "set noclip" );
    $plot->gnuplot_cmd( "set hidden3d" ) unless $self->{_show_hidden_in_3D_plots};
    $plot->gnuplot_cmd( "set pointsize 2" );
    $plot->gnuplot_cmd( "set parametric" );
    $plot->gnuplot_cmd( "set size ratio 1" );
    $plot->gnuplot_cmd( "set xlabel \"X\"" );
    $plot->gnuplot_cmd( "set ylabel \"Y\"" );
    $plot->gnuplot_cmd( "set zlabel \"Z\"" );

lib/Algorithm/LinearManifoldDataClusterer.pm  view on Meta::CPAN


    $training_data_gen->visualize_data_on_sphere($output_file);

You can use this method to visualize the clusters produced by the data generator.
Since the clusters are located at randomly selected points on a unit sphere, by
looking at the output visually, you can quickly reject what the data generator has
produced and try again.

=back

=head1 HOW THE CLUSTERS ARE OUTPUT

When the option C<terminal_output> is set in the constructor of the
C<LinearManifoldDataClusterer> class, the clusters are displayed on the terminal
screen.

And, when the option C<write_clusters_to_files> is set in the same constructor, the
module dumps the clusters in files named

    cluster0.txt
    cluster1.txt



( run in 0.572 second using v1.01-cache-2.11-cpan-4e96b696675 )