Algorithm-DBSCAN

 view release on metacpan or  search on metacpan

lib/Algorithm/DBSCAN.pm  view on Meta::CPAN

        point_19 48 15
        point_20 52 16
        point_21 50 14';

    my $dataset = Algorithm::DBSCAN::Dataset->new();
    my @lines = split(/\n\s+/, $points_data_file);
    foreach my $line (@lines) {
        $dataset->AddPoint(new Algorithm::DBSCAN::Point(split(/\s+/, $line)));
    }

    my $dbscan = Algorithm::DBSCAN->new($dataset, 4 * 4, 2);

    $dbscan->UseRegionIndex(the filename containg the previous region index);
    $dbscan->FindClusters();
    $dbscan->PrintClustersShort();
  

=head1 SUBROUTINES/METHODS

=cut

=head2 new

The constructor takes 3 parameters:
    
    $dataset: The Algorithm::DBSCAN::Dataset dataset object
        
        Create the Dataset object:
            my $dataset = Algorithm::DBSCAN::Dataset->new();
        
        Add points (the first parameter is a point_id the other are point coordinates)
            $dataset->AddPoint(new Algorithm::DBSCAN::Point('point_1', 1, 2, 3, 4, 5);
            
    $eps: The epsilon parameter used for region density computation
        WARNING: This implementation uses the sqare distance between the points to avoid 
        a useless square root call. If you want to use the euclidian distance you need to 
        convert it to the right value yourself.
        
        For example for the previous point with 5 dimensions 
        $eps = $euclidian_distance * $euclidian_distance * $euclidian_distance * $euclidian_distance * $euclidian_distance; 
        
    $min_points: the minimal number of points in a region with a radius of $eps. $eps 
    and $min_points are the 2 parameters used to compute the denisty of a region. If 
    the number of points in a region with radius $eps is lower than $min_points the 
    point is considered as an outlier point that can't be included in any cluster.

=cut

sub new {
	my($type, $dataset, $eps, $min_points) = @_;
	
	my $self = {};
	$self->{dataset_object} = $dataset;
	$self->{dataset} = $dataset->{points};
	@{$self->{id_list}} = keys %{$dataset->{points}};
	$self->{eps} = $eps;
	$self->{min_points} = $min_points;
	$self->{current_cluster} = 1;
	$self->{use_external_region_index} = 0;
		
	bless($self, $type);

	return($self);
}

=head2 FindClusters

The main method that will run the DBSCAN algorithm on the Dataset.

=cut

sub FindClusters {
	my ($self, $starting_point_id) = @_;

	my $i = 0;
	unshift(@{$self->{id_list}}, $starting_point_id) if (defined $starting_point_id);
	foreach my $id (@{$self->{id_list}}) {
		my $point = $self->{dataset}->{$id};
		say "$i";
		$i++;
		next if ($point->{visited});
		$point->{visited} = 1;
		$self->_one_more_point_visited();
		
		my $neighborPts = $self->GetRegion($point);
#say Dumper($neighborPts);
		
		if (scalar(@$neighborPts) < $self->{min_points}) {
			$point->{cluster_id} = -1;
		}
		else {
			$self->ExpandCluster($point, $neighborPts);
		}
	}
}

=head2 ExpandCluster

This method will expand the cluster starting by the neighborhood of point $point

=cut

sub ExpandCluster {
	my ($self, $point, $neighborPts) = @_;
	
	if (scalar(@$neighborPts) < $self->{min_points}) {
		$point->{cluster_id} = -1;
	}
	else {
		$self->{current_cluster}++;

		$point->{cluster_id} = $self->{current_cluster};
	
		my %cluster_points;
		map { $cluster_points{$_}++ } @$neighborPts;
		my $cluster_expanded = 0;
		do {
			$cluster_expanded = 0;
			foreach my $id (keys %cluster_points) {
				my $p = $self->{dataset}->{$id};
				unless ($p->{visited}) {



( run in 0.910 second using v1.01-cache-2.11-cpan-5b529ec07f3 )