Algorithm-DBSCAN
view release on metacpan or search on metacpan
lib/Algorithm/DBSCAN.pm view on Meta::CPAN
point_19 48 15
point_20 52 16
point_21 50 14';
my $dataset = Algorithm::DBSCAN::Dataset->new();
my @lines = split(/\n\s+/, $points_data_file);
foreach my $line (@lines) {
$dataset->AddPoint(new Algorithm::DBSCAN::Point(split(/\s+/, $line)));
}
my $dbscan = Algorithm::DBSCAN->new($dataset, 4 * 4, 2);
$dbscan->UseRegionIndex(the filename containg the previous region index);
$dbscan->FindClusters();
$dbscan->PrintClustersShort();
=head1 SUBROUTINES/METHODS
=cut
=head2 new
The constructor takes 3 parameters:
$dataset: The Algorithm::DBSCAN::Dataset dataset object
Create the Dataset object:
my $dataset = Algorithm::DBSCAN::Dataset->new();
Add points (the first parameter is a point_id the other are point coordinates)
$dataset->AddPoint(new Algorithm::DBSCAN::Point('point_1', 1, 2, 3, 4, 5);
$eps: The epsilon parameter used for region density computation
WARNING: This implementation uses the sqare distance between the points to avoid
a useless square root call. If you want to use the euclidian distance you need to
convert it to the right value yourself.
For example for the previous point with 5 dimensions
$eps = $euclidian_distance * $euclidian_distance * $euclidian_distance * $euclidian_distance * $euclidian_distance;
$min_points: the minimal number of points in a region with a radius of $eps. $eps
and $min_points are the 2 parameters used to compute the denisty of a region. If
the number of points in a region with radius $eps is lower than $min_points the
point is considered as an outlier point that can't be included in any cluster.
=cut
sub new {
my($type, $dataset, $eps, $min_points) = @_;
my $self = {};
$self->{dataset_object} = $dataset;
$self->{dataset} = $dataset->{points};
@{$self->{id_list}} = keys %{$dataset->{points}};
$self->{eps} = $eps;
$self->{min_points} = $min_points;
$self->{current_cluster} = 1;
$self->{use_external_region_index} = 0;
bless($self, $type);
return($self);
}
=head2 FindClusters
The main method that will run the DBSCAN algorithm on the Dataset.
=cut
sub FindClusters {
my ($self, $starting_point_id) = @_;
my $i = 0;
unshift(@{$self->{id_list}}, $starting_point_id) if (defined $starting_point_id);
foreach my $id (@{$self->{id_list}}) {
my $point = $self->{dataset}->{$id};
say "$i";
$i++;
next if ($point->{visited});
$point->{visited} = 1;
$self->_one_more_point_visited();
my $neighborPts = $self->GetRegion($point);
#say Dumper($neighborPts);
if (scalar(@$neighborPts) < $self->{min_points}) {
$point->{cluster_id} = -1;
}
else {
$self->ExpandCluster($point, $neighborPts);
}
}
}
=head2 ExpandCluster
This method will expand the cluster starting by the neighborhood of point $point
=cut
sub ExpandCluster {
my ($self, $point, $neighborPts) = @_;
if (scalar(@$neighborPts) < $self->{min_points}) {
$point->{cluster_id} = -1;
}
else {
$self->{current_cluster}++;
$point->{cluster_id} = $self->{current_cluster};
my %cluster_points;
map { $cluster_points{$_}++ } @$neighborPts;
my $cluster_expanded = 0;
do {
$cluster_expanded = 0;
foreach my $id (keys %cluster_points) {
my $p = $self->{dataset}->{$id};
unless ($p->{visited}) {
( run in 0.910 second using v1.01-cache-2.11-cpan-5b529ec07f3 )