Algorithm-Cluster

 view release on metacpan or  search on metacpan

perl/Cluster.pm  view on Meta::CPAN

    unless (ref($_[0]) eq 'ARRAY') {
        module_warn("Wanted array reference, but got a reference to ",
                    ref($_[0]), ". Cannot parse matrix");
        return;
    }
    my $nrows = scalar @{ $_[0] };
    unless ($nrows > 0) {
        module_warn("Matrix has zero rows.  Cannot parse matrix");
        return;
    }
    my $firstrow =  $_[0]->[0];
    unless (defined $firstrow) {
        module_warn("First row in matrix is undef scalar (?). Cannot parse matrix",);
        return;
    }
    unless (ref($firstrow) eq 'ARRAY') {
        module_warn("Wanted array reference, but got a reference to ",
                     ref($firstrow), ". Cannot parse matrix");
        return;
    }
    my $ncols = scalar @{ $_[0]->[0] };
    unless ($ncols > 0) {
        module_warn("Row has zero columns. Cannot parse matrix");
        return;
    }
    unless (defined($_[0]->[0]->[0])) {
        module_warn("Cell [0,0] is undefined. Cannot parse matrix");
        return;
    }
    return 1;
}


#-------------------------------------------------------------
# Wrapper for the mean() function
#
sub mean {
    if(ref $_[0] eq 'ARRAY') {
        return _mean($_[0]);
    } else {
        return _mean([@_]);
    }
}

#-------------------------------------------------------------
# Wrapper for the median() function
#
sub median {
    if(ref $_[0] eq 'ARRAY') {
        return _median($_[0]);
    } else {
        return _median([@_]);
    }
}


#------------------------------------------------------
# This function is called by the wrappers for library functions.
# It checks the dimensions of the data, mask and weight parameters.
#
# Return false if any errors are found in the data matrix. 
#
# Detect the dimension (nrows x ncols) of the data matrix,
# and set values in the parameter hash. 
#
# Also check the mask matrix and weight arrays, and set
# the parameters to default values if we find any errors, 
# however, we still return true if we find errors.
#
sub check_matrix_dimensions {
    my ($param, $default) = @_;
    #----------------------------------
    # Check the data matrix
    #
    return unless data_is_valid_matrix($param->{data});
    #----------------------------------
    # Remember the dimensions of the weight array
    #
    $param->{nrows}   = scalar @{ $param->{data}      };
    $param->{ncols}   = scalar @{ $param->{data}->[0] };
    #----------------------------------
    # Check the mask matrix
    #
    unless (data_is_valid_matrix($param->{mask})) {
        module_warn("Parameter 'mask' is not a valid matrix, ignoring it.");
        $param->{mask}      = $default->{mask}     
    } else {
        my $mask_nrows    = scalar @{ $param->{mask}      };
        my $mask_ncols    = scalar @{ $param->{mask}->[0] };
        unless ($param->{nrows} == $mask_nrows and $param->{ncols} == $mask_ncols) {
            module_warn("Data matrix is $param->{nrows}x$param->{ncols}, but mask matrix is ${mask_nrows}x${mask_ncols}.\nIgnoring the mask.");
            $param->{mask}      = $default->{mask};
        }
    }
    #----------------------------------
    # Check the weight array
    #
    unless(ref $param->{weight} eq 'ARRAY') {
            module_warn("Parameter 'weight' does not point to an array, ignoring it.");
            $param->{weight} = $default->{weight};
    } else {
        my $weight_length    = scalar @{ $param->{weight} };
        if ($param->{transpose} eq 0) {
            unless ($param->{ncols} == $weight_length) {
                module_warn("Data matrix has $param->{ncols} columns, but weight array has $weight_length items.\nIgnoring the weight array.");
                $param->{weight}      = $default->{weight}     
            }
        }
        else {
            unless ($param->{nrows} == $weight_length) {
                module_warn("Data matrix has $param->{nrows} rows, but weight array has $weight_length items.\nIgnoring the weight array.");
                $param->{weight}      = $default->{weight}     
            }
        }
    }
    return 1;
}


sub check_distance_matrix {
    my $distances = $_[0];
    my $i;
    my $row;
    my $column;
    #----------------------------------
    # Check the data matrix
    #
    my $reference = ref($distances);

perl/Cluster.pm  view on Meta::CPAN

    #
    if(ref $param->{initialid} ne 'ARRAY') {
        module_warn("Optional parameter 'initialid' should be an array");
        return;
    }
    if (@{ $param->{initialid}} == 0) {
        # no initial clustering solution specified
        if ($param->{nclusters}==-1) {
            $param->{nclusters} = 2; # default value
        }
        if ($param->{nclusters} > $nobjects) {
            module_warn("More clusters requested than elements available");
            return;
        }
        unless($param->{npass} =~ /^\d+$/ and $param->{npass} > 0) {
            module_warn("Parameter 'npass' must be a positive integer (got '$param->{npass}')");
            return;
        }
        return 1;
    }
    if (@{ $param->{initialid}} != $nobjects) {
        module_warn("Optional parameter 'initialid' should contain $nobjects elements");
        return;
    }
    foreach $i (@{ $param->{initialid}}) {
        unless($i =~ /^\d+$/ and $i >= 0) {
            module_warn("Optional parameter 'initialid' should only contain non-negative integers");
            return;
        }
    }
    if ($param->{nclusters} == -1) {
        # number of clusters was not specified. Infer it from initialid
        foreach $i (@{ $param->{initialid}}) {
            if ($i > $param->{nclusters}) {
                $param->{nclusters} = $i;
            }
        }
        $param->{nclusters}++;
    } else {
        # check if initialid is consistent with number of clusters
        foreach $i (@{ $param->{initialid}}) {
            if ($i >= $param->{nclusters}) {
                module_warn("Optional parameter 'initialid' inconsistent with nclusters");
                return;
            }
        }
    }
    # Check that none of the clusters are empty
    for ($i = 0; $i < $param->{nclusters}; $i++) {
        push(@counter, 0);
    }
    foreach $i (@{ $param->{initialid}}) {
        $counter[$i]++;
    }
    for ($i = 0; $i < $param->{nclusters}; $i++) {
        if ($counter[$i]==0) {
            module_warn("Optional parameter 'initialid' contains empty clusters");
            return;
        }
    }
    # No errors detected
    $param->{npass} = 0;
    return 1;
}

#-------------------------------------------------------------
# Wrapper for the kcluster() function
#
sub kcluster {
    #----------------------------------
    # Define default parameters
    #
    my %default = (
        nclusters =>    -1,
        data      =>  [[]],
        mask      =>    '',
        weight    =>    '',
        transpose =>     0,
        npass     =>     1,
        method    =>   'a',
        dist      =>   'e',
        initialid =>    [],
    );
    #----------------------------------
    # Local variable
    #
    my $nobjects = 0;
    #----------------------------------
    # Accept parameters from caller
    #
    my %param = (%default, @_);
    my @data = @{$param{data}};
    #----------------------------------
    # Check the data, matrix and weight parameters
    #
    return unless check_matrix_dimensions(\%param, \%default);
    #----------------------------------
    # Check the transpose parameter
    #
    if ($param{transpose} == 0) {
        $nobjects = $param{nrows};
    } elsif ($param{transpose} == 1) {
        $nobjects = $param{ncols};
    } else {
        module_warn("Parameter 'transpose' must be either 0 or 1 (got '$param{transpose}')");
        return;
    }
    #----------------------------------
    # Check the initial clustering, if specified, and npass
    #
    return unless check_initialid(\%param, \%default, $nobjects);
    #----------------------------------
    # Check the other parameters
    #
    unless($param{method}    =~ /^[am]$/) {
        module_warn("Parameter 'method' must be either 'a' or 'm' (got '$param{method}')");
        return;
    }
    unless($param{dist}      =~ /^[cauxskeb]$/) {
        module_warn("Parameter 'dist' must be one of: [cauxskeb] (got '$param{dist}')");
        return;



( run in 1.182 second using v1.01-cache-2.11-cpan-8450f2e95f3 )