ALBD
view release on metacpan or search on metacpan
utils/datasetCreator/dataStats/getMatrixStats.pl view on Meta::CPAN
#gets matrix stats for a matrix file
# (number of rows, number of columns, number of keys)
&getStats('/home/henryst/lbdData/groupedData/1852_window1_squared_inParts');
#############################################
# gets the stats for the matrix
#############################################
sub getStats {
my $fileName = shift;
print STDERR "$fileName\n";
#read in the matrix
open IN, $fileName or die ("unable to open file: $fileName\n");
my %matrix = ();
my $numCooccurrences = 0;
while (my $line = <IN>) {
#$line =~ /([^\t]+)\t([^\t]+)\t([\d]+)/;
$line =~ /([^\s]+)\s([^\s]+)\s([\d]+)/;
if (!exists $matrix{$1}) {
my %hash = ();
$matrix{$1} = \%hash;
}
$matrix{$1}{$2} = $3;
$numCooccurrences += $3;
}
close IN;
print STDERR " num rows in matrix = ".(scalar keys %matrix)."\n";
#count the number of columns and the number of keys
# this is done outside of the loop above because I also need to count the number of columns
my $numKeys = 0;
my %colKeys = ();
foreach my $row (keys %matrix) {
foreach my $colKey (keys %{$matrix{$row}}) {
$colKeys{$colKey} = 1;
$numKeys++;
}
}
print STDERR " num columns in matrix = ".(scalar keys %colKeys)."\n";
print STDERR " number of keys in the matrix = $numKeys\n";
print STDERR " number of cooccurrences in the matrix = $numCooccurrences\n";
}
( run in 1.013 second using v1.01-cache-2.11-cpan-39bf76dae61 )