ALBD
view release on metacpan or search on metacpan
utils/datasetCreator/makeOrderNotMatter.pl view on Meta::CPAN
#makes the order of a CUIs not matter in a co-occurrence matrix and writes
# the result to file
use strict;
use warnings;
use Getopt::Long;
my $DEBUG = 0;
my $HELP = '';
my %options = ();
GetOptions( 'debug' => \$DEBUG,
'help' => \$HELP,
'inputFile=s' => \$options{'inputFile'},
'outputFile=s' => \$options{'outputFile'},
);
#TODO add stuff for help and debug
#input checking
(exists $options{'inputFile'}) or die ("inputFile must be specified\n");
open IN, $options{'inputFile'} or
die ("unable to open input file: $options{inputFile}\n");
(exists $options{'outputFile'}) or die ("outputFile must be specified\n");
open OUT, '>'.$options{'outputFile'} or
die ("unable to open output file: $options{outputFile}\n");
#make order not matter
#...output every $outputLimit iterations to avoid too much IO
my %matrix = ();
while (my $line = <IN>) {
#TODO use split instead of regex match
$line =~ /([^\s]+)\t([^\s]+)\t([^\s]+)/;
#$1 = row, $2 = col, $3 = val
if (!(defined $1) || !(defined $2) || !(defined $3)) {
print "Not all defined: $line";
}
#initialize rows if needed
if (!(exists $matrix{$1})) {
my %newHash = ();
$matrix{$1} = \%newHash;
}
if (!(exists $matrix{$2})) {
my %newHash = ();
$matrix{$2} = \%newHash;
}
#initialize cols if needed
if (!(exists ${$matrix{$1}}{$2})) {
${$matrix{$1}}{$2} = 0;
}
if (!(exists ${$matrix{$2}}{$1})) {
${$matrix{$2}}{$1} = 0;
}
#add the value
${$matrix{$1}}{$2} += $3;
#${$matrix{$2}}{$1} += $3;
}
close IN;
#output the matrix
foreach my $key1 (keys %matrix) {
foreach my $key2 (keys %{$matrix{$key1}}) {
print OUT "$key1\t$key2\t${$matrix{$key1}}{$key2}\n";
}
}
foreach my $key1 (keys %matrix) {
foreach my $key2 (keys %{$matrix{$key1}}) {
print OUT "$key2\t$key1\t${$matrix{$key1}}{$key2}\n";
}
}
close OUT;
print "DONE!\n";
( run in 0.425 second using v1.01-cache-2.11-cpan-39bf76dae61 )