select results from the CPAN

select

ALBD

view release on metacpan or search on metacpan

    All the modules that will be installed in the Perl system directory are
    present in the '/lib' directory tree of the package.

    The package contains a utils/ directory that contain Perl utility
    programs. These utilities use the modules or provide some supporting
    functionality.

    runDiscovery.pl -- runs LBD using the parameters specified in the input
    file, and outputs to an output file.

    The package contains a large selection of functions to manipulate CUI
    Co-occurrence matrices in the utils/datasetCreator/ directory. These are
    short scripts and generally require modifying the code at the top with
    user input paramaters specific for each run. These scripts include:

    applyMaxThreshold.pl -- applies a maximum co-occurrence threshold to the
    co-occurrence matrix

    applyMinThreshold.pl -- applies a minimum co-occurrence threshold to the
    co-occurrence matrix

lib/LiteratureBasedDiscovery/Discovery.pm view on Meta::CPAN

    # check tableName
    #TODO check that the table exists in the database
    # or die "Error: table does not exist: $tableName\n";

    #  set up database
    my $db = $cuiFinder->_getDB(); 
    
    # retreive the table as a nested hash where keys are CUI1, 
    # then CUI2, value is N11
     my @keyFields = ('cui_1', 'cui_2');
     my $matrixRef = $db->selectall_hashref(
	"select * from $tableName", \@keyFields);

    # set values of the loaded table to n_11
    # ...default is hash of hash of hash
    foreach my $key1(keys %{$matrixRef}) {
	foreach my $key2(keys %{${$matrixRef}{$key1}}) {
	    ${${$matrixRef}{$key1}}{$key2} = ${${${$matrixRef}{$key1}}{$key2}}{'n_11'};
	}
    }
    return $matrixRef;
}

lib/LiteratureBasedDiscovery/Filters.pm view on Meta::CPAN

	    $termsHash{$key2} = 1;
	}
    }
    print "   number of keys after filtering = ".(scalar keys %termsHash)."\n";
=cut
}


# applies a semantic group filter to the matrix, by removing keys that 
# are not allowed semantic type. Only removes types from rows, 
# so is applied for times slicing, before randomly selecting terms of 
# one semantic type
# input:  $matrixRef <- ref to a sparse matrix to be filtered
#         $acceptTypesRef <- a ref to a hash of accept type strings
#         $umls <- an instance of UMLS::Interface
# output: None, but $vectorRef is updated 
sub semanticTypeFilter_rows {
    my $matrixRef = shift;
    my $acceptTypesRef = shift;
    my $umls = shift;

lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN

	    }
	}
    }
    close IN;

    #return the post cutoff matrix
    return \%postCutoffMatrix;
}

#TODO numRows should be read from file and sent with the lbdOptionsRef
# generates a starting matrix of numRows randomly selected terms
# input:  $explicitMatrixRef <- a ref to the explicit sparse matrix
#         $lbdOptionsRef <- the LBD options
#         $startTermAcceptTypesRef <- a reference to an hash of accept 
#                                     types for start terms (TUIs)
#         $numRows <- the number of random rows to load (if random)
#         $umls_interface <- an instance of the UMLS::Interface
# output: \%startingMatrix <- a ref to the starting sparse matrix
sub generateStartingMatrix {
    my $explicitMatrixRef = shift;
    my $lbdOptionsRef = shift;

lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN

	    }
	}
    }
    else {
	#randomly grab rows
	#apply semantic filter to the rows (just retreive appropriate rows)
	my $rowsToKeepRef = getRowsOfSemanticTypes(
	    $explicitMatrixRef, $startTermAcceptTypesRef, $umls_interface);
	((scalar keys %{$rowsToKeepRef}) >= $numRows) or die("ERROR: number of acceptable rows starting terms is less than $numRows\n");

	#randomly select 100 rows (to generate the 'starting matrix')
	#generate random numbers from 0 to number of rows in the explicit matrix
	my %rowNumbers = ();
	while ((scalar keys %rowNumbers) < $numRows) {
	    $rowNumbers{int(rand(scalar keys %{$rowsToKeepRef}))} = 1;
	}

	#fill starting matrix with keys corresponding to the random numbers 
	my $i = 0;
	foreach my $key (keys %{$rowsToKeepRef}) {
	    if (exists $rowNumbers{$i}) {

lib/LiteratureBasedDiscovery/TimeSlicing.pm view on Meta::CPAN

		}
		$numChecked++;

		#check if the recall rate has been reached
		if ($truePositive > $numTrueForInterval) {
		    last;
		}
	    }
	    #sum precision at this interval, average over number of rows is 
	    # taken outside of the loop
	    $precision{$i} += ($truePositive / $numChecked); #number that are selected that are true
	    $recall{$i} += ($truePositive / $numTrue); #number of true that are selected	
	}
    }

    #calculate the average precision at each interval
    foreach my $i (keys %precision) {
	#divide by the number of rows in the true matrix ref
	# because those are the number of cuis we are testing
	# it is possible that the predictions has rows that are 
	# not in the true, and those should be ignored.
	$precision{$i} /= (scalar keys %{$trueMatrixRef});

( run in 0.415 second using v1.01-cache-2.11-cpan-a5abf4f5562 )