ALBD

 view release on metacpan or  search on metacpan

README  view on Meta::CPAN

    All the modules that will be installed in the Perl system directory are
    present in the '/lib' directory tree of the package.

    The package contains a utils/ directory that contain Perl utility
    programs. These utilities use the modules or provide some supporting
    functionality.

    runDiscovery.pl -- runs LBD using the parameters specified in the input
    file, and outputs to an output file.

    The package contains a large selection of functions to manipulate CUI
    Co-occurrence matrices in the utils/datasetCreator/ directory. These are
    short scripts and generally require modifying the code at the top with
    user input paramaters specific for each run. These scripts include:

    applyMaxThreshold.pl -- applies a maximum co-occurrence threshold to the
    co-occurrence matrix

    applyMinThreshold.pl -- applies a minimum co-occurrence threshold to the
    co-occurrence matrix

lib/LiteratureBasedDiscovery/Discovery.pm  view on Meta::CPAN

    # check tableName
    #TODO check that the table exists in the database
    # or die "Error: table does not exist: $tableName\n";

    #  set up database
    my $db = $cuiFinder->_getDB(); 
    
    # retreive the table as a nested hash where keys are CUI1, 
    # then CUI2, value is N11
     my @keyFields = ('cui_1', 'cui_2');
     my $matrixRef = $db->selectall_hashref(
	"select * from $tableName", \@keyFields);

    # set values of the loaded table to n_11
    # ...default is hash of hash of hash
    foreach my $key1(keys %{$matrixRef}) {
	foreach my $key2(keys %{${$matrixRef}{$key1}}) {
	    ${${$matrixRef}{$key1}}{$key2} = ${${${$matrixRef}{$key1}}{$key2}}{'n_11'};
	}
    }
    return $matrixRef;
}

lib/LiteratureBasedDiscovery/Filters.pm  view on Meta::CPAN

	    $termsHash{$key2} = 1;
	}
    }
    print "   number of keys after filtering = ".(scalar keys %termsHash)."\n";
=cut
}


# applies a semantic group filter to the matrix, by removing keys that 
# are not allowed semantic type. Only removes types from rows, 
# so is applied for times slicing, before randomly selecting terms of 
# one semantic type
# input:  $matrixRef <- ref to a sparse matrix to be filtered
#         $acceptTypesRef <- a ref to a hash of accept type strings
#         $umls <- an instance of UMLS::Interface
# output: None, but $vectorRef is updated 
sub semanticTypeFilter_rows {
    my $matrixRef = shift;
    my $acceptTypesRef = shift;
    my $umls = shift;
    

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

	    }
	}
    }
    close IN;

    #return the post cutoff matrix
    return \%postCutoffMatrix;
}

#TODO numRows should be read from file and sent with the lbdOptionsRef
# generates a starting matrix of numRows randomly selected terms
# input:  $explicitMatrixRef <- a ref to the explicit sparse matrix
#         $lbdOptionsRef <- the LBD options
#         $startTermAcceptTypesRef <- a reference to an hash of accept 
#                                     types for start terms (TUIs)
#         $numRows <- the number of random rows to load (if random)
#         $umls_interface <- an instance of the UMLS::Interface
# output: \%startingMatrix <- a ref to the starting sparse matrix
sub generateStartingMatrix {
    my $explicitMatrixRef = shift;
    my $lbdOptionsRef = shift;

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

	    }
	}
    }
    else {
	#randomly grab rows
	#apply semantic filter to the rows (just retreive appropriate rows)
	my $rowsToKeepRef = getRowsOfSemanticTypes(
	    $explicitMatrixRef, $startTermAcceptTypesRef, $umls_interface);
	((scalar keys %{$rowsToKeepRef}) >= $numRows) or die("ERROR: number of acceptable rows starting terms is less than $numRows\n");

	#randomly select 100 rows (to generate the 'starting matrix')
	#generate random numbers from 0 to number of rows in the explicit matrix
	my %rowNumbers = ();
	while ((scalar keys %rowNumbers) < $numRows) {
	    $rowNumbers{int(rand(scalar keys %{$rowsToKeepRef}))} = 1;
	}

	#fill starting matrix with keys corresponding to the random numbers 
	my $i = 0;
	foreach my $key (keys %{$rowsToKeepRef}) {
	    if (exists $rowNumbers{$i}) {

lib/LiteratureBasedDiscovery/TimeSlicing.pm  view on Meta::CPAN

		}
		$numChecked++;

		#check if the recall rate has been reached
		if ($truePositive > $numTrueForInterval) {
		    last;
		}
	    }
	    #sum precision at this interval, average over number of rows is 
	    # taken outside of the loop
	    $precision{$i} += ($truePositive / $numChecked); #number that are selected that are true
	    $recall{$i} += ($truePositive / $numTrue); #number of true that are selected	
	}
    }

    #calculate the average precision at each interval
    foreach my $i (keys %precision) {
	#divide by the number of rows in the true matrix ref
	# because those are the number of cuis we are testing
	# it is possible that the predictions has rows that are 
	# not in the true, and those should be ignored.
	$precision{$i} /= (scalar keys %{$trueMatrixRef});



( run in 0.562 second using v1.01-cache-2.11-cpan-49f99fa48dc )