ALBD

 view release on metacpan or  search on metacpan

utils/datasetCreator/applySemanticFilter.pl  view on Meta::CPAN

#applies a semantic filter to the matrix
use strict;
use warnings;

use LiteratureBasedDiscovery::Discovery;
use LiteratureBasedDiscovery::Evaluation;
use LiteratureBasedDiscovery::Rank;
use LiteratureBasedDiscovery::Filters;
use LiteratureBasedDiscovery;

use UMLS::Association;
use UMLS::Interface;

####### User input
my $matrixFileName = '/home/henryst/lbdData/groupedData/1975_1999_window8_noOrder_threshold5';
my $outputFileName = $matrixFileName.'_filtered';
my $acceptTypesString = ''; #leave blank if none are applied
my $acceptGroupsString = 'CHEM,DISO,GENE,PHYS,ANAT'; #for the explicit matrix
my $interfaceConfig = '/home/share/packages/ALBD/config/interface';

#apply the filter to rows and columns or columns only
# apply to just columns generally for the implicit matrix
#   ...if the rows are just the starting terms
# apply to rows and columns generally for the explicit matrix
my $columnsOnly = 0; #apply to columns only, or rows and columns

&applySemanticFilter($matrixFileName, $outputFileName, 
		     $acceptTypesString, $acceptGroupsString,



###################################################################
###################################################################

# Applies the semantic type filter
sub applySemanticFilter {
    #grab the input
    my $matrixFileName = shift;
    my $outputFileName = shift;
    my $acceptTypesString = shift;
    my $acceptGroupsString = shift;
    my $interfaceConfig = shift;
    my $columnsOnly = shift;

    print STDERR "Applying Semantic Filter to $matrixFileName\n";

    #load the matrix
    my $matrixRef = Discovery::fileToSparseMatrix($matrixFileName);

    #initialize the UMLS::Interface 
    my $componentOptions = 
	LiteratureBasedDiscovery::_readConfigFile('',$interfaceConfig);
    
    my $umls_interface = UMLS::Interface->new($componentOptions) 
	or die "Error: Unable to create UMLS::Interface object.\n";
    
    #get the acceptTypes
    my $acceptTypesRef = &getAcceptTypes(
	$umls_interface, $acceptTypesString, $acceptGroupsString);

    #apply semantic filter
    if ($columnsOnly) {
	Filters::semanticTypeFilter_columns(
	    $matrixRef, $acceptTypesRef, $umls_interface);
    } else {
	Filters::semanticTypeFilter_rowsAndColumns(
	    $matrixRef, $acceptTypesRef, $umls_interface);
    }

    #output the matrix
    Discovery::outputMatrixToFile($outputFileName, $matrixRef);

    #TODO re-enable this and then try to run again
    #disconnect from the database and return
    #$umls_interface->disconnect();
}


# transforms the string of accept types or groups into a hash of accept TUIs
# input:  a string specifying whether linking or target types are being defined
# output: a hash of acceptable TUIs
sub getAcceptTypes {
    my $umls_interface = shift;
    my $acceptTypesString = shift;
    my $acceptGroupsString = shift;

    #get the accept types 
    my %acceptTypes = ();

    #add all types for groups specified
    #accept groups were specified
    my @acceptGroups = split(',',$acceptGroupsString);

    #add all the types of each group
    foreach my $group(@acceptGroups) {
	my $typesRef = Filters::getTypesOfGroup($group, $umls_interface);
	foreach my $key(keys %{$typesRef}) {
	    $acceptTypes{$key} = 1;
	}
    }

    #add all types specified
    #convert each type to a tui and add
    my $tui;
    my @acceptTypes = split(',',$acceptTypesString);
    foreach my $abr(@acceptTypes) {
	$tui = uc $umls_interface->getStTui($abr);
	$acceptTypes{$tui} = 1;
    }
    
    return \%acceptTypes;
}




( run in 1.098 second using v1.01-cache-2.11-cpan-39bf76dae61 )