App-geoCancerPrognosticDatasetsRetriever
view release on metacpan or search on metacpan
bin/geoCancerPrognosticDatasetsRetriever view on Meta::CPAN
system ("gunzip $prog_path/temp/$dsf_zip_file");
system ("mv $prog_path/temp/*.soft $run_dir");
print color ("green"), "done\n", color("reset");
}
return $dsf_unzip_file;
}
############################ SUBROUTINE 10 #######################################################
#This subroutine checks the GSE entries' full abstract for prognostic keywords. If the input
#file's abstract is incomplete, "more..." is found. It calls the download_soft_file() to download
#the .soft file and then checks for prognostic keywords. If keywords are detected, it calls the
#prognostic_signature_finder() to check for prognostic signatures in the same .soft file
sub soft_file_abstract_check {
my $wget_file = $_[0];
my $gse_id = $_[1];
my $wget_counter = 0;
my $unzip_file = download_soft_file($wget_file, $gse_id); #download soft file and store filename in variable $unzip_file.
#open soft file and search for prognostic keywords in all GSE entry abstracts.
open (SOFT, "$run_dir/$unzip_file") or die "Cannot open file: $unzip_file $!\n";
while (<SOFT>) {
if ($_=~ m/^!Series_summary.+(prognosis|prognostic|prognostically|prognosticator|survival|survive|survives|survived|surviving).+/ig) {
#if ($line =~ m/^!Series_summary.+(progno.+\s?|surviv.+\s?).*/ig) {
$wget_counter++;
}
else { next; }
}
close (SOFT);
if ($wget_counter) {
print color ("yellow"), "Prognostic Text filter: <Prognostic keywords found>\n", color("reset");
print FH2 "Prognostic Text filter: <Prognostic keywords found>\n";
my %local_hash = prognostic_signature_finder($unzip_file);
#returns count (i.e. 1 => minimum no. of occurrence of keyword), which will be added to count value from main script.
#and returns a copy of the result of prognostic_signature_finder().
return (1, %local_hash);
} else {
print color ("yellow"), "Prognostic Text filter: <No prognostic keywords found>\n", color("reset");
print FH2 "Prognostic Text filter: <No prognostic keywords found>\n";
return 0;
}
}
############################ SUBROUTINE 11 #######################################################
#This subroutine uses regular expression analysis to detect prognostic signature patterns.
#The regexes are based on over 50 parsed signatures used for different cancer types.
sub prognostic_signature_finder {
my $soft_file = $_[0];
my %hash_signature = ();
my $soft_line = "";
my $regex_tail = '(:|=)\s*(alive.*|no\sdeath|dea(d|th.*)|deceased|NED|DOD|DOC|0\s|1\s|no|yes|.*patient|died.*|alive|surviv(al|ed)|living|long|short|Y|N|NED|DOD|AWD|Exitus)';
my $regex_keyword_type_A = '(.*stat?us.*|.*(dea(d|th)|.*alive).*|.*\srecur\s.*|.*Die.*)' . $regex_tail;
my $regex_keyword_type_B = '(dss.?event\s?\(.+\)|dfs.*\w+|drfs.*|e\.dmfs.*|e\.rfs.*|e?\.?os.*)' . $regex_tail;
my $regex_keyword_type_C = '((Overall)?\s?survival.*|(overall)?.?event.*|outcome.*|prognosis.*|comort.*|evolution.*)' . $regex_tail;
my $regex_keyword_type_Ca = '(Overall)?\s?survival\s.*(:|=)\s*(\d*)';
my $outcome_1 = "Prognostic Signature filter: <Prognostic signature found>\n";
my $outcome_2 = "Prognostic Signature filter: <No data found>\n";
my $i = 0;
#open SOFT file and read its contents
open (KEY, "$run_dir/$soft_file") or die "Cannot open $soft_file: $!";
while ($soft_line = <KEY>) {
#regexes are evaluated according to their predominance from A - C
if ($soft_line =~ /^!Sample_characteristics_ch1 = $regex_keyword_type_A/ig) {
print color ("yellow"), "$outcome_1", color("reset");
print color ("green"), "$soft_line", color("reset");
$hash_signature{"$soft_file"} = "$outcome_1";
$i++;
last;
}
elsif ($soft_line =~ /^!Sample_characteristics_ch1 = $regex_keyword_type_B/ig) {
print color ("yellow"), "$outcome_1", color("reset");
print color ("green"), "$soft_line", color("reset");
$hash_signature{"$soft_file"} = "$outcome_1";
$i++;
last;
}
elsif ($soft_line =~ /^!Sample_characteristics_ch1 = $regex_keyword_type_C/ig) {
print color ("yellow"), "$outcome_1", color("reset");
print color ("green"), "$soft_line", color("reset");
$hash_signature{"$soft_file"} = "$outcome_1";
$i++;
last;
}
elsif ($soft_line =~ /^!Sample_characteristics_ch1 = $regex_keyword_type_Ca/ig) {
print color ("yellow"), "$outcome_1", color("reset");
print color ("green"), "$soft_line", color("reset");
$hash_signature{"$soft_file"} = "$outcome_1";
$i++;
last;
}
}
#Alert user if no prognostic signature was found in the current GSE .soft file
unless ($i) {
print color ("yellow"), "$outcome_2", color("reset");
$hash_signature{"$soft_file"} = "$outcome_2";
}
else {
( run in 3.344 seconds using v1.01-cache-2.11-cpan-df04353d9ac )