App-GeoCancerPrognosticDatasetsRetriever

 view release on metacpan or  search on metacpan

bin/geoCancerPrognosticDatasetsRetriever  view on Meta::CPAN

		system ("gunzip $dsf_zip_file");
		system ("mv *.soft $run_dir");
		
		print color ("green"), "done\n", color("reset");
	}
	
	return $dsf_unzip_file;
}
############################ SUBROUTINE 11 #######################################################
#This subroutine checks the GSE entries' full abstract for prognostic keywords. If the input 
#file's abstract is incomplete, "more..." is found. It calls the download_soft_file() to download 
#the .soft file and then checks for prognostic keywords. If keywords are detected, it calls the 
#prognostic_signature_finder() to check for prognostic signatures in the same .soft file
sub soft_file_abstract_check {
	
	my $wget_file     = $_[0];
	my $gse_id        = $_[1];
	my $wget_counter  = 0;
	my $unzip_file    = download_soft_file($wget_file, $gse_id); #download soft file and store filename in variable $unzip_file.
	
	#open soft file and search for prognostic keywords in all GSE entry abstracts.
	open (SOFT, "$run_dir/$unzip_file") or die "Cannot open file: $unzip_file $!\n";

	while (<SOFT>) {
		
		if ($_=~ m/^!Series_summary.+(prognosis|prognostic|prognostically|prognosticator|survival|survive|survives|survived|surviving).+/ig) {
		#if ($line =~ m/^!Series_summary.+(progno.+\s?|surviv.+\s?).*/ig) {
			$wget_counter++;	
		}
		
		else { next; }
	}
	
	close (SOFT);
	
	if ($wget_counter) { 
		
		print color ("yellow"), "Prognostic Text filter: <Prognostic keywords found>\n", color("reset");
		print FH2 "Prognostic Text filter: <Prognostic keywords found>\n";
		my %local_hash = prognostic_signature_finder($unzip_file);
		
		#returns count (i.e. 1 => minimum no. of occurrence of keyword), which will be added to count value from main script.
		#and returns a copy of the result of prognostic_signature_finder().
		return (1, %local_hash);
		
	} else {
		
		print color ("yellow"), "Prognostic Text filter: <No prognostic keywords found>\n", color("reset");
		print FH2 "Prognostic Text filter: <No prognostic keywords found>\n";
		return 0;
	}
}
############################ SUBROUTINE 12 #######################################################
#This subroutine uses regular expression analysis to detect prognostic signature patterns. 
#The regexes are based on over 50 parsed signatures used for different cancer types.
sub prognostic_signature_finder {

	my $soft_file             = $_[0];
	my %hash_signature        = ();
	my $soft_line             = "";
	my $regex_tail            = '(:|=)\s*(alive.*|no\sdeath|dea(d|th.*)|deceased|NED|DOD|DOC|0\s|1\s|no|yes|.*patient|died.*|alive|surviv(al|ed)|living|long|short|Y|N|NED|DOD|AWD|Exitus)';
	my $regex_keyword_type_A  = '(.*stat?us.*|.*(dea(d|th)|.*alive).*|.*\srecur\s.*|.*Die.*)' . $regex_tail;
	my $regex_keyword_type_B  = '(dss.?event\s?\(.+\)|dfs.*\w+|drfs.*|e\.dmfs.*|e\.rfs.*|e?\.?os.*)' . $regex_tail;
	my $regex_keyword_type_C  = '((Overall)?\s?survival.*|(overall)?.?event.*|outcome.*|prognosis.*|comort.*|evolution.*)' . $regex_tail;
	my $regex_keyword_type_Ca = '(Overall)?\s?survival\s.*(:|=)\s*(\d*)';
	my $outcome_1             = "Prognostic Signature filter: <Prognostic signature found>\n";
	my $outcome_2             = "Prognostic Signature filter: <No data found>\n";
	my $i                     = 0;
	
	#open SOFT file and read its contents
	open (KEY, "$run_dir/$soft_file") or die "Cannot open $soft_file: $!";
	
	while ($soft_line = <KEY>) {
	
		#regexes are evaluated according to their predominance from A - C
		if ($soft_line =~ /^!Sample_characteristics_ch1 = $regex_keyword_type_A/ig) { 
		
			print color ("yellow"), "$outcome_1", color("reset"); 
			print color ("green"), "$soft_line", color("reset");
			$hash_signature{"$soft_file"} = "$outcome_1";
			$i++; 
			last;
		} 
				
		elsif ($soft_line =~ /^!Sample_characteristics_ch1 = $regex_keyword_type_B/ig) { 
			
			print color ("yellow"), "$outcome_1", color("reset"); 
			print color ("green"), "$soft_line", color("reset");
			$hash_signature{"$soft_file"} = "$outcome_1";
			$i++;
			last;
		}
				
		elsif ($soft_line =~ /^!Sample_characteristics_ch1 = $regex_keyword_type_C/ig) { 
			
			print color ("yellow"), "$outcome_1", color("reset"); 
			print color ("green"), "$soft_line", color("reset"); 
			$hash_signature{"$soft_file"} = "$outcome_1";
			$i++;
			last;
		}
		
		elsif ($soft_line =~ /^!Sample_characteristics_ch1 = $regex_keyword_type_Ca/ig) { 
			
			print color ("yellow"), "$outcome_1", color("reset");  
			print color ("green"), "$soft_line", color("reset");
			$hash_signature{"$soft_file"} = "$outcome_1";
			$i++; 
			last;
		}
		
	}

	#Alert user if no prognostic signature was found in the current GSE .soft file
	unless ($i) { 
		
		print color ("yellow"), "$outcome_2", color("reset");
		$hash_signature{"$soft_file"} = "$outcome_2"; 
		
	}
	
	else {



( run in 2.485 seconds using v1.01-cache-2.11-cpan-df04353d9ac )