Metabolomics-Fragment-Annotation

 view release on metacpan or  search on metacpan

metabolomics-references/utils/update_resources.pl  view on Meta::CPAN

    		}
    		else {
    			warn "\t[WARN] The type for line $entriesNb is undef or unknown ($row->{'type'}) \n" ;
    			$currentFrag{_TYPE_} = undef ;
    		}
    		
    		## Check and control DELTA_MASS
    		if ( ($row->{'delta_mass'} ) and ($row->{'delta_mass'} > 0 or $row->{'delta_mass'} < 0) ) {
    			$currentFrag{_DELTA_MASS_} = $row->{'delta_mass'} ;
    			$checker++ ;
    		}
    		else {
    			warn "\t[WARN] The delta_mass for line $entriesNb is undef or equal to 0\n" ;
    			$currentFrag{_DELTA_MASS_} = undef ;
    		}
    		
    		## Check and control LOSSES OR GAINS
    		if ( ($row->{'losses_or_gains'} ) and ($row->{'losses_or_gains'} ne '' ) ) {
    			$currentFrag{_LOSSES_OR_GAINS_} = $row->{'losses_or_gains'} ;
    			$checker++ ;
    		}
    		else {
    			warn "\t[WARN] The losses or gains for line $entriesNb is undef or void\n" ;
    			$currentFrag{_LOSSES_OR_GAINS_} = undef ;
    		}
    		
    		
    		$currentFrag{_ANNOTATION_IN_POS_MODE_} = $row->{'annotation_in_pos_mode'} ;
    		$currentFrag{_ANNOTATION_IN_NEG_MODE_} = $row->{'annotation_in_neg_mode'} ;
    		
    		my %tmp = %currentFrag ;
    		push(@fragments, \%tmp ) ;
     		
     		## Checker control 
     		if ($checker == 3 ) {	print " line is OK\n" ;		} # end of the message
     		if ($checker < 3 ) {	print "\n" ;		}
     		$checker = 0 ;
     		
     		$entriesNb ++ ;
    	}
    
    print Dumper @fragments ;
    
    return (0) ;
}
### END of SUB





=head2 METHOD updateKnapsack

	## Description : update Knapsack database on its web portal by crawling
	## Input : $var3
	## Output : $var4
	## Usage : my ( $var4 ) = updateKnapsack ( $var3 ) ;
	
=cut
## START of SUB
sub updateKnapsack {
    ## Retrieve Values
    
    my ( $dbFile,  ) = @_;
    my ( $lastID, $nbEntries ) = ( 0, 0 ) ;
    
    ## based on http://www.knapsackfamily.com/knapsack_core/information.php?sname=C_ID&word=C00000001
    
    ## Get Last id in last CSV file version
    
    print "[INFO] Parsing Knapsack current version...\n" ;
    
    my $csv = Text::CSV->new ( {'sep_char' => ",", binary => 1 } )  # should set binary attribute.
    	or die "Cannot use CSV: ".Text::CSV->error_diag ();
    
     
	open my $fh, "<:encoding(utf8)", $dbFile or die "Can't open csv file $dbFile: $!";
	
	while ( my $row = $csv->getline( $fh ) ) {
		
		if ($row->[0] eq 'knapsackid') {
			next ;
		}
		
#		print Dumper $row ;
		if ( $row->[0] =~ /^C([0-9]*)/ ) {
			$nbEntries++ ;
			
			my $id = $1 ;
			
			if ( (defined $id ) and ($id > $lastID) ) {
				$lastID = $id ;
			}
			else {
				next ;
			}
		}
	}
	$csv->eof or $csv->error_diag();
	close $fh;
	
    print "[INFO] Knapsack db parsed with: $nbEntries entries\n" ;
    print "[INFO] Last Knapsack id in exported db is: $lastID\n" ;
    
    
    ## test if new ids exists on knapsack online db
    # Knapsack ID format is 00051737
    
    my $tryAgain = 'TRUE' ;
    my ($runNbTrue, $runNbFalse) = (0, 0) ;
    my $thresholdFalse = 10 ;
    my $thresholdTrue = 1000 ;
    my %NewKnapSackDump = () ;
    my $newId = $lastID ; 
    
    
    while ($tryAgain eq 'TRUE') {
    
	    $newId = $newId+1 ;
	#    my $newId = 53000 ;
	    my $newFormattedId =  'C'.sprintf '%08s', $newId ; 



( run in 0.911 second using v1.01-cache-2.11-cpan-39bf76dae61 )