Metabolomics-Fragment-Annotation
view release on metacpan or search on metacpan
metabolomics-references/utils/update_resources.pl view on Meta::CPAN
}
else {
warn "\t[WARN] The type for line $entriesNb is undef or unknown ($row->{'type'}) \n" ;
$currentFrag{_TYPE_} = undef ;
}
## Check and control DELTA_MASS
if ( ($row->{'delta_mass'} ) and ($row->{'delta_mass'} > 0 or $row->{'delta_mass'} < 0) ) {
$currentFrag{_DELTA_MASS_} = $row->{'delta_mass'} ;
$checker++ ;
}
else {
warn "\t[WARN] The delta_mass for line $entriesNb is undef or equal to 0\n" ;
$currentFrag{_DELTA_MASS_} = undef ;
}
## Check and control LOSSES OR GAINS
if ( ($row->{'losses_or_gains'} ) and ($row->{'losses_or_gains'} ne '' ) ) {
$currentFrag{_LOSSES_OR_GAINS_} = $row->{'losses_or_gains'} ;
$checker++ ;
}
else {
warn "\t[WARN] The losses or gains for line $entriesNb is undef or void\n" ;
$currentFrag{_LOSSES_OR_GAINS_} = undef ;
}
$currentFrag{_ANNOTATION_IN_POS_MODE_} = $row->{'annotation_in_pos_mode'} ;
$currentFrag{_ANNOTATION_IN_NEG_MODE_} = $row->{'annotation_in_neg_mode'} ;
my %tmp = %currentFrag ;
push(@fragments, \%tmp ) ;
## Checker control
if ($checker == 3 ) { print " line is OK\n" ; } # end of the message
if ($checker < 3 ) { print "\n" ; }
$checker = 0 ;
$entriesNb ++ ;
}
print Dumper @fragments ;
return (0) ;
}
### END of SUB
=head2 METHOD updateKnapsack
## Description : update Knapsack database on its web portal by crawling
## Input : $var3
## Output : $var4
## Usage : my ( $var4 ) = updateKnapsack ( $var3 ) ;
=cut
## START of SUB
sub updateKnapsack {
## Retrieve Values
my ( $dbFile, ) = @_;
my ( $lastID, $nbEntries ) = ( 0, 0 ) ;
## based on http://www.knapsackfamily.com/knapsack_core/information.php?sname=C_ID&word=C00000001
## Get Last id in last CSV file version
print "[INFO] Parsing Knapsack current version...\n" ;
my $csv = Text::CSV->new ( {'sep_char' => ",", binary => 1 } ) # should set binary attribute.
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $fh, "<:encoding(utf8)", $dbFile or die "Can't open csv file $dbFile: $!";
while ( my $row = $csv->getline( $fh ) ) {
if ($row->[0] eq 'knapsackid') {
next ;
}
# print Dumper $row ;
if ( $row->[0] =~ /^C([0-9]*)/ ) {
$nbEntries++ ;
my $id = $1 ;
if ( (defined $id ) and ($id > $lastID) ) {
$lastID = $id ;
}
else {
next ;
}
}
}
$csv->eof or $csv->error_diag();
close $fh;
print "[INFO] Knapsack db parsed with: $nbEntries entries\n" ;
print "[INFO] Last Knapsack id in exported db is: $lastID\n" ;
## test if new ids exists on knapsack online db
# Knapsack ID format is 00051737
my $tryAgain = 'TRUE' ;
my ($runNbTrue, $runNbFalse) = (0, 0) ;
my $thresholdFalse = 10 ;
my $thresholdTrue = 1000 ;
my %NewKnapSackDump = () ;
my $newId = $lastID ;
while ($tryAgain eq 'TRUE') {
$newId = $newId+1 ;
# my $newId = 53000 ;
my $newFormattedId = 'C'.sprintf '%08s', $newId ;
( run in 0.911 second using v1.01-cache-2.11-cpan-39bf76dae61 )