MS

 view release on metacpan or  search on metacpan

bin/unimod2storable.pl  view on Meta::CPAN

    $unimod->{$tag}->{$title}->{avge_mass} = $avg;
    $unimod->{$tag}->{$title}->{full_name} = $attrs->{full_name};
    $unimod->{$tag}->{$title}->{record_id} = $attrs->{record_id};

    # store mappings of record_id to title
    $unimod->{mod_index}->{$attrs->{record_id}} = $title;

    # parse element composition
    for my $atom ($delta->children('umod:element')) {
        my $attrs = $atom->atts;
        for (qw/symbol number/) {
            die "Missing meta $_ for elt\n" if (! defined $attrs->{$_});
        }
        print STDERR "$attrs->{symbol} to $attrs->{number}\n";
        $unimod->{$tag}->{$title}->{atoms}->{ $attrs->{symbol} }
            = $attrs->{number};
    }

    $unimod->{$tag}->{$title}->{hashref} = $elt->simplify(
        forcearray => [qw/
            umod:element
            umod:specificity
            umod:Ignore
            umod:alt_name
            umod:xref
            umod:NeutralLoss
            umod:PepNeutralLoss
       /],
    );

    $twig->purge;
    return;

}

sub fetch_missing_elements {

    my ($unimod) = @_;

    my %elements;

    open my $in, '<', $fi_elements;
    while (my $line = <$in>) {

        next if ($line =~ /^\s*#/);
        chomp $line;
        my ($num, $sym, $name) = split ',', $line;
        $elements{$name} = $sym;

    }
    close $in;

    my $ua = HTTP::Tiny->new();

    ELEM:
    for my $el (keys %elements) {

        say STDERR "Fetching $el";

        my $url = sprintf
            "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastformula/%s/JSON?MaxRecords=1",
            $elements{$el},
        ;

        my $res = $ua->get($url);

        if (! $res->{success}) {
            warn "Failed to fetch data for $el: $res->{reason}\n";
            next ELEM;
        }

        my $data = decode_json( $res->{content} );
        my @mono = grep {$_->{urn}->{label} eq 'Weight' && $_->{urn}->{name} eq 'MonoIsotopic'} @{ $data->{PC_Compounds}->[0]->{props} };
        die "Missing or too many mono masses for $el\n"
            if (scalar @mono != 1);
        my @avg = grep {$_->{urn}->{label} eq 'Molecular Weight' } @{ $data->{PC_Compounds}->[0]->{props} };
        die "Missing or too many avg masses for $elements{$el}\n"
            if (scalar @avg != 1);

        my $mass_avg = $avg[0]->{value}->{fval}
            // die "Missing avg mass for $el";
        my $mass_mono = $mono[0]->{value}->{fval}
            // die "Missing mono mass for $el";
        
        my $existing = $unimod->{elem}->{ $elements{$el} };
        if (defined $existing) {
            my $prev = $existing->{mono_mass};
            my $delta = abs($prev - $mass_mono);
            if ($delta > 0.01) {
                die "Disageement in mono mass: prev $prev, curr $mass_mono\n";
            }
        }
        else {
            $unimod->{elem}->{ $elements{$el} } = {
                full_name => $el,
                avge_mass => $mass_avg,
                mono_mass => $mass_mono,
            };
            say STDERR "\tAdded $el";
        }

    }

}



( run in 0.597 second using v1.01-cache-2.11-cpan-99c4e6809bf )