Finnigan
view release on metacpan or search on metacpan
bin/mzxml-scan view on Meta::CPAN
for my $i (1 .. 5) {
$head .= <>;
}
my ($type, $binaryTag);
if ($head =~ /<mzML/m) {
$type = 'mzML';
}
elsif ($head =~ /<mzXML/) {
$type = 'mzXML';
}
else {
say STDERR "can't recognise file format:";
say STDERR $head . "...";
}
if ( $type eq 'mzXML' ) {
$/ = '</peaks>';
while ( <> ) {
if ( s/(<peaks[^>]+>)(.*)$// ) {
my ($tag, $data) = ($1, $2);
s/\n$//;
if ( /scan num="(\d+)"/ ) {
next if $1 < $args->{'-n'}{'<n>'};
$data =~ s{</peaks>$}{};
my @spec = map {unpack "f", $_} map {pack "V", $_} unpack("N*", decode_base64($data));
foreach my $i (0 .. scalar @spec / 2 - 1) {
say join("\t", @spec[2*$i .. 2*$i+1]);
}
last;
}
else {
die "cannot determine scan number";
}
}
}
} # mzXML
else { # mzML
$/ = '</binaryDataArrayList>';
while ( <> ) {
chomp;
my $chromatogram = 0;
if ( /<chromatogramList/) {
$chromatogram = 1;
}
if ( /<spectrum[^>]+scan=(\d+)/s ) {
next if $1 < $args->{'-n'}{'<n>'};
if ( s/^(.*<binaryDataArrayList\s+count\s*=\s*"(\d+)">)(.+)$//s ) {
my ($tag, $n, $tail) = ($1, $2, $3);
die "don't know what to do with <binaryDataArrayList> of size != 2 (read: $n)" unless $n == 2;
my $chunk_no = 0;
my @key = qw/mz intensity/;
my $table;
foreach my $chunk ( split m{</binaryDataArray>\s*}, $tail) {
if ( $chunk =~ m{^(.*<binaryDataArray.+<binary>)(.*)(</binary>.*)$}s ) {
my ($head, $data, $tail) = ($1, $2, $3);
my ($size) = ( $head =~ /<cvParam.+name="(\d\d-bit)/ );
if ( $size eq '32-bit' ) {
$table->{$key[$chunk_no]} = [unpack("f<*", decode_base64($data))];
}
elsif ( $size eq '64-bit' ) {
$table->{$key[$chunk_no]} = [unpack("d<*", decode_base64($data))];
}
else {
die "unknown number size: $size";
}
$chunk_no++;
}
else {
say STDERR "<binaryDataArray>...<binary> not matched in: " . substr($chunk, 0, 500) . "...\n";
}
}
my ($n1, $n2) = (scalar @{$table->{mz}}, scalar @{$table->{intensity}});
die "unequal sizes of the M/z and intensity arrays ($n1 and $n2)" unless $n1 == $n2;
foreach my $i ( 0 .. $n1 - 1 ) {
say join "\t", map {$table->{$_}->[$i]} qw/mz intensity/;
}
}
last;
}
}
}
__END__
=head1 NAME
mzxml-unpack - decode the base64-encoded scan data in an mzXML or mzML file
=head1 SYNOPSIS
mzxml-unpack [options] <file>
Options:
-r[ange] <from:0+n> .. <to:0+n> write only scans with numbers between <from> and <to>
-hex add the hex encoding of decimals
<file> input file
=head1 OPTIONS
=over 4
=item B<-r[ange] E<lt>from:0+nE<gt> .. E<lt>to:0+nE<gt>>
extract only scans with numbers between E<lt>fromE<gt> and E<lt>toE<gt>
B<Note:> this option breaks the structure of the output file (the parts preceding and following the selected range of scans are not written). It is mainly useful in checking the XML syntax and the contents of a small number of scans. For extracting t...
=item B<-hex>
add the hex encoding of decimals
( run in 0.621 second using v1.01-cache-2.11-cpan-71847e10f99 )