Finnigan
view release on metacpan or search on metacpan
bin/mzxml-unpack view on Meta::CPAN
if ($head =~ /<mzML/m) {
$type = 'mzML';
}
elsif ($head =~ /<mzXML/) {
$type = 'mzXML';
}
else {
say STDERR "can't recognise file format:";
say STDERR $head . "...";
}
print $head;
if ( $type eq 'mzXML' ) {
$/ = '</peaks>';
while ( <> ) {
if ( s/(<peaks[^>]+>)(.*)$// ) {
my ($tag, $data) = ($1, $2);
s/\n$//;
if ( /scan num="(\d+)"/ ) {
next unless $1 >= $from and $1 <= $to;
}
else {
die "cannot determine scan number";
}
say "$_$tag";
# my @spec = unpack("f<*", decode_base64($data)); # this does not work
my @spec = map {unpack "f", $_} map {pack "V", $_} unpack("N*", decode_base64($data));
foreach my $i (0 .. scalar @spec / 2 - 1) {
say join("\t", @spec[2*$i .. 2*$i+1]);
}
print "</peaks>";
}
else {
print;
}
}
} # mzXML
else { # mzML
$/ = '</binaryDataArrayList>';
while ( <> ) {
chomp;
my $chromatogram = 0;
if ( /<chromatogramList/) {
$chromatogram = 1;
}
if ( /<spectrum[^>]+scan=(\d+)/s ) {
next unless $1 >= $from and $1 <= $to;
}
if ( s/^(.*<binaryDataArrayList\s+count\s*=\s*"(\d+)">)(.+)$//s ) {
my ($tag, $n, $tail) = ($1, $2, $3);
print "$_$tag";
foreach my $chunk ( split m{</binaryDataArray>\s*}, $tail) {
if ( $chunk =~ m{^(.*<binaryDataArray.+<binary>)(.*)(</binary>.*)$}s ) {
my ($head, $data, $tail) = ($1, $2, $3);
my ($size) = ( $head =~ /<cvParam.+name="(\d\d-bit)/ );
say $head;
my @list;
if ( $size eq '32-bit' ) {
@list = unpack("f<*", decode_base64($data));
}
elsif ( $size eq '64-bit' ) {
@list = unpack("d<*", decode_base64($data));
}
else {
die "unknown number size: $size";
}
if ($args->{-h}) {
foreach my $i ( 0 .. $#list ) {
if ( $size eq '32-bit' ) {
say join(" ", map { sprintf "%2.2x", $_ } unpack("C*", pack "f", $list[$i])) . "\t" . $list[$i];
}
else {
say join(" ", map { sprintf "%2.2x", $_ } unpack("C*", pack "d", $list[$i])) . "\t" . $list[$i];
}
}
}
else {
say join "\n", @list;
}
print "$tail</binaryDataArray>\n ";
}
else {
say STDERR "<binaryDataArray>...<binary> not matched in: " . substr($chunk, 0, 500) . "...\n";
}
}
print '</binaryDataArrayList>';
}
else {
print;
}
}
}
__END__
=head1 NAME
mzxml-unpack - decode the base64-encoded scan data in an mzXML or mzML file
=head1 SYNOPSIS
mzxml-unpack [options] <file>
Options:
-r[ange] <from:0+n> .. <to:0+n> write only scans with numbers between <from> and <to>
-hex add the hex encoding of decimals
<file> input file
( run in 0.493 second using v1.01-cache-2.11-cpan-71847e10f99 )