Finnigan

 view release on metacpan or  search on metacpan

bin/mzxml-unpack  view on Meta::CPAN

if ($head =~ /<mzML/m) {
  $type = 'mzML';
}
elsif ($head =~ /<mzXML/) {
  $type = 'mzXML';
}
else {
  say STDERR "can't recognise file format:";
  say STDERR $head . "...";
}

print $head;

if ( $type eq 'mzXML' ) {
  $/ = '</peaks>';
  while ( <> ) {
    if ( s/(<peaks[^>]+>)(.*)$// ) {
      my ($tag, $data) = ($1, $2);
      s/\n$//;
      if ( /scan num="(\d+)"/ ) {
        next unless $1 >= $from and $1 <= $to;
      }
      else {
        die "cannot determine scan number";
      }

      say "$_$tag";

      # my @spec = unpack("f<*", decode_base64($data)); # this does not work
      my @spec = map {unpack "f", $_} map {pack "V", $_} unpack("N*", decode_base64($data));
      foreach my $i (0 .. scalar @spec / 2 - 1) {
        say join("\t", @spec[2*$i .. 2*$i+1]);
      }

      print "</peaks>";
    }
    else {
      print;
    }
  }
} # mzXML

else { # mzML
  $/ = '</binaryDataArrayList>';
  while ( <> ) {
    chomp;

    my $chromatogram = 0;
    if ( /<chromatogramList/) {
      $chromatogram = 1;
    }

    if ( /<spectrum[^>]+scan=(\d+)/s ) {
      next unless $1 >= $from and $1 <= $to;
    }

    if ( s/^(.*<binaryDataArrayList\s+count\s*=\s*"(\d+)">)(.+)$//s ) {
      my ($tag, $n, $tail) = ($1, $2, $3);
      print "$_$tag";

      foreach my $chunk ( split m{</binaryDataArray>\s*}, $tail) {
        if ( $chunk =~ m{^(.*<binaryDataArray.+<binary>)(.*)(</binary>.*)$}s ) {
          my ($head, $data, $tail) = ($1, $2, $3);

          my ($size) = ( $head =~ /<cvParam.+name="(\d\d-bit)/ );

          say $head;

          my @list;
          if ( $size eq '32-bit' ) {
            @list = unpack("f<*", decode_base64($data));
          }
          elsif ( $size eq '64-bit' ) {
            @list = unpack("d<*", decode_base64($data));
          }
          else {
            die "unknown number size: $size";
          }

          if ($args->{-h}) {
            foreach my $i ( 0 .. $#list ) {
              if ( $size eq '32-bit' ) {
                say join(" ", map { sprintf "%2.2x", $_ } unpack("C*", pack "f", $list[$i])) . "\t" . $list[$i];
              }
              else {
                say join(" ", map { sprintf "%2.2x", $_ } unpack("C*", pack "d", $list[$i])) . "\t" . $list[$i];
              }
            }
          }
          else {
            say join "\n", @list;
          }

          print "$tail</binaryDataArray>\n            ";
        }
        else {
          say STDERR "<binaryDataArray>...<binary> not matched in: " . substr($chunk, 0, 500) . "...\n";
        }
      }
      print '</binaryDataArrayList>';
    }
    else {
      print;
    }
  }
}

__END__
=head1 NAME

mzxml-unpack - decode the base64-encoded scan data in an mzXML or mzML file

=head1 SYNOPSIS

mzxml-unpack [options] <file>

 Options:

  -r[ange] <from:0+n> .. <to:0+n>  write only scans with numbers between <from> and <to>
  -hex                             add the hex encoding of decimals
  <file>                           input file



( run in 0.493 second using v1.01-cache-2.11-cpan-71847e10f99 )