Bio-AGP-LowLevel

 view release on metacpan or  search on metacpan

lib/Bio/AGP/LowLevel.pm  view on Meta::CPAN

    next unless @fields >= 5 && @fields <= 10;

    my %r = (linenum => $.); #< the record we're building for this line, starting with line number

    #parse and check the first 5 cols
    @r{qw( objname ostart oend partnum type )} = splice @fields,0,5;
    $r{objname}
      or parse_error "'$r{obj_name}' is a valid object name";
    #end
    if ( defined $last_end && defined $last_objname && $r{objname} eq $last_objname ) {
      $r{ostart} == $last_end+1
	or parse_error "start coordinate not contiguous with previous line's end";
    }
    $last_end = $r{oend};
    $last_objname = $r{objname};

    #start
    $r{oend} >= $r{ostart} or parse_error("end must be >= start");

    #part num
    $last_partnum ||= 0;
    $r{partnum} == $last_partnum + 1
      or parse_error("part numbers not sequential");

    $last_partnum = $r{partnum};

    #type
    if ( $r{type} =~ /^[NU]$/ ) {
      (@r{qw( length gap_type linkage)}, my $empty, my $undefined) = @fields;
      @fields = ();
      my %descmap = qw/ U unknown_gap N known_gap /;
      $r{typedesc} = $descmap{$r{type}}
	or parse_error("unregistered type $r{type}");
      $r{is_gap}   = 1;

      my $gap_size_to_use = $opt{gap_length} || $r{length};

      $r{length} == $r{oend} - $r{ostart} + 1
	or parse_error("gap size of '$r{length}' does not agree with ostart, oend of ($r{ostart},$r{oend})");

      str_in($r{gap_type},qw/fragment clone contig centromere short_arm heterochromatin telomere repeat/)
	or parse_error("invalid gap type '$r{gap_type}'");

      str_in($r{linkage},qw/yes no/)
	or parse_error("linkage (column 8) should be 'yes' or 'no'\n");

      defined $empty && $empty eq ''
	or parse_error("9th column should be present and empty\n");

      push @records,\%r;

  } elsif ( $r{type} =~ /^[ADFGOPW]$/ ) {
      my %descmap = qw/A active_finishing D draft F finished G wgs_finishing N known_gap O other P predraft U unknown_gap W wgs_contig/;
      $r{typedesc} = $descmap{$r{type}}
	or parse_error("unregistered type $r{type}");
      $r{is_gap} = 0;

      @r{qw(ident cstart cend orient)} = @fields;
      if($opt{validate_identifiers}) {
	my $comp_type = identifier_namespace($r{ident})
	  or parse_error("cannot guess type of '$r{ident}'");
      } else {
	$r{ident} or parse_error("invalid identifier '$r{ident}'");
      }

      str_in($r{orient},qw/+ - 0 na/)
	or parse_error("orientation must be one of +,-,0,na");

      $r{cstart} >= 1 && $r{cend} > $r{cstart}
	or parse_error("invalid component start and/or end ($r{cstart},$r{cend})");

      $r{length} = $r{cend}-$r{cstart}+1;

      $r{length} == $r{oend} - $r{ostart} + 1
	or parse_error("distance between object start, end ($r{ostart},$r{oend}) does not agree with distance between component start, end ($r{cstart},$r{cend})");

      push @records, \%r;
    } else {
      parse_error("invalid component type '$r{type}', it should be one of {A D F G N O P U W}");
    }
  }

  return if $parse_error_flag;

  #otherwise, everything was well
  return \@records;
}


=head2 agp_write

  Usage: agp_write($lines,$file);
  Desc : writes a properly formatted AGP file
  Args : arrayref of line records to write, with the line records being
             in the same format as those returned by agp_parse above,
         filename or filehandle to write to,
  Ret :  nothing meaningful

  Side Effects: dies on failure.  if you gave it a filehandle, does
                not close it
  Example:

=cut

sub agp_write {
  my ($lines,$file) = @_;
  $file or confess "must provide file to write to!\n";

  my $out_fh = is_filehandle($file) ? $file
    : do {
      open my $f,">$file" or croak "$! opening '$file' for writing";
      $f
    };

  foreach my $line (@$lines) {
      print $out_fh agp_format_part( $line );
  }

  return;
}



( run in 0.530 second using v1.01-cache-2.11-cpan-f56aa216473 )