Bio-AGP-LowLevel
view release on metacpan or search on metacpan
lib/Bio/AGP/LowLevel.pm view on Meta::CPAN
next unless @fields >= 5 && @fields <= 10;
my %r = (linenum => $.); #< the record we're building for this line, starting with line number
#parse and check the first 5 cols
@r{qw( objname ostart oend partnum type )} = splice @fields,0,5;
$r{objname}
or parse_error "'$r{obj_name}' is a valid object name";
#end
if ( defined $last_end && defined $last_objname && $r{objname} eq $last_objname ) {
$r{ostart} == $last_end+1
or parse_error "start coordinate not contiguous with previous line's end";
}
$last_end = $r{oend};
$last_objname = $r{objname};
#start
$r{oend} >= $r{ostart} or parse_error("end must be >= start");
#part num
$last_partnum ||= 0;
$r{partnum} == $last_partnum + 1
or parse_error("part numbers not sequential");
$last_partnum = $r{partnum};
#type
if ( $r{type} =~ /^[NU]$/ ) {
(@r{qw( length gap_type linkage)}, my $empty, my $undefined) = @fields;
@fields = ();
my %descmap = qw/ U unknown_gap N known_gap /;
$r{typedesc} = $descmap{$r{type}}
or parse_error("unregistered type $r{type}");
$r{is_gap} = 1;
my $gap_size_to_use = $opt{gap_length} || $r{length};
$r{length} == $r{oend} - $r{ostart} + 1
or parse_error("gap size of '$r{length}' does not agree with ostart, oend of ($r{ostart},$r{oend})");
str_in($r{gap_type},qw/fragment clone contig centromere short_arm heterochromatin telomere repeat/)
or parse_error("invalid gap type '$r{gap_type}'");
str_in($r{linkage},qw/yes no/)
or parse_error("linkage (column 8) should be 'yes' or 'no'\n");
defined $empty && $empty eq ''
or parse_error("9th column should be present and empty\n");
push @records,\%r;
} elsif ( $r{type} =~ /^[ADFGOPW]$/ ) {
my %descmap = qw/A active_finishing D draft F finished G wgs_finishing N known_gap O other P predraft U unknown_gap W wgs_contig/;
$r{typedesc} = $descmap{$r{type}}
or parse_error("unregistered type $r{type}");
$r{is_gap} = 0;
@r{qw(ident cstart cend orient)} = @fields;
if($opt{validate_identifiers}) {
my $comp_type = identifier_namespace($r{ident})
or parse_error("cannot guess type of '$r{ident}'");
} else {
$r{ident} or parse_error("invalid identifier '$r{ident}'");
}
str_in($r{orient},qw/+ - 0 na/)
or parse_error("orientation must be one of +,-,0,na");
$r{cstart} >= 1 && $r{cend} > $r{cstart}
or parse_error("invalid component start and/or end ($r{cstart},$r{cend})");
$r{length} = $r{cend}-$r{cstart}+1;
$r{length} == $r{oend} - $r{ostart} + 1
or parse_error("distance between object start, end ($r{ostart},$r{oend}) does not agree with distance between component start, end ($r{cstart},$r{cend})");
push @records, \%r;
} else {
parse_error("invalid component type '$r{type}', it should be one of {A D F G N O P U W}");
}
}
return if $parse_error_flag;
#otherwise, everything was well
return \@records;
}
=head2 agp_write
Usage: agp_write($lines,$file);
Desc : writes a properly formatted AGP file
Args : arrayref of line records to write, with the line records being
in the same format as those returned by agp_parse above,
filename or filehandle to write to,
Ret : nothing meaningful
Side Effects: dies on failure. if you gave it a filehandle, does
not close it
Example:
=cut
sub agp_write {
my ($lines,$file) = @_;
$file or confess "must provide file to write to!\n";
my $out_fh = is_filehandle($file) ? $file
: do {
open my $f,">$file" or croak "$! opening '$file' for writing";
$f
};
foreach my $line (@$lines) {
print $out_fh agp_format_part( $line );
}
return;
}
( run in 0.530 second using v1.01-cache-2.11-cpan-f56aa216473 )