GBrowse
view release on metacpan or search on metacpan
bin/gtf2gff3.pl view on Meta::CPAN
if (! defined $feature->{score}) {
$feature->{score} = '.';
}
if (! defined $feature->{phase}) {
$feature->{phase} = '.';
}
#Set attributes
$feature->{attributes} =
{parent => $trnsc_id,
id => ["$feature_type:" . $trnsc_id->[0] .
":" . ++$count]};
}
}
my $trnsc_type = $coding_flag ? 'mRNA' : 'transcript';
my $attributes = {parent => $gene_id,
parent_name => $gene_name,
id => $trnsc_id,
name => $trnsc_name};
my $trnsc = {seq_id => $seq_id,
source => $source,
type => $trnsc_type,
start => $min,
end => $max,
score => '.',
strand => $strand,
phase => '.',
attributes => $attributes,
features => $features};
return $trnsc;
}
#-----------------------------------------------------------------------------
sub CDS_phase {
my ($feature, $next_phase) = @_;
#If phase isn't already valid assign it
if (! defined $feature->{phase} ||
$feature->{phase} !~ /^0|1|2$/) {
$feature->{phase} = $next_phase;
}
#If phase is valid, calculate the next phase
if ($feature->{phase} =~ /^0|1|2$/) {
my $length = ($feature->{end} -
$feature->{start}) + 1;
# my $hang_3 = $length % 3; # 3' overhang
# my $hang_5 = 3 - $hang_3; # 5' overhang
#
# #The next phase is equal to this phase
# #plus the modulus 3 of the length wrapped
# #at 2.
# $next_phase = $feature->{phase} + $hang_5;
# $next_phase -= 3 if $next_phase > 2;
# This was update 5/24/10 in response to an
# e-mail from Leighton Prichard regarding
# errors in the GFF3 spec. The code above
# calculates the phase correctly, but the
# formula suggested by Leighton is cleaner.
$next_phase = ($feature->{phase} - $length) % 3;
}
return ($feature, $next_phase);
}
#-----------------------------------------------------------------------------
sub validate_and_build_gene {
my $trnscs = shift;
#Get parameter defaults
my $seq_id = $trnscs->[0]{seq_id};
my $source = $trnscs->[0]{source};
my $strand = $trnscs->[0]{strand};
my $gene_id = $trnscs->[0]{attributes}{parent};
my $gene_name = $trnscs->[0]{attributes}{parent_name};
#Get gene boundaries and check all transcripts for agreement with
#parameter defaults
my ($min, $max);
for my $trnsc (@{$trnscs}) {
$min = ! defined $min ?
$trnsc->{start} :
$min > $trnsc->{start} ?
$trnsc->{start} :
$min;
$max = ! defined $max ?
$trnsc->{end} :
$max < $trnsc->{end} ?
$trnsc->{end} :
$max;
print STDERR "ERROR: seq_id conflict: " .
"validate_and_build_gene\n" if $seq_id ne $trnsc->{seq_id};
print STDERR "ERROR: sourc`e conflict: " .
"validate_and_build_gene\n" if $source ne $trnsc->{source};
print STDERR "ERROR: strand conflict: " .
"validate_and_build_gene\n" if $strand ne $trnsc->{strand};
print STDERR "ERROR: gene_id conflict: " .
"validate_and_build_gene\n" if $gene_id->[0] ne
$trnsc->{attributes}{parent}[0];
}
my $attributes = {id => $gene_id,
name => $gene_name};
my $gene = {seq_id => $seq_id,
source => $source,
type => 'gene',
start => $min,
end => $max,
score => '.',
strand => $strand,
phase => '.',
attributes => $attributes,
trnscs => $trnscs};
return $gene;
( run in 0.463 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )