GBrowse

 view release on metacpan or  search on metacpan

bin/gtf2gff3.pl  view on Meta::CPAN

			if (! defined $feature->{score}) {
				$feature->{score} = '.';
			}
			if (! defined $feature->{phase}) {
				$feature->{phase} = '.';
			}
			#Set attributes
			$feature->{attributes} =
			{parent => $trnsc_id,
			 id     => ["$feature_type:" . $trnsc_id->[0] .
				    ":" . ++$count]};
		}
	}

	my $trnsc_type = $coding_flag ? 'mRNA' : 'transcript';

	my $attributes = {parent      => $gene_id,
			  parent_name => $gene_name,
			  id          => $trnsc_id,
			  name        => $trnsc_name};

	my $trnsc = {seq_id     => $seq_id,
		     source     => $source,
		     type       => $trnsc_type,
		     start      => $min,
		     end        => $max,
		     score      => '.',
		     strand     => $strand,
		     phase      => '.',
		     attributes => $attributes,
		     features   => $features};

	return $trnsc;
}
#-----------------------------------------------------------------------------
sub CDS_phase {
	my ($feature, $next_phase) = @_;

	#If phase isn't already valid assign it
	if (! defined $feature->{phase} ||
	    $feature->{phase} !~ /^0|1|2$/) {
		$feature->{phase} = $next_phase;
	}
	#If phase is valid, calculate the next phase
	if ($feature->{phase} =~ /^0|1|2$/) {
		my $length = ($feature->{end} -
			      $feature->{start}) + 1;
		# my $hang_3 = $length % 3; # 3' overhang
		# my $hang_5 = 3 - $hang_3; # 5' overhang
		#
		# #The next phase is equal to this phase
		# #plus the modulus 3 of the length wrapped
		# #at 2.
		# $next_phase = $feature->{phase} + $hang_5;
		# $next_phase -= 3 if $next_phase > 2;

		# This was update 5/24/10 in response to an
		# e-mail from Leighton Prichard regarding
		# errors in the GFF3 spec.  The code above
		# calculates the phase correctly, but the
		# formula suggested by Leighton is cleaner.

		$next_phase = ($feature->{phase} - $length) % 3;

	}

	return ($feature, $next_phase);
}
#-----------------------------------------------------------------------------
sub validate_and_build_gene {
	my $trnscs = shift;

	#Get parameter defaults
	my $seq_id     = $trnscs->[0]{seq_id};
	my $source     = $trnscs->[0]{source};
	my $strand     = $trnscs->[0]{strand};
	my $gene_id    = $trnscs->[0]{attributes}{parent};
	my $gene_name  = $trnscs->[0]{attributes}{parent_name};

	#Get gene boundaries and check all transcripts for agreement with
	#parameter defaults
	my ($min, $max);
	for my $trnsc (@{$trnscs}) {
		$min = ! defined $min      ?
		    $trnsc->{start}        :
		    $min > $trnsc->{start} ?
		    $trnsc->{start}        :
		    $min;

		$max = ! defined $max      ?
		    $trnsc->{end}          :
		    $max < $trnsc->{end}   ?
		    $trnsc->{end}          :
		    $max;

		print STDERR "ERROR: seq_id conflict: " .
		  "validate_and_build_gene\n" if $seq_id ne $trnsc->{seq_id};
		print STDERR "ERROR: sourc`e conflict: " .
		  "validate_and_build_gene\n" if $source ne $trnsc->{source};
		print STDERR "ERROR: strand conflict: " .
		  "validate_and_build_gene\n" if $strand ne $trnsc->{strand};
		print STDERR "ERROR: gene_id conflict: " .
		  "validate_and_build_gene\n" if $gene_id->[0] ne 
		    $trnsc->{attributes}{parent}[0];

	}
	my $attributes = {id   => $gene_id,
			  name => $gene_name};

	my $gene = {seq_id     => $seq_id,
		    source     => $source,
		    type       => 'gene',
		    start      => $min,
		    end        => $max,
		    score      => '.',
		    strand     => $strand,
		    phase      => '.',
		    attributes => $attributes,
		    trnscs     => $trnscs};

	return $gene;



( run in 0.463 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )