Bio-ToolBox

 view release on metacpan or  search on metacpan

lib/Bio/ToolBox/Parser/ucsc/builder.pm  view on Meta::CPAN

	if ( $self->gene_name ne $self->name2 ) {
		$transcript->add_tag_value( 'Alias', $self->gene_name );
	}

	# adjust the primary_tag and biotype values as necessary
	if ( exists $ensembldata->{ $self->name } ) {
		my $t = $ensembldata->{ $self->name }->[1] || q();
		if ( $t =~ /protein.coding/xi ) {
			$transcript->primary_tag('mRNA');
			$transcript->add_tag_value( 'biotype', $t );
		}
		elsif ( $t =~ /(?: rna | transcript )/xi ) {
			$transcript->primary_tag($t);    # this is redundant????
			$transcript->add_tag_value( 'biotype', $t );
		}
		elsif ($t) {
			$transcript->primary_tag('transcript');
			$transcript->add_tag_value( 'biotype', $t );
		}
	}

	# update extra attributes as necessary
	$self->update_attributes($transcript);

	# add transcript specific attributes
	if ( defined $self->completeness ) {
		$transcript->add_tag_value( 'completeness', $self->completeness );
	}
	if ( defined $self->status ) {
		$transcript->add_tag_value( 'status', $self->status );
	}

	# add the exons
	if ( $ucsc->do_exon ) {
		$self->add_exons( $transcript, $gene );
	}

	# add CDS, UTRs, and codons if necessary
	if ( $self->cdsStart - 1 != $self->cdsEnd ) {

		if ( $ucsc->do_utr ) {
			$self->add_utrs( $transcript, $gene );
		}

		if ( $ucsc->do_codon ) {
			$self->add_codons( $transcript, $gene );
		}

		if ( $ucsc->do_cds ) {
			$self->add_cds($transcript);
		}
	}

	# record the type of transcript
	$ucsc->{counts}{ $transcript->primary_tag } += 1;

	# transcript is complete
	return $transcript;
}

sub update_attributes {
	my ( $self, $seqf ) = @_;

	# add Note if possible
	if ( $self->note ) {
		$self->add_unique_attribute( $seqf, 'Note', $self->note );
	}

	# add refSeq identifier if possible
	if ( $self->refseq ) {
		$self->add_unique_attribute( $seqf, 'Dbxref', 'RefSeq:' . $self->refseq );
	}

	# add SwissProt identifier if possible
	if ( exists $self->{spid} and $self->{spid} ) {
		$self->add_unique_attribute( $seqf, 'Dbxref', 'Swiss-Prot:' . $self->{spid} );
	}

	# add SwissProt display identifier if possible
	if ( exists $self->{spdid} and $self->{spdid} ) {
		$self->add_unique_attribute( $seqf, 'swiss-prot_display_id', $self->{spdid} );
	}

	# add NCBI protein access identifier if possible
	if ( exists $self->{protacc} and $self->{protacc} ) {
		$self->add_unique_attribute( $seqf, 'Dbxref', 'RefSeq:' . $self->{protacc} );
	}
}

sub add_unique_attribute {
	my ( $self, $seqf, $tag, $value ) = @_;

	# look for a pre-existing identical tag value
	my $check = 1;
	foreach ( $seqf->get_tag_values($tag) ) {
		if ( $_ eq $value ) {
			$check = 0;
			last;
		}
	}

	# add it if our value is unique
	$seqf->add_tag_value( $tag, $value ) if $check;
}

sub add_exons {
	my ( $self, $transcript, $gene ) = @_;
	my $ucsc = $self->ucsc;

	# Add the exons
EXON_LOOP:
	for ( my $i = 0; $i < $self->exonCount; $i++ ) {

		# first look for existing
		if ( $ucsc->share and $gene ) {
			my $exon = $self->find_existing_subfeature(
				$gene, 'exon',
				$self->exonStarts->[$i],
				$self->exonEnds->[$i]
			);
			if ($exon) {



( run in 1.599 second using v1.01-cache-2.11-cpan-140bd7fdf52 )