Bio-Chaos

 view release on metacpan or  search on metacpan

bin/cx-enscore2chaos.pl  view on Meta::CPAN

			 type=>'chromosome',
			 seqlen=>$_->sget_length,
			]);
	 } values %chromh);
    my $gfid = $g->sget_gene_id."-gn";
    my $gfacc = $g->sget("gene_stable_id/stable_id");
    my $gfn = $g->sget("xref/display_label") || $gfacc;
    
    my $gC = 
      N(feature=>[feature_id=>$gfid,
		  uniquename=>$gfacc,
		  name=>$gfn,
		  dbxrefstr=>"ensembl:$gfacc",
		  type=>'gene',
		  featureloc=>[],
		 ]);
    push(@F, $gC);
    my $gfloc = $gC->sget_featureloc;

    my @tC = ();
    foreach my $t (@transcripts) {
	my @exons = $t->get("exon_transcript/exon");
	my $tfid = $t->sget_transcript_id."-tr";
	my $tC =
	  N(feature=>[feature_id=>$tfid,
		      uniquename=>$tfid,
		      type=>'mRNA',
		      featureloc=>[],
		     ]);
	push(@F, $tC);
	my $tfloc = $tC->sget_featureloc;

	my $translation = $t->sget_translation;
	my $tnid = $translation->get_translation_id . "-tn";

	# protein feature - we set floc later
	my $tnC =
	  N(feature=>[
		      feature_id=>$tnid,
		      uniquename=>$tnid,
		      type=>'protein',
		      featureloc=>[
				  ],
		     ]);
	my $tnfloc = $tnC->sget_featureloc;
	my ($tnstart, $tnend) = $translation->getl(qw(seq_start seq_end));
	push(@F, $tnC);
	push(@F,
	     N(feature_relationship=>[subject_id=>$tnid,
				      object_id=>$tfid,
				      type=>'produced_by',
				     ]));
	
	my @exC = ();
	my $mrnaseq = '';
	my $cdsseq = '';
	my $in_cds = 0;
	foreach my $exon (@exons) {
	    my ($min, $max, $strand, $contig_id) = 
	      $exon->getl(qw(contig_start contig_end contig_strand contig_id));
	    my $ctgid = $exon->sget_contig_id."-ct";
	    my ($nb, $ne) = bcmm2ibv($min, $max, $strand);
	    my $contig = $contigh{$contig_id};
	    my $dna = $contig->sget("dna/sequence");
	    my $exonseq = cutseq($dna, $nb, $ne);
	    $mrnaseq .= $exonseq;

	    # cds
	    if ($exon->sget_exon_id eq
		$translation->sget_start_exon_id) {
		$in_cds = 1;
		# start of CDS
		my $tnrelpos = $translation->sget_seq_start -1;
		$cdsseq =
		  cutseq($exonseq, 
			 $tnrelpos,
			 length($exonseq),
			 1);
		$tnfloc->set_nbeg($nb + $tnrelpos * $strand);
		$tnfloc->set_srcfeature_id($ctgid);
		if ($exon->sget_exon_id eq
		    $translation->sget_end_exon_id) {

		    $tnfloc->set_nend($nb + 
				      $translation->sget_seq_end * $strand);
		}
	    }
	    elsif ($exon->sget_exon_id eq
		   $translation->sget_end_exon_id) {
		$in_cds = 0;
		# end of CDS
		my $tnrelpos = $translation->sget_seq_end;
		$cdsseq .=
		  cutseq($exonseq, 
			 0,
			 $tnrelpos,
			 1);
		$tnfloc->set_nend($nb + $tnrelpos * $strand);
		if ($ctgid ne $tnfloc->sget_srcfeature_id) {
		    $tnC->add_featureprop(N(featureprop=>[type=>"problem",
							  value=>"CDS range split across contigs; also on $ctgid"]));
		}
	    }
	    else {
		# continuation of CDS
		$cdsseq .= $exonseq if $in_cds;
	    }
	    

#	    if ($strand == 1) {
#		if ($exon->sget_exon_id eq
#		    $translation->sget_start_exon_id) {
#		    $in_cds = 1;
#		    $cdsseq =
#		      cutseq($exonseq, 
#			     $translation->sget_seq_start -1,
#			     length($exonseq),
#			     1);
#		}
#		elsif ($exon->sget_exon_id eq
#		    $translation->sget_end_exon_id) {
#		    $in_cds = 0;
#		    $cdsseq .=
#		      cutseq($exonseq, 
#			     0,
#			     $translation->sget_seq_end,
#			     1);
#		}
#		else {
#		    $cdsseq .= $exonseq if $in_cds;
#		}
#	    }
#	    else {
#		if ($exon->sget_exon_id eq
#		    $translation->sget_end_exon_id) {
#		    $in_cds = 1;
#		    $cdsseq =
#		      cutseq($exonseq, 
#			     length($exonseq) - $translation->sget_seq_end,
#			     length($exonseq),
#			     1);
#		}
#		elsif ($exon->sget_exon_id eq
#		    $translation->sget_start_exon_id) {
#		    $in_cds = 0;
#		    $cdsseq .=
#		      cutseq($exonseq, 
#			     0,
#			     length($exonseq) - ($translation->sget_seq_start-1),
#			     1);
#		}
#		else {
#		    $cdsseq .= $exonseq if $in_cds;
#		}
#	    }

	    my $fid = $exon->sget_exon_id."-ex";
	    my $f =
	      N(feature=>[
			  feature_id=>$fid,
			  uniquename=>$fid,
			  type=>'exon',
			  featureloc=>[
				       nbeg=>$nb,
				       nend=>$ne,
				       strand=>$strand,
				       srcfeature_id=>$ctgid,
				      ]
			  ]);
	    push(@exC, $f);
	    push(@F, $f);
	    push(@F, 
		 N(feature_relationship=>[subject_id=>$fid,
					  object_id=>$tfid,
					  type=>'part_of',
					 ]));
	}
	$tC->set_residues($mrnaseq);
	$tnC->set_residues(translate($cdsseq)) if $cdsseq;
	$tnC->add_featureprop(N(featureprop=>[type=>"cdsseq",
					      value=>$cdsseq])) if $cdsseq;
	# transcript loc determined by exons
	$tfloc->set_nbeg($exC[0]->sget("featureloc/nbeg"));
	$tfloc->set_nend($exC[-1]->sget("featureloc/nend"));
	$tfloc->set_strand($exC[0]->sget("featureloc/strand"));
	$tfloc->set_srcfeature_id($exC[0]->sget("featureloc/srcfeature_id"));
	push(@tC, $tC);

	push(@F, 
	     N(feature_relationship=>[subject_id=>$tfid,
				      object_id=>$gfid,
				      type=>'part_of',
				     ]));
    }
    $gfloc->set_nbeg($tC[0]->sget("featureloc/nbeg"));
    $gfloc->set_nend($tC[-1]->sget("featureloc/nend"));
    $gfloc->set_strand($tC[0]->sget("featureloc/strand"));
    $gfloc->set_srcfeature_id($tC[0]->sget("featureloc/srcfeature_id"));

#    print $_->sxpr foreach @F;
    my $C =
      Bio::Chaos::ChaosGraph->new;
    my $S = Data::Stag->new(chaos=>[@F]);
    $C->init_from_stag($S);
    $C->name_all_features;
    $S = $C->stag;
    my $md = $S->sget_metadata;
    
    if (@gnames < 2) {
	print $S->xml;
    }
    else {
	$dstore->mkdir($gfacc);
	my $ndir = $dstore->id_to_dir($gfacc);
	print "ndir=$ndir\n";
	open(F, ">$ndir/$gfacc.chaos.xml");
	print F $S->xml;
	close(F);
    }
    
}
$dbh->disconnect;
print STDERR "DONE!\n";

sub N {
    Data::Stag->unflatten(@_);
}



( run in 1.943 second using v1.01-cache-2.11-cpan-d7f47b0818f )