Bio-Polloc

 view release on metacpan or  search on metacpan

scripts/polloc_vntrs.pl  view on Meta::CPAN

	print SUM "ID: Group-".$group->name."\nLoci (".($#{$group->loci}+1)."):";
	print SUM " ", $_->id."(".$_->strand.")" for @{$group->loci};
	print SUM "\nLoci length average: $len\nLoci length standard deviation: $sd\n";
	print SUM "\nUpstream alignment:\n";
	my $alnO = Bio::AlignIO->new(-fh => \*SUM, -format=>'clustalw');
	$alnO->write_aln($left_aln) if defined $left_aln;
	print SUM "\nDownstream alignment:\n";
	$alnO->write_aln($right_aln) if defined $right_aln;
	print SUM "\nLocus alignment:\n";
	$alnO->write_aln($within_aln) if defined $within_aln;
	print SUM "\nDistance matrix:\n\t";
	print SUM $_->id, "\t" for @{$group->loci};
	print SUM "\n";
	for my $i (0 .. $#{$group->loci}){
	   print SUM $group->loci->[$i]->id;
	   for my $j (0 .. $i){
	      print SUM "\t", $group->loci->[$i]->distance(-locus=>$group->loci->[$j]);
	   }
	   print SUM "\n";
	}
	close SUM;
     }
     $procGroups++;
     print GCSV "\n";
  }
  close GCSV;
  close GLIST;
}

&_advance_proto("$csv.done","done");


# ------------------------------------------------- SUB-ROUTINES
sub advance_detection($$$$){
   my($loci, $gF, $gN, $rk) = @_;
   our $out;
   &_advance_proto("$out.nfeats", $loci);
   &_advance_proto("$out.nseqs", "$gF/$gN");
}

sub advance_group($$$){
   my($i,$j,$n) = @_;
   our $out;
   &_advance_proto("$out.ngroups", $i+1);
}

sub advance_extension($$){
   my($i, $n) = @_;
   our $out;
   &_advance_proto("$out.next", "$i/$n");
}

sub _advance_proto($$) {
   my($file, $msg) = @_;
   open ADV, ">", $file or die "I can not open the '$file' file: $!\n";
   print ADV $msg;
   close ADV;
}

sub csv_header() {
   return "ID\tGenome\tSeq\tFrom\tTo\tUnit length\tCopy number\tMismatch percent\tScore\t".
		"Left 500bp\tRight 500bp (rc)\tRepeats\tConsensus/Notes\n";
}
sub csv_line($$) {
   my $f = shift;
   my $n = shift;
   $n||= '';
   my $left = $f->seq->subseq(max(1, $f->from-500), $f->from);
   my $right = Bio::Seq->new(-seq=>$f->seq->subseq($f->to, min($f->seq->length, $f->to+500)));
   $right = $right->revcom->seq;
   my $seq;
   $seq = $f->repeats if $f->can('repeats');
   $seq = $f->seq->subseq($f->from, $f->to) unless defined $seq;
   if(defined $seq and $f->strand eq '-'){
      my $seqI = Bio::Seq->new(-seq=>$seq);
      $seq = $seqI->revcom->seq;
   }
   my $notes = '';
   if($f->can('consensus')){
      $notes = $f->consensus;
   }else{
      $notes = $f->comments if $f->type eq 'extend';
      $notes =~ s/\s*Extended feature\s*/ /i; # <- not really clean, but works ;)
   }
   $notes =~ s/[\n\r]+/ /g;
   return sprintf(
   		"\%s\t\%s\t\%s\t\%d\t\%d\t%.2f\t%.2f\t%.0f%%\t%.2f\t\%s\t\%s\t\%s\t\%s\n",
   		(defined $f->id ? $f->id : ''), $n, $f->seq->display_id,
		$f->from, $f->to, ($f->can('period') ? $f->period : 0),
		($f->can('exponent') ? $f->exponent : 0),
		($f->can('error') ? $f->error : 0), $f->score,
   		$left, $right, $seq, $notes);
}

close CSV;

__END__

=pod

=head1 AUTHOR

Luis M. Rodriguez-R < lmrodriguezr at gmail dot com >

=head1 DESCRIPTION

This script is the core of the VNTRs analysis tool
(L<http://bioinfo-prod.mpl.ird.fr/xantho/utils/#vntrs>).  It requires the C<vntrs.bme>
file, at the C<examples> folder.  Run it with no arguments to check the required parameters.

=head1 LICENSE

This script is distributed under the terms of
I<The Artistic License>.  See LICENSE.txt for details.

=head1 SYNOPSIS

C<perl polloc_vntrs.pl> B<arguments>

The arguments must be in the following order:



( run in 0.294 second using v1.01-cache-2.11-cpan-483215c6ad5 )