Bio-Polloc
view release on metacpan or search on metacpan
scripts/polloc_vntrs.pl view on Meta::CPAN
print SUM "ID: Group-".$group->name."\nLoci (".($#{$group->loci}+1)."):";
print SUM " ", $_->id."(".$_->strand.")" for @{$group->loci};
print SUM "\nLoci length average: $len\nLoci length standard deviation: $sd\n";
print SUM "\nUpstream alignment:\n";
my $alnO = Bio::AlignIO->new(-fh => \*SUM, -format=>'clustalw');
$alnO->write_aln($left_aln) if defined $left_aln;
print SUM "\nDownstream alignment:\n";
$alnO->write_aln($right_aln) if defined $right_aln;
print SUM "\nLocus alignment:\n";
$alnO->write_aln($within_aln) if defined $within_aln;
print SUM "\nDistance matrix:\n\t";
print SUM $_->id, "\t" for @{$group->loci};
print SUM "\n";
for my $i (0 .. $#{$group->loci}){
print SUM $group->loci->[$i]->id;
for my $j (0 .. $i){
print SUM "\t", $group->loci->[$i]->distance(-locus=>$group->loci->[$j]);
}
print SUM "\n";
}
close SUM;
}
$procGroups++;
print GCSV "\n";
}
close GCSV;
close GLIST;
}
&_advance_proto("$csv.done","done");
# ------------------------------------------------- SUB-ROUTINES
sub advance_detection($$$$){
my($loci, $gF, $gN, $rk) = @_;
our $out;
&_advance_proto("$out.nfeats", $loci);
&_advance_proto("$out.nseqs", "$gF/$gN");
}
sub advance_group($$$){
my($i,$j,$n) = @_;
our $out;
&_advance_proto("$out.ngroups", $i+1);
}
sub advance_extension($$){
my($i, $n) = @_;
our $out;
&_advance_proto("$out.next", "$i/$n");
}
sub _advance_proto($$) {
my($file, $msg) = @_;
open ADV, ">", $file or die "I can not open the '$file' file: $!\n";
print ADV $msg;
close ADV;
}
sub csv_header() {
return "ID\tGenome\tSeq\tFrom\tTo\tUnit length\tCopy number\tMismatch percent\tScore\t".
"Left 500bp\tRight 500bp (rc)\tRepeats\tConsensus/Notes\n";
}
sub csv_line($$) {
my $f = shift;
my $n = shift;
$n||= '';
my $left = $f->seq->subseq(max(1, $f->from-500), $f->from);
my $right = Bio::Seq->new(-seq=>$f->seq->subseq($f->to, min($f->seq->length, $f->to+500)));
$right = $right->revcom->seq;
my $seq;
$seq = $f->repeats if $f->can('repeats');
$seq = $f->seq->subseq($f->from, $f->to) unless defined $seq;
if(defined $seq and $f->strand eq '-'){
my $seqI = Bio::Seq->new(-seq=>$seq);
$seq = $seqI->revcom->seq;
}
my $notes = '';
if($f->can('consensus')){
$notes = $f->consensus;
}else{
$notes = $f->comments if $f->type eq 'extend';
$notes =~ s/\s*Extended feature\s*/ /i; # <- not really clean, but works ;)
}
$notes =~ s/[\n\r]+/ /g;
return sprintf(
"\%s\t\%s\t\%s\t\%d\t\%d\t%.2f\t%.2f\t%.0f%%\t%.2f\t\%s\t\%s\t\%s\t\%s\n",
(defined $f->id ? $f->id : ''), $n, $f->seq->display_id,
$f->from, $f->to, ($f->can('period') ? $f->period : 0),
($f->can('exponent') ? $f->exponent : 0),
($f->can('error') ? $f->error : 0), $f->score,
$left, $right, $seq, $notes);
}
close CSV;
__END__
=pod
=head1 AUTHOR
Luis M. Rodriguez-R < lmrodriguezr at gmail dot com >
=head1 DESCRIPTION
This script is the core of the VNTRs analysis tool
(L<http://bioinfo-prod.mpl.ird.fr/xantho/utils/#vntrs>). It requires the C<vntrs.bme>
file, at the C<examples> folder. Run it with no arguments to check the required parameters.
=head1 LICENSE
This script is distributed under the terms of
I<The Artistic License>. See LICENSE.txt for details.
=head1 SYNOPSIS
C<perl polloc_vntrs.pl> B<arguments>
The arguments must be in the following order:
( run in 0.294 second using v1.01-cache-2.11-cpan-483215c6ad5 )