Bio-NEXUS
view release on metacpan or search on metacpan
exec/nexfix.pl view on Meta::CPAN
# > OTU names that contain NEXUS punctuation (i.e., (){}/\,;:=*'"`+-<>) need #
# to be single-quoted. Some taxa in our files are hyphenated. nexfix #
# currently checks the following commands for OTU names: TAXLABELS, MATRIX, #
# TAXSET. It does not check the ADD command in the Span Block. #
# #
################################################################################
# Usage: #######################################################################
# #
# nexfix.pl [-ckP] filename [filenames] #
# #
# -c : clobber the old files with the new ones (don't rename the old ones) #
# -k : keep the old files intact, renaming the new ones (default is to #
# rename the old ones, and write the new files using the old name) #
# -P : do not move non-compliant files to a /problematic/ directory--leave #
# them where they are #
# -v : send error messages to screen instead of error.log file #
# #
################################################################################
my $RCSId = '$Id: nexfix.pl,v 1.11 2007/02/01 04:52:09 vivek Exp $';
my $shortname = join (" v", $RCSId =~ m/(\w+.?\w+,)v (\d+\.\d+)/);
use strict;
use Data::Dumper;
use Bio::NEXUS;
use File::Copy;
use File::Path;
use Getopt::Std;
# read in the command-line options, if any; see Usage above for details
my %flags;
getopts('ckPv', \%flags) or die "ERROR: Unknown options\n";
# get list of NEXUS files to process
my @paths = @ARGV;
my $punctuation_pattern = '[\(\)\{\}\/\\,;:=\*\'"`\+\-<>]';
# loop through them
for my $path (@paths) {
# verify that $path is a file
unless (-e $path) {warn "'$path' is not a valid filename: skipping\n"; next };
print "Processing: <$path>\n";
# split the path into directory and filename components
my ($directory, $filename) = $path =~ /(.*?)([^\/]+)$/;
unless ($flags{v}) {
open(STDERR, ">> ./$directory/error.log") || die "Couldn't open error log in $directory\n";
}
# get the time stamp, so it can be reported later
my $time = localtime time;
# print STDERR "$shortname run on $time\n";
# slurp in the entire NEXUS file
my $nexus_text = do {local(@ARGV, $/) = $path; <>};
# move the old file to filename.old unless the user has specified
# overwriting (-c) or keeping the original file intact (-k)
move($path, "$path.old") unless ($flags{c} || $flags{k});
# use an alternate filename for the new file if the user specifies keeping the original file
if ($flags{k}) {$path = "$path.new";}
# split the file into its commands (and their arguments)
my @commands = split(/;/, $nexus_text);
# loop through them
my $within_history_block = 0;
for my $command (@commands) {
# remove previous comments that this program inserted
$command =~ s/\[This file was checked by nexfix\.pl, v\d+\.\d+ on \w+ \w+\s+\d{1,2}\s+\d{1,2}:\d{2}:\d{2} \d{4}\]\n//;
# if it's the beginning of the file, insert a comment documenting this processing
$command =~ s/(#NEXUS)/$1\n\[This file was checked by $shortname on $time\]/;
$within_history_block = 0 if ($within_history_block and $command =~/(end|endblock)/i);
# match command, capture the arguments
if ($command =~ /^\s*charlabels\s+(.+?)\s*$/si) {
$command = &charlabels($1);
} elsif ($command =~ /^\s*taxlabels\s+(.+?)\s*$/si) {
$command = &taxlabels($1);
} elsif ($command =~ /^\s*matrix\s+(.+?)\s*$/si) {
$command = &matrix($1);
$command = &history_matrix($command) if ($within_history_block);
$command = "\n\tMATRIX\n" . $command;
} elsif ($command =~ /^\s*taxset\s+(.+?)\s+=(.+?)\s*$/si) {
$command = "TAXSET $1 = " . &taxset($2) . "\n";
} elsif ($command =~ /^\s*tree\s+(.+?)\s+=(.+?)\s*$/si) {
$command = "\nTREE $1 = " . &tree($2) . "\n";
} elsif ($command =~ /\s*Begin\s+History/si) {
$within_history_block = 1;
} elsif ($command =~ /\s*format /si and $within_history_block) {
$command .= " statesformat=frequency" if $command !~ /statesformat/;
} #!!!!!! TO ADD MORE ERROR-CHECKING FUNCTUNALITY, PUT AN ELSIF HERE !!!!!!
}
# join the commands back together
$nexus_text = join(";", @commands);
# open a FH, write out the altered text
open( my $fh, ">$path" ) || die "Can't create $path $!" ;
print $fh $nexus_text;
close $fh;
# to make sure that the new file is a well-formed NEXUS file, read it in and
# write it back out with NEXPL. This is done using a system call so that
# die commands within NEXPL do not kill this process.
if ((my $retval = system("perl -MBio::NEXUS -e 'new Bio::NEXUS(\"$path\")->write(\"$path\")'")) == 0) {
# system command finished properly
warn "File: <$path> has been validated and written.\n";
} elsif ( $retval == 2 ) {
# if the system call returned a value of 2, it's because the process was
# interrupted by a SIGINT (such as Ctrl-C)
warn "Processing of File: <$path> interrupted; file written but not validated\n";
print "\n$shortname interrupted by SIGINT\n" unless $flags{v};
die "$shortname interrupted by SIGINT\n";
( run in 0.761 second using v1.01-cache-2.11-cpan-5a3173703d6 )