App-lapidary
view release on metacpan or search on metacpan
script/lapidary view on Meta::CPAN
# Author: Samuel Bloomfield
###
use warnings;
use strict;
use Getopt::Long;
use List::MoreUtils qw(uniq);
use FindBin qw($RealBin);
use File::Basename;
use File::Spec::Functions qw(catfile);
use LWP::Simple;
use Archive::Extract;
# here check if no arguments were given, then print help message
if (@ARGV == 0) {
# Print the help message
print "This script requires some arguments to run.\n";
print "use --help to see the options\n";
exit;
}
my $script_location = $RealBin;
my $path_variable = $ENV{'PATH'};
my $read_1;
my $read_2="";
my $db;
my $threads = 1; #Default to 1 thread
my $identity = 70; #Default to 70% identity
my $coverage = 50; #Default to 50% coverage
my $read_type;
my $help;
my $version;
my $sequence_identification = "identity";
# Check if the script location is already in the PATH
if ($path_variable !~ m/$script_location/) {
# Add the script location to the PATH
$ENV{'PATH'} = "$path_variable:$script_location";
}
GetOptions ( 'read_1:s' => \$read_1,
'read_2:s' => \$read_2,
'db:s' => \$db,
'threads:i' => \$threads,
'identity:i' => \$identity,
'coverage:i' => \$coverage,
'read_type:s' => \$read_type,
'sequence_identification:s' => \$sequence_identification,
'help' => \$help,
'version' => \$version);
#Print out help message
if(defined($help)){
die "\n\nLapidary: a software for identifying amino acid sequences using sequenced reads\n\n
Options:\n
read_1\tLocation of first read file (required)\n
read_2\tLocation of second read file if read files are paired\n
db\tFull location to fasta file containing amino acid sequences (required)\n
threads\tNumber of threads to use for Diamond (default: 1)\n
identity\tDiamond identity percentage cut-off to use (default: 70)\n
coverage\tDiamond coverage percentage cut-off to use (default: 50)\n
read_type\tTypes of reads used (required): single or paired\n
sequence_identification\tMethod for calling most likely sequence: identity (default) or consensus\n
help\tDisplay help screen\n
version\tReturn version of Lapidary\n\n";
}
#Print out version
if(defined($version)){
die "\n\nLapidary version 0.5.0\n\n";
}
# Check if we have diamond
# Define the URL to download diamond
my $diamond_url = 'http://github.com/bbuchfink/diamond/releases/download/v2.1.8/diamond-linux64.tar.gz';
# Check if diamond is in the PATH
unless (system("which diamond >/dev/null 2>&1") == 0) {
print "diamond not found in PATH. Downloading and installing...\n";
# Get the filename from the URL
my $filename = basename($diamond_url);
# Set the destination path for download and extraction
my $download_path = catfile($RealBin, $filename);
my $extraction_path = $RealBin;
# Download the diamond tar.gz file
my $status = getstore($diamond_url, $download_path);
# Check if download was successful
unless ($status == 200) {
die "Failed to download diamond: $status";
}
# Extract the tar.gz file
my $extractor = Archive::Extract->new(archive => $download_path);
my $result = $extractor->extract(to => $extraction_path);
# Check if extraction was successful
unless ($result) {
die "Failed to extract diamond: " . $extractor->error;
}
# Print success message
print "diamond has been downloaded and installed in $extraction_path\n";
}
#Create diamond database of protein sequences
my $db_name;
if ($db=~m/^.+\/(.*?)\.f.*?$/) {
$db_name = $1;
system "diamond makedb --in $db --db $db_name"
} else {
die "File $db not in fasta format $!\n";
}
( run in 1.239 second using v1.01-cache-2.11-cpan-39bf76dae61 )