App-lapidary

 view release on metacpan or  search on metacpan

script/lapidary  view on Meta::CPAN

#	Author: Samuel Bloomfield
###

use warnings;
use strict;
use Getopt::Long;
use List::MoreUtils qw(uniq);
use FindBin qw($RealBin);
use File::Basename;
use File::Spec::Functions qw(catfile);
use LWP::Simple;
use Archive::Extract;


# here check if no arguments were given, then print help message
if (@ARGV == 0) {
    # Print the help message
    print "This script requires some arguments to run.\n";
    print "use --help to see the options\n";
    exit;
}

my $script_location = $RealBin;
my $path_variable = $ENV{'PATH'};
my $read_1;
my $read_2="";
my $db;
my $threads = 1; #Default to 1 thread
my $identity = 70; #Default to 70% identity
my $coverage = 50; #Default to 50% coverage
my $read_type;
my $help;
my $version;
my $sequence_identification = "identity";

# Check if the script location is already in the PATH
if ($path_variable !~ m/$script_location/) {
    # Add the script location to the PATH
    $ENV{'PATH'} = "$path_variable:$script_location";
}

GetOptions (	'read_1:s'		=> \$read_1,
	'read_2:s'		=> \$read_2,
	'db:s'			=> \$db,
	'threads:i'		=> \$threads,
	'identity:i'	=> \$identity,
	'coverage:i'	=> \$coverage,
	'read_type:s'	=> \$read_type,
	'sequence_identification:s'	=> \$sequence_identification,
	'help'			=> \$help,
	'version'		=> \$version);

#Print out help message
if(defined($help)){
    die "\n\nLapidary: a software for identifying amino acid sequences using sequenced reads\n\n
    Options:\n
    read_1\tLocation of first read file (required)\n
    read_2\tLocation of second read file if read files are paired\n
    db\tFull location to fasta file containing amino acid sequences (required)\n
    threads\tNumber of threads to use for Diamond (default: 1)\n
    identity\tDiamond identity percentage cut-off to use (default: 70)\n
    coverage\tDiamond coverage percentage cut-off to use (default: 50)\n
    read_type\tTypes of reads used (required): single or paired\n
    sequence_identification\tMethod for calling most likely sequence: identity (default) or consensus\n
    help\tDisplay help screen\n
    version\tReturn version of Lapidary\n\n";
} 

#Print out version
if(defined($version)){
    die "\n\nLapidary version 0.5.0\n\n";
} 


# Check if we have diamond
# Define the URL to download diamond
my $diamond_url = 'http://github.com/bbuchfink/diamond/releases/download/v2.1.8/diamond-linux64.tar.gz';

# Check if diamond is in the PATH
unless (system("which diamond >/dev/null 2>&1") == 0) {
    print "diamond not found in PATH. Downloading and installing...\n";
    
    # Get the filename from the URL
    my $filename = basename($diamond_url);
    
    # Set the destination path for download and extraction
    my $download_path = catfile($RealBin, $filename);
    my $extraction_path = $RealBin;
    
    # Download the diamond tar.gz file
    my $status = getstore($diamond_url, $download_path);
    
    # Check if download was successful
    unless ($status == 200) {
	die "Failed to download diamond: $status";
    }
    
    # Extract the tar.gz file
    my $extractor = Archive::Extract->new(archive => $download_path);
    my $result = $extractor->extract(to => $extraction_path);
    
    # Check if extraction was successful
    unless ($result) {
	die "Failed to extract diamond: " . $extractor->error;
    }
    
    # Print success message
    print "diamond has been downloaded and installed in $extraction_path\n";
}


#Create diamond database of protein sequences
my $db_name;
if ($db=~m/^.+\/(.*?)\.f.*?$/) {
    $db_name = $1;
    
    system "diamond makedb --in $db --db $db_name"

} else {
    die "File $db not in fasta format $!\n";
}



( run in 1.239 second using v1.01-cache-2.11-cpan-39bf76dae61 )