App-lapidary

 view release on metacpan or  search on metacpan

script/lapidary  view on Meta::CPAN

    print "This script requires some arguments to run.\n";
    print "use --help to see the options\n";
    exit;
}

my $script_location = $RealBin;
my $path_variable = $ENV{'PATH'};
my $read_1;
my $read_2="";
my $db;
my $threads = 1; #Default to 1 thread
my $identity = 70; #Default to 70% identity
my $coverage = 50; #Default to 50% coverage
my $read_type;
my $help;
my $version;
my $sequence_identification = "identity";

# Check if the script location is already in the PATH
if ($path_variable !~ m/$script_location/) {
    # Add the script location to the PATH
    $ENV{'PATH'} = "$path_variable:$script_location";
}

GetOptions (	'read_1:s'		=> \$read_1,
	'read_2:s'		=> \$read_2,
	'db:s'			=> \$db,
	'threads:i'		=> \$threads,
	'identity:i'	=> \$identity,
	'coverage:i'	=> \$coverage,
	'read_type:s'	=> \$read_type,
	'sequence_identification:s'	=> \$sequence_identification,
	'help'			=> \$help,
	'version'		=> \$version);

#Print out help message
if(defined($help)){
    die "\n\nLapidary: a software for identifying amino acid sequences using sequenced reads\n\n
    Options:\n
    read_1\tLocation of first read file (required)\n
    read_2\tLocation of second read file if read files are paired\n
    db\tFull location to fasta file containing amino acid sequences (required)\n
    threads\tNumber of threads to use for Diamond (default: 1)\n
    identity\tDiamond identity percentage cut-off to use (default: 70)\n
    coverage\tDiamond coverage percentage cut-off to use (default: 50)\n
    read_type\tTypes of reads used (required): single or paired\n
    sequence_identification\tMethod for calling most likely sequence: identity (default) or consensus\n
    help\tDisplay help screen\n
    version\tReturn version of Lapidary\n\n";
} 

#Print out version
if(defined($version)){

script/lapidary  view on Meta::CPAN

if ($read_1=~m/^.+\/(.*?)\.f.*?$/) { 
    $concatenated = ($1 . "_concatenated.fastq.gz");
    $diamond_output = ($1 . "_diamond_read_alignments.txt");
    $protein_cut_off = ($1 . "_protein_cut_offs.txt");

    if($read_type eq "paired") {
	#Concaternate read files
	system "cat $read_1 $read_2 > $concatenated";
	
	#Run diamond on concatenated reads
	system "diamond blastx -k 1000000000000 -e 1.0 -k0 --matrix BLOSUM45 -q $concatenated --db $db_name --threads $threads --id $identity --query-cover $coverage -o $diamond_output -f 6 stitle qseqid qseq_translated qstart qend sstart send qcovhsp piden...
	
	#Remove concatenated reads
	system "rm $concatenated";
    } elsif ($read_type eq "single") {
	#Run diamond on single reads
	system "diamond blastx -k 1000000000000 -e 1.0 -k0 --matrix BLOSUM45 -q $read_1 --db $db_name --threads $threads --id $identity --query-cover $coverage -o $diamond_output -f 6 stitle qseqid qseq_translated qstart qend sstart send qcovhsp pident qlen...
    }

    #Read in diamond_output and extract read information
    @sample_proteins = ();	
#	@sample_reads = ();
    @sample_positions = ();
    @sample_amino_acids = ();
    @sample_identity = ();
    @sample_length = ();



( run in 0.580 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )