App-Anchr
view release on metacpan or search on metacpan
lib/App/Anchr/Command/quorum.pm view on Meta::CPAN
RED='\033[0;31m'
NC='\033[0m' # No Color
fi
log_warn () {
echo >&2 -e "${RED}==> $@ <==${NC}"
}
log_info () {
echo >&2 -e "${GREEN}==> $@${NC}"
}
log_debug () {
echo >&2 -e " * $@"
}
#----------------------------#
# masurca
#----------------------------#
set +e
# Set some paths and prime system to save environment variables
save () {
printf ". + {%s: \"%s\"}" $1 $(eval "echo -n \"\$$1\"") > jq.filter.txt
if [ -e environment.json ]; then
cat environment.json \
| jq -f jq.filter.txt \
> environment.json.new
rm environment.json
else
jq -f jq.filter.txt -n \
> environment.json.new
fi
mv environment.json.new environment.json
rm jq.filter.txt
}
signaled () {
log_warn Interrupted
exit 1
}
trap signaled TERM QUIT INT
START_TIME=$(date +%s)
save START_TIME
NUM_THREADS=[% opt.parallel %]
save NUM_THREADS
#----------------------------#
# Renaming reads
#----------------------------#
log_info 'Processing pe and/or se library reads'
rm -rf meanAndStdevByPrefix.pe.txt
echo 'pe [% opt.size %] [% opt.std %]' >> meanAndStdevByPrefix.pe.txt
if [ ! -e pe.renamed.fastq ]; then
rename_filter_fastq \
'pe' \
<(exec expand_fastq '[% args.0 %]' ) \
<(exec expand_fastq '[% args.1 %]' ) \
> 'pe.renamed.fastq'
fi
[% IF args.2 -%]
echo 'se [% opt.size %] [% opt.std %]' >> meanAndStdevByPrefix.pe.txt
if [ ! -e se.renamed.fastq ]; then
rename_filter_fastq \
'se' \
<(exec expand_fastq '[% args.2 %]' ) \
'' \
> 'se.renamed.fastq'
fi
[% END -%]
#----------------------------#
# Stats of PE reads
#----------------------------#
head -n 80000 pe.renamed.fastq > pe_data.tmp
export PE_AVG_READ_LENGTH=$(
head -n 40000 pe_data.tmp \
| grep --text -v '^+' \
| grep --text -v '^@' \
| awk '{if(length($1)>31){n+=length($1);m++;}}END{print int(n/m)}'
)
save PE_AVG_READ_LENGTH
log_debug "Average PE read length $PE_AVG_READ_LENGTH"
KMER=$(
tail -n 40000 pe_data.tmp \
| perl -e '
my @lines;
while ( my $line = <STDIN> ) {
$line = <STDIN>;
chomp($line);
push( @lines, $line );
$line = <STDIN>;
$line = <STDIN>;
}
my @legnths;
my $min_len = 100000;
my $base_count = 0;
for my $l (@lines) {
$base_count += length($l);
push( @lengths, length($l) );
for $base ( split( "", $l ) ) {
if ( uc($base) eq "G" or uc($base) eq "C" ) { $gc_count++; }
}
}
@lengths = sort { $b <=> $a } @lengths;
$min_len = $lengths[ int( $#lengths * .75 ) ];
$gc_ratio = $gc_count / $base_count;
$kmer = 0;
if ( $gc_ratio < 0.5 ) {
$kmer = int( $min_len * .7 );
}
elsif ( $gc_ratio >= 0.5 && $gc_ratio < 0.6 ) {
$kmer = int( $min_len * .5 );
}
else {
$kmer = int( $min_len * .33 );
}
$kmer++ if ( $kmer % 2 == 0 );
$kmer = 31 if ( $kmer < 31 );
$kmer = 127 if ( $kmer > 127 );
print $kmer;
' )
save KMER
log_debug "Choosing kmer size of $KMER"
( run in 1.266 second using v1.01-cache-2.11-cpan-5b529ec07f3 )