App-Anchr

 view release on metacpan or  search on metacpan

lib/App/Anchr/Command/quorum.pm  view on Meta::CPAN

    RED='\033[0;31m'
    NC='\033[0m' # No Color
fi

log_warn () {
    echo >&2 -e "${RED}==> $@ <==${NC}"
}

log_info () {
    echo >&2 -e "${GREEN}==> $@${NC}"
}

log_debug () {
    echo >&2 -e "  * $@"
}

#----------------------------#
# masurca
#----------------------------#
set +e
# Set some paths and prime system to save environment variables
save () {
    printf ". + {%s: \"%s\"}" $1 $(eval "echo -n \"\$$1\"") > jq.filter.txt

    if [ -e environment.json ]; then
        cat environment.json \
            | jq -f jq.filter.txt \
            > environment.json.new
        rm environment.json
    else
        jq -f jq.filter.txt -n \
            > environment.json.new
    fi

    mv environment.json.new environment.json
    rm jq.filter.txt
}

signaled () {
    log_warn Interrupted
    exit 1
}
trap signaled TERM QUIT INT

START_TIME=$(date +%s)
save START_TIME

NUM_THREADS=[% opt.parallel %]
save NUM_THREADS

#----------------------------#
# Renaming reads
#----------------------------#
log_info 'Processing pe and/or se library reads'
rm -rf meanAndStdevByPrefix.pe.txt
echo 'pe [% opt.size %] [% opt.std %]' >> meanAndStdevByPrefix.pe.txt

if [ ! -e pe.renamed.fastq ]; then
    rename_filter_fastq \
        'pe' \
        <(exec expand_fastq '[% args.0 %]' ) \
        <(exec expand_fastq '[% args.1 %]' ) \
        > 'pe.renamed.fastq'
fi

[% IF args.2 -%]
echo 'se [% opt.size %] [% opt.std %]' >> meanAndStdevByPrefix.pe.txt

if [ ! -e se.renamed.fastq ]; then
    rename_filter_fastq \
        'se' \
        <(exec expand_fastq '[% args.2 %]' ) \
        '' \
        > 'se.renamed.fastq'
fi
[% END -%]

#----------------------------#
# Stats of PE reads
#----------------------------#
head -n 80000 pe.renamed.fastq > pe_data.tmp
export PE_AVG_READ_LENGTH=$(
    head -n 40000 pe_data.tmp \
    | grep --text -v '^+' \
    | grep --text -v '^@' \
    | awk '{if(length($1)>31){n+=length($1);m++;}}END{print int(n/m)}'
)
save PE_AVG_READ_LENGTH
log_debug "Average PE read length $PE_AVG_READ_LENGTH"

KMER=$(
    tail -n 40000 pe_data.tmp \
    | perl -e '
        my @lines;
        while ( my $line = <STDIN> ) {
            $line = <STDIN>;
            chomp($line);
            push( @lines, $line );
            $line = <STDIN>;
            $line = <STDIN>;
        }
        my @legnths;
        my $min_len    = 100000;
        my $base_count = 0;
        for my $l (@lines) {
            $base_count += length($l);
            push( @lengths, length($l) );
            for $base ( split( "", $l ) ) {
                if ( uc($base) eq "G" or uc($base) eq "C" ) { $gc_count++; }
            }
        }
        @lengths  = sort { $b <=> $a } @lengths;
        $min_len  = $lengths[ int( $#lengths * .75 ) ];
        $gc_ratio = $gc_count / $base_count;
        $kmer     = 0;
        if ( $gc_ratio < 0.5 ) {
            $kmer = int( $min_len * .7 );
        }
        elsif ( $gc_ratio >= 0.5 && $gc_ratio < 0.6 ) {
            $kmer = int( $min_len * .5 );
        }
        else {
            $kmer = int( $min_len * .33 );
        }
        $kmer++ if ( $kmer % 2 == 0 );
        $kmer = 31  if ( $kmer < 31 );
        $kmer = 127 if ( $kmer > 127 );
        print $kmer;
    ' )
save KMER
log_debug "Choosing kmer size of $KMER"



( run in 1.266 second using v1.01-cache-2.11-cpan-5b529ec07f3 )