App-Anchr
view release on metacpan or search on metacpan
doc/model_organisms.md view on Meta::CPAN
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 35 ::: 60
# Stats of processed reads
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 35 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|------:|------:|-------:|-------:|---------:|--------:|------:|-------:|-------:|---------:|----------:|
| Q20L60 | 2.67G | 219.3 | 2.28G | 187.9 | 14.327% | 149 | "105" | 12.16M | 12.66M | 1.04 | 0:07'19'' |
| Q25L60 | 2.5G | 205.9 | 2.2G | 181.2 | 11.967% | 149 | "105" | 12.16M | 12.16M | 1.00 | 0:06'55'' |
| Q30L60 | 2.44G | 201.0 | 2.18G | 179.5 | 10.664% | 148 | "105" | 12.16M | 12.06M | 0.99 | 0:06'49'' |
| Q35L60 | 2.19G | 180.4 | 1.98G | 163.0 | 9.633% | 146 | "105" | 12.16M | 11.95M | 0.98 | 0:10'16'' |
* Clear intermediate files.
```bash
BASE_NAME=s288c
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
## s288c: down sampling
```bash
BASE_NAME=s288c
REAL_G=12157105
cd ${HOME}/data/anchr/${BASE_NAME}
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 20 25 30 35 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
continue;
fi
for X in 40 80 120 160; do
printf "==> Coverage: %s\n" ${X}
rm -fr 2_illumina/${QxxLxx}X${X}*
faops split-about -l 0 \
2_illumina/${QxxLxx}/pe.cor.fa \
$(( ${REAL_G} * ${X} )) \
"2_illumina/${QxxLxx}X${X}"
MAX_SERIAL=$(
cat 2_illumina/${QxxLxx}/environment.json \
| jq ".SUM_OUT | tonumber | . / ${REAL_G} / ${X} | floor | . - 1"
)
for i in $( seq 0 1 ${MAX_SERIAL} ); do
P=$( printf "%03d" ${i})
printf " * Part: %s\n" ${P}
mkdir -p "2_illumina/${QxxLxx}X${X}P${P}"
mv "2_illumina/${QxxLxx}X${X}/${P}.fa" \
"2_illumina/${QxxLxx}X${X}P${P}/pe.cor.fa"
cp 2_illumina/${QxxLxx}/environment.json "2_illumina/${QxxLxx}X${X}P${P}"
done
done
done
```
## s288c: k-unitigs and anchors (sampled)
```bash
BASE_NAME=s288c
REAL_G=12157105
cd ${HOME}/data/anchr/${BASE_NAME}
# k-unitigs (sampled)
parallel --no-run-if-empty -j 1 "
echo >&2 '==> Group Q{1}L{2}X{3}P{4}'
doc/model_organisms.md view on Meta::CPAN
platanus scaffold -t 16 \
-c out_contig.fa -b out_contigBubble.fa \
-ip1 pe.fa \
2>&1 | tee sca_log.txt
platanus gap_close -t 16 \
-c out_scaffold.fa \
-ip1 pe.fa \
2>&1 | tee gap_log.txt
```
## iso_1: quorum
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
parallel --no-run-if-empty -j 1 "
cd 2_illumina/Q{1}L{2}
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
```
Clear intermediate files.
```bash
BASE_NAME=iso_1
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* Stats of processed reads
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=137567477
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|-------:|------:|-------:|-------:|---------:|--------:|-----:|--------:|--------:|---------:|----------:|
| Q20L60 | 15.65G | 113.7 | 13.86G | 100.8 | 11.383% | 100 | "71" | 137.57M | 129M | 0.94 | 1:02'31'' |
| Q25L60 | 14.66G | 106.5 | 13.26G | 96.4 | 9.506% | 99 | "71" | 137.57M | 127.11M | 0.92 | 0:58'16'' |
| Q30L60 | 14G | 101.7 | 12.97G | 94.3 | 7.364% | 99 | "71" | 137.57M | 126.4M | 0.92 | 0:56'23'' |
* kmergenie
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 121 -s 10 -t 8 ../R1.fq.gz -o oriR1
kmergenie -l 21 -k 121 -s 10 -t 8 ../R2.fq.gz -o oriR2
kmergenie -l 21 -k 121 -s 10 -t 8 ../Q30L60/pe.cor.fa -o Q30L60
```
## iso_1: down sampling
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=137567477
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 25 30 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
doc/model_organisms.md view on Meta::CPAN
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
# Stats of processed reads
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|-------:|------:|-------:|-------:|---------:|--------:|-----:|--------:|-------:|---------:|----------:|
| Q20L60 | 10.55G | 105.2 | 6.37G | 63.5 | 39.659% | 99 | "71" | 100.29M | 99.05M | 0.99 | 0:54'45'' |
| Q25L60 | 9.88G | 98.5 | 6.38G | 63.6 | 35.443% | 97 | "71" | 100.29M | 98.89M | 0.99 | 0:53'22'' |
| Q30L60 | 8.88G | 88.5 | 7.42G | 73.9 | 16.455% | 91 | "69" | 100.29M | 98.82M | 0.99 | 0:51'43'' |
* Clear intermediate files.
```bash
BASE_NAME=n2
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* kmergenie
```bash
BASE_NAME=n2
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 91 -s 10 -t 8 ../R1.fq.gz -o oriR1
kmergenie -l 21 -k 91 -s 10 -t 8 ../R2.fq.gz -o oriR2
kmergenie -l 21 -k 91 -s 10 -t 8 ../Q30L60/pe.cor.fa -o Q30L60
```
## n2: down sampling
```bash
BASE_NAME=n2
REAL_G=100286401
cd ${HOME}/data/anchr/${BASE_NAME}
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 25 30 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
continue;
fi
for X in 30 60; do
printf "==> Coverage: %s\n" ${X}
rm -fr 2_illumina/${QxxLxx}X${X}*
faops split-about -l 0 \
2_illumina/${QxxLxx}/pe.cor.fa \
$(( ${REAL_G} * ${X} )) \
"2_illumina/${QxxLxx}X${X}"
MAX_SERIAL=$(
cat 2_illumina/${QxxLxx}/environment.json \
| jq ".SUM_OUT | tonumber | . / ${REAL_G} / ${X} | floor | . - 1"
)
for i in $( seq 0 1 ${MAX_SERIAL} ); do
P=$( printf "%03d" ${i})
printf " * Part: %s\n" ${P}
mkdir -p "2_illumina/${QxxLxx}X${X}P${P}"
mv "2_illumina/${QxxLxx}X${X}/${P}.fa" \
"2_illumina/${QxxLxx}X${X}P${P}/pe.cor.fa"
cp 2_illumina/${QxxLxx}/environment.json "2_illumina/${QxxLxx}X${X}P${P}"
done
done
doc/model_organisms.md view on Meta::CPAN
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
# Stats of processed reads
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|-------:|------:|-------:|-------:|---------:|--------:|------:|--------:|--------:|---------:|----------:|
| Q20L60 | 13.36G | 111.6 | 7.48G | 62.5 | 43.986% | 254 | "127" | 119.67M | 131.52M | 1.10 | 1:03'05'' |
| Q25L60 | 11.82G | 98.8 | 8.43G | 70.4 | 28.710% | 236 | "127" | 119.67M | 125.44M | 1.05 | 0:56'14'' |
| Q30L60 | 10.37G | 86.7 | 8.73G | 73.0 | 15.807% | 218 | "127" | 119.67M | 119.21M | 1.00 | 0:49'53'' |
* Clear intermediate files.
```bash
BASE_NAME=col_0
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* kmergenie
```bash
BASE_NAME=col_0
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 91 -s 10 -t 8 ../Q30L60/pe.cor.fa -o Q30L60
```
## col_0: down sampling
```bash
BASE_NAME=col_0
REAL_G=119667750
cd ${HOME}/data/anchr/${BASE_NAME}
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 25 30 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
continue;
fi
for X in 30 60; do
printf "==> Coverage: %s\n" ${X}
rm -fr 2_illumina/${QxxLxx}X${X}*
faops split-about -l 0 \
2_illumina/${QxxLxx}/pe.cor.fa \
$(( ${REAL_G} * ${X} )) \
"2_illumina/${QxxLxx}X${X}"
MAX_SERIAL=$(
cat 2_illumina/${QxxLxx}/environment.json \
| jq ".SUM_OUT | tonumber | . / ${REAL_G} / ${X} | floor | . - 1"
)
for i in $( seq 0 1 ${MAX_SERIAL} ); do
P=$( printf "%03d" ${i})
printf " * Part: %s\n" ${P}
mkdir -p "2_illumina/${QxxLxx}X${X}P${P}"
mv "2_illumina/${QxxLxx}X${X}/${P}.fa" \
"2_illumina/${QxxLxx}X${X}P${P}/pe.cor.fa"
cp 2_illumina/${QxxLxx}/environment.json "2_illumina/${QxxLxx}X${X}P${P}"
done
done
done
( run in 0.507 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )