App-Anchr
view release on metacpan or search on metacpan
doc/gage_b.md view on Meta::CPAN
cd 2_illumina/Q{1}L{2}
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} == '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 25 30 ::: 60
# Stats of processed reads
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|------:|------:|------:|---------:|----------:|
| Q25L60 | 381.76M | 70.3 | 343.49M | 63.2 | 10.023% | 218 | "127" | 5.43M | 5.34M | 0.98 | 0:01'18'' |
| Q30L60 | 372.08M | 68.5 | 348.42M | 64.1 | 6.360% | 210 | "121" | 5.43M | 5.34M | 0.98 | 0:01'16'' |
* Clear intermediate files.
```bash
BASE_NAME=Bcer
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* kmergenie
```bash
BASE_NAME=Bcer
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 151 -s 10 -t 8 ../Q30L60/pe.cor.fa -o Q30L60
```
## Bcer: k-unitigs and anchors
```bash
BASE_NAME=Bcer
REAL_G=5432652
cd ${HOME}/data/anchr/${BASE_NAME}
# k-unitigs
parallel --no-run-if-empty -j 1 "
echo >&2 '==> Group Q{1}L{2} '
if [ -e Q{1}L{2}/k_unitigs.fasta ]; then
echo >&2 ' k_unitigs.fasta already presents'
exit;
fi
mkdir -p Q{1}L{2}
cd Q{1}L{2}
anchr kunitigs \
../2_illumina/Q{1}L{2}/pe.cor.fa \
../2_illumina/Q{1}L{2}/environment.json \
-p 16 \
--kmer 31,41,51,61,71,81,59,91 \
-o kunitigs.sh
bash kunitigs.sh
echo >&2
" ::: 25 30 ::: 60
# anchors
parallel --no-run-if-empty -j 3 "
echo >&2 '==> Group Q{1}L{2}'
if [ -e Q{1}L{2}/anchor/pe.anchor.fa ]; then
exit;
fi
if [ ! -e Q{1}L{2}/k_unitigs.fasta ]; then
exit;
fi
rm -fr Q{1}L{2}/anchor
mkdir -p Q{1}L{2}/anchor
doc/gage_b.md view on Meta::CPAN
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} == '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
# Stats of processed reads
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|-----:|------:|------:|---------:|----------:|
| Q20L60 | 174.29M | 37.9 | 154.91M | 33.7 | 11.122% | 136 | "37" | 4.6M | 4.55M | 0.99 | 0:00'54'' |
| Q25L60 | 144.88M | 31.5 | 138.36M | 30.1 | 4.502% | 126 | "35" | 4.6M | 4.53M | 0.99 | 0:00'55'' |
| Q30L60 | 126.32M | 27.4 | 123.22M | 26.8 | 2.454% | 111 | "31" | 4.6M | 4.52M | 0.98 | 0:00'50'' |
* Clear intermediate files.
```bash
BASE_NAME=Rsph
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* kmergenie
```bash
BASE_NAME=Rsph
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 151 -s 10 -t 8 ../Q20L60/pe.cor.fa -o Q20L60
```
## Rsph: k-unitigs and anchors
```bash
BASE_NAME=Rsph
REAL_G=4602977
cd ${HOME}/data/anchr/${BASE_NAME}
# k-unitigs
parallel --no-run-if-empty -j 1 "
echo >&2 '==> Group Q{1}L{2} '
if [ -e Q{1}L{2}/k_unitigs.fasta ]; then
echo >&2 ' k_unitigs.fasta already presents'
exit;
fi
mkdir -p Q{1}L{2}
cd Q{1}L{2}
anchr kunitigs \
../2_illumina/Q{1}L{2}/pe.cor.fa \
../2_illumina/Q{1}L{2}/environment.json \
-p 16 \
--kmer 31,41,51,61,71,81 \
-o kunitigs.sh
bash kunitigs.sh
echo >&2
" ::: 20 25 30 ::: 60
# anchors
parallel --no-run-if-empty -j 3 "
echo >&2 '==> Group Q{1}L{2}'
if [ -e Q{1}L{2}/anchor/pe.anchor.fa ]; then
exit;
fi
if [ ! -e Q{1}L{2}/k_unitigs.fasta ]; then
exit;
fi
rm -fr Q{1}L{2}/anchor
mkdir -p Q{1}L{2}/anchor
doc/gage_b.md view on Meta::CPAN
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} == '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
# Stats of processed reads
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|-----:|------:|------:|---------:|----------:|
| Q20L60 | 291.68M | 57.3 | 228.29M | 44.8 | 21.734% | 167 | "47" | 5.09M | 5.23M | 1.03 | 0:01'09'' |
| Q25L60 | 251.42M | 49.4 | 210.79M | 41.4 | 16.164% | 162 | "43" | 5.09M | 5.21M | 1.02 | 0:01'02'' |
| Q30L60 | 222.24M | 43.7 | 194.38M | 38.2 | 12.534% | 152 | "39" | 5.09M | 5.19M | 1.02 | 0:01'05'' |
* Clear intermediate files.
```bash
BASE_NAME=Mabs
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* kmergenie
```bash
BASE_NAME=Mabs
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 151 -s 10 -t 8 ../Q20L60/pe.cor.fa -o Q20L60
```
## Mabs: k-unitigs and anchors
```bash
BASE_NAME=Mabs
REAL_G=5090491
cd ${HOME}/data/anchr/${BASE_NAME}
# k-unitigs
parallel --no-run-if-empty -j 1 "
echo >&2 '==> Group Q{1}L{2} '
if [ -e Q{1}L{2}/k_unitigs.fasta ]; then
echo >&2 ' k_unitigs.fasta already presents'
exit;
fi
mkdir -p Q{1}L{2}
cd Q{1}L{2}
anchr kunitigs \
../2_illumina/Q{1}L{2}/pe.cor.fa \
../2_illumina/Q{1}L{2}/environment.json \
-p 16 \
--kmer 31,41,51,61,71,81 \
-o kunitigs.sh
bash kunitigs.sh
echo >&2
" ::: 20 25 30 ::: 60
# anchors
parallel --no-run-if-empty -j 3 "
echo >&2 '==> Group Q{1}L{2}'
if [ -e Q{1}L{2}/anchor/pe.anchor.fa ]; then
exit;
fi
if [ ! -e Q{1}L{2}/k_unitigs.fasta ]; then
exit;
fi
rm -fr Q{1}L{2}/anchor
mkdir -p Q{1}L{2}/anchor
doc/gage_b.md view on Meta::CPAN
| Genome | 2961149 | 4033464 | 2 |
| Paralogs | 3483 | 114707 | 48 |
| Illumina | 251 | 399999624 | 1593624 |
| uniq | 251 | 397989616 | 1585616 |
| scythe | 198 | 303351043 | 1585616 |
| Q20L60 | 192 | 276676322 | 1504034 |
| Q20L90 | 192 | 271399426 | 1460080 |
| Q25L60 | 189 | 254738206 | 1415632 |
| Q25L90 | 189 | 248113857 | 1359224 |
| Q30L60 | 182 | 231416118 | 1354988 |
| Q30L90 | 183 | 227344381 | 1300876 |
## Vcho: quorum
```bash
BASE_NAME=Vcho
cd ${HOME}/data/anchr/${BASE_NAME}
parallel --no-run-if-empty -j 1 "
cd 2_illumina/Q{1}L{2}
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} == '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60 90
```
Clear intermediate files.
```bash
BASE_NAME=Vcho
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* Stats of processed reads
```bash
BASE_NAME=Vcho
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=4033464
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 90 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|------:|------:|------:|---------:|----------:|
| Q20L60 | 276.68M | 68.6 | 224.36M | 55.6 | 18.911% | 183 | "113" | 4.03M | 3.96M | 0.98 | 0:01'05'' |
| Q20L90 | 271.4M | 67.3 | 220.69M | 54.7 | 18.684% | 184 | "113" | 4.03M | 3.96M | 0.98 | 0:01'04'' |
| Q25L60 | 254.74M | 63.2 | 217.57M | 53.9 | 14.590% | 179 | "109" | 4.03M | 3.95M | 0.98 | 0:01'04'' |
| Q25L90 | 248.11M | 61.5 | 212.22M | 52.6 | 14.465% | 182 | "111" | 4.03M | 3.95M | 0.98 | 0:01'02'' |
| Q30L60 | 231.51M | 57.4 | 205.43M | 50.9 | 11.266% | 174 | "105" | 4.03M | 3.94M | 0.98 | 0:01'02'' |
| Q30L90 | 227.45M | 56.4 | 201.7M | 50.0 | 11.322% | 177 | "107" | 4.03M | 3.94M | 0.98 | 0:00'58'' |
* kmergenie
```bash
BASE_NAME=Vcho
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 151 -s 10 -t 8 ../R1.fq.gz -o oriR1
kmergenie -l 21 -k 151 -s 10 -t 8 ../R2.fq.gz -o oriR2
kmergenie -l 21 -k 151 -s 10 -t 8 ../Q20L60/pe.cor.fa -o Q20L60
```
## Vcho: generate k-unitigs
```bash
BASE_NAME=Vcho
cd ${HOME}/data/anchr/${BASE_NAME}
parallel --no-run-if-empty -j 1 "
echo >&2 '==> Group Q{1}L{2} '
if [ -e Q{1}L{2}/k_unitigs.fasta ]; then
doc/gage_b.md view on Meta::CPAN
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 35 ::: 60
# Stats of processed reads
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 35 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|-----:|------:|------:|---------:|----------:|
| Q20L60 | 578.89M | 113.7 | 451.73M | 88.7 | 21.966% | 167 | "45" | 5.09M | 5.39M | 1.06 | 0:06'06'' |
| Q25L60 | 498.47M | 97.9 | 415.13M | 81.5 | 16.719% | 161 | "43" | 5.09M | 5.28M | 1.04 | 0:05'27'' |
| Q30L60 | 439.78M | 86.4 | 382.34M | 75.1 | 13.060% | 152 | "39" | 5.09M | 5.26M | 1.03 | 0:04'49'' |
| Q35L60 | 237.66M | 46.7 | 221M | 43.4 | 7.010% | 126 | "31" | 5.09M | 5.19M | 1.02 | 0:02'27'' |
* Clear intermediate files.
```bash
BASE_NAME=MabsF
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
## MabsF: down sampling
```bash
BASE_NAME=MabsF
REAL_G=5090491
cd ${HOME}/data/anchr/${BASE_NAME}
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 20 25 30 35 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
continue;
fi
for X in 40 80; do
printf "==> Coverage: %s\n" ${X}
rm -fr 2_illumina/${QxxLxx}X${X}*
faops split-about -l 0 \
2_illumina/${QxxLxx}/pe.cor.fa \
$(( ${REAL_G} * ${X} )) \
"2_illumina/${QxxLxx}X${X}"
MAX_SERIAL=$(
cat 2_illumina/${QxxLxx}/environment.json \
| jq ".SUM_OUT | tonumber | . / ${REAL_G} / ${X} | floor | . - 1"
)
for i in $( seq 0 1 ${MAX_SERIAL} ); do
P=$( printf "%03d" ${i})
printf " * Part: %s\n" ${P}
mkdir -p "2_illumina/${QxxLxx}X${X}P${P}"
mv "2_illumina/${QxxLxx}X${X}/${P}.fa" \
"2_illumina/${QxxLxx}X${X}P${P}/pe.cor.fa"
cp 2_illumina/${QxxLxx}/environment.json "2_illumina/${QxxLxx}X${X}P${P}"
done
done
done
```
## MabsF: k-unitigs and anchors (sampled)
```bash
BASE_NAME=MabsF
REAL_G=5090491
cd ${HOME}/data/anchr/${BASE_NAME}
# k-unitigs (sampled)
parallel --no-run-if-empty -j 1 "
echo >&2 '==> Group Q{1}L{2}X{3}P{4}'
doc/gage_b.md view on Meta::CPAN
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
# Stats of processed reads
REAL_G=4602977
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|-----:|------:|------:|---------:|----------:|
| Q20L60 | 536.78M | 116.6 | 477.56M | 103.8 | 11.032% | 137 | "37" | 4.6M | 4.58M | 1.00 | 0:06'12'' |
| Q25L60 | 446.18M | 96.9 | 426.13M | 92.6 | 4.494% | 127 | "35" | 4.6M | 4.55M | 0.99 | 0:04'57'' |
| Q30L60 | 389.16M | 84.5 | 379.54M | 82.5 | 2.473% | 112 | "31" | 4.6M | 4.55M | 0.99 | 0:04'09'' |
* Clear intermediate files.
```bash
BASE_NAME=RsphF
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* kmergenie
```bash
BASE_NAME=RsphF
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 151 -s 10 -t 8 ../R1.fq.gz -o oriR1
kmergenie -l 21 -k 151 -s 10 -t 8 ../R2.fq.gz -o oriR2
kmergenie -l 21 -k 151 -s 10 -t 8 ../Q20L60/pe.cor.fa -o Q20L60
```
## RsphF: down sampling
```bash
BASE_NAME=RsphF
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=4602977
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 20 25 30 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
continue;
fi
for X in 40 80; do
printf "==> Coverage: %s\n" ${X}
rm -fr 2_illumina/${QxxLxx}X${X}*
faops split-about -l 0 \
2_illumina/${QxxLxx}/pe.cor.fa \
$(( ${REAL_G} * ${X} )) \
"2_illumina/${QxxLxx}X${X}"
MAX_SERIAL=$(
cat 2_illumina/${QxxLxx}/environment.json \
| jq ".SUM_OUT | tonumber | . / ${REAL_G} / ${X} | floor | . - 1"
)
for i in $( seq 0 1 ${MAX_SERIAL} ); do
P=$( printf "%03d" ${i})
printf " * Part: %s\n" ${P}
mkdir -p "2_illumina/${QxxLxx}X${X}P${P}"
mv "2_illumina/${QxxLxx}X${X}/${P}.fa" \
"2_illumina/${QxxLxx}X${X}P${P}/pe.cor.fa"
cp 2_illumina/${QxxLxx}/environment.json "2_illumina/${QxxLxx}X${X}P${P}"
done
doc/gage_b.md view on Meta::CPAN
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 35 ::: 60
# Stats of processed reads
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 35 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|------:|------:|------:|---------:|----------:|
| Q20L60 | 558.81M | 138.5 | 449.06M | 111.3 | 19.639% | 183 | "111" | 4.03M | 4.05M | 1.00 | 0:01'45'' |
| Q25L60 | 513.69M | 127.4 | 435.83M | 108.1 | 15.156% | 179 | "109" | 4.03M | 4.01M | 0.99 | 0:01'42'' |
| Q30L60 | 466.22M | 115.6 | 411.28M | 102.0 | 11.783% | 173 | "103" | 4.03M | 3.98M | 0.99 | 0:01'35'' |
| Q35L60 | 310.41M | 77.0 | 291.79M | 72.3 | 6.001% | 147 | "83" | 4.03M | 3.94M | 0.98 | 0:01'05'' |
* Clear intermediate files.
```bash
BASE_NAME=VchoF
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* kmergenie
```bash
BASE_NAME=VchoF
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 151 -s 10 -t 8 ../R1.fq.gz -o oriR1
kmergenie -l 21 -k 151 -s 10 -t 8 ../R2.fq.gz -o oriR2
kmergenie -l 21 -k 151 -s 10 -t 8 ../Q20L60/pe.cor.fa -o Q20L60
```
## VchoF: down sampling
```bash
BASE_NAME=VchoF
REAL_G=4033464
cd ${HOME}/data/anchr/${BASE_NAME}
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 20 25 30 35 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
continue;
fi
for X in 40 80; do
printf "==> Coverage: %s\n" ${X}
rm -fr 2_illumina/${QxxLxx}X${X}*
faops split-about -l 0 \
2_illumina/${QxxLxx}/pe.cor.fa \
$(( ${REAL_G} * ${X} )) \
"2_illumina/${QxxLxx}X${X}"
MAX_SERIAL=$(
cat 2_illumina/${QxxLxx}/environment.json \
| jq ".SUM_OUT | tonumber | . / ${REAL_G} / ${X} | floor | . - 1"
)
for i in $( seq 0 1 ${MAX_SERIAL} ); do
P=$( printf "%03d" ${i})
printf " * Part: %s\n" ${P}
mkdir -p "2_illumina/${QxxLxx}X${X}P${P}"
mv "2_illumina/${QxxLxx}X${X}/${P}.fa" \
"2_illumina/${QxxLxx}X${X}P${P}/pe.cor.fa"
cp 2_illumina/${QxxLxx}/environment.json "2_illumina/${QxxLxx}X${X}P${P}"
done
done
( run in 3.160 seconds using v1.01-cache-2.11-cpan-d7f47b0818f )