App-Anchr
view release on metacpan or search on metacpan
doc/bacteria_2_3.md view on Meta::CPAN
head -n 160000 3_pacbio/pacbio.fasta > 3_pacbio/pacbio.40x.fasta
faops n50 -S -C 3_pacbio/pacbio.40x.fasta
head -n 320000 3_pacbio/pacbio.fasta > 3_pacbio/pacbio.80x.fasta
faops n50 -S -C 3_pacbio/pacbio.80x.fasta
```
## Sfle: generate super-reads
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60 Q20L90
Q25L60 Q25L90
Q30L60 Q30L90
}
)
{
printf qq{%s\n}, $n;
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ ! -d ${BASE_DIR}/{} ]; then
echo ' directory not exists'
exit;
fi
if [ -e ${BASE_DIR}/{}/k_unitigs.fasta ]; then
echo ' k_unitigs.fasta already presents'
exit;
fi
cd ${BASE_DIR}/{}
anchr superreads \
R1.fq.gz R2.fq.gz \
--nosr -p 8 \
--kmer 41,61,81,101,121 \
-o superreads.sh
bash superreads.sh
"
```
Clear intermediate files.
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
find . -type f -name "quorum_mer_db.jf" | xargs rm
find . -type f -name "k_u_hash_0" | xargs rm
find . -type f -name "readPositionsInSuperReads" | xargs rm
find . -type f -name "*.tmp" | xargs rm
find . -type f -name "pe.renamed.fastq" | xargs rm
find . -type f -name "pe.cor.sub.fa" | xargs rm
```
## Sfle: create anchors
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60 Q20L90
Q25L60 Q25L90
Q30L60 Q30L90
}
)
{
printf qq{%s\n}, $n;
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ -e ${BASE_DIR}/{}/anchor/pe.anchor.fa ]; then
exit;
fi
rm -fr ${BASE_DIR}/{}/anchor
bash ~/Scripts/cpan/App-Anchr/share/anchor.sh ${BASE_DIR}/{} 8 false
"
```
## Sfle: results
* Stats of super-reads
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
REAL_G=4607202
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> ${BASE_DIR}/stat1.md
perl -e '
for my $n (
qw{
Q20L60 Q20L90
Q25L60 Q25L90
Q30L60 Q30L90
}
)
{
printf qq{%s\n}, $n;
}
' \
| parallel -k --no-run-if-empty -j 4 "
doc/bacteria_2_3.md view on Meta::CPAN
head -n 100000 3_pacbio/pacbio.fasta > 3_pacbio/pacbio.80x.fasta
faops n50 -S -C 3_pacbio/pacbio.80x.fasta
```
## Vpar: generate super-reads
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
}
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ ! -d ${BASE_DIR}/{} ]; then
echo ' directory not exists'
exit;
fi
if [ -e ${BASE_DIR}/{}/k_unitigs.fasta ]; then
echo ' k_unitigs.fasta already presents'
exit;
fi
cd ${BASE_DIR}/{}
anchr superreads \
R1.fq.gz R2.fq.gz \
--nosr -p 8 \
--kmer 41,61,81 \
-o superreads.sh
bash superreads.sh
"
```
Clear intermediate files.
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
find . -type f -name "quorum_mer_db.jf" | xargs rm
find . -type f -name "k_u_hash_0" | xargs rm
find . -type f -name "readPositionsInSuperReads" | xargs rm
find . -type f -name "*.tmp" | xargs rm
find . -type f -name "pe.renamed.fastq" | xargs rm
find . -type f -name "pe.cor.sub.fa" | xargs rm
```
## Vpar: create anchors
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
}
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ -e ${BASE_DIR}/{}/anchor/pe.anchor.fa ]; then
exit;
fi
rm -fr ${BASE_DIR}/{}/anchor
bash ~/Scripts/cpan/App-Anchr/share/anchor.sh ${BASE_DIR}/{} 8 false
"
```
## Vpar: results
* Stats of super-reads
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
REAL_G=5165770
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> ${BASE_DIR}/stat1.md
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
doc/bacteria_2_3.md view on Meta::CPAN
head -n 100000 3_pacbio/pacbio.fasta > 3_pacbio/pacbio.80x.fasta
faops n50 -S -C 3_pacbio/pacbio.80x.fasta
```
## Lpne: generate super-reads
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
}
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ ! -d ${BASE_DIR}/{} ]; then
echo ' directory not exists'
exit;
fi
if [ -e ${BASE_DIR}/{}/k_unitigs.fasta ]; then
echo ' k_unitigs.fasta already presents'
exit;
fi
cd ${BASE_DIR}/{}
anchr superreads \
R1.fq.gz R2.fq.gz \
--nosr -p 8 \
--kmer 41,61,81 \
-o superreads.sh
bash superreads.sh
"
```
Clear intermediate files.
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
find . -type f -name "quorum_mer_db.jf" | xargs rm
find . -type f -name "k_u_hash_0" | xargs rm
find . -type f -name "readPositionsInSuperReads" | xargs rm
find . -type f -name "*.tmp" | xargs rm
find . -type f -name "pe.renamed.fastq" | xargs rm
find . -type f -name "pe.cor.sub.fa" | xargs rm
```
## Lpne: create anchors
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
}
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ -e ${BASE_DIR}/{}/anchor/pe.anchor.fa ]; then
exit;
fi
rm -fr ${BASE_DIR}/{}/anchor
bash ~/Scripts/cpan/App-Anchr/share/anchor.sh ${BASE_DIR}/{} 8 false
"
```
## Lpne: results
* Stats of super-reads
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
REAL_G=3397754
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> ${BASE_DIR}/stat1.md
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
doc/bacteria_2_3.md view on Meta::CPAN
head -n 50000 3_pacbio/pacbio.fasta > 3_pacbio/pacbio.80x.fasta
faops n50 -S -C 3_pacbio/pacbio.80x.fasta
```
## Ngon: generate super-reads
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
}
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ ! -d ${BASE_DIR}/{} ]; then
echo ' directory not exists'
exit;
fi
if [ -e ${BASE_DIR}/{}/k_unitigs.fasta ]; then
echo ' k_unitigs.fasta already presents'
exit;
fi
cd ${BASE_DIR}/{}
anchr superreads \
R1.fq.gz R2.fq.gz \
--nosr -p 8 \
--kmer 41,61,81 \
-o superreads.sh
bash superreads.sh
"
```
Clear intermediate files.
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
find . -type f -name "quorum_mer_db.jf" | xargs rm
find . -type f -name "k_u_hash_0" | xargs rm
find . -type f -name "readPositionsInSuperReads" | xargs rm
find . -type f -name "*.tmp" | xargs rm
find . -type f -name "pe.renamed.fastq" | xargs rm
find . -type f -name "pe.cor.sub.fa" | xargs rm
```
## Ngon: create anchors
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
}
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ -e ${BASE_DIR}/{}/anchor/pe.anchor.fa ]; then
exit;
fi
rm -fr ${BASE_DIR}/{}/anchor
bash ~/Scripts/cpan/App-Anchr/share/anchor.sh ${BASE_DIR}/{} 8 false
"
```
## Ngon: results
* Stats of super-reads
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
REAL_G=2153922
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> ${BASE_DIR}/stat1.md
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
doc/bacteria_2_3.md view on Meta::CPAN
| Name | N50 | Sum | # |
|:---------|--------:|-----------:|---------:|
| Genome | 2272360 | 2272360 | 1 |
| Paralogs | 0 | 0 | 0 |
| PacBio | 9603 | 402166610 | 58711 |
| Illumina | 101 | 1395253390 | 13814390 |
| uniq | 101 | 1389594158 | 13758358 |
| 200x | 101 | 454471922 | 4499722 |
| Q20L60 | 101 | 383136380 | 3873164 |
| Q25L60 | 101 | 330579421 | 3379270 |
| Q30L60 | 101 | 277833033 | 2980121 |
## Nmen: quorum
```bash
BASE_NAME=Nmen
cd ${HOME}/data/anchr/${BASE_NAME}
parallel --no-run-if-empty -j 1 "
cd 2_illumina/Q{1}L{2}
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
```
Clear intermediate files.
```bash
BASE_NAME=Nmen
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* Stats of processed reads
```bash
BASE_NAME=Nmen
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=2272360
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|-----:|------:|------:|---------:|----------:|
| Q20L60 | 383.14M | 168.6 | 342.54M | 150.7 | 10.596% | 98 | "71" | 2.27M | 2.95M | 1.30 | 0:01'27'' |
| Q25L60 | 330.58M | 145.5 | 302.59M | 133.2 | 8.466% | 98 | "71" | 2.27M | 2.77M | 1.22 | 0:01'18'' |
| Q30L60 | 278.35M | 122.5 | 259.5M | 114.2 | 6.772% | 95 | "65" | 2.27M | 2.64M | 1.16 | 0:01'03'' |
* kmergenie
```bash
BASE_NAME=Nmen
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 91 -s 10 -t 8 ../R1.fq.gz -o oriR1
kmergenie -l 21 -k 91 -s 10 -t 8 ../R2.fq.gz -o oriR2
kmergenie -l 21 -k 91 -s 10 -t 8 ../Q30L60/pe.cor.fa -o Q30L60
```
```bash
BASE_DIR=$HOME/data/anchr/Nmen
cd ${BASE_DIR}
head -n 25000 3_pacbio/pacbio.fasta > 3_pacbio/pacbio.40x.fasta
faops n50 -S -C 3_pacbio/pacbio.40x.fasta
head -n 50000 3_pacbio/pacbio.fasta > 3_pacbio/pacbio.80x.fasta
faops n50 -S -C 3_pacbio/pacbio.80x.fasta
```
## Nmen: down sampling
doc/bacteria_2_3.md view on Meta::CPAN
"
done
```
## Bper: generate super-reads
```bash
BASE_DIR=$HOME/data/anchr/Bper
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
}
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ ! -d ${BASE_DIR}/{} ]; then
echo ' directory not exists'
exit;
fi
if [ -e ${BASE_DIR}/{}/k_unitigs.fasta ]; then
echo ' k_unitigs.fasta already presents'
exit;
fi
cd ${BASE_DIR}/{}
anchr superreads \
R1.fq.gz R2.fq.gz \
--nosr -p 8 \
--kmer 41,61,81 \
-o superreads.sh
bash superreads.sh
"
```
Clear intermediate files.
```bash
BASE_DIR=$HOME/data/anchr/Bper
cd ${BASE_DIR}
find . -type f -name "quorum_mer_db.jf" | xargs rm
find . -type f -name "k_u_hash_0" | xargs rm
find . -type f -name "readPositionsInSuperReads" | xargs rm
find . -type f -name "*.tmp" | xargs rm
find . -type f -name "pe.renamed.fastq" | xargs rm
find . -type f -name "pe.cor.sub.fa" | xargs rm
```
## Bper: create anchors
```bash
BASE_DIR=$HOME/data/anchr/Bper
cd ${BASE_DIR}
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
}
}
' \
| parallel --no-run-if-empty -j 3 "
echo '==> Group {}'
if [ -e ${BASE_DIR}/{}/anchor/pe.anchor.fa ]; then
exit;
fi
rm -fr ${BASE_DIR}/{}/anchor
bash ~/Scripts/cpan/App-Anchr/share/anchor.sh ${BASE_DIR}/{} 8 false
"
```
## Bper: results
* Stats of super-reads
```bash
BASE_DIR=$HOME/data/anchr/Bper
cd ${BASE_DIR}
REAL_G=4086189
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> ${BASE_DIR}/stat1.md
perl -e '
for my $n (
qw{
Q20L60
Q25L60
Q30L60
}
)
{
for my $i ( 1 .. 5 ) {
printf qq{%s_%d\n}, $n, ( 1000000 * $i );
doc/bacteria_2_3.md view on Meta::CPAN
| Name | N50 | Sum | # |
|:---------|--------:|-----------:|---------:|
| Genome | 2488635 | 2488635 | 1 |
| Paralogs | 5635 | 56210 | 18 |
| PacBio | 8966 | 665803465 | 110317 |
| Illumina | 101 | 1124010012 | 11128812 |
| uniq | 101 | 1120677416 | 11095816 |
| Q20L60 | 101 | 942374034 | 9521902 |
| Q25L60 | 101 | 811857109 | 8299530 |
| Q30L60 | 101 | 674398728 | 7270020 |
## Cdip: quorum
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
parallel --no-run-if-empty -j 1 "
cd 2_illumina/Q{1}L{2}
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
```
Clear intermediate files.
```bash
BASE_NAME=Cdip
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* Stats of processed reads
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=2488635
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|-----:|------:|------:|---------:|----------:|
| Q20L60 | 942.37M | 378.7 | 839.01M | 337.1 | 10.969% | 98 | "51" | 2.49M | 2.92M | 1.17 | 0:03'14'' |
| Q25L60 | 811.86M | 326.2 | 742.27M | 298.3 | 8.571% | 97 | "51" | 2.49M | 2.58M | 1.04 | 0:02'48'' |
| Q30L60 | 675.61M | 271.5 | 631.39M | 253.7 | 6.545% | 93 | "43" | 2.49M | 2.48M | 0.99 | 0:02'21'' |
* kmergenie
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 91 -s 10 -t 8 ../R1.fq.gz -o oriR1
kmergenie -l 21 -k 91 -s 10 -t 8 ../R2.fq.gz -o oriR2
kmergenie -l 21 -k 91 -s 10 -t 8 ../Q30L60/pe.cor.fa -o Q30L60
```
## Cdip: down sampling
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=2488635
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 25 30 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
doc/bacteria_2_3.md view on Meta::CPAN
| Name | N50 | Sum | # |
|:---------|--------:|-----------:|---------:|
| Genome | 1892775 | 1892775 | 1 |
| Paralogs | 33912 | 93531 | 10 |
| PacBio | 10022 | 1161069478 | 151564 |
| Illumina | 101 | 2144257270 | 21230270 |
| uniq | 101 | 2122919000 | 21019000 |
| 200x | 101 | 378554868 | 3748068 |
| Q20L60 | 101 | 367096899 | 3645544 |
| Q25L60 | 101 | 358221620 | 3563774 |
| Q30L60 | 101 | 348913664 | 3507509 |
## Ftul: quorum
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
parallel --no-run-if-empty -j 1 "
cd 2_illumina/Q{1}L{2}
echo >&2 '==> Group Q{1}L{2} <=='
if [ ! -e R1.fq.gz ]; then
echo >&2 ' R1.fq.gz not exists'
exit;
fi
if [ -e pe.cor.fa ]; then
echo >&2 ' pe.cor.fa exists'
exit;
fi
if [[ {1} -ge '30' ]]; then
anchr quorum \
R1.fq.gz R2.fq.gz Rs.fq.gz \
-p 16 \
-o quorum.sh
else
anchr quorum \
R1.fq.gz R2.fq.gz \
-p 16 \
-o quorum.sh
fi
bash quorum.sh
echo >&2
" ::: 20 25 30 ::: 60
```
Clear intermediate files.
```bash
BASE_NAME=Ftul
cd $HOME/data/anchr/${BASE_NAME}
find 2_illumina -type f -name "quorum_mer_db.jf" | xargs rm
find 2_illumina -type f -name "k_u_hash_0" | xargs rm
find 2_illumina -type f -name "*.tmp" | xargs rm
find 2_illumina -type f -name "pe.renamed.fastq" | xargs rm
find 2_illumina -type f -name "se.renamed.fastq" | xargs rm
find 2_illumina -type f -name "pe.cor.sub.fa" | xargs rm
```
* Stats of processed reads
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=1892775
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 header \
> stat1.md
parallel -k --no-run-if-empty -j 3 "
if [ ! -d 2_illumina/Q{1}L{2} ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 1 2_illumina/Q{1}L{2} ${REAL_G}
" ::: 20 25 30 ::: 60 \
>> stat1.md
cat stat1.md
```
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|------:|-------:|-------:|-------:|---------:|--------:|-----:|------:|------:|---------:|----------:|
| Q20L60 | 2.06G | 1087.9 | 1.96G | 1033.3 | 5.015% | 100 | "71" | 1.89M | 1.92M | 1.01 | 0:05'31'' |
| Q25L60 | 2.01G | 1061.5 | 1.92G | 1016.4 | 4.253% | 100 | "71" | 1.89M | 1.89M | 1.00 | 0:05'29'' |
| Q30L60 | 1.96G | 1034.1 | 1.89G | 998.3 | 3.465% | 99 | "71" | 1.89M | 1.86M | 0.98 | 0:05'29'' |
| Name | SumIn | CovIn | SumOut | CovOut | Discard% | AvgRead | Kmer | RealG | EstG | Est/Real | RunTime |
|:-------|--------:|------:|--------:|-------:|---------:|--------:|-----:|------:|-----:|---------:|----------:|
| Q20L60 | 367.1M | 193.9 | 348.22M | 184.0 | 5.143% | 100 | "71" | 1.89M | 1.8M | 0.95 | 0:01'15'' |
| Q25L60 | 358.22M | 189.3 | 342.63M | 181.0 | 4.353% | 100 | "71" | 1.89M | 1.8M | 0.95 | 0:01'10'' |
| Q30L60 | 349.03M | 184.4 | 336.65M | 177.9 | 3.546% | 99 | "71" | 1.89M | 1.8M | 0.95 | 0:01'10'' |
* kmergenie
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
mkdir -p 2_illumina/kmergenie
cd 2_illumina/kmergenie
kmergenie -l 21 -k 91 -s 10 -t 8 ../R1.fq.gz -o oriR1
kmergenie -l 21 -k 91 -s 10 -t 8 ../R2.fq.gz -o oriR2
kmergenie -l 21 -k 91 -s 10 -t 8 ../Q30L60/pe.cor.fa -o Q30L60
```
## Ftul: down sampling
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
REAL_G=1892775
( run in 1.172 second using v1.01-cache-2.11-cpan-99c4e6809bf )