view release on metacpan or search on metacpan
doc/bacteria_2_3.md view on Meta::CPAN
| Name | N50SR | Sum | # | N50Anchor | Sum | # | N50Others | Sum | # | RunTime |
|:-------|------:|------:|----:|----------:|------:|----:|----------:|-------:|----:|----------:|
| Q20L60 | 8637 | 4.26M | 778 | 8821 | 4.17M | 660 | 765 | 87.11K | 118 | 0:00'51'' |
| Q20L90 | 9406 | 4.26M | 725 | 9482 | 4.18M | 615 | 766 | 80.73K | 110 | 0:00'53'' |
| Q25L60 | 19847 | 4.22M | 398 | 20462 | 4.18M | 337 | 770 | 44.81K | 61 | 0:00'58'' |
| Q25L90 | 21495 | 4.22M | 378 | 21517 | 4.18M | 321 | 765 | 41.64K | 57 | 0:00'56'' |
| Q30L60 | 29285 | 4.22M | 316 | 29285 | 4.18M | 264 | 760 | 37.42K | 52 | 0:00'57'' |
| Q30L90 | 29285 | 4.22M | 314 | 29570 | 4.18M | 261 | 760 | 37.88K | 53 | 0:00'56'' |
## Sfle: merge anchors
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
# merge anchors
mkdir -p merge
anchr contained \
Q20L60/anchor/pe.anchor.fa \
Q20L90/anchor/pe.anchor.fa \
Q25L60/anchor/pe.anchor.fa \
Q25L90/anchor/pe.anchor.fa \
Q30L60/anchor/pe.anchor.fa \
Q30L90/anchor/pe.anchor.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
anchr contained \
Q20L60/anchor/pe.others.fa \
Q20L90/anchor/pe.others.fa \
Q25L60/anchor/pe.others.fa \
Q25L90/anchor/pe.others.fa \
Q30L60/anchor/pe.others.fa \
Q30L90/anchor/pe.others.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## Sfle: 3GS
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
canu \
-p Sfle -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=4.8m \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p Sfle -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=4.8m \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
faops n50 -S -C canu-raw-40x/Sfle.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/Sfle.trimmedReads.fasta.gz
```
## Sfle: expand anchors
* anchorLong
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
anchr cover \
--parallel 16 \
-c 2 -m 40 \
-b 20 --len 1000 --idt 0.9 \
merge/anchor.merge.fasta \
canu-raw-40x/Sfle.trimmedReads.fasta.gz \
-o merge/anchor.cover.fasta
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.cover.fasta \
canu-raw-40x/Sfle.trimmedReads.fasta.gz \
-d anchorLong \
-b 20 --len 1000 --idt 0.98
anchr overlap \
merge/anchor.cover.fasta \
--serial --len 10 --idt 0.9999 \
-o stdout \
doc/bacteria_2_3.md view on Meta::CPAN
rm -fr contigTrim
anchr overlap2 \
--parallel 16 \
anchorLong/contig.fasta \
canu-raw-40x/Sfle.contigs.fasta \
-d contigTrim \
-b 20 --len 1000 --idt 0.98 --all
CONTIG_COUNT=$(faops n50 -H -N 0 -C contigTrim/anchor.fasta)
echo ${CONTIG_COUNT}
rm -fr contigTrim/group
anchr group \
--parallel 16 \
--keep \
contigTrim/anchorLong.db \
contigTrim/anchorLong.ovlp.tsv \
--range "1-${CONTIG_COUNT}" --len 1000 --idt 0.98 --max 20000 -c 1
pushd ${BASE_DIR}/contigTrim
cat group/groups.txt \
| parallel --no-run-if-empty -j 8 '
echo {};
anchr orient \
--len 1000 --idt 0.98 \
group/{}.anchor.fasta \
group/{}.long.fasta \
-r group/{}.restrict.tsv \
-o group/{}.strand.fasta;
anchr overlap --len 1000 --idt 0.98 \
group/{}.strand.fasta \
-o stdout \
| anchr restrict \
stdin group/{}.restrict.tsv \
-o group/{}.ovlp.tsv;
anchr layout \
group/{}.ovlp.tsv \
group/{}.relation.tsv \
group/{}.strand.fasta \
-o group/{}.contig.fasta
'
popd
cat \
contigTrim/group/non_grouped.fasta \
contigTrim/group/*.contig.fasta \
> contigTrim/contig.fasta
```
* quast
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/Sfle.contigs.fasta \
canu-raw-80x/Sfle.contigs.fasta \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-40x,canu-80x,paralogs" \
-o 9_qa_contig
```
* Stats
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:-------------|--------:|--------:|----:|
| Genome | 4607202 | 4828820 | 2 |
| Paralogs | 1377 | 543111 | 334 |
| anchor.merge | 29718 | 4177514 | 258 |
| others.merge | 1013 | 5268 | 5 |
| anchor.cover | 21445 | 4065033 | 337 |
| anchorLong | 21727 | 4064559 | 333 |
| contigTrim | 59768 | 4286051 | 140 |
* Clear QxxLxxx.
```bash
BASE_DIR=$HOME/data/anchr/Sfle
cd ${BASE_DIR}
rm -fr 2_illumina/Q{20,25,30}L*
rm -fr Q{20,25,30}L*
doc/bacteria_2_3.md view on Meta::CPAN
| Q30L60_1000000 | 179300 | 5.05M | 110 | 179300 | 5.04M | 82 | 656 | 18.77K | 28 | 0:00'53'' |
| Q30L60_2000000 | 175008 | 5.06M | 100 | 175008 | 5.04M | 77 | 710 | 16.14K | 23 | 0:01'22'' |
| Q30L60_3000000 | 155751 | 5.06M | 106 | 155751 | 5.04M | 82 | 693 | 16.62K | 24 | 0:01'46'' |
| Q30L60_4000000 | 123854 | 5.05M | 107 | 123854 | 5.04M | 84 | 710 | 16.14K | 23 | 0:02'14'' |
| Q30L60_5000000 | 99637 | 5.05M | 123 | 99637 | 5.04M | 99 | 697 | 16.64K | 24 | 0:02'38'' |
## Vpar: merge anchors
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
# merge anchors
mkdir -p merge
anchr contained \
Q20L60_2000000/anchor/pe.anchor.fa \
Q20L60_3000000/anchor/pe.anchor.fa \
Q20L60_4000000/anchor/pe.anchor.fa \
Q20L60_5000000/anchor/pe.anchor.fa \
Q25L60_2000000/anchor/pe.anchor.fa \
Q25L60_3000000/anchor/pe.anchor.fa \
Q25L60_4000000/anchor/pe.anchor.fa \
Q25L60_5000000/anchor/pe.anchor.fa \
Q30L60_2000000/anchor/pe.anchor.fa \
Q30L60_3000000/anchor/pe.anchor.fa \
Q30L60_4000000/anchor/pe.anchor.fa \
Q30L60_5000000/anchor/pe.anchor.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
anchr contained \
Q20L60_2000000/anchor/pe.others.fa \
Q25L60_2000000/anchor/pe.others.fa \
Q30L60_2000000/anchor/pe.others.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
1_genome/paralogs.fas \
--label "merge,paralogs" \
-o 9_qa
```
## Vpar: 3GS
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
canu \
-p Vpar -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=5.2m \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p Vpar -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=5.2m \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
faops n50 -S -C canu-raw-40x/Vpar.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/Vpar.trimmedReads.fasta.gz
```
## Vpar: expand anchors
* anchorLong
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
anchr cover \
--parallel 16 \
-c 2 -m 40 \
-b 20 --len 1000 --idt 0.9 \
merge/anchor.merge.fasta \
canu-raw-40x/Vpar.trimmedReads.fasta.gz \
-o merge/anchor.cover.fasta
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.cover.fasta \
canu-raw-40x/Vpar.trimmedReads.fasta.gz \
-d anchorLong \
-b 20 --len 1000 --idt 0.98
anchr overlap \
merge/anchor.cover.fasta \
--serial --len 10 --idt 0.9999 \
-o stdout \
| perl -nla -e '
doc/bacteria_2_3.md view on Meta::CPAN
rm -fr contigTrim
anchr overlap2 \
--parallel 16 \
anchorLong/contig.fasta \
canu-raw-40x/Vpar.contigs.fasta \
-d contigTrim \
-b 20 --len 1000 --idt 0.98 --all
CONTIG_COUNT=$(faops n50 -H -N 0 -C contigTrim/anchor.fasta)
echo ${CONTIG_COUNT}
rm -fr contigTrim/group
anchr group \
--parallel 16 \
--keep \
contigTrim/anchorLong.db \
contigTrim/anchorLong.ovlp.tsv \
--range "1-${CONTIG_COUNT}" --len 1000 --idt 0.98 --max 20000 -c 1
pushd ${BASE_DIR}/contigTrim
cat group/groups.txt \
| parallel --no-run-if-empty -j 8 '
echo {};
anchr orient \
--len 1000 --idt 0.98 \
group/{}.anchor.fasta \
group/{}.long.fasta \
-r group/{}.restrict.tsv \
-o group/{}.strand.fasta;
anchr overlap --len 1000 --idt 0.98 \
group/{}.strand.fasta \
-o stdout \
| anchr restrict \
stdin group/{}.restrict.tsv \
-o group/{}.ovlp.tsv;
anchr layout \
group/{}.ovlp.tsv \
group/{}.relation.tsv \
group/{}.strand.fasta \
-o group/{}.contig.fasta
'
popd
cat \
contigTrim/group/non_grouped.fasta \
contigTrim/group/*.contig.fasta \
> contigTrim/contig.fasta
```
* quast
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/Vpar.contigs.fasta \
canu-raw-80x/Vpar.contigs.fasta \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-40x,canu-80x,paralogs" \
-o 9_qa_contig
```
* Stats
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:-------------|--------:|--------:|---:|
| Genome | 3288558 | 5165770 | 2 |
| Paralogs | 3333 | 155714 | 62 |
| anchor.merge | 175008 | 5047543 | 73 |
| others.merge | 20100 | 20100 | 1 |
| anchor.cover | 175008 | 5024576 | 78 |
| anchorLong | 208183 | 5023596 | 63 |
| contigTrim | 1488730 | 5148305 | 11 |
* Clear QxxLxxx.
```bash
BASE_DIR=$HOME/data/anchr/Vpar
cd ${BASE_DIR}
rm -fr 2_illumina/Q{20,25,30}L*
rm -fr Q{20,25,30}L*
doc/bacteria_2_3.md view on Meta::CPAN
| Q25L60_4000000 | 20322 | 3.43M | 304 | 20440 | 3.38M | 273 | 14979 | 50.13K | 31 | 0:04'18'' |
| Q30L60_1000000 | 219482 | 3.41M | 83 | 219482 | 3.4M | 56 | 734 | 18.18K | 27 | 0:01'41'' |
| Q30L60_2000000 | 128845 | 3.42M | 93 | 128845 | 3.36M | 68 | 21578 | 59.28K | 25 | 0:02'42'' |
| Q30L60_3000000 | 68045 | 3.42M | 123 | 68045 | 3.37M | 97 | 15040 | 46.9K | 26 | 0:03'30'' |
| Q30L60_4000000 | 49793 | 3.42M | 146 | 49793 | 3.38M | 126 | 15040 | 42.66K | 20 | 0:03'54'' |
## Lpne: merge anchors
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
# merge anchors
mkdir -p merge
anchr contained \
Q20L60_1000000/anchor/pe.anchor.fa \
Q20L60_2000000/anchor/pe.anchor.fa \
Q20L60_3000000/anchor/pe.anchor.fa \
Q20L60_4000000/anchor/pe.anchor.fa \
Q25L60_1000000/anchor/pe.anchor.fa \
Q25L60_2000000/anchor/pe.anchor.fa \
Q25L60_3000000/anchor/pe.anchor.fa \
Q25L60_4000000/anchor/pe.anchor.fa \
Q30L60_1000000/anchor/pe.anchor.fa \
Q30L60_2000000/anchor/pe.anchor.fa \
Q30L60_3000000/anchor/pe.anchor.fa \
Q30L60_4000000/anchor/pe.anchor.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
anchr contained \
Q20L60_2000000/anchor/pe.others.fa \
Q25L60_2000000/anchor/pe.others.fa \
Q30L60_2000000/anchor/pe.others.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## Lpne: 3GS
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
canu \
-p Lpne -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=3.4m \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p Lpne -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=3.4m \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
faops n50 -S -C canu-raw-40x/Lpne.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/Lpne.trimmedReads.fasta.gz
```
## Lpne: expand anchors
* anchorLong
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
anchr cover \
--parallel 16 \
-c 2 -m 40 \
-b 20 --len 1000 --idt 0.9 \
merge/anchor.merge.fasta \
canu-raw-40x/Lpne.trimmedReads.fasta.gz \
-o merge/anchor.cover.fasta
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.cover.fasta \
canu-raw-40x/Lpne.trimmedReads.fasta.gz \
-d anchorLong \
-b 20 --len 1000 --idt 0.98
anchr overlap \
merge/anchor.cover.fasta \
--serial --len 10 --idt 0.9999 \
-o stdout \
doc/bacteria_2_3.md view on Meta::CPAN
rm -fr contigTrim
anchr overlap2 \
--parallel 16 \
anchorLong/contig.fasta \
canu-raw-40x/Lpne.contigs.fasta \
-d contigTrim \
-b 20 --len 1000 --idt 0.98 --all
CONTIG_COUNT=$(faops n50 -H -N 0 -C contigTrim/anchor.fasta)
echo ${CONTIG_COUNT}
rm -fr contigTrim/group
anchr group \
--parallel 16 \
--keep \
contigTrim/anchorLong.db \
contigTrim/anchorLong.ovlp.tsv \
--range "1-${CONTIG_COUNT}" --len 1000 --idt 0.98 --max 20000 -c 1
pushd ${BASE_DIR}/contigTrim
cat group/groups.txt \
| parallel --no-run-if-empty -j 8 '
echo {};
anchr orient \
--len 1000 --idt 0.98 \
group/{}.anchor.fasta \
group/{}.long.fasta \
-r group/{}.restrict.tsv \
-o group/{}.strand.fasta;
anchr overlap --len 1000 --idt 0.98 \
group/{}.strand.fasta \
-o stdout \
| anchr restrict \
stdin group/{}.restrict.tsv \
-o group/{}.ovlp.tsv;
anchr layout \
group/{}.ovlp.tsv \
group/{}.relation.tsv \
group/{}.strand.fasta \
-o group/{}.contig.fasta
'
popd
cat \
contigTrim/group/non_grouped.fasta \
contigTrim/group/*.contig.fasta \
> contigTrim/contig.fasta
```
* quast
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/Lpne.contigs.fasta \
canu-raw-80x/Lpne.contigs.fasta \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-40x,canu-80x,paralogs" \
-o 9_qa_contig
```
* Stats
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:-------------|--------:|--------:|---:|
| Genome | 3397754 | 3397754 | 1 |
| Paralogs | 2793 | 100722 | 43 |
| anchor.merge | 248586 | 3426711 | 53 |
| others.merge | 43054 | 47381 | 5 |
| anchor.cover | 248586 | 3375327 | 45 |
| anchorLong | 261851 | 3373001 | 39 |
| contigTrim | 3408430 | 3418287 | 7 |
* Clear QxxLxxx.
```bash
BASE_DIR=$HOME/data/anchr/Lpne
cd ${BASE_DIR}
rm -fr 2_illumina/Q{20,25,30}L*
rm -fr Q{20,25,30}L*
doc/bacteria_2_3.md view on Meta::CPAN
| Q25L60_3000000 | 3585 | 2.03M | 824 | 3879 | 1.88M | 611 | 765 | 155.13K | 213 | 0:02'58'' |
| Q25L60_4000000 | 2270 | 2M | 1139 | 2532 | 1.72M | 757 | 786 | 283.29K | 382 | 0:03'38'' |
| Q30L60_1000000 | 20872 | 2.04M | 205 | 20872 | 2.01M | 165 | 721 | 28.47K | 40 | 0:01'57'' |
| Q30L60_2000000 | 20188 | 2.04M | 219 | 20628 | 2.01M | 178 | 762 | 31.84K | 41 | 0:02'36'' |
| Q30L60_3000000 | 16937 | 2.04M | 242 | 17918 | 2.01M | 201 | 726 | 29.25K | 41 | 0:03'19'' |
| Q30L60_4000000 | 14239 | 2.03M | 279 | 14269 | 2M | 228 | 752 | 37.14K | 51 | 0:03'35'' |
## Ngon: merge anchors
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
# merge anchors
mkdir -p merge
anchr contained \
Q20L60_1000000/anchor/pe.anchor.fa \
Q20L60_2000000/anchor/pe.anchor.fa \
Q20L60_3000000/anchor/pe.anchor.fa \
Q25L60_1000000/anchor/pe.anchor.fa \
Q25L60_2000000/anchor/pe.anchor.fa \
Q25L60_3000000/anchor/pe.anchor.fa \
Q25L60_4000000/anchor/pe.anchor.fa \
Q30L60_1000000/anchor/pe.anchor.fa \
Q30L60_2000000/anchor/pe.anchor.fa \
Q30L60_3000000/anchor/pe.anchor.fa \
Q30L60_4000000/anchor/pe.anchor.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
anchr contained \
Q20L60_2000000/anchor/pe.others.fa \
Q25L60_2000000/anchor/pe.others.fa \
Q30L60_2000000/anchor/pe.others.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## Ngon: 3GS
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
canu \
-p Ngon -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=2.3m \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p Ngon -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=2.3m \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
faops n50 -S -C canu-raw-40x/Ngon.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/Ngon.trimmedReads.fasta.gz
```
## Ngon: expand anchors
* anchorLong
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
anchr cover \
--parallel 16 \
-c 2 -m 40 \
-b 20 --len 1000 --idt 0.9 \
merge/anchor.merge.fasta \
canu-raw-40x/Ngon.trimmedReads.fasta.gz \
-o merge/anchor.cover.fasta
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.cover.fasta \
canu-raw-40x/Ngon.trimmedReads.fasta.gz \
-d anchorLong \
-b 20 --len 1000 --idt 0.98
anchr overlap \
merge/anchor.cover.fasta \
--serial --len 10 --idt 0.9999 \
-o stdout \
doc/bacteria_2_3.md view on Meta::CPAN
rm -fr contigTrim
anchr overlap2 \
--parallel 16 \
anchorLong/contig.fasta \
canu-raw-40x/Ngon.contigs.fasta \
-d contigTrim \
-b 20 --len 1000 --idt 0.98 --all
CONTIG_COUNT=$(faops n50 -H -N 0 -C contigTrim/anchor.fasta)
echo ${CONTIG_COUNT}
rm -fr contigTrim/group
anchr group \
--parallel 16 \
--keep \
contigTrim/anchorLong.db \
contigTrim/anchorLong.ovlp.tsv \
--range "1-${CONTIG_COUNT}" --len 1000 --idt 0.98 --max 20000 -c 1
pushd ${BASE_DIR}/contigTrim
cat group/groups.txt \
| parallel --no-run-if-empty -j 8 '
echo {};
anchr orient \
--len 1000 --idt 0.98 \
group/{}.anchor.fasta \
group/{}.long.fasta \
-r group/{}.restrict.tsv \
-o group/{}.strand.fasta;
anchr overlap --len 1000 --idt 0.98 \
group/{}.strand.fasta \
-o stdout \
| anchr restrict \
stdin group/{}.restrict.tsv \
-o group/{}.ovlp.tsv;
anchr layout \
group/{}.ovlp.tsv \
group/{}.relation.tsv \
group/{}.strand.fasta \
-o group/{}.contig.fasta
'
popd
cat \
contigTrim/group/non_grouped.fasta \
contigTrim/group/*.contig.fasta \
> contigTrim/contig.fasta
```
* quast
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/Ngon.contigs.fasta \
canu-raw-80x/Ngon.contigs.fasta \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-40x,canu-80x,paralogs" \
-o 9_qa_contig
```
* Stats
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:-------------|--------:|--------:|----:|
| Genome | 2153922 | 2153922 | 1 |
| Paralogs | 4318 | 142093 | 53 |
| anchor.merge | 23012 | 2014543 | 152 |
| others.merge | 2347 | 2347 | 1 |
| anchor.cover | 19415 | 1876453 | 161 |
| anchorLong | 29718 | 1874102 | 124 |
| contigTrim | 465723 | 1839660 | 22 |
* Clear QxxLxxx.
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
rm -fr 2_illumina/Q{20,25,30}L*
rm -fr Q{20,25,30}L*
doc/bacteria_2_3.md view on Meta::CPAN
| Q25L60X40P002 | 90.89M | 40.0 | 8424 | 2.16M | 401 | 8526 | 2.03M | 320 | 5808 | 138.15K | 81 | "31,41,51,61,71,81" | 0:02'47'' | 0:01'12'' |
| Q25L60X80P000 | 181.79M | 80.0 | 8256 | 2.14M | 424 | 8379 | 2.05M | 347 | 951 | 84.91K | 77 | "31,41,51,61,71,81" | 0:04'33'' | 0:01'37'' |
| Q25L60X120P000 | 272.68M | 120.0 | 7764 | 2.15M | 477 | 7854 | 2.05M | 372 | 832 | 98.28K | 105 | "31,41,51,61,71,81" | 0:05'58'' | 0:02'04'' |
| Q30L60X40P000 | 90.89M | 40.0 | 8732 | 2.12M | 392 | 8901 | 2.04M | 310 | 867 | 76.48K | 82 | "31,41,51,61,71,81" | 0:03'06'' | 0:01'17'' |
| Q30L60X40P001 | 90.89M | 40.0 | 8720 | 2.11M | 392 | 8871 | 2.06M | 315 | 706 | 55.09K | 77 | "31,41,51,61,71,81" | 0:02'44'' | 0:01'05'' |
| Q30L60X80P000 | 181.79M | 80.0 | 9268 | 2.13M | 369 | 9632 | 2.05M | 302 | 935 | 76.24K | 67 | "31,41,51,61,71,81" | 0:02'41'' | 0:01'29'' |
## Nmen: merge anchors
```bash
BASE_NAME=Nmen
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 ::: 000 001 002 003 004 005 006
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.others.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.others.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 ::: 000 001 002 003 004 005 006
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# anchors sorted on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## Nmen: 3GS
```bash
BASE_DIR=$HOME/data/anchr/Nmen
cd ${BASE_DIR}
canu \
-p Nmen -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=2.3m \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p Nmen -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=2.3m \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
faops n50 -S -C canu-raw-40x/Nmen.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/Nmen.trimmedReads.fasta.gz
```
## Nmen: expand anchors
* anchorLong
```bash
BASE_NAME=Nmen
cd ${HOME}/data/anchr/${BASE_NAME}
anchr cover \
--parallel 16 \
-c 2 -m 40 \
-b 20 --len 1000 --idt 0.9 \
merge/anchor.merge.fasta \
canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz \
-o merge/anchor.cover.fasta
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.cover.fasta \
canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz \
-d anchorLong \
-b 20 --len 1000 --idt 0.98
anchr overlap \
merge/anchor.cover.fasta \
--serial --len 10 --idt 0.9999 \
-o stdout \
doc/bacteria_2_3.md view on Meta::CPAN
rm -fr contigTrim
anchr overlap2 \
--parallel 16 \
anchorLong/contig.fasta \
canu-raw-40x/${BASE_NAME}.contigs.fasta \
-d contigTrim \
-b 20 --len 1000 --idt 0.98 --all
CONTIG_COUNT=$(faops n50 -H -N 0 -C contigTrim/anchor.fasta)
echo ${CONTIG_COUNT}
rm -fr contigTrim/group
anchr group \
--parallel 16 \
--keep \
contigTrim/anchorLong.db \
contigTrim/anchorLong.ovlp.tsv \
--range "1-${CONTIG_COUNT}" --len 1000 --idt 0.98 --max 20000 -c 1
pushd contigTrim
cat group/groups.txt \
| parallel --no-run-if-empty -j 8 '
echo {};
anchr orient \
--len 1000 --idt 0.98 \
group/{}.anchor.fasta \
group/{}.long.fasta \
-r group/{}.restrict.tsv \
-o group/{}.strand.fasta;
anchr overlap --len 1000 --idt 0.98 --all \
group/{}.strand.fasta \
-o stdout \
| anchr restrict \
stdin group/{}.restrict.tsv \
-o group/{}.ovlp.tsv;
anchr layout \
group/{}.ovlp.tsv \
group/{}.relation.tsv \
group/{}.strand.fasta \
-o group/{}.contig.fasta
'
popd
cat \
contigTrim/group/non_grouped.fasta \
contigTrim/group/*.contig.fasta \
> contigTrim/contig.fasta
```
* quast
```bash
BASE_NAME=Nmen
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/Nmen.contigs.fasta \
canu-raw-80x/Nmen.contigs.fasta \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-40x,canu-80x,paralogs" \
-o 9_qa_contig
```
* Stats
```bash
BASE_NAME=Nmen
cd ${HOME}/data/anchr/${BASE_NAME}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:-------------|--------:|--------:|----:|
| Genome | 2272360 | 2272360 | 1 |
| Paralogs | 0 | 0 | 0 |
| anchor.merge | 9877 | 2075643 | 295 |
| others.merge | 9917 | 47940 | 11 |
| anchor.cover | 6814 | 1583544 | 313 |
| anchorLong | 7239 | 1582929 | 302 |
| contigTrim | 15255 | 1826314 | 225 |
* Clear QxxLxxXxx.
```bash
BASE_NAME=Nmen
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 2_illumina/Q{20,25,30}L{1,60,90,120}X*
rm -fr Q{20,25,30}L{1,60,90,120}X*
doc/bacteria_2_3.md view on Meta::CPAN
| Q25L60_3000000 | 2914 | 3.53M | 1679 | 3223 | 3.14M | 1148 | 766 | 391.06K | 531 | 0:02'53'' |
| Q25L60_4000000 | 2532 | 3.58M | 1905 | 2838 | 3.1M | 1247 | 767 | 482.46K | 658 | 0:03'36'' |
| Q25L60_5000000 | 2192 | 3.61M | 2149 | 2563 | 3.01M | 1333 | 759 | 592.4K | 816 | 0:04'09'' |
| Q30L60_1000000 | 2433 | 3.16M | 1703 | 2778 | 2.7M | 1076 | 758 | 459.36K | 627 | 0:01'29'' |
| Q30L60_2000000 | 2858 | 3.36M | 1610 | 3251 | 2.98M | 1087 | 731 | 372.53K | 523 | 0:01'44'' |
| Q30L60_3000000 | 3136 | 3.46M | 1580 | 3503 | 3.1M | 1088 | 742 | 353.88K | 492 | 0:02'18'' |
## Bper: merge anchors
```bash
BASE_DIR=$HOME/data/anchr/Bper
cd ${BASE_DIR}
# merge anchors
mkdir -p merge
anchr contained \
Q20L60_1000000/anchor/pe.anchor.fa \
Q20L60_2000000/anchor/pe.anchor.fa \
Q20L60_3000000/anchor/pe.anchor.fa \
Q20L60_4000000/anchor/pe.anchor.fa \
Q25L60_1000000/anchor/pe.anchor.fa \
Q25L60_2000000/anchor/pe.anchor.fa \
Q25L60_3000000/anchor/pe.anchor.fa \
Q25L60_4000000/anchor/pe.anchor.fa \
Q30L60_1000000/anchor/pe.anchor.fa \
Q30L60_2000000/anchor/pe.anchor.fa \
Q30L60_3000000/anchor/pe.anchor.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
anchr contained \
Q20L60_2000000/anchor/pe.others.fa \
Q25L60_2000000/anchor/pe.others.fa \
Q30L60_2000000/anchor/pe.others.fa \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
* Stats
```bash
BASE_DIR=$HOME/data/anchr/Bper
cd ${BASE_DIR}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:-------------|--------:|--------:|----:|
| Genome | 4086189 | 4086189 | 1 |
| Paralogs | | | |
| anchor.merge | 4674 | 3478267 | 986 |
| others.merge | 1024 | 52609 | 49 |
* Clear QxxLxxx.
```bash
BASE_DIR=$HOME/data/anchr/Bper
cd ${BASE_DIR}
rm -fr 2_illumina/Q{20,25,30}L*
rm -fr Q{20,25,30}L*
```
# Corynebacterium diphtheriae FDAARGOS_197, ç½åæè
* Project
[SRP040661](https://trace.ncbi.nlm.nih.gov/Traces/sra/?study=SRP040661)
* Other name: ATCC 700971D-5; NCTC 13129;
* BioSample: [SAMN04875534](https://www.ncbi.nlm.nih.gov/biosample/SAMN04875534)
## Cdip: download
doc/bacteria_2_3.md view on Meta::CPAN
| Q30L60X80P001 | 199.09M | 80.0 | 68973 | 2.45M | 64 | 68973 | 2.45M | 57 | 844 | 5.27K | 7 | "31,41,51,61,71,81" | 0:04'02'' | 0:02'00'' |
| Q30L60X80P002 | 199.09M | 80.0 | 89791 | 2.45M | 65 | 89791 | 2.45M | 58 | 809 | 5.53K | 7 | "31,41,51,61,71,81" | 0:04'01'' | 0:02'08'' |
| Q30L60X120P000 | 298.64M | 120.0 | 60425 | 2.45M | 74 | 60427 | 2.45M | 67 | 727 | 5.37K | 7 | "31,41,51,61,71,81" | 0:05'23'' | 0:02'27'' |
| Q30L60X120P001 | 298.64M | 120.0 | 71924 | 2.45M | 62 | 71924 | 2.44M | 56 | 844 | 4.9K | 6 | "31,41,51,61,71,81" | 0:05'18'' | 0:02'36'' |
| Q30L60X160P000 | 398.18M | 160.0 | 60427 | 2.45M | 75 | 60427 | 2.45M | 68 | 727 | 5.37K | 7 | "31,41,51,61,71,81" | 0:06'26'' | 0:03'04'' |
| Q30L60X240P000 | 597.27M | 240.0 | 57594 | 2.45M | 86 | 59198 | 2.44M | 74 | 844 | 9.05K | 12 | "31,41,51,61,71,81" | 0:06'47'' | 0:03'28'' |
## Cdip: merge anchors
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 240 ::: 000 001 002 003 004 005 006
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.others.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.others.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 240 ::: 000 001 002 003 004 005 006
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# anchors sorted on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## Cdip: 3GS
```bash
BASE_NAME=Cdip
REAL_G=2488635
cd $HOME/data/anchr/${BASE_NAME}
canu \
-p ${BASE_NAME} -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p ${BASE_NAME} -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
faops n50 -S -C canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/${BASE_NAME}.trimmedReads.fasta.gz
```
## Cdip: expand anchors
* anchorLong
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
anchr cover \
--parallel 16 \
-c 2 -m 40 \
-b 20 --len 1000 --idt 0.9 \
merge/anchor.merge.fasta \
canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz \
-o merge/anchor.cover.fasta
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.cover.fasta \
canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz \
-d anchorLong \
-b 20 --len 1000 --idt 0.98
anchr overlap \
merge/anchor.cover.fasta \
--serial --len 10 --idt 0.9999 \
doc/bacteria_2_3.md view on Meta::CPAN
rm -fr contigTrim
anchr overlap2 \
--parallel 16 \
anchorLong/contig.fasta \
canu-raw-40x/${BASE_NAME}.contigs.fasta \
-d contigTrim \
-b 20 --len 1000 --idt 0.98 --all
CONTIG_COUNT=$(faops n50 -H -N 0 -C contigTrim/anchor.fasta)
echo ${CONTIG_COUNT}
rm -fr contigTrim/group
anchr group \
--parallel 16 \
--keep \
contigTrim/anchorLong.db \
contigTrim/anchorLong.ovlp.tsv \
--range "1-${CONTIG_COUNT}" --len 1000 --idt 0.98 --max 20000 -c 1
pushd contigTrim
cat group/groups.txt \
| parallel --no-run-if-empty -j 8 '
echo {};
anchr orient \
--len 1000 --idt 0.98 \
group/{}.anchor.fasta \
group/{}.long.fasta \
-r group/{}.restrict.tsv \
-o group/{}.strand.fasta;
anchr overlap --len 1000 --idt 0.98 --all \
group/{}.strand.fasta \
-o stdout \
| anchr restrict \
stdin group/{}.restrict.tsv \
-o group/{}.ovlp.tsv;
anchr layout \
group/{}.ovlp.tsv \
group/{}.relation.tsv \
group/{}.strand.fasta \
-o group/{}.contig.fasta
'
popd
cat \
contigTrim/group/non_grouped.fasta \
contigTrim/group/*.contig.fasta \
> contigTrim/contig.fasta
```
* quast
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/${BASE_NAME}.contigs.fasta \
canu-raw-80x/${BASE_NAME}.contigs.fasta \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-40x,canu-80x,paralogs" \
-o 9_qa_contig
```
* Stats
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:-------------|--------:|--------:|---:|
| Genome | 2488635 | 2488635 | 1 |
| Paralogs | 5635 | 56210 | 18 |
| anchor.merge | 115948 | 2447558 | 46 |
| others.merge | 2541 | 23728 | 10 |
| anchor.cover | 108033 | 2442070 | 45 |
| anchorLong | 125030 | 2441001 | 30 |
| contigTrim | 2488479 | 2488479 | 1 |
* Clear QxxLxxXxx.
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 2_illumina/Q{20,25,30}L{1,60,90,120}X*
rm -fr Q{20,25,30}L{1,60,90,120}X*
doc/bacteria_2_3.md view on Meta::CPAN
| Q30L60X40P002 | 75.71M | 40.0 | 32751 | 1.8M | 73 | 32751 | 1.8M | 72 | 855 | 855 | 1 | "31,41,51,61,71,81" | 0:01'13'' | 0:00'44'' |
| Q30L60X40P003 | 75.71M | 40.0 | 32741 | 1.8M | 75 | 32741 | 1.8M | 74 | 865 | 865 | 1 | "31,41,51,61,71,81" | 0:01'13'' | 0:00'45'' |
| Q30L60X80P000 | 151.42M | 80.0 | 32751 | 1.8M | 74 | 32751 | 1.8M | 73 | 865 | 865 | 1 | "31,41,51,61,71,81" | 0:01'49'' | 0:01'08'' |
| Q30L60X80P001 | 151.42M | 80.0 | 32751 | 1.8M | 74 | 32751 | 1.8M | 73 | 865 | 865 | 1 | "31,41,51,61,71,81" | 0:01'50'' | 0:01'12'' |
| Q30L60X120P000 | 227.13M | 120.0 | 32751 | 1.8M | 77 | 32751 | 1.8M | 75 | 865 | 1.49K | 2 | "31,41,51,61,71,81" | 0:02'26'' | 0:01'32'' |
| Q30L60X160P000 | 302.84M | 160.0 | 32404 | 1.8M | 79 | 32404 | 1.8M | 77 | 865 | 1.49K | 2 | "31,41,51,61,71,81" | 0:03'00'' | 0:01'37'' |
## Ftul: merge anchors
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 ::: 000 001 002 003 004 005 006
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.others.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.others.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 ::: 000 001 002 003 004 005 006
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# anchors sorted on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## Ftul: 3GS
```bash
BASE_NAME=Ftul
GENOME_SIZE=1.9m
cd $HOME/data/anchr/${BASE_NAME}
canu \
-p ${BASE_NAME} -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${GENOME_SIZE} \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p ${BASE_NAME} -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${GENOME_SIZE} \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
faops n50 -S -C canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/${BASE_NAME}.trimmedReads.fasta.gz
```
## Ftul: expand anchors
* anchorLong
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
anchr cover \
--parallel 16 \
-c 2 -m 40 \
-b 20 --len 1000 --idt 0.9 \
merge/anchor.merge.fasta \
canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz \
-o merge/anchor.cover.fasta
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.cover.fasta \
canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz \
-d anchorLong \
-b 20 --len 1000 --idt 0.98
anchr overlap \
merge/anchor.cover.fasta \
--serial --len 10 --idt 0.9999 \
doc/bacteria_2_3.md view on Meta::CPAN
rm -fr contigTrim
anchr overlap2 \
--parallel 16 \
anchorLong/contig.fasta \
canu-raw-40x/${BASE_NAME}.contigs.fasta \
-d contigTrim \
-b 20 --len 1000 --idt 0.98 --all
CONTIG_COUNT=$(faops n50 -H -N 0 -C contigTrim/anchor.fasta)
echo ${CONTIG_COUNT}
rm -fr contigTrim/group
anchr group \
--parallel 16 \
--keep \
contigTrim/anchorLong.db \
contigTrim/anchorLong.ovlp.tsv \
--range "1-${CONTIG_COUNT}" --len 1000 --idt 0.98 --max 20000 -c 1
pushd contigTrim
cat group/groups.txt \
| parallel --no-run-if-empty -j 8 '
echo {};
anchr orient \
--len 1000 --idt 0.98 \
group/{}.anchor.fasta \
group/{}.long.fasta \
-r group/{}.restrict.tsv \
-o group/{}.strand.fasta;
anchr overlap --len 1000 --idt 0.98 --all \
group/{}.strand.fasta \
-o stdout \
| anchr restrict \
stdin group/{}.restrict.tsv \
-o group/{}.ovlp.tsv;
anchr layout \
group/{}.ovlp.tsv \
group/{}.relation.tsv \
group/{}.strand.fasta \
-o group/{}.contig.fasta
'
popd
cat \
contigTrim/group/non_grouped.fasta \
contigTrim/group/*.contig.fasta \
> contigTrim/contig.fasta
```
* quast
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/Ftul.contigs.fasta \
canu-raw-80x/Ftul.contigs.fasta \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-40x,canu-80x,paralogs" \
-o 9_qa_contig
```
* Stats
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:-------------|--------:|--------:|---:|
| Genome | 1892775 | 1892775 | 1 |
| Paralogs | 33912 | 93531 | 10 |
| anchor.merge | 32813 | 1801122 | 73 |
| others.merge | 32404 | 64274 | 3 |
| anchor.cover | 32813 | 1796007 | 71 |
| anchorLong | 35248 | 1795927 | 70 |
| contigTrim | 1027458 | 1856949 | 4 |
* Clear QxxLxxXxx.
```bash
BASE_NAME=Ftul
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 2_illumina/Q{20,25,30}L{1,60,90,120}X*
rm -fr Q{20,25,30}L{1,60,90,120}X*