App-Anchr
view release on metacpan or search on metacpan
doc/model_organisms.md view on Meta::CPAN
| Q35L60X40P001 | 486.28M | 40.0 | 11870 | 11.33M | 1722 | 12095 | 11.12M | 1438 | 777 | 217.61K | 284 | "31,41,51,61,71,81" | 0:05'24'' | 0:01'25'' |
| Q35L60X40P002 | 486.28M | 40.0 | 11706 | 11.43M | 1712 | 11825 | 11.09M | 1431 | 975 | 335.6K | 281 | "31,41,51,61,71,81" | 0:07'22'' | 0:01'18'' |
| Q35L60X40P003 | 486.28M | 40.0 | 11991 | 11.36M | 1667 | 12276 | 11.16M | 1414 | 786 | 192.34K | 253 | "31,41,51,61,71,81" | 0:07'32'' | 0:01'16'' |
| Q35L60X80P000 | 972.57M | 80.0 | 9921 | 11.42M | 2046 | 10046 | 11.11M | 1683 | 816 | 311.6K | 363 | "31,41,51,61,71,81" | 0:10'27'' | 0:01'23'' |
| Q35L60X80P001 | 972.57M | 80.0 | 9705 | 11.43M | 2013 | 9937 | 11.13M | 1652 | 822 | 299.37K | 361 | "31,41,51,61,71,81" | 0:10'18'' | 0:01'41'' |
| Q35L60X120P000 | 1.46G | 120.0 | 7747 | 11.45M | 2499 | 8018 | 11.03M | 2012 | 822 | 422.78K | 487 | "31,41,51,61,71,81" | 0:13'16'' | 0:01'55'' |
| Q35L60X160P000 | 1.95G | 160.0 | 6698 | 11.44M | 2786 | 6923 | 10.97M | 2220 | 812 | 470.54K | 566 | "31,41,51,61,71,81" | 0:16'02'' | 0:01'52'' |
## s288c: merge anchors with Qxx and QxxL60Xxx
```bash
BASE_NAME=s288c
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors with Qxx
for Q in 20 25 30 35; do
mkdir -p mergeQ${Q}
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 '
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
' ::: ${Q} ::: 60 ::: 40 80 120 160 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin mergeQ${Q}/anchor.contained.fasta
anchr orient mergeQ${Q}/anchor.contained.fasta --len 1000 --idt 0.98 -o mergeQ${Q}/anchor.orient.fasta
anchr merge mergeQ${Q}/anchor.orient.fasta --len 1000 --idt 0.999 -o mergeQ${Q}/anchor.merge0.fasta
anchr contained mergeQ${Q}/anchor.merge0.fasta --len 1000 --idt 0.98 \
--proportion 0.99 --parallel 16 -o stdout \
| faops filter -a 1000 -l 0 stdin mergeQ${Q}/anchor.merge.fasta
done
# merge anchors with QxxL60Xxx
for Q in 20 25 30 35; do
for X in 40 80; do
mkdir -p mergeQ${Q}X${X}
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 '
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
' ::: ${Q} ::: 60 ::: ${X} ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin mergeQ${Q}X${X}/anchor.contained.fasta
anchr orient mergeQ${Q}X${X}/anchor.contained.fasta --len 1000 --idt 0.98 -o mergeQ${Q}X${X}/anchor.orient.fasta
anchr merge mergeQ${Q}X${X}/anchor.orient.fasta --len 1000 --idt 0.999 -o mergeQ${Q}X${X}/anchor.merge0.fasta
anchr contained mergeQ${Q}X${X}/anchor.merge0.fasta --len 1000 --idt 0.98 \
--proportion 0.99 --parallel 16 -o stdout \
| faops filter -a 1000 -l 0 stdin mergeQ${Q}X${X}/anchor.merge.fasta
done
done
# quast
rm -fr 9_qa_mergeQX
quast --no-check --threads 16 \
--eukaryote \
-R 1_genome/genome.fa \
mergeQ20/anchor.merge.fasta \
mergeQ25/anchor.merge.fasta \
mergeQ30/anchor.merge.fasta \
mergeQ35/anchor.merge.fasta \
mergeQ20X40/anchor.merge.fasta \
mergeQ20X80/anchor.merge.fasta \
mergeQ25X40/anchor.merge.fasta \
mergeQ25X80/anchor.merge.fasta \
mergeQ30X40/anchor.merge.fasta \
mergeQ30X80/anchor.merge.fasta \
mergeQ35X40/anchor.merge.fasta \
mergeQ35X80/anchor.merge.fasta \
1_genome/paralogs.fas \
--label "mergeQ20,mergeQ25,mergeQ30,mergeQ35,mergeQ20X40,mergeQ20X80,mergeQ25X40,mergeQ25X80,mergeQ30X40,mergeQ30X80,mergeQ35X40,mergeQ35X80,paralogs" \
-o 9_qa_mergeQX
```
## s288c: merge anchors
```bash
BASE_NAME=s288c
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o merge/anchor.merge0.fasta
anchr contained merge/anchor.merge0.fasta --len 1000 --idt 0.98 \
--proportion 0.99 --parallel 16 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge1.fasta
faops order merge/anchor.merge1.fasta \
<(faops size merge/anchor.merge1.fasta | sort -n -r -k2,2 | cut -f 1) \
merge/anchor.merge.fasta
# No need for this step
#mkdir -p merge/anchor
#pushd merge/anchor
#anchr anchors \
# ../anchor.merge.fasta \
# ../../2_illumina/Q25L60/pe.cor.fa \
# -p 16 \
# -o anchors.sh
#bash anchors.sh
#popd
# merge others
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.others.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.others.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# anchor sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## s288c: 3GS
```bash
BASE_NAME=s288c
REAL_G=12157105
cd ${HOME}/data/anchr/${BASE_NAME}
canu \
-p ${BASE_NAME} -d canu-raw-20x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.20x.fasta
canu \
-p ${BASE_NAME} -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p ${BASE_NAME} -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
canu \
-p ${BASE_NAME} -d canu-trim-20x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.20x.trim.fasta
canu \
-p ${BASE_NAME} -d canu-trim-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.40x.trim.fasta
canu \
-p ${BASE_NAME} -d canu-trim-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.80x.trim.fasta
# quast
rm -fr 9_qa_canu
quast --no-check --threads 16 \
--eukaryote \
-R 1_genome/genome.fa \
canu-raw-20x/${BASE_NAME}.contigs.fasta \
canu-trim-20x/${BASE_NAME}.contigs.fasta \
canu-raw-40x/${BASE_NAME}.contigs.fasta \
canu-trim-40x/${BASE_NAME}.contigs.fasta \
canu-raw-80x/${BASE_NAME}.contigs.fasta \
canu-trim-80x/${BASE_NAME}.contigs.fasta \
1_genome/paralogs.fas \
--label "20x,20x.trim,40x,40x.trim,80x,80x.trim,paralogs" \
-o 9_qa_canu
find . -type d -name "correction" -path "*canu-*" | xargs rm -fr
faops n50 -S -C canu-raw-20x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-trim-20x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-trim-40x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-trim-80x/${BASE_NAME}.trimmedReads.fasta.gz
minimap canu-raw-20x/${BASE_NAME}.contigs.fasta 1_genome/genome.fa \
| minidot - > canu-raw-20x/minidot.eps
minimap canu-raw-40x/${BASE_NAME}.contigs.fasta 1_genome/genome.fa \
| minidot - > canu-raw-40x/minidot.eps
minimap canu-raw-80x/${BASE_NAME}.contigs.fasta 1_genome/genome.fa \
| minidot - > canu-raw-80x/minidot.eps
```
## s288c: local corrections
```bash
BASE_NAME=s288c
REAL_G=12157105
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr localCor
anchr overlap2 \
--parallel 16 \
merge/anchor.merge.fasta \
3_pacbio/pacbio.40x.trim.fasta \
-d localCor \
-b 20 --len 1000 --idt 0.85 --all
pushd localCor
anchr cover \
--range "1-$(faops n50 -H -N 0 -C anchor.fasta)" \
--len 1000 --idt 0.85 -c 2 \
anchorLong.ovlp.tsv \
-o anchor.cover.json
cat anchor.cover.json | jq "." > environment.json
rm -fr group
anchr localcor \
anchorLong.db \
anchorLong.ovlp.tsv \
--parallel 16 \
--range $(cat environment.json | jq -r '.TRUSTED') \
--len 1000 --idt 0.85 -v
faops some -i -l 0 \
long.fasta \
group/overlapped.long.txt \
independentLong.fasta
find . -type d -name "correction" | xargs rm -fr
# localCor
gzip -d -c -f $(find group -type f -name "*.correctedReads.fasta.gz") \
| faops filter -l 0 stdin stdout \
| grep -E '^>long' -A 1 \
| sed '/^--$/d' \
| faops dazz -a -l 0 stdin stdout \
| pigz -c > localCor.fasta.gz
canu \
-p ${BASE_NAME} -d localCor \
gnuplotTested=true \
genomeSize=${REAL_G} \
-pacbio-corrected localCor.fasta.gz \
-pacbio-corrected anchor.fasta
canu \
-p ${BASE_NAME} -d localCorRaw \
gnuplotTested=true \
genomeSize=${REAL_G} \
-pacbio-raw localCor.fasta.gz \
-pacbio-raw anchor.fasta
canu \
-p ${BASE_NAME} -d localCorIndep \
gnuplotTested=true \
genomeSize=${REAL_G} \
-pacbio-raw localCor.fasta.gz \
-pacbio-raw anchor.fasta \
-pacbio-raw independentLong.fasta
popd
# quast
rm -fr 9_qa_localCor
quast --no-check --threads 16 \
--eukaryote \
-R 1_genome/genome.fa \
localCor/anchor.fasta \
localCor/localCor/${BASE_NAME}.contigs.fasta \
localCor/localCorRaw/${BASE_NAME}.contigs.fasta \
localCor/localCorIndep/${BASE_NAME}.contigs.fasta \
1_genome/paralogs.fas \
--label "anchor,localCor,localCorRaw,localCorIndep,paralogs" \
-o 9_qa_localCor
find . -type d -name "correction" | xargs rm -fr
```
## s288c: expand anchors
å¨é
¿é
é
µæ¯ä¸, æä¸åå ç»å®å
¨ç¸åçåºå, å®ä»¬é½æ¯æ°è¿åçççæ®µéå¤:
* I:216563-218385, VIII:537165-538987
* I:223713-224783, VIII:550350-551420
* IV:528442-530427, IV:532327-534312, IV:536212-538197
* IV:530324-531519, IV:534209-535404
* IV:5645-7725, X:738076-740156
* IV:7810-9432, X:736368-737990
* IX:9683-11043, X:9666-11026
* IV:1244112-1245373, XV:575980-577241
* VIII:212266-214124, VIII:214264-216122
* IX:11366-14953, X:11349-14936
* XII:468935-470576, XII:472587-474228, XII:482167-483808, XII:485819-487460,
* XII:483798-485798, XII:487450-489450
* anchorLong
```bash
BASE_NAME=s288c
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.merge.fasta \
3_pacbio/pacbio.40x.trim.fasta \
-d anchorLong \
-b 20 --len 1000 --idt 0.85 --all
pushd anchorLong
anchr cover \
--range "1-$(faops n50 -H -N 0 -C anchor.fasta)" \
--len 1000 --idt 0.85 -c 2 \
anchorLong.ovlp.tsv \
-o anchor.cover.json
cat anchor.cover.json | jq "." > environment.json
anchr overlap \
anchor.fasta \
--serial --len 20 --idt 0.9999 \
-o stdout \
| perl -nla -e '
BEGIN {
doc/model_organisms.md view on Meta::CPAN
```bash
BASE_NAME=s288c
cd ${HOME}/data/anchr/${BASE_NAME}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "canu-raw"; faops n50 -H -S -C canu-raw-40x/${BASE_NAME}.contigs.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "canu-trim"; faops n50 -H -S -C canu-trim-40x/${BASE_NAME}.contigs.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "spades.contig"; faops n50 -H -S -C 8_spades/contigs.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "spades.scaffold"; faops n50 -H -S -C 8_spades/scaffolds.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "platanus.contig"; faops n50 -H -S -C 8_platanus/out_contig.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "platanus.scaffold"; faops n50 -H -S -C 8_platanus/out_gapClosed.fa;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:------------------|-------:|---------:|-----:|
| Genome | 924431 | 12157105 | 17 |
| Paralogs | 3851 | 1059148 | 366 |
| anchor.merge | 29017 | 11359547 | 665 |
| others.merge | 2625 | 282212 | 127 |
| anchorLong | 38821 | 11269708 | 478 |
| contigTrim | 460244 | 11555366 | 37 |
| canu-raw | 475272 | 12333950 | 41 |
| canu-trim | 475066 | 12194521 | 36 |
| spades.contig | 89836 | 11731746 | 1189 |
| spades.scaffold | 98572 | 11732702 | 1167 |
| platanus.contig | 5983 | 12437850 | 7727 |
| platanus.scaffold | 55443 | 12073445 | 4735 |
* quast
```bash
BASE_NAME=s288c
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
--eukaryote \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-trim-40x/${BASE_NAME}.contigs.fasta \
8_spades/scaffolds.fasta \
8_platanus/out_gapClosed.fa \
1_genome/paralogs.fas \
--label "merge,contig,contigTrim,canu-40x,spades,platanus,paralogs" \
-o 9_qa_contig
```
* Clear QxxLxxXxx.
```bash
BASE_NAME=s288c
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 2_illumina/Q{20,25,30,35}L{30,60,90,120}X*
rm -fr Q{20,25,30,35}L{30,60,90,120}X*
```
# *Drosophila melanogaster* iso-1
* Genome: [Ensembl 82](http://sep2015.archive.ensembl.org/Drosophila_melanogaster/Info/Index)
* Proportion of paralogs (> 1000 bp): 0.0661
## iso_1: download
* Reference genome
```bash
mkdir -p ~/data/anchr/iso_1/1_genome
cd ~/data/anchr/iso_1/1_genome
wget -N ftp://ftp.ensembl.org/pub/release-82/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.dna_sm.toplevel.fa.gz
faops order Drosophila_melanogaster.BDGP6.dna_sm.toplevel.fa.gz \
<(for chr in {2L,2R,3L,3R,4,X,Y,dmel_mitochondrion_genome}; do echo $chr; done) \
genome.fa
cp ~/data/anchr/paralogs/model/Results/iso_1/iso_1.multi.fas 1_genome/paralogs.fas
```
* Illumina
* [ERX645969](http://www.ebi.ac.uk/ena/data/view/ERX645969): ERR701706-ERR701711
* SRR306628 labels ycnbwsp instead of iso-1.
```bash
mkdir -p ~/data/anchr/iso_1/2_illumina
cd ~/data/anchr/iso_1/2_illumina
cat << EOF > sra_ftp.txt
ftp://ftp.sra.ebi.ac.uk/vol1/err/ERR701/ERR701706
ftp://ftp.sra.ebi.ac.uk/vol1/err/ERR701/ERR701707
ftp://ftp.sra.ebi.ac.uk/vol1/err/ERR701/ERR701708
ftp://ftp.sra.ebi.ac.uk/vol1/err/ERR701/ERR701709
ftp://ftp.sra.ebi.ac.uk/vol1/err/ERR701/ERR701710
doc/model_organisms.md view on Meta::CPAN
| Q30L60X80P000 | 11.01G | 80.0 | 12544 | 120.37M | 20203 | 13096 | 115.53M | 15333 | 867 | 4.84M | 4870 | "31,41,43,45,51,61,71,81" | 2:35'52'' | 0:13'13'' |
## iso_1: merge anchors
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o merge/anchor.merge0.fasta
anchr contained merge/anchor.merge0.fasta --len 1000 --idt 0.98 \
--proportion 0.99 --parallel 16 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge1.fasta
faops order merge/anchor.merge1.fasta \
<(faops size merge/anchor.merge1.fasta | sort -n -r -k2,2 | cut -f 1) \
merge/anchor.merge.fasta
# merge others
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.others.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.others.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# anchor sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
--eukaryote \
--no-icarus \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## iso_1: 3GS
```bash
BASE_NAME=iso_1
REAL_G=137567477
cd ${HOME}/data/anchr/${BASE_NAME}
canu \
-p ${BASE_NAME} -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p ${BASE_NAME} -d canu-trim-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.40x.trim.fasta
find . -type d -name "correction" -path "*canu-*" | xargs rm -fr
minimap canu-raw-40x/${BASE_NAME}.contigs.fasta 1_genome/genome.fa \
| minidot - > canu-raw-40x/minidot.eps
minimap canu-trim-40x/${BASE_NAME}.contigs.fasta 1_genome/genome.fa \
| minidot - > canu-trim-40x/minidot.eps
faops n50 -S -C canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-trim-40x/${BASE_NAME}.trimmedReads.fasta.gz
```
## iso_1: expand anchors
* anchorLong
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr anchorLong
anchr overlap2 \
--parallel 16 \
merge/anchor.merge.fasta \
3_pacbio/pacbio.40x.trim.fasta \
-d anchorLong \
-b 50 --len 1000 --idt 0.85 --all
pushd anchorLong
doc/model_organisms.md view on Meta::CPAN
```
## iso_1: final stats
* Stats
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "spades.contig"; faops n50 -H -S -C 8_spades/contigs.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "spades.scaffold"; faops n50 -H -S -C 8_spades/scaffolds.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "platanus.contig"; faops n50 -H -S -C 8_platanus/out_contig.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "platanus.scaffold"; faops n50 -H -S -C 8_platanus/out_gapClosed.fa;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:------------------|---------:|----------:|-------:|
| Genome | 25286936 | 137567477 | 8 |
| Paralogs | 4031 | 13665900 | 4492 |
| anchor.merge | 26860 | 117041459 | 9566 |
| others.merge | 8732 | 3092289 | 1004 |
| anchor.cover | 26199 | 116199529 | 9576 |
| anchorLong | 69814 | 115806088 | 4924 |
| contigTrim | 1238480 | 123572499 | 603 |
| spades.contig | 108756 | 132705321 | 61620 |
| spades.scaffold | 142273 | 132725706 | 61182 |
| platanus.contig | 11503 | 156820565 | 359399 |
| platanus.scaffold | 146404 | 129134232 | 71416 |
* quast
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
--eukaryote \
--no-icarus \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/${BASE_NAME}.contigs.fasta \
canu-trim-40x/${BASE_NAME}.contigs.fasta \
8_spades/scaffolds.fasta \
8_platanus/out_gapClosed.fa \
1_genome/paralogs.fas \
--label "merge,contig,contigTrim,canu-40x,canu-40x.trim,spades,platanus,paralogs" \
-o 9_qa_contig
```
* Clear QxxLxxXxx.
```bash
BASE_NAME=iso_1
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 2_illumina/Q{20,25,30,35}L{30,60,90,120}X*
rm -fr Q{20,25,30,35}L{30,60,90,120}X*
```
# *Caenorhabditis elegans* N2
* Genome: [Ensembl 82](http://sep2015.archive.ensembl.org/Caenorhabditis_elegans/Info/Index)
* Proportion of paralogs (> 1000 bp): 0.0472
## n2: download
* Reference genome
```bash
mkdir -p ~/data/anchr/n2/1_genome
cd ~/data/anchr/n2/1_genome
wget -N ftp://ftp.ensembl.org/pub/release-82/fasta/caenorhabditis_elegans/dna/Caenorhabditis_elegans.WBcel235.dna_sm.toplevel.fa.gz
faops order Caenorhabditis_elegans.WBcel235.dna_sm.toplevel.fa.gz \
<(for chr in {I,II,III,IV,V,X,MtDNA}; do echo $chr; done) \
genome.fa
cp ~/data/anchr/paralogs/model/Results/n2/n2.multi.fas 1_genome/paralogs.fas
```
* Illumina
* Other SRA
* SRX770040 - [insert size](https://www.ncbi.nlm.nih.gov/sra/SRX770040[accn]) is 500-600 bp
* ERR1039478 - adaptor contamination "ACTTCCAGGGATTTATAAGCCGATGACGTCATAACATCCCTGACCCTTTA"
* DRR008443
* SRR065390
```bash
# Downloading from ena with aria2
mkdir -p ~/data/anchr/n2/2_illumina
cd ~/data/anchr/n2/2_illumina
doc/model_organisms.md view on Meta::CPAN
| Q25L60X60P000 | 6.02G | 60.0 | 11787 | 99.24M | 19543 | 12467 | 88.53M | 12969 | 4629 | 10.71M | 6574 | "31,41,51,61,71,81" | 1:25'03'' | 0:08'23'' |
| Q30L60X30P000 | 3.01G | 30.0 | 10914 | 97.81M | 22843 | 11752 | 85.35M | 13764 | 1510 | 12.46M | 9079 | "31,41,51,61,71,81" | 0:59'47'' | 0:07'06'' |
| Q30L60X30P001 | 3.01G | 30.0 | 10160 | 97.39M | 24052 | 10924 | 84.51M | 14218 | 1255 | 12.87M | 9834 | "31,41,51,61,71,81" | 0:45'09'' | 0:07'05'' |
| Q30L60X60P000 | 6.02G | 60.0 | 12462 | 99.3M | 19363 | 12943 | 88.29M | 12773 | 5901 | 11.01M | 6590 | "31,41,51,61,71,81" | 0:59'46'' | 0:08'02'' |
## n2: merge anchors
```bash
BASE_NAME=n2
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
" ::: 25 30 ::: 60 ::: 30 60 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o merge/anchor.merge0.fasta
anchr contained merge/anchor.merge0.fasta --len 1000 --idt 0.98 \
--proportion 0.99 --parallel 16 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.others.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.others.fa
fi
" ::: 25 30 ::: 60 ::: 30 60 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# anchor sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot -png out.delta -p anchor.sort --large
# mummerplot files
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
--eukaryote \
--no-icarus \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## n2: 3GS
```bash
BASE_NAME=n2
REAL_G=100286401
cd ${HOME}/data/anchr/${BASE_NAME}
canu \
-p ${BASE_NAME} -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p ${BASE_NAME} -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
canu \
-p ${BASE_NAME} -d canu-trim-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.40x.trim.fasta
canu \
-p ${BASE_NAME} -d canu-trim-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.80x.trim.fasta
find . -type d -name "correction" -path "*canu-*" | xargs rm -fr
minimap canu-raw-40x/${BASE_NAME}.contigs.fasta 1_genome/genome.fa \
| minidot - > canu-raw-40x/minidot.eps
minimap canu-trim-40x/${BASE_NAME}.contigs.fasta 1_genome/genome.fa \
| minidot - > canu-trim-40x/minidot.eps
faops n50 -S -C canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-trim-40x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-trim-80x/${BASE_NAME}.trimmedReads.fasta.gz
```
## n2: expand anchors
* anchorLong
doc/model_organisms.md view on Meta::CPAN
## n2: final stats
* Stats
```bash
BASE_NAME=n2
cd ${HOME}/data/anchr/${BASE_NAME}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "spades.contig"; faops n50 -H -S -C 8_spades/contigs.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "spades.scaffold"; faops n50 -H -S -C 8_spades/scaffolds.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "platanus.contig"; faops n50 -H -S -C 8_platanus/out_contig.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "platanus.scaffold"; faops n50 -H -S -C 8_platanus/out_gapClosed.fa;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:------------------|---------:|----------:|-------:|
| Genome | 17493829 | 100286401 | 7 |
| Paralogs | 2013 | 5313653 | 2637 |
| anchor.merge | 15525 | 90530693 | 11777 |
| others.merge | 10805 | 11087395 | 3278 |
| anchor.cover | 15486 | 90231476 | 11704 |
| anchorLong | 22963 | 89834518 | 8438 |
| contigTrim | 335060 | 95965683 | 607 |
| spades.contig | 34792 | 105621949 | 39732 |
| spades.scaffold | 39185 | 105667774 | 39154 |
| platanus.contig | 9540 | 108908253 | 143264 |
| platanus.scaffold | 28158 | 99589056 | 35182 |
* quast
```bash
BASE_NAME=n2
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
--eukaryote \
--no-icarus \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-raw-40x/${BASE_NAME}.contigs.fasta \
8_spades/scaffolds.fasta \
8_platanus/out_gapClosed.fa \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-40x,spades,platanus,paralogs" \
-o 9_qa_contig
```
* Clear QxxLxxx.
```bash
BASE_DIR=$HOME/data/anchr/n2
cd ${BASE_DIR}
rm -fr 2_illumina/Q{20,25,30,35}L{30,60,90,120}X*
rm -fr Q{20,25,30,35}L{30,60,90,120}X*
```
# *Arabidopsis thaliana* Col-0
* Genome: [Ensembl Genomes](http://plants.ensembl.org/Arabidopsis_thaliana/Info/Index)
* Proportion of paralogs (> 1000 bp): 0.1158
## col_0: download
* Reference genome
```bash
mkdir -p ~/data/anchr/col_0/1_genome
cd ~/data/anchr/col_0/1_genome
wget -N ftp://ftp.ensemblgenomes.org/pub/release-29/plants/fasta/arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.29.dna_sm.toplevel.fa.gz
faops order Arabidopsis_thaliana.TAIR10.29.dna_sm.toplevel.fa.gz \
<(for chr in {1,2,3,4,5,Mt,Pt}; do echo $chr; done) \
genome.fa
```
* Illumina HiSeq (100 bp)
[SRX202246](https://www.ncbi.nlm.nih.gov/sra/SRX202246[accn])
```bash
# Downloading from ena with aria2
mkdir -p ~/data/anchr/col_0/2_illumina
cd ~/data/anchr/col_0/2_illumina
cat << EOF > sra_ftp.txt
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR611/SRR611086
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR616/SRR616966
EOF
aria2c -x 9 -s 3 -c -i sra_ftp.txt
doc/model_organisms.md view on Meta::CPAN
```bash
BASE_NAME=col_0
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
" ::: 25 30 ::: 60 ::: 30 60 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta \
--len 1000 --idt 0.98 --parallel 16 \
-o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta \
--len 1000 --idt 0.999 --parallel 16 \
-o merge/anchor.merge0.fasta
anchr contained merge/anchor.merge0.fasta \
--len 1000 --idt 0.98 --proportion 0.99 --parallel 16 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.others.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.others.fa
fi
" ::: 25 30 ::: 60 ::: 30 60 ::: 000 001 002 003 004 005
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# anchor sort on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
# mummerplot files
nucmer -l 200 1_genome/genome.fa merge/anchor.sort.fa
mummerplot out.delta --png --large -p anchor.sort
rm *.[fr]plot
rm out.delta
rm *.gp
mv anchor.sort.png merge/
# minidot
minimap 1_genome/genome.fa merge/anchor.sort.fa | minidot - > merge/anchor.minidot.eps
# quast
rm -fr 9_qa
quast --no-check --threads 16 \
--eukaryote \
--no-icarus \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/others.merge.fasta \
1_genome/paralogs.fas \
--label "merge,others,paralogs" \
-o 9_qa
```
## col_0: 3GS
```bash
BASE_NAME=col_0
REAL_G=119667750
cd ${HOME}/data/anchr/${BASE_NAME}
canu \
-p ${BASE_NAME} -d canu-raw-40x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.40x.fasta
canu \
-p ${BASE_NAME} -d canu-raw-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.80x.fasta
canu \
-p ${BASE_NAME} -d canu-trim-80x \
gnuplot=$(brew --prefix)/Cellar/$(brew list --versions gnuplot | sed 's/ /\//')/bin/gnuplot \
genomeSize=${REAL_G} \
-pacbio-raw 3_pacbio/pacbio.80x.trim.fasta
faops n50 -S -C canu-raw-40x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-raw-80x/${BASE_NAME}.trimmedReads.fasta.gz
faops n50 -S -C canu-trim-80x/${BASE_NAME}.trimmedReads.fasta.gz
rm -fr canu-raw-40x/correction
rm -fr canu-raw-80x/correction
rm -fr canu-trim-80x/correction
```
## col_0: expand anchors
* anchorLong
```bash
BASE_NAME=col_0
cd ${HOME}/data/anchr/${BASE_NAME}
anchr cover \
--parallel 16 \
-c 2 -m 40 \
-b 50 --len 1000 --idt 0.9 \
merge/anchor.merge.fasta \
canu-trim-80x/${BASE_NAME}.trimmedReads.fasta.gz \
doc/model_organisms.md view on Meta::CPAN
```bash
BASE_NAME=col_0
cd ${HOME}/data/anchr/${BASE_NAME}
printf "| %s | %s | %s | %s |\n" \
"Name" "N50" "Sum" "#" \
> stat3.md
printf "|:--|--:|--:|--:|\n" >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Genome"; faops n50 -H -S -C 1_genome/genome.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "Paralogs"; faops n50 -H -S -C 1_genome/paralogs.fas;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.merge"; faops n50 -H -S -C merge/anchor.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "others.merge"; faops n50 -H -S -C merge/others.merge.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchor.cover"; faops n50 -H -S -C merge/anchor.cover.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "anchorLong"; faops n50 -H -S -C anchorLong/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "contigTrim"; faops n50 -H -S -C contigTrim/contig.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "canu-trim"; faops n50 -H -S -C canu-trim-80x/${BASE_NAME}.contigs.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "spades.contig"; faops n50 -H -S -C 8_spades/contigs.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "spades.scaffold"; faops n50 -H -S -C 8_spades/scaffolds.fasta;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "platanus.contig"; faops n50 -H -S -C 8_platanus/out_contig.fa;) >> stat3.md
printf "| %s | %s | %s | %s |\n" \
$(echo "platanus.scaffold"; faops n50 -H -S -C 8_platanus/out_gapClosed.fa;) >> stat3.md
cat stat3.md
```
| Name | N50 | Sum | # |
|:------------------|---------:|----------:|-------:|
| Genome | 23459830 | 119667750 | 7 |
| Paralogs | 2007 | 16447809 | 8055 |
| anchor.merge | 28391 | 108282399 | 8601 |
| others.merge | 2939 | 2073808 | 882 |
| anchor.cover | 28398 | 107735288 | 8339 |
| anchorLong | 45080 | 107528125 | 5622 |
| contigTrim | 694603 | 108985292 | 666 |
| canu-trim | 2880862 | 119217587 | 244 |
| spades.contig | 55516 | 154715185 | 115087 |
| spades.scaffold | 67856 | 154750615 | 114703 |
| platanus.contig | 15019 | 139807772 | 106870 |
| platanus.scaffold | 192019 | 128497152 | 67429 |
* quast
```bash
BASE_NAME=col_0
cd ${HOME}/data/anchr/${BASE_NAME}
rm -fr 9_qa_contig
quast --no-check --threads 16 \
--eukaryote \
--no-icarus \
-R 1_genome/genome.fa \
merge/anchor.merge.fasta \
merge/anchor.cover.fasta \
anchorLong/contig.fasta \
contigTrim/contig.fasta \
canu-trim-80x/${BASE_NAME}.contigs.fasta \
1_genome/paralogs.fas \
--label "merge,cover,contig,contigTrim,canu-trim,paralogs" \
-o 9_qa_contig
```
* Clear QxxLxxx.
```bash
BASE_DIR=$HOME/data/anchr/col_0
cd ${BASE_DIR}
rm -fr 2_illumina/Q{20,25,30,35}L{30,60,90,120}X*
rm -fr Q{20,25,30,35}L{30,60,90,120}X*
```
( run in 1.405 second using v1.01-cache-2.11-cpan-39bf76dae61 )