view release on metacpan or search on metacpan
doc/bacteria_2_3.md view on Meta::CPAN
Project
[SRP055199](https://trace.ncbi.nlm.nih.gov/Traces/sra/?study=SRP055199)
## lambda: download
* Reference genome
* Strain: Escherichia virus Lambda (viruses)
* Taxid: [10710](https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=10710&lvl=3&lin=f&keep=1&srchmode=1&unlock)
* RefSeq assembly accession:
[GCF_000840245.1](ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/840/245/GCF_000840245.1_ViralProj14204/GCF_000840245.1_ViralProj14204_assembly_report.txt)
* Proportion of paralogs (> 1000 bp): 0.0
```bash
mkdir -p ~/data/anchr/lambda/1_genome
cd ~/data/anchr/lambda/1_genome
aria2c -x 9 -s 3 -c ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/840/245/GCF_000840245.1_ViralProj14204/GCF_000840245.1_ViralProj14204_genomic.fna.gz
TAB=$'\t'
cat <<EOF > replace.tsv
NC_001416.1${TAB}1
EOF
faops replace GCF_000840245.1_ViralProj14204_genomic.fna.gz replace.tsv genome.fa
#cp ~/data/anchr/paralogs/otherbac/Results/lambda/lambda.multi.fas paralogs.fas
doc/bacteria_2_3.md view on Meta::CPAN
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 2 ${BASE_DIR}/{}
" >> ${BASE_DIR}/stat2.md
cat stat2.md
```
| Name | SumFq | CovFq | AvgRead | Kmer | SumFa | Discard% | RealG | EstG | Est/Real | SumKU | SumSR | RunTime |
|:---------------|--------:|------:|--------:|-----------:|--------:|---------:|------:|------:|---------:|------:|------:|----------:|
| Q20L60_1000000 | 200.24M | 58.9 | 100 | "41,61,81" | 181.17M | 9.524% | 3.4M | 3.4M | 1.00 | 3.42M | 0 | 0:05'07'' |
| Q20L60_2000000 | 400.49M | 117.9 | 100 | "41,61,81" | 362.45M | 9.500% | 3.4M | 3.41M | 1.00 | 3.43M | 0 | 0:08'38'' |
| Q20L60_3000000 | 600.74M | 176.8 | 99 | "41,61,81" | 543.81M | 9.476% | 3.4M | 3.43M | 1.01 | 3.43M | 0 | 0:12'31'' |
| Q20L60_4000000 | 801M | 235.7 | 99 | "41,61,81" | 725.8M | 9.388% | 3.4M | 3.47M | 1.02 | 3.43M | 0 | 0:15'59'' |
| Q25L60_1000000 | 199.18M | 58.6 | 99 | "41,61,81" | 183.62M | 7.812% | 3.4M | 3.4M | 1.00 | 3.48M | 0 | 0:05'12'' |
| Q25L60_2000000 | 398.32M | 117.2 | 99 | "41,61,81" | 367.28M | 7.792% | 3.4M | 3.41M | 1.00 | 3.43M | 0 | 0:08'33'' |
| Q25L60_3000000 | 597.52M | 175.9 | 99 | "41,61,81" | 551.1M | 7.769% | 3.4M | 3.41M | 1.00 | 3.42M | 0 | 0:12'32'' |
| Q25L60_4000000 | 796.67M | 234.5 | 99 | "41,61,81" | 734.9M | 7.753% | 3.4M | 3.42M | 1.01 | 3.43M | 0 | 0:14'31'' |
| Q30L60_1000000 | 195.64M | 57.6 | 98 | "41,61,81" | 183.5M | 6.208% | 3.4M | 3.4M | 1.00 | 3.41M | 0 | 0:05'13'' |
| Q30L60_2000000 | 391.27M | 115.2 | 98 | "41,61,81" | 367.04M | 6.193% | 3.4M | 3.4M | 1.00 | 3.42M | 0 | 0:07'40'' |
| Q30L60_3000000 | 586.89M | 172.7 | 97 | "41,61,81" | 550.6M | 6.183% | 3.4M | 3.41M | 1.00 | 3.42M | 0 | 0:10'04'' |
doc/bacteria_2_3.md view on Meta::CPAN
| Name | N50 | Sum | # |
|:---------|--------:|-----------:|---------:|
| Genome | 2153922 | 2153922 | 1 |
| Paralogs | 4318 | 142093 | 53 |
| Illumina | 101 | 1491583958 | 14768158 |
| PacBio | 11808 | 1187845820 | 137516 |
| uniq | 101 | 1485449016 | 14707416 |
| scythe | 101 | 1460356291 | 14707416 |
| Q20L60 | 101 | 1239834586 | 12544518 |
| Q25L60 | 101 | 1062429395 | 10873960 |
| Q30L60 | 101 | 734805677 | 7775198 |
## Ngon: down sampling
```bash
BASE_DIR=$HOME/data/anchr/Ngon
cd ${BASE_DIR}
ARRAY=(
"2_illumina/Q20L60:Q20L60:4000000"
doc/bacteria_2_3.md view on Meta::CPAN
REAL_G=2488635
for QxxLxx in $( parallel "echo 'Q{1}L{2}'" ::: 25 30 ::: 60 ); do
echo "==> ${QxxLxx}"
if [ ! -e 2_illumina/${QxxLxx}/pe.cor.fa ]; then
echo "2_illumina/${QxxLxx}/pe.cor.fa not exists"
continue;
fi
for X in 40 80 120 160 240; do
printf "==> Coverage: %s\n" ${X}
rm -fr 2_illumina/${QxxLxx}X${X}*
faops split-about -l 0 \
2_illumina/${QxxLxx}/pe.cor.fa \
$(( ${REAL_G} * ${X} )) \
"2_illumina/${QxxLxx}X${X}"
MAX_SERIAL=$(
doc/bacteria_2_3.md view on Meta::CPAN
anchr kunitigs \
../2_illumina/Q{1}L{2}X{3}P{4}/pe.cor.fa \
../2_illumina/Q{1}L{2}X{3}P{4}/environment.json \
-p 8 \
--kmer 31,41,51,61,71,81 \
-o kunitigs.sh
bash kunitigs.sh
echo >&2
" ::: 25 30 ::: 60 ::: 40 80 120 160 240 ::: 000 001 002 003 004 005 006
# anchors (sampled)
parallel --no-run-if-empty -j 3 "
echo >&2 '==> Group Q{1}L{2}X{3}P{4}'
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
exit;
fi
rm -fr Q{1}L{2}X{3}P{4}/anchor
bash ~/Scripts/cpan/App-Anchr/share/anchor.sh Q{1}L{2}X{3}P{4} 8 false
echo >&2
" ::: 25 30 ::: 60 ::: 40 80 120 160 240 ::: 000 001 002 003 004 005 006
# Stats of anchors
REAL_G=2488635
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 2 header \
> stat2.md
parallel -k --no-run-if-empty -j 6 "
if [ ! -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
exit;
fi
bash ~/Scripts/cpan/App-Anchr/share/sr_stat.sh 2 Q{1}L{2}X{3}P{4} ${REAL_G}
" ::: 25 30 ::: 60 ::: 40 80 120 160 240 ::: 000 001 002 003 004 005 006 \
>> stat2.md
cat stat2.md
```
| Name | SumCor | CovCor | N50SR | Sum | # | N50Anchor | Sum | # | N50Others | Sum | # | Kmer | RunTimeKU | RunTimeAN |
|:---------------|:--------|-------:|------:|------:|----:|----------:|------:|----:|----------:|-------:|----:|--------------------:|----------:|----------:|
| Q25L60X40P000 | 99.55M | 40.0 | 34190 | 2.46M | 140 | 34190 | 2.45M | 131 | 844 | 7.28K | 9 | "31,41,51,61,71,81" | 0:02'48'' | 0:01'25'' |
| Q25L60X40P001 | 99.55M | 40.0 | 30045 | 2.46M | 148 | 30045 | 2.45M | 132 | 844 | 13.48K | 16 | "31,41,51,61,71,81" | 0:02'49'' | 0:01'35'' |
| Q25L60X40P002 | 99.55M | 40.0 | 27638 | 2.47M | 162 | 27680 | 2.45M | 145 | 742 | 13.08K | 17 | "31,41,51,61,71,81" | 0:02'46'' | 0:01'35'' |
| Q25L60X40P003 | 99.55M | 40.0 | 33236 | 2.46M | 131 | 33236 | 2.45M | 117 | 684 | 9.59K | 14 | "31,41,51,61,71,81" | 0:02'53'' | 0:01'23'' |
| Q25L60X40P004 | 99.55M | 40.0 | 49674 | 2.45M | 99 | 49674 | 2.45M | 91 | 748 | 6.37K | 8 | "31,41,51,61,71,81" | 0:02'58'' | 0:01'31'' |
| Q25L60X40P005 | 99.55M | 40.0 | 46364 | 2.46M | 108 | 46364 | 2.45M | 97 | 727 | 7.86K | 11 | "31,41,51,61,71,81" | 0:02'55'' | 0:01'33'' |
| Q25L60X40P006 | 99.55M | 40.0 | 47421 | 2.46M | 117 | 47421 | 2.45M | 105 | 783 | 9.12K | 12 | "31,41,51,61,71,81" | 0:02'45'' | 0:01'35'' |
| Q25L60X80P000 | 199.09M | 80.0 | 19434 | 2.46M | 238 | 19434 | 2.44M | 213 | 822 | 19.67K | 25 | "31,41,51,61,71,81" | 0:03'58'' | 0:02'06'' |
| Q25L60X80P001 | 199.09M | 80.0 | 15365 | 2.46M | 246 | 15447 | 2.45M | 227 | 727 | 13.98K | 19 | "31,41,51,61,71,81" | 0:03'53'' | 0:02'11'' |
| Q25L60X80P002 | 199.09M | 80.0 | 27534 | 2.46M | 163 | 27534 | 2.45M | 151 | 707 | 8.48K | 12 | "31,41,51,61,71,81" | 0:03'57'' | 0:01'56'' |
| Q25L60X120P000 | 298.64M | 120.0 | 9278 | 2.47M | 391 | 9498 | 2.44M | 351 | 770 | 29.14K | 40 | "31,41,51,61,71,81" | 0:05'22'' | 0:02'46'' |
| Q25L60X120P001 | 298.64M | 120.0 | 13839 | 2.46M | 290 | 13936 | 2.44M | 261 | 727 | 20.61K | 29 | "31,41,51,61,71,81" | 0:05'23'' | 0:02'34'' |
| Q25L60X160P000 | 398.18M | 160.0 | 6698 | 2.47M | 550 | 6848 | 2.42M | 479 | 727 | 50.73K | 71 | "31,41,51,61,71,81" | 0:07'01'' | 0:03'12'' |
| Q25L60X240P000 | 597.27M | 240.0 | 4746 | 2.47M | 759 | 4908 | 2.38M | 627 | 778 | 95.9K | 132 | "31,41,51,61,71,81" | 0:09'39'' | 0:03'43'' |
| Q30L60X40P000 | 99.55M | 40.0 | 55218 | 2.46M | 91 | 55218 | 2.44M | 81 | 10398 | 17.13K | 10 | "31,41,51,61,71,81" | 0:03'05'' | 0:01'35'' |
| Q30L60X40P001 | 99.55M | 40.0 | 55749 | 2.45M | 93 | 55749 | 2.45M | 85 | 844 | 6.42K | 8 | "31,41,51,61,71,81" | 0:02'52'' | 0:01'35'' |
| Q30L60X40P002 | 99.55M | 40.0 | 65454 | 2.46M | 75 | 65454 | 2.44M | 62 | 1126 | 13.91K | 13 | "31,41,51,61,71,81" | 0:03'01'' | 0:01'37'' |
| Q30L60X40P003 | 99.55M | 40.0 | 97954 | 2.45M | 68 | 97954 | 2.45M | 62 | 834 | 4.77K | 6 | "31,41,51,61,71,81" | 0:02'53'' | 0:01'25'' |
| Q30L60X40P004 | 99.55M | 40.0 | 71924 | 2.45M | 76 | 71924 | 2.45M | 67 | 727 | 6.18K | 9 | "31,41,51,61,71,81" | 0:02'54'' | 0:01'26'' |
| Q30L60X40P005 | 99.55M | 40.0 | 63766 | 2.45M | 88 | 63766 | 2.44M | 76 | 727 | 8.73K | 12 | "31,41,51,61,71,81" | 0:02'37'' | 0:01'26'' |
| Q30L60X80P000 | 199.09M | 80.0 | 60425 | 2.45M | 76 | 60425 | 2.45M | 70 | 753 | 4.59K | 6 | "31,41,51,61,71,81" | 0:04'00'' | 0:02'03'' |
| Q30L60X80P001 | 199.09M | 80.0 | 68973 | 2.45M | 64 | 68973 | 2.45M | 57 | 844 | 5.27K | 7 | "31,41,51,61,71,81" | 0:04'02'' | 0:02'00'' |
| Q30L60X80P002 | 199.09M | 80.0 | 89791 | 2.45M | 65 | 89791 | 2.45M | 58 | 809 | 5.53K | 7 | "31,41,51,61,71,81" | 0:04'01'' | 0:02'08'' |
| Q30L60X120P000 | 298.64M | 120.0 | 60425 | 2.45M | 74 | 60427 | 2.45M | 67 | 727 | 5.37K | 7 | "31,41,51,61,71,81" | 0:05'23'' | 0:02'27'' |
| Q30L60X120P001 | 298.64M | 120.0 | 71924 | 2.45M | 62 | 71924 | 2.44M | 56 | 844 | 4.9K | 6 | "31,41,51,61,71,81" | 0:05'18'' | 0:02'36'' |
| Q30L60X160P000 | 398.18M | 160.0 | 60427 | 2.45M | 75 | 60427 | 2.45M | 68 | 727 | 5.37K | 7 | "31,41,51,61,71,81" | 0:06'26'' | 0:03'04'' |
| Q30L60X240P000 | 597.27M | 240.0 | 57594 | 2.45M | 86 | 59198 | 2.44M | 74 | 844 | 9.05K | 12 | "31,41,51,61,71,81" | 0:06'47'' | 0:03'28'' |
## Cdip: merge anchors
```bash
BASE_NAME=Cdip
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.anchor.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 240 ::: 000 001 002 003 004 005 006
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.contained.fasta
anchr orient merge/anchor.contained.fasta --len 1000 --idt 0.98 -o merge/anchor.orient.fasta
anchr merge merge/anchor.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/anchor.merge.fasta
# merge others
mkdir -p merge
anchr contained \
$(
parallel -k --no-run-if-empty -j 6 "
if [ -e Q{1}L{2}X{3}P{4}/anchor/pe.others.fa ]; then
echo Q{1}L{2}X{3}P{4}/anchor/pe.others.fa
fi
" ::: 25 30 ::: 60 ::: 40 80 120 160 240 ::: 000 001 002 003 004 005 006
) \
--len 1000 --idt 0.98 --proportion 0.99999 --parallel 16 \
-o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.contained.fasta
anchr orient merge/others.contained.fasta --len 1000 --idt 0.98 -o merge/others.orient.fasta
anchr merge merge/others.orient.fasta --len 1000 --idt 0.999 -o stdout \
| faops filter -a 1000 -l 0 stdin merge/others.merge.fasta
# anchors sorted on ref
bash ~/Scripts/cpan/App-Anchr/share/sort_on_ref.sh merge/anchor.merge.fasta 1_genome/genome.fa merge/anchor.sort
doc/bacteria_2_3.md view on Meta::CPAN
* BioSample: [SAMN04875536](https://www.ncbi.nlm.nih.gov/biosample/SAMN04875536)
## Hinf: download
* Reference genome
* Strain: Haemophilus influenzae Rd KW20 (g-proteobacteria)
* Taxid: [71421](https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=71421)
* RefSeq assembly accession:
[GCF_000027305.1](ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/027/305/GCF_000027305.1_ASM2730v1/GCF_000027305.1_ASM2730v1_assembly_report.txt)
* Proportion of paralogs (> 1000 bp): 0.0324
```bash
mkdir -p ~/data/anchr/Hinf/1_genome
cd ~/data/anchr/Hinf/1_genome
aria2c -x 9 -s 3 -c ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/027/305/GCF_000027305.1_ASM2730v1/GCF_000027305.1_ASM2730v1_genomic.fna.gz
TAB=$'\t'
cat <<EOF > replace.tsv
NC_000907.1${TAB}1
doc/e_coli.md view on Meta::CPAN
| Q20L60X40P005 | 185.67M | 40.0 | 5322 | 4.59M | 1291 | 5531 | 4.41M | 1050 | 805 | 182.01K | 241 | "31,41,51,61,71,81" | 0:07'43'' | 0:00'33'' |
| Q20L60X80P000 | 371.33M | 80.0 | 2129 | 4.51M | 2683 | 2448 | 3.8M | 1737 | 783 | 702.3K | 946 | "31,41,51,61,71,81" | 0:13'15'' | 0:00'36'' |
| Q20L60X80P001 | 371.33M | 80.0 | 2153 | 4.51M | 2634 | 2477 | 3.83M | 1719 | 782 | 679.02K | 915 | "31,41,51,61,71,81" | 0:12'05'' | 0:00'47'' |
| Q20L60X80P002 | 371.33M | 80.0 | 2186 | 4.5M | 2653 | 2525 | 3.8M | 1702 | 772 | 698.13K | 951 | "31,41,51,61,71,81" | 0:12'02'' | 0:00'46'' |
| Q20L60X120P000 | 557M | 120.0 | 1468 | 4.28M | 3375 | 1822 | 3.05M | 1698 | 772 | 1.23M | 1677 | "31,41,51,61,71,81" | 0:17'10'' | 0:00'46'' |
| Q20L60X120P001 | 557M | 120.0 | 1461 | 4.28M | 3372 | 1846 | 3.09M | 1727 | 750 | 1.19M | 1645 | "31,41,51,61,71,81" | 0:16'54'' | 0:00'46'' |
| Q20L60X160P000 | 742.66M | 160.0 | 1207 | 4.05M | 3683 | 1644 | 2.49M | 1527 | 756 | 1.56M | 2156 | "31,41,51,61,71,81" | 0:22'14'' | 0:01'06'' |
| Q20L60X200P000 | 928.33M | 200.0 | 1089 | 3.87M | 3791 | 1557 | 2.13M | 1361 | 740 | 1.74M | 2430 | "31,41,51,61,71,81" | 0:28'42'' | 0:01'10'' |
| Q20L90X40P000 | 185.67M | 40.0 | 6570 | 4.61M | 1008 | 6725 | 4.49M | 879 | 856 | 117.92K | 129 | "31,41,51,61,71,81" | 0:08'32'' | 0:00'31'' |
| Q20L90X40P001 | 185.67M | 40.0 | 7208 | 4.59M | 1012 | 7356 | 4.5M | 890 | 795 | 93.56K | 122 | "31,41,51,61,71,81" | 0:09'17'' | 0:00'23'' |
| Q20L90X40P002 | 185.67M | 40.0 | 6970 | 4.59M | 1005 | 7253 | 4.5M | 890 | 769 | 84.54K | 115 | "31,41,51,61,71,81" | 0:08'24'' | 0:00'24'' |
| Q20L90X40P003 | 185.67M | 40.0 | 7017 | 4.59M | 1014 | 7125 | 4.49M | 886 | 832 | 97.74K | 128 | "31,41,51,61,71,81" | 0:08'37'' | 0:00'27'' |
| Q20L90X40P004 | 185.67M | 40.0 | 6957 | 4.59M | 1005 | 7184 | 4.49M | 888 | 800 | 92.41K | 117 | "31,41,51,61,71,81" | 0:07'13'' | 0:00'23'' |
| Q20L90X40P005 | 185.67M | 40.0 | 6736 | 4.59M | 1010 | 6980 | 4.49M | 876 | 811 | 101.03K | 134 | "31,41,51,61,71,81" | 0:06'35'' | 0:00'25'' |
| Q20L90X80P000 | 371.33M | 80.0 | 3098 | 4.59M | 2005 | 3287 | 4.24M | 1525 | 777 | 358.32K | 480 | "31,41,51,61,71,81" | 0:12'00'' | 0:00'27'' |
| Q20L90X80P001 | 371.33M | 80.0 | 3045 | 4.59M | 2019 | 3280 | 4.24M | 1540 | 783 | 355.31K | 479 | "31,41,51,61,71,81" | 0:13'23'' | 0:00'25'' |
| Q20L90X80P002 | 371.33M | 80.0 | 3172 | 4.58M | 1969 | 3342 | 4.23M | 1492 | 770 | 350.42K | 477 | "31,41,51,61,71,81" | 0:11'49'' | 0:00'27'' |
| Q20L90X120P000 | 557M | 120.0 | 2165 | 4.55M | 2630 | 2473 | 3.91M | 1768 | 773 | 639.84K | 862 | "31,41,51,61,71,81" | 0:14'55'' | 0:00'41'' |
| Q20L90X120P001 | 557M | 120.0 | 2232 | 4.53M | 2568 | 2519 | 3.9M | 1716 | 774 | 630.62K | 852 | "31,41,51,61,71,81" | 0:14'05'' | 0:01'03'' |
| Q20L90X160P000 | 742.66M | 160.0 | 1852 | 4.48M | 2899 | 2174 | 3.68M | 1811 | 772 | 804.27K | 1088 | "31,41,51,61,71,81" | 0:18'06'' | 0:01'07'' |
| Q20L90X200P000 | 928.33M | 200.0 | 1732 | 4.45M | 3035 | 2069 | 3.56M | 1826 | 763 | 886.28K | 1209 | "31,41,51,61,71,81" | 0:24'00'' | 0:00'51'' |
| Q20L120X40P000 | 185.67M | 40.0 | 8832 | 4.6M | 838 | 8954 | 4.49M | 740 | 891 | 101.73K | 98 | "31,41,51,61,71,81" | 0:06'15'' | 0:00'24'' |
| Q20L120X40P001 | 185.67M | 40.0 | 8577 | 4.58M | 836 | 8892 | 4.51M | 742 | 788 | 69.07K | 94 | "31,41,51,61,71,81" | 0:06'47'' | 0:00'26'' |
| Q20L120X40P002 | 185.67M | 40.0 | 8147 | 4.58M | 859 | 8263 | 4.5M | 756 | 795 | 75.92K | 103 | "31,41,51,61,71,81" | 0:06'57'' | 0:00'24'' |
| Q20L120X40P003 | 185.67M | 40.0 | 8864 | 4.57M | 819 | 8970 | 4.5M | 727 | 860 | 75.55K | 92 | "31,41,51,61,71,81" | 0:06'51'' | 0:00'24'' |
| Q20L120X40P004 | 185.67M | 40.0 | 8495 | 4.58M | 856 | 8659 | 4.5M | 755 | 810 | 75.85K | 101 | "31,41,51,61,71,81" | 0:06'23'' | 0:00'27'' |
| Q20L120X80P000 | 371.33M | 80.0 | 4501 | 4.59M | 1519 | 4648 | 4.38M | 1231 | 809 | 217.48K | 288 | "31,41,51,61,71,81" | 0:10'09'' | 0:00'33'' |
| Q20L120X80P001 | 371.33M | 80.0 | 4373 | 4.6M | 1530 | 4690 | 4.38M | 1240 | 804 | 219.06K | 290 | "31,41,51,61,71,81" | 0:10'30'' | 0:00'35'' |
| Q20L120X120P000 | 557M | 120.0 | 3256 | 4.58M | 1931 | 3501 | 4.24M | 1474 | 795 | 344.09K | 457 | "31,41,51,61,71,81" | 0:14'16'' | 0:00'43'' |
| Q20L120X160P000 | 742.66M | 160.0 | 2868 | 4.58M | 2131 | 3138 | 4.16M | 1576 | 795 | 417.96K | 555 | "31,41,51,61,71,81" | 0:20'19'' | 0:00'56'' |
| Q20L120X200P000 | 928.33M | 200.0 | 2730 | 4.57M | 2203 | 2999 | 4.12M | 1604 | 801 | 451.19K | 599 | "31,41,51,61,71,81" | 0:23'31'' | 0:00'49'' |
| Q25L30X40P000 | 185.67M | 40.0 | 50567 | 4.55M | 196 | 50567 | 4.53M | 179 | 754 | 13K | 17 | "31,41,51,61,71,81" | 0:07'33'' | 0:00'45'' |
| Q25L30X40P001 | 185.67M | 40.0 | 38554 | 4.55M | 215 | 40089 | 4.53M | 196 | 754 | 14.03K | 19 | "31,41,51,61,71,81" | 0:06'56'' | 0:00'29'' |
| Q25L30X40P002 | 185.67M | 40.0 | 41181 | 4.55M | 203 | 41181 | 4.53M | 184 | 812 | 14.21K | 19 | "31,41,51,61,71,81" | 0:05'51'' | 0:00'29'' |
| Q25L30X40P003 | 185.67M | 40.0 | 39467 | 4.55M | 210 | 39467 | 4.53M | 193 | 812 | 12.79K | 17 | "31,41,51,61,71,81" | 0:06'00'' | 0:00'25'' |
doc/model_organisms.md view on Meta::CPAN
```
## s288c: expand anchors
å¨é
¿é
é
µæ¯ä¸, æä¸åå ç»å®å
¨ç¸åçåºå, å®ä»¬é½æ¯æ°è¿åçççæ®µéå¤:
* I:216563-218385, VIII:537165-538987
* I:223713-224783, VIII:550350-551420
* IV:528442-530427, IV:532327-534312, IV:536212-538197
* IV:530324-531519, IV:534209-535404
* IV:5645-7725, X:738076-740156
* IV:7810-9432, X:736368-737990
* IX:9683-11043, X:9666-11026
* IV:1244112-1245373, XV:575980-577241
* VIII:212266-214124, VIII:214264-216122
* IX:11366-14953, X:11349-14936
* XII:468935-470576, XII:472587-474228, XII:482167-483808, XII:485819-487460,
* XII:483798-485798, XII:487450-489450
* anchorLong
doc/model_organisms.md view on Meta::CPAN
tar xvfz Dro5_29NOV2013_402.tgz --directory untar
tar xvfz Dro6_1DEC2013_403.tgz --directory untar
find . -type f -name "*.ba?.h5" | parallel -j 1 "mv {} untar"
# convert .bax.h5 to .subreads.bam
mkdir -p ~/data/anchr/iso_1/3_pacbio/bam
cd ~/data/anchr/iso_1/3_pacbio/bam
source ~/share/pitchfork/deployment/setup-env.sh
for movie in m131124_190051 m131124_221952 m131125_013854 m131125_045830 m131130_054035 m131130_091217 m131130_124231 m131130_161213 m131130_194336 m131130_231441 m131201_024805 m131201_061903 m131201_223357 m131202_020424 m131202_053545 m131202_0905...
do
if [ -e ~/data/anchr/iso_1/3_pacbio/bam/${movie}*.subreads.bam ]; then
continue
fi
bax2bam ~/data/anchr/iso_1/3_pacbio/untar/${movie}*.bax.h5
done
# convert .subreads.bam to fasta
mkdir -p ~/data/anchr/iso_1/3_pacbio/fasta
for movie in m131124_190051 m131124_221952 m131125_013854 m131125_045830 m131130_054035 m131130_091217 m131130_124231 m131130_161213 m131130_194336 m131130_231441 m131201_024805 m131201_061903 m131201_223357 m131202_020424 m131202_053545 m131202_0905...
do
if [ ! -e ~/data/anchr/iso_1/3_pacbio/bam/${movie}*.subreads.bam ]; then
continue
fi
samtools fasta \
~/data/anchr/iso_1/3_pacbio/bam/${movie}*.subreads.bam \
> ~/data/anchr/iso_1/3_pacbio/fasta/${movie}.fasta
done
doc/model_organisms.md view on Meta::CPAN
cat stat2.md
```
| Name | SumCor | CovCor | N50SR | Sum | # | N50Anchor | Sum | # | N50Others | Sum | # | Kmer | RunTimeKU | RunTimeAN |
|:--------------|-------:|-------:|------:|-------:|------:|----------:|-------:|------:|----------:|-------:|-----:|--------------------:|----------:|:----------|
| Q25L60X30P000 | 3.01G | 30.0 | 10992 | 98.21M | 22237 | 11789 | 85.96M | 13658 | 2311 | 12.25M | 8579 | "31,41,51,61,71,81" | 1:14'53'' | 0:07'05'' |
| Q25L60X30P001 | 3.01G | 30.0 | 10433 | 97.91M | 23035 | 11351 | 85.44M | 13871 | 1562 | 12.47M | 9164 | "31,41,51,61,71,81" | 1:11'02'' | 0:07'05'' |
| Q25L60X60P000 | 6.02G | 60.0 | 11787 | 99.24M | 19543 | 12467 | 88.53M | 12969 | 4629 | 10.71M | 6574 | "31,41,51,61,71,81" | 1:25'03'' | 0:08'23'' |
| Q30L60X30P000 | 3.01G | 30.0 | 10914 | 97.81M | 22843 | 11752 | 85.35M | 13764 | 1510 | 12.46M | 9079 | "31,41,51,61,71,81" | 0:59'47'' | 0:07'06'' |
| Q30L60X30P001 | 3.01G | 30.0 | 10160 | 97.39M | 24052 | 10924 | 84.51M | 14218 | 1255 | 12.87M | 9834 | "31,41,51,61,71,81" | 0:45'09'' | 0:07'05'' |
| Q30L60X60P000 | 6.02G | 60.0 | 12462 | 99.3M | 19363 | 12943 | 88.29M | 12773 | 5901 | 11.01M | 6590 | "31,41,51,61,71,81" | 0:59'46'' | 0:08'02'' |
## n2: merge anchors
```bash
BASE_NAME=n2
cd ${HOME}/data/anchr/${BASE_NAME}
# merge anchors
mkdir -p merge
doc/model_organisms.md view on Meta::CPAN
mkdir -p ~/data/anchr/col_0/1_genome
cd ~/data/anchr/col_0/1_genome
wget -N ftp://ftp.ensemblgenomes.org/pub/release-29/plants/fasta/arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.29.dna_sm.toplevel.fa.gz
faops order Arabidopsis_thaliana.TAIR10.29.dna_sm.toplevel.fa.gz \
<(for chr in {1,2,3,4,5,Mt,Pt}; do echo $chr; done) \
genome.fa
```
* Illumina HiSeq (100 bp)
[SRX202246](https://www.ncbi.nlm.nih.gov/sra/SRX202246[accn])
```bash
# Downloading from ena with aria2
mkdir -p ~/data/anchr/col_0/2_illumina
cd ~/data/anchr/col_0/2_illumina
cat << EOF > sra_ftp.txt
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR611/SRR611086
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR616/SRR616966
EOF
doc/model_organisms.md view on Meta::CPAN
https://www.ncbi.nlm.nih.gov/biosample/4539665
[SRX1715692](https://www.ncbi.nlm.nih.gov/sra/SRX1715692[accn])
```bash
mkdir -p ~/data/anchr/col_0/3_pacbio
cd ~/data/anchr/col_0/3_pacbio
cat <<EOF > sra_ftp.txt
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/002/SRR3405242
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/003/SRR3405243
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/004/SRR3405244
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/006/SRR3405246
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/008/SRR3405248
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/000/SRR3405250
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/002/SRR3405252
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/003/SRR3405253
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/004/SRR3405254
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/005/SRR3405255
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/006/SRR3405256
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/007/SRR3405257
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/008/SRR3405258
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/009/SRR3405259
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/005/SRR3405245
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/007/SRR3405247
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/009/SRR3405249
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/001/SRR3405251
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/000/SRR3405260
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/003/SRR3405263
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/005/SRR3405265
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/007/SRR3405267
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/009/SRR3405269
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/001/SRR3405271
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/004/SRR3405274
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/005/SRR3405275
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/006/SRR3405276
doc/model_organisms.md view on Meta::CPAN
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/006/SRR3405266
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/008/SRR3405268
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/000/SRR3405270
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/002/SRR3405272
ftp://ftp.sra.ebi.ac.uk/vol1/srr/SRR340/003/SRR3405273
EOF
aria2c -x 6 -s 3 -c -i sra_ftp.txt
cat << EOF > sra_md5.txt
be9c803f847ff1c81d153110cc699390 SRR3405242
c68a2c3b62245a697722fd3f8fda7a2d SRR3405243
7116e8a0de87b1acd016d9b284e4795c SRR3405244
51f8e5ee4565aace4e5a5cba73e3e597 SRR3405246
f339f580e86aad3a5487b5cec8ae80d4 SRR3405248
1a8246ed1f7c38801cfc603e088abb70 SRR3405250
a0ce8435a7fa2e7ddbd6ac181902f751 SRR3405252
8754f69a1c8c1f00b58b48454c1c01ad SRR3405253
367508500303325e855666133505a5af SRR3405254
d250f69fcf2975c89ceab5a4f9425b36 SRR3405255
badd9b2d23f94d1c98263d2e786742ae SRR3405256
6c5cbd3bce9459283a415d8a5c05c86e SRR3405257
32da7a364c8cbda5cf76b87f7c51b475 SRR3405258
eb3819adf483451ac670f89d1ea6b76e SRR3405259
5337862eeb0945f932de74e8f7b9ec4f SRR3405245
4545ce4666878fcbcda1e7737be1896b SRR3405247
71d61bc64e3ca9b91f08b1c6b1389f16 SRR3405249
b9a911b8eb4fbfe29dff8cf920429f18 SRR3405251
99bae070fa90d53c8f15b9cf42c634f6 SRR3405260
830e02f1f3cb66b9e085803a21ad8040 SRR3405263
86d28c63f00095ae0ff1151e7e0bf7b4 SRR3405265
3e048ad8dbb526d4a533ee1d5ec10a43 SRR3405267
1b73ed3a1124f5f025c511672c1e18d3 SRR3405269
fa07c85b9e6258abcef8bdb730ab812f SRR3405271
aeb6ab7edfa42e5e27704b7625c659c1 SRR3405274
0eb24fcc9b40f6fe0f013fe79dd7edf7 SRR3405275
f051e0065602477e0a1d13a6d0a42d3d SRR3405276
doc/model_organisms.md view on Meta::CPAN
| Name | N50 | Sum | # |
|:----------------|---------:|------------:|---------:|
| Genome | 23459830 | 119667750 | 7 |
| Paralogs | 2007 | 16447809 | 8055 |
| Illumina | 301 | 15529845059 | 53786130 |
| uniq | 301 | 15528150050 | 53779068 |
| Q20L60 | 301 | 13359936477 | 52318516 |
| Q25L60 | 301 | 11821537855 | 49650904 |
| Q30L60 | 301 | 10366980114 | 48122656 |
| PacBio | 6754 | 18768526777 | 5721958 |
| PacBio.40x | 7830 | 4906030224 | 1300000 |
| PacBio.40x.trim | 6904 | 2032710549 | 381134 |
| PacBio.80x | 7448 | 9473394614 | 2600000 |
| PacBio.80x.trim | 6975 | 3942522483 | 729527 |
## col_0: spades
```bash
BASE_NAME=col_0
cd ${HOME}/data/anchr/${BASE_NAME}
doc/pacbio_consensus.md view on Meta::CPAN
```bash
mkdir -p $HOME/data/pacbio/rawdata/ler0_test/fasta
cd $HOME/data/pacbio/rawdata/ler0_test/fasta
samtools fasta \
~/data/pacbio/rawdata/public/SequelData/ArabidopsisDemoData/SequenceData/1_A01_customer/m54113_160913_184949.subreads.bam \
> m54113_160913_184949.fasta
samtools fasta \
~/data/pacbio/rawdata/public/SequelData/ArabidopsisDemoData/SequenceData/3_C01_customer/m54113_160914_092411.subreads.bam \
> m54113_160914_092411.fasta
#N50 70763
#S 10753458447
#C 1135065
faops n50 -C -S *.fasta
```
## å
¶å®æ¨¡å¼çç©
ç¨è¿ç¯æç« éæä¾çæ ·ä¾, doi:10.1038/sdata.2014.45.
t/24_4.ovlp.tsv view on Meta::CPAN
anchor/282/0_2680 long/5011/0_28061 2680 0.905 0 0 2680 2680 0 5864 8699 28061 contained
anchor/282/0_2680 long/4614/0_23784 2680 0.904 0 0 2680 2680 0 18900 21712 23784 contained
anchor/282/0_2680 long/14357/0_23576 1372 0.914 0 0 1372 2680 0 22147 23576 23576 overlap
anchor/282/0_2680 long/7809/0_20200 2680 0.909 0 0 2680 2680 0 9909 12648 20200 contained
anchor/282/0_2680 long/6479/0_19161 2680 0.872 0 0 2680 2680 0 12964 15616 19161 contained
anchor/282/0_2680 long/14135/0_18323 2680 0.901 0 0 2680 2680 0 10784 13531 18323 contained
anchor/282/0_2680 long/9192/0_18207 2680 0.897 0 0 2680 2680 0 3663 6411 18207 contained
anchor/282/0_2680 long/9672/0_17039 2680 0.902 0 0 2680 2680 0 271 3121 17039 contained
anchor/282/0_2680 long/6096/0_9538 2680 0.886 0 0 2680 2680 0 6157 8862 9538 contained
anchor/282/0_2680 long/6019/0_9345 2680 0.874 0 0 2680 2680 0 4944 7805 9345 contained
anchor/282/0_2680 long/6005/0_8970 2680 0.878 0 0 2680 2680 0 2443 5282 8970 contained
anchor/282/0_2680 long/2618/0_8586 1146 0.865 0 1534 2680 2680 0 0 1221 8586 overlap
anchor/306/0_2073 long/5011/0_28061 2073 0.901 0 0 2073 2073 0 8657 10858 28061 contained
anchor/306/0_2073 long/14475/0_26732 2073 0.888 0 0 2073 2073 0 160 2354 26732 contained
anchor/306/0_2073 long/4614/0_23784 2010 0.908 0 0 2010 2073 0 21673 23784 23784 overlap
anchor/306/0_2073 long/5288/0_22087 2073 0.909 0 0 2073 2073 0 1530 3659 22087 contained
anchor/306/0_2073 long/7809/0_20200 2073 0.922 0 0 2073 2073 0 12613 14731 20200 contained
anchor/306/0_2073 long/6479/0_19161 2073 0.862 0 0 2073 2073 0 15580 17617 19161 contained
anchor/306/0_2073 long/14135/0_18323 2073 0.905 0 0 2073 2073 0 13496 15644 18323 contained
anchor/306/0_2073 long/9192/0_18207 2073 0.912 0 0 2073 2073 0 6375 8501 18207 contained
anchor/306/0_2073 long/4795/0_17190 1361 0.909 0 712 2073 2073 0 0 1392 17190 overlap
t/24_4.ovlp.tsv view on Meta::CPAN
long/4795/0_17190 anchor/306/0_2073 1392 0.909 0 0 1392 17190 0 712 2073 2073 overlap
long/4795/0_17190 anchor/311/0_1888 1944 0.917 0 1358 3302 17190 0 0 1888 1888 contains
long/9672/0_17039 anchor/282/0_2680 2850 0.902 0 271 3121 17039 0 0 2680 2680 contains
long/9672/0_17039 anchor/306/0_2073 2198 0.904 0 3082 5280 17039 0 0 2073 2073 contains
long/9672/0_17039 anchor/311/0_1888 1991 0.914 0 5244 7235 17039 0 0 1888 1888 contains
long/6096/0_9538 anchor/201/0_4965 4986 0.888 0 1186 6172 9538 0 0 4965 4965 contains
long/6096/0_9538 anchor/282/0_2680 2705 0.886 0 6157 8862 9538 0 0 2680 2680 contains
long/6019/0_9345 anchor/201/0_4965 4960 0.885 0 0 4960 9345 0 209 4965 4965 overlap
long/6019/0_9345 anchor/282/0_2680 2861 0.874 0 4944 7805 9345 0 0 2680 2680 contains
long/6019/0_9345 anchor/306/0_2073 1581 0.857 0 7764 9345 9345 0 0 1507 2073 overlap
long/6005/0_8970 anchor/282/0_2680 2839 0.878 0 2443 5282 8970 0 0 2680 2680 contains
long/6005/0_8970 anchor/306/0_2073 2204 0.855 0 5245 7449 8970 0 0 2073 2073 contains
long/2618/0_8586 anchor/282/0_2680 1221 0.865 0 0 1221 8586 0 1534 2680 2680 overlap
long/2618/0_8586 anchor/306/0_2073 2188 0.910 0 1185 3373 8586 0 0 2073 2073 contains
long/2618/0_8586 anchor/311/0_1888 1962 0.911 0 3344 5306 8586 0 0 1888 1888 contains
long/2002/0_8381 anchor/306/0_2073 1834 0.912 0 0 1834 8381 0 313 2073 2073 overlap
long/2002/0_8381 anchor/311/0_1888 1991 0.899 0 1802 3793 8381 0 0 1888 1888 contains
long/10968/0_7486 anchor/306/0_2073 2154 0.899 0 1474 3628 7486 0 0 2073 2073 contains
long/10968/0_7486 anchor/311/0_1888 1948 0.906 0 3596 5544 7486 0 0 1888 1888 contains
long/4796/0_6610 anchor/306/0_2073 1399 0.883 0 0 1399 6610 0 713 2073 2073 overlap
long/4796/0_6610 anchor/311/0_1888 1903 0.911 0 1367 3270 6610 0 0 1888 1888 contains