App-Anchr
view release on metacpan or search on metacpan
doc/masurca.md view on Meta::CPAN
do
echo "==> ${d}"
if [ -e ${d}/work1/superReadSequences.fasta ];
then
continue
fi
pushd ~/data/test/rhodobacter_PE_SJ_Sanger4 > /dev/null
$HOME/share/MaSuRCA/bin/masurca sr_config.txt
bash assemble.sh
popd > /dev/null
done
```
### Rhodobacter sphaeroides with `anchr superreads`
```bash
# gzip original fastq
mkdir -p ~/data/test/rhodobacter/PEgz
gzip -c ~/data/test/rhodobacter/PE/frag_1.fastq > ~/data/test/rhodobacter/PEgz/frag_1.fq.gz
gzip -c ~/data/test/rhodobacter/PE/frag_2.fastq > ~/data/test/rhodobacter/PEgz/frag_2.fq.gz
mkdir -p ~/data/test/rhodobacter_superreads
cd ~/data/test/rhodobacter_superreads
perl ~/Scripts/sra/superreads.pl \
~/data/test/rhodobacter/PEgz/frag_1.fq.gz \
~/data/test/rhodobacter/PEgz/frag_2.fq.gz \
-s 180 -d 20
```
### ç»ææ¯è¾
```bash
cd ~/data/test/
printf "| %s | %s | %s | %s | %s | %s | %s | %s |\n" \
"Name" "N50SR" "#SR" "N50Contig" "#Contig" "N50Scaffold" "#Scaffold" "EstG" \
> stat.md
printf "|:--|--:|--:|--:|--:|--:|--:|--:|\n" >> stat.md
for d in rhodobacter_PE_SJ_Sanger4 rhodobacter_PE_SJ_Sanger rhodobacter_PE_SJ rhodobacter_PE_Sanger4 rhodobacter_PE_Sanger rhodobacter_PE rhodobacter_superreads;
do
printf "| %s | %s | %s | %s | %s | %s | %s | %s |\n" \
${d} \
$( faops n50 -H -N 50 -C ${d}/work1/superReadSequences.fasta ) \
$( faops n50 -H -N 50 -C ${d}/CA/10-gapclose/genome.ctg.fasta ) \
$( faops n50 -H -N 50 -C ${d}/CA/10-gapclose/genome.scf.fasta ) \
$( cat ${d}/environment.sh \
| perl -n -e '/ESTIMATED_GENOME_SIZE=\"(\d+)\"/ and print $1' )
done >> stat.md
cat stat.md
```
| name | N50SR | #SR | N50Contig | #Contig | N50Scaffold | #Scaffold | EstG |
|:--------------|------:|-----:|----------:|--------:|------------:|----------:|--------:|
| PE_SJ_Sanger4 | 4586 | 4187 | 205225 | 69 | 3196849 | 35 | 4602968 |
| PE_SJ_Sanger | 4586 | 4187 | 63274 | 141 | 3070846 | 28 | 4602968 |
| PE_SJ | 4586 | 4187 | 43125 | 219 | 3058404 | 59 | 4602968 |
| PE_Sanger4 | 4705 | 4042 | 125228 | 67 | 534852 | 30 | 4595684 |
| PE_Sanger | 4705 | 4042 | 19435 | 412 | 21957 | 359 | 4595684 |
| PE | 4705 | 4043 | 20826 | 407 | 34421 | 278 | 4595684 |
| superreads | 4705 | 4043 | | | | | 4595684 |
æè¶³å¤å¤ç long reads æ¯æä¸, ä¸éè¦ short jump.
# SuperReads 3.1.3
2017 å¹´ 2 æ, UMD ftp ä¸å¤äºä¸ä¸ªæ°ç¨åº
[SuperReads_RNA](ftp://ftp.genome.umd.edu/pub/MaSuRCA/beta/SuperReads_RNA-1.0.1.tar.gz), æ¯ MaSuRCA
3.2.1 çç®åç. å¾å¯è½æ¯ `StringTie` ç¨äº super-reads æ¥å¤ç RNA-seq, å¨å¾å¤äººçè¦æ±ä¸åç.
æ ¹æ®è¿ä¸ªçæ¬, æå° MaSuRCA 3.1.3 ç®å, 廿ææçä¾èµ, 廿é
å `Celera Assembler` çé¨å, åªçä¸äº
`SuperReads`, å¯ä»¥ç¨ `Linuxbrew` å®è£
.
```bash
brew install homebrew/science/jellyfish
brew install wang-q/tap/quorum@1.1.1
brew install wang-q/tap/superreads
```
# Super-reads and anchors
## E. coli: link anchors
```bash
cd ~/zlc/Ecoli/anchorAlign
for id in 0_11 10_13 11_7 12_3 13_33 14_8 15_11 16_20 17_4 18_17 19_19 1_4 20_15 21_13 22_8 23_15 24_34 25_8 26_3 27_30 28_2 29_13 2_27 30_25 31_15 32_28 33_2 34_16 35_3 36_23 37_5 38_29 39_5 3_12 40_9 41_19 4_5 5_7 6_56 7_12 8_15 9_6;
do
bash ~/Scripts/cpan/App-Anchr/share/link_anchor.sh ${id}.anchor.fasta ${id}.pac.fasta ${id};
GROUP_COUNT=$(id=${id} perl -e '@p = split q{_}, $ENV{id}; print $p[1];')
perl ~/Scripts/cpan/App-Anchr/share/ovlp_layout.pl ${id}.ovlp.tsv --range "1-${GROUP_COUNT}"
done
# Exceeded memory bound: 502169772
#poa -preserve_seqorder -read_fasta 9_2.renamed.fasta -clustal 9_2.aln -hb ~/Scripts/sra/poa-blosum80.mat
#cp 9_2.renamed.fasta myDB.pp.fasta
#
#DBrm myDB
#fasta2DB myDB myDB.pp.fasta
#DBdust myDB
#
#if [ -e myDB.las ]; then
# rm myDB.las
#fi
#HPC.daligner myDB -v -M4 -e.70 -l1000 -s1000 -mdust > job.sh
#bash job.sh
#rm job.sh
#
#LA4Falcon -o myDB.db myDB.las 1-2
#
#perl ~/Scripts/sra/las2ovlp.pl 9_2.renamed.fasta <(LAshow -o myDB.db myDB.las 1)
#
#perl ~/Scripts/sra/las2ovlp.pl 9_2.renamed.fasta 9_2.show.txt -r 9_2.replace.tsv
# 3 5 10 8 4 9 7 2 11 6 1
perl ~/Scripts/egaz/sparsemem_exact.pl \
-f 0_11.renamed.fasta -g ~/data/dna-seq/e_coli/superreads/NC_000913.fa \
--length 500 -o 0_11.chr.tsv
perl ~/Scripts/sra/ovlp_layout.pl 0_11.ovlp.tsv --range 1-11
( run in 0.653 second using v1.01-cache-2.11-cpan-0d23b851a93 )