App-Dazz

 view release on metacpan or  search on metacpan

lib/App/Dazz/Command/group.pm  view on Meta::CPAN

        [ "dir|d=s", "output directory", ],
        [ "range|r=s",    "ranges of anchors",            { required => 1 }, ],
        [ "coverage|c=i", "minimal coverage",             { default  => 2 }, ],
        [ "max=i",        "max distance",                 { default  => 5000 }, ],
        [ "len|l=i",      "minimal length of overlaps",   { default  => 1000 }, ],
        [ "idt|i=f",      "minimal identity of overlaps", { default  => 0.85 }, ],
        [ "keep",         "don't remove multi-matched reads", ],
        [ 'oa=s',         'overlaps between anchors', ],
        [ "parallel|p=i", "number of threads",            { default  => 4 }, ],
        [ "verbose|v",    "verbose mode", ],
        [ "png",          "write a png file via graphviz", ],
        { show_defaults => 1, }
    );
}


# 三代 reads 里有一个常见的错误, 即单一 ZMW 里的测序结果中, 接头序列部分的测序结果出现了较多的错误,
# 因此并没有将接头序列去除干净, 形成的 subreads 里含有多份基因组上同一片段, 它们之间以接头序列为间隔.
#
# `dazz group` 命令默认会将这种三代的 reads 去除. `--keep` 选项会留下这种 reads, 这适用于组装好的三代序列.
#

lib/App/Dazz/Command/group.pm  view on Meta::CPAN

            $cmd .= " | faops filter -l 0 stdin stdout";
            $cmd .= " > " . $out_dir->child("$basename.long.fasta")->stringify;

            system $cmd;
        }

        $cc_serial++;
    }
    printf STDERR "CC count %d\n", scalar(@ccs);

    if ( $opt->{png} ) {
        App::Dazz::Common::g2gv0( $graph, $fn_dazz . ".png" );
    }
}

1;

lib/App/Dazz/Command/layout.pm  view on Meta::CPAN

    return 'layout anchors within a group';
}

sub opt_spec {
    return (
        [ "outfile|o=s", "output filename", ],
        [ 'border=i', 'length of borders in anchors', { default => 500 }, ],
        [ "max=i",    "max distance",                 { default => 5000 }, ],
        [ 'pa=s',     'prefix of anchors',            { default => "anchor" }, ],
        [ 'oa=s',     'overlaps between anchors', ],
        [ "png",      "write a png file via graphviz", ],
        { show_defaults => 1, },
    );
}

sub usage_desc {
    return "dazz layout [options] <strand.fasta> <.ovlp.tsv> <.relation.tsv>";
}

sub description {
    my $desc;

lib/App/Dazz/Command/layout.pm  view on Meta::CPAN

                    = AlignDB::IntSpan->new->add_pair( $beg, $end );
            }
            elsif ( $is_anchor{ $info->{g_id} } and !$is_anchor{ $info->{f_id} } ) {
                my ( $beg, $end ) = App::Dazz::Common::beg_end( $info->{f_B}, $info->{f_E}, );
                $links_of->{ $info->{f_id} }{ $info->{g_id} }
                    = AlignDB::IntSpan->new->add_pair( $beg, $end );
            }
        }
        close $in_fh;
    }
    if ( $opt->{png} ) {
        App::Dazz::Common::g2gv( $graph, $args->[1] . ".linker.png" );
    }

    #----------------------------#
    # Graph of anchors
    #----------------------------#
    my $anchor_graph = Graph->new( directed => 1 );
    {
        my @nodes = $graph->vertices;
        my @linkers = grep { !$is_anchor{$_} } @nodes;

lib/App/Dazz/Command/layout.pm  view on Meta::CPAN

            }
        }

        # add anchors missed in $anchor_graph, i.e., not correctly linked to long.fasta
        for my $id ( keys %is_anchor ) {
            if ( !$anchor_graph->has_vertex($id) ) {
                $anchor_graph->add_vertex($id);
            }
        }

        if ( $opt->{png} ) {
            App::Dazz::Common::g2gv( $anchor_graph, $args->[1] . ".png" );
        }
        App::Dazz::Common::transitive_reduction($anchor_graph);
        if ( $opt->{png} ) {
            App::Dazz::Common::g2gv( $anchor_graph, $args->[1] . ".reduced.png" );
        }
    }

    #----------------------------#
    # existing relations
    #----------------------------#
    my $relation_of = {};
    {
        for my $line ( Path::Tiny::path( $args->[2] )->lines( { chomp => 1 } ) ) {
            my @fields = split "\t", $line;

lib/App/Dazz/Command/merge.pm  view on Meta::CPAN

use constant abstract => "merge overlapped unitigs";

sub opt_spec {
    return (
        [ "outfile|o=s", "output filename, [stdout] for screen", ],
        [ "len|l=i",      "minimal length of overlaps",   { default => 1000 }, ],
        [ "idt|i=f",      "minimal identity of overlaps", { default => 0.98 }, ],
        [ "tmp=s",        "user defined tempdir", ],
        [ "parallel|p=i", "number of threads",            { default => 8 }, ],
        [ "verbose|v",    "verbose mode", ],
        [ "png",          "write a png file via graphviz", ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "dazz merge [options] <infile>";
}

sub description {
    my $desc;

lib/App/Dazz/Command/merge.pm  view on Meta::CPAN

        #
        # Remove branching nodes
        #        for my $v ( $graph->vertices ) {
        #            if ( $graph->out_degree($v) > 1 or $graph->in_degree($v) > 1 ) {
        #                $graph->delete_vertex($v);
        #            }
        #        }

        $tempdir->child("overlapped.txt")->spew( map {"$_\n"} $graph->vertices );

        if ( $opt->{png} ) {
            App::Dazz::Common::g2gv( $graph, $infile . ".png" );
        }
    }

    {    # Output non-overlapped
        my $cmd;
        $cmd .= "faops some -i -l 0 $infile overlapped.txt non-overlapped.fasta";
        App::Dazz::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, } );
    }

    #----------------------------#

lib/App/Dazz/Common.pm  view on Meta::CPAN


    for my $e ( $g->edges ) {
        if ( $g->has_edge_weight( @{$e} ) ) {
            $gv->add_edge( @{$e}, label => $g->get_edge_weight( @{$e} ) );
        }
        else {
            $gv->add_edge( @{$e} );
        }
    }

    Path::Tiny::path($fn)->spew_raw( $gv->as_png );
}

sub g2gv0 {
    require GraphViz;

    #@type Graph
    my $g  = shift;
    my $fn = shift;

    my $gv = GraphViz->new( directed => 0 );

    for my $v ( $g->vertices ) {
        $gv->add_node($v);
    }

    for my $e ( $g->edges ) {
        $gv->add_edge( @{$e} );
    }

    Path::Tiny::path($fn)->spew_raw( $gv->as_png );
}

sub transitive_reduction {

    #@type Graph
    my $g = shift;

    my $count = 0;
    my $prev_count;
    while (1) {

t/11-group.t  view on Meta::CPAN

            or IPC::Cmd::can_run('fasta2DB')
            or IPC::Cmd::can_run('LAshow')
            or IPC::Cmd::can_run('ovlpr');

    my $tempdir = Path::Tiny->tempdir;
    test_app( 'App::Dazz' =>
            [ qw(overlap2 t/1_4.anchor.fasta t/1_4.pac.fasta), "-d", $tempdir->stringify, ] );

    $result = test_app(
        'App::Dazz' => [
            qw(group --png --range 1-4),
            $tempdir->child("anchorLong.db")->stringify,
            $tempdir->child("anchorLong.ovlp.tsv")->stringify,
        ]
    );

    ok( $tempdir->child("group")->is_dir,              'output directory exists' );
    ok( $tempdir->child("group")->child("groups.txt"), 'groups.txt exists' );
}

done_testing();



( run in 1.653 second using v1.01-cache-2.11-cpan-df04353d9ac )