App-Anchr

 view release on metacpan or  search on metacpan

lib/App/Anchr/Command/orient.pm  view on Meta::CPAN

    }
    for ( @{$args} ) {
        if ( !Path::Tiny::path($_)->is_file ) {
            $self->usage_error("The input file [$_] doesn't exist.");
        }
    }

    if ( exists $opt->{restrict} ) {
        if ( !Path::Tiny::path( $opt->{restrict} )->is_file ) {
            $self->usage_error("The restrict file [$opt->{restrict}] doesn't exist.\n");
        }
    }

    if ( !exists $opt->{outfile} ) {
        $opt->{outfile} = Path::Tiny::path( $args->[0] )->absolute . ".rc.fasta";
    }
}

sub execute {
    my ( $self, $opt, $args ) = @_;

    # absolute pathes as we will chdir to tempdir later
    my @infiles;
    for my $infile ( @{$args} ) {
        push @infiles, Path::Tiny::path($infile)->absolute->stringify;
    }

    if ( lc $opt->{outfile} ne "stdout" ) {
        $opt->{outfile} = Path::Tiny::path( $opt->{outfile} )->absolute->stringify;
    }

    if ( $opt->{restrict} ) {
        $opt->{restrict} = Path::Tiny::path( $opt->{restrict} )->absolute->stringify;
    }

    # record cwd, we'll return there
    my $cwd     = Path::Tiny->cwd;
    my $tempdir = Path::Tiny->tempdir("anchr_orient_XXXXXXXX");
    chdir $tempdir;

    my $basename = $tempdir->basename();
    $basename =~ s/\W+/_/g;

    {    # Sort by lengths
        for my $i ( 0 .. $#infiles ) {
            App::Anchr::Common::exec_cmd(
                "faops size $infiles[$i] | sort -n -r -k2,2 | cut -f 1 > infile.$i.order.txt",
                { verbose => $opt->{verbose}, },
            );
            App::Anchr::Common::exec_cmd(
                "faops order $infiles[$i] infile.$i.order.txt infile.$i.fasta",
                { verbose => $opt->{verbose}, },
            );
        }
    }

    {    # Preprocess reads to format them for dazzler
        my $cmd = "cat";
        $cmd .= sprintf " infile.%d.fasta", $_ for ( 0 .. $#infiles );
        $cmd .= " | anchr dazzname stdin -o stdout";
        $cmd .= " | faops filter -l 0 stdin renamed.fasta";
        App::Anchr::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, } );

        if ( !$tempdir->child("renamed.fasta")->is_file ) {
            Carp::croak "Failed: create renamed.fasta\n";
        }

        if ( !$tempdir->child("stdout.replace.tsv")->is_file ) {
            Carp::croak "Failed: create stdout.replace.tsv\n";
        }
    }

    {    # overlaps
        my $cmd;
        $cmd .= "anchr overlap renamed.fasta";
        $cmd .= " --len $opt->{len} --idt $opt->{idt} --parallel $opt->{parallel}";
        $cmd .= " -o renamed.ovlp.tsv";
        App::Anchr::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, } );

        if ( !$tempdir->child("renamed.ovlp.tsv")->is_file ) {
            Carp::croak "Failed: create renamed.ovlp.tsv\n";
        }
    }

    # filter overlaps
    if ( $opt->{restrict} ) {
        my $cmd;
        $cmd .= "anchr replace renamed.ovlp.tsv stdout.replace.tsv -o stdout";
        $cmd .= " | anchr restrict stdin $opt->{restrict} -o stdout";
        $cmd .= " | anchr replace stdin stdout.replace.tsv -r -o stdout";
        $cmd .= " > restrict.ovlp.tsv";
        App::Anchr::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, } );

        if ( !$tempdir->child("restrict.ovlp.tsv")->is_file ) {
            Carp::croak "Failed: create restrict.ovlp.tsv\n";
        }
    }

    my $graph = Graph->new( directed => 0 );
    {    # Build ovlp graph
        my @lines;
        if ( $tempdir->child("restrict.ovlp.tsv")->is_file ) {
            @lines = $tempdir->child("restrict.ovlp.tsv")->lines( { chomp => 1, } );
        }
        else {
            @lines = $tempdir->child("renamed.ovlp.tsv")->lines( { chomp => 1, } );
        }

        # load strands
        for my $line (@lines) {
            my $info = App::Anchr::Common::parse_ovlp_line($line);

            # ignore self overlapping
            next if $info->{f_id} eq $info->{g_id};

            # ignore poor overlaps
            next if $info->{ovlp_idt} < $opt->{idt};
            next if $info->{ovlp_len} < $opt->{len};

            $graph->add_edge( $info->{f_id}, $info->{g_id}, );
            $graph->set_edge_attribute( $info->{f_id}, $info->{g_id}, q{strand},
                $info->{g_strand} );
        }
    }

    {    # To positive strands in each cc
        for my $cc ( $graph->connected_components ) {
            my @pieces = @{$cc};
            my $copy   = scalar @pieces;

            next if $copy == 1;

            # set first sequence to positive strand
            my $assigned = AlignDB::IntSpan->new(0);
            $graph->set_vertex_attribute( $pieces[0], q{strand}, 0 );

            # need to be handled
            my $unhandled = AlignDB::IntSpan->new->add_pair( 1, $copy - 1 );

            my $prev_size = $assigned->size;
            my $cur_loop  = 0;                 # existing point
            while ( $assigned->size < $copy ) {
                if ( $prev_size == $assigned->size ) {
                    $cur_loop++;
                    last if $cur_loop > 10;
                }
                else {
                    $cur_loop = 0;
                }
                $prev_size = $assigned->size;

                for my $i ( $assigned->elements ) {
                    for my $j ( $unhandled->elements ) {
                        next unless $graph->has_edge( $pieces[$i], $pieces[$j] );

                        # assign strands
                        my $i_strand = $graph->get_vertex_attribute( $pieces[$i], q{strand} );
                        my $edge_strand
                            = $graph->get_edge_attribute( $pieces[$i], $pieces[$j], q{strand} );
                        if ( $edge_strand == 0 ) {
                            $graph->set_vertex_attribute( $pieces[$j], q{strand}, $i_strand );
                        }
                        else {
                            if ( $i_strand == 0 ) {
                                $graph->set_vertex_attribute( $pieces[$j], q{strand}, 1 );
                            }
                            else {
                                $graph->set_vertex_attribute( $pieces[$j], q{strand}, 0 );
                            }
                        }
                        $unhandled->remove($j);
                        $assigned->add($j);
                    }
                }
            }
        }
    }

    {    # RC
        my @negs;
        for my $i ( sort $graph->vertices ) {
            my $i_strand = $graph->get_vertex_attribute( $i, q{strand} );
            push @negs, $i if ( defined $i_strand and $i_strand == 1 );
        }

        $tempdir->child("rc.list")->spew( map {"$_\n"} @negs );

        my $cmd;
        $cmd .= "faops rc -l 0 -n -f rc.list";
        $cmd .= " renamed.fasta renamed.rc.fasta";
        App::Anchr::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, } );

        if ( !$tempdir->child("renamed.rc.fasta")->is_file ) {
            Carp::croak "Failed: create renamed.rc.fasta\n";
        }
    }

    {    # Outputs. stdout is handeld by faops
        my $cmd;
        $cmd .= "faops replace -l 0 renamed.rc.fasta stdout.replace.tsv";
        $cmd .= " $opt->{outfile}";
        App::Anchr::Common::exec_cmd( $cmd, { verbose => $opt->{verbose}, } );
    }

    chdir $cwd;
}

1;



( run in 0.639 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )