App-RecordStream-Bio

 view release on metacpan or  search on metacpan

lib/App/RecordStream/Operation/tofasta.pm  view on Meta::CPAN

package App::RecordStream::Operation::tofasta;

use strict;
use warnings;

use base qw(App::RecordStream::Operation);

sub init {
    my $self = shift;
    my $args = shift;

    my ($id, $desc, $seq) = qw(id description sequence);
    my $spec = {
        "id|i=s"            => \$id,
        "description|d=s"   => \$desc,
        "sequence|s=s"      => \$seq,
        "width|w=i"         => \($self->{WIDTH}),
        "oneline"           => \($self->{ONELINE}),
        "passthru"          => \($self->{PASSTHRU}),
    };

    $self->parse_options($args, $spec);

    die "--passthru is incompatible with --oneline and --width\n\n"
        if $self->{PASSTHRU} and ($self->{ONELINE} or $self->{WIDTH});

    $self->{WIDTH} ||= 60;

    $self->{KEYS}{id}    = $id;
    $self->{KEYS}{desc}  = $desc;
    $self->{KEYS}{seq}   = $seq;
}

sub accept_record {
    my $self   = shift;
    my $record = shift;

    my %props = map {; "-$_" => ${$record->guess_key_from_spec($self->{KEYS}{$_})} }
                grep { $self->{KEYS}{$_} ne 'NONE' }
                keys %{$self->{KEYS}};

    if (not $self->{PASSTHRU} and defined $props{'-seq'}) {
        $props{'-seq'} =~ s/\s+//g; # fixme

        if ($self->{ONELINE}) {
            $props{'-seq'} =~ s/[\n\r]//g;
        } elsif ($self->{WIDTH}) {
            my $width = $self->{WIDTH} + 0;
            $props{'-seq'} =~ s/(.{$width})/$1\n/g;
        }
    }

    # Retain previous behaviour of preserving a leading space before any
    # description without --passthru
    $props{'-id'} = ""
        unless defined $props{'-id'} or $self->{PASSTHRU};

    my $fasta = sprintf ">%s\n%s",
        join(" ", map { s/[\n\r]//g; $_ }
                 grep { defined }
                      @props{'-id', '-desc'}),
        $props{'-seq'} || "";

    chomp $fasta;
    $self->push_line($fasta);

    return 1;
}

sub add_help_types {
    my $self = shift;
    $self->use_help_type('keyspecs');
    $self->use_help_type('keys');
}

sub usage {
    my $self = shift;

    my $options = [
        [ 'id|-i <keyspec>',            'Record field to use for the sequence id' ],
        [ 'description|-d <keyspec>',   'Record field to use for the sequence description' ],
        [ 'sequence|-s <keyspec>',      'Record field to use for the sequence itself' ],
        [ 'width|w <#>',                'Format sequence blocks to # characters wide' ],
        [ 'oneline',                    'Format sequences on a single long line' ],
        [ 'passthru',                   'Pass through nucleotides unformatted' ],
    ];

    my $args_string = $self->options_string($options);

    return <<USAGE;
Usage: recs-tofasta <options> [files]
   __FORMAT_TEXT__
   Outputs a FASTA-formatted sequence for each record.

   By default the keys "id", "description", and "sequence" are used to build
   the FASTA format.  These defaults match up with what recs-fromfasta produces.
   The special key name "NONE" may be used to indicate that no key should be
   used, disabling the defaults.  Note that specifying NONE for --id will cause



( run in 0.483 second using v1.01-cache-2.11-cpan-39bf76dae61 )