BioPerl
view release on metacpan or search on metacpan
Bio/DB/GFF.pm view on Meta::CPAN
a scalar corresponding to a GFF file on the system
A pathname to a local GFF file. Any files ending with the .gz, .Z, or
.bz2 suffixes will be transparently decompressed with the appropriate
command-line utility.
=item *
an array reference containing a list of GFF files on the system
For example ['/home/gff/gff1.gz','/home/gff/gff2.gz']
=item *
directory path
The indicated directory will be searched for all files ending in the
suffixes .gff, .gff.gz, .gff.Z or .gff.bz2.
=item *
filehandle
An open filehandle from which to read the GFF data. Tied filehandles
now work as well.
=item *
a pipe expression
A pipe expression will also work. For example, a GFF file on a remote
web server can be loaded with an expression like this:
$db->load_gff("lynx -dump -source http://stein.cshl.org/gff_test |");
=back
The optional second argument, if true, will turn on verbose status
reports that indicate the progress.
If successful, the method will return the number of GFF lines
successfully loaded.
NOTE:this method used to be called load(), but has been changed. The
old method name is also recognized.
=cut
sub load_gff {
my $self = shift;
my $file_or_directory = shift || '.';
my $verbose = shift;
local $self->{__verbose__} = $verbose;
return $self->do_load_gff($file_or_directory) if ref($file_or_directory)
&& tied *$file_or_directory;
my $tied_stdin = tied(*STDIN);
open my $SAVEIN, "<&STDIN" unless $tied_stdin;
local @ARGV = $self->setup_argv($file_or_directory,'gff','gff3') or return; # to play tricks with reader
my $result = $self->do_load_gff('ARGV');
open STDIN, '<', $SAVEIN unless $tied_stdin; # restore STDIN
return $result;
}
*load = \&load_gff;
=head2 load_gff_file
Title : load_gff_file
Usage : $db->load_gff_file($file [,$verbose]);
Function: load GFF data into database
Returns : count of records loaded
Args : a path to a file
Status : Public
This is provided as an alternative to load_gff_file. It doesn't munge
STDIN or play tricks with ARGV.
=cut
sub load_gff_file {
my $self = shift;
my $file = shift;
my $verbose = shift;
my $fh = IO::File->new($file) or return;
return $self->do_load_gff($fh);
}
=head2 load_fasta
Title : load_fasta
Usage : $db->load_fasta($file|$directory|$filehandle);
Function: load FASTA data into database
Returns : count of records loaded
Args : a directory, a file, a list of files,
or a filehandle
Status : Public
This method takes a single overloaded argument, which can be any of:
=over 4
=item *
scalar corresponding to a FASTA file on the system
A pathname to a local FASTA file. Any files ending with the .gz, .Z, or
.bz2 suffixes will be transparently decompressed with the appropriate
command-line utility.
=item *
array reference containing a list of FASTA files on the
system
For example ['/home/fasta/genomic.fa.gz','/home/fasta/genomic.fa.gz']
=item *
path to a directory
The indicated directory will be searched for all files ending in the
suffixes .fa, .fa.gz, .fa.Z or .fa.bz2.
=item *
filehandle
An open filehandle from which to read the FASTA data.
=item *
pipe expression
A pipe expression will also work. For example, a FASTA file on a remote
web server can be loaded with an expression like this:
$db->load_gff("lynx -dump -source http://stein.cshl.org/fasta_test.fa |");
=back
=cut
sub load_fasta {
my $self = shift;
my $file_or_directory = shift || '.';
my $verbose = shift;
local $self->{__verbose__} = $verbose;
return $self->load_sequence($file_or_directory) if ref($file_or_directory)
&& tied *$file_or_directory;
my $tied = tied(*STDIN);
open my $SAVEIN, "<&STDIN" unless $tied;
local @ARGV = $self->setup_argv($file_or_directory,'fa','dna','fasta') or return; # to play tricks with reader
my $result = $self->load_sequence('ARGV');
open STDIN, '<', $SAVEIN unless $tied; # restore STDIN
return $result;
}
=head2 load_fasta_file
Title : load_fasta_file
Usage : $db->load_fasta_file($file [,$verbose]);
Function: load FASTA data into database
Returns : count of records loaded
Args : a path to a file
Status : Public
This is provided as an alternative to load_fasta. It doesn't munge
STDIN or play tricks with ARGV.
=cut
sub load_fasta_file {
my $self = shift;
my $file = shift;
my $verbose = shift;
my $fh = IO::File->new($file) or return;
return $self->do_load_fasta($fh);
}
=head2 load_sequence_string
Title : load_sequence_string
Usage : $db->load_sequence_string($id,$dna)
Function: load a single DNA entry
Returns : true if successfully loaded
Args : a raw sequence string (DNA, RNA, protein)
Status : Public
=cut
sub load_sequence_string {
my $self = shift;
my ($acc,$seq) = @_;
my $offset = 0;
$self->insert_sequence_chunk($acc,\$offset,\$seq) or return;
$self->insert_sequence($acc,$offset,$seq) or return;
1;
}
sub setup_argv {
my $self = shift;
my $file_or_directory = shift;
my @suffixes = @_;
no strict 'refs'; # so that we can call fileno() on the argument
my @argv;
if (-d $file_or_directory) {
# Because glob() is broken with long file names that contain spaces
$file_or_directory = Win32::GetShortPathName($file_or_directory)
( run in 0.917 second using v1.01-cache-2.11-cpan-5a3173703d6 )