Bio-Phylo-Forest-DBTree

 view release on metacpan or  search on metacpan

MANIFEST  view on Meta::CPAN

COPYING
lib/Bio/Phylo/Forest/DBTree.pm
lib/Bio/Phylo/Forest/DBTree/Result/Node.pm
LICENSE
Makefile.PL
MANIFEST			This list of files
README.md
script/megatree-bold-loader
script/megatree-loader
script/megatree-ncbi-loader
script/megatree-phylotree-loader
script/megatree-pruner
script/megatree-pruner.py
t/megatree.t
t/pod-coverage.t
t/pod.t
t/trivial.csv
t/trivial.db

Makefile.PL  view on Meta::CPAN

use strict;
use warnings;
use ExtUtils::MakeMaker;

my %parms = (
    'NAME'         => 'Bio::Phylo::Forest::DBTree',
    'AUTHOR'       => 'Rutger Vos',
    'EXE_FILES'    => [
        'script/megatree-loader',
        'script/megatree-ncbi-loader',
        'script/megatree-bold-loader',
        'script/megatree-phylotree-loader',
        'script/megatree-pruner'
    ],
    'VERSION_FROM' => 'lib/Bio/Phylo/Forest/DBTree.pm',
    'LICENSE'      => 'perl',
    'ABSTRACT'     => 'DBIx::Class-backed, Bio::Phylo-like API for large phylogenies',
    'PREREQ_PM'    => {
    	'DBIx::Class' => '0',
    	'Bio::Phylo'  => '0.52',
    	'DBD::SQLite' => '0',

script/megatree-bold-loader  view on Meta::CPAN

#!/usr/bin/perl
use strict;
use warnings;
use Pod::Usage;
use Getopt::Long;
use Bio::Phylo::Util::Logger ':levels';
use Bio::Phylo::Forest::DBTree;

# process command line arguments
my $verbosity = WARN;
my ( $boldtsv, $dbfile );
GetOptions(
	'bold=s'   => \$boldtsv,
	'dbfile=s' => \$dbfile,
	'verbose+' => \$verbosity,
	'help'     => sub { pod2usage() },
	'man'      => sub { pod2usage(1) },
);
if ( not $boldtsv or not $dbfile ) {
	pod2usage();
}

=head1 NAME

megatree-bold-loader - Loads the processid taxonomy tree implied by a BOLD BCDM file into a database

=head1 SYNOPSIS

    megatree-bold-loader -bold <file> -d <file> [-vhm]

=head1 OPTIONS

=over

=item B<< -b <file> >> or B<< -bold <file> >>

Location of the TSV file from a BOLD BCDM dump, i.e. as contained in an archive
such as located here as of 2023-03-06: L<https://bench.boldsystems.org/index.php/datapackages/Latest>

=item B<< -d <file> >> or B<< -dbfile <file> >>

Location of a database file, compatible with sqlite3, which will be produced. This file
can not yet exist. If it does, an error message will be emitted and the program will quit.

=item B<-v> or B<-verbose>

Optional.

script/megatree-bold-loader  view on Meta::CPAN


Optional.

Prints help message / documentation.

=item B<-m> or B<-man>

Optional.

Prints manual page. Additional information is available in the documentation, i.e.
C<perldoc megatree-bold-loader>

=back

=head1 DESCRIPTION

This program produces a database file from a BOLD BCDM dump. Such a database
provides much quicker random access to the taxonomy tree then by processing the flat
file. It can be accessed by an API that is compatible with L<Bio::Phylo>, but much more
scalable. An example of such API usage is presented by the L<megatree-pruner> script.

script/megatree-bold-loader  view on Meta::CPAN

$dbh->{'AutoCommit'} = 1;
$dbh->begin_work;

{

	# start primary key counter at 1, instantiate header list and taxon to ID map
	my ( $id, @header, %id_map, %parent_map ) = 1;
	my $line = 1;

	# open the BOLD TSV file
	open my $fh, '<', $boldtsv or die $!;
	LINE: while(<$fh>) {
		chomp;

		# read the header and move to the next line
		my @record = split /\t/, $_;
		if ( not @header ) {
			@header = @record;
			next LINE;
		}



( run in 1.478 second using v1.01-cache-2.11-cpan-5dc5da66d9d )