AlignDB-ToXLSX
view release on metacpan or search on metacpan
lib/AlignDB/ToXLSX.pm view on Meta::CPAN
num_format => 'yyyy-mm-dd hh:mm',
%font,
),
URL => $workbook->add_format( color => 'blue', underline => 1, %font, ),
URLHEADER => $workbook->add_format( color => 'blue', underline => 1, %header, %font, ),
};
$self->{format} = $format;
return;
}
sub increase_row {
my $self = shift;
my $step = shift || 1;
$self->{row} += $step;
}
sub increase_column {
my $self = shift;
my $step = shift || 1;
$self->{column} += $step;
}
#@returns Excel::Writer::XLSX::Worksheet
sub write_header {
my $self = shift;
my $sheet_name = shift;
my $opt = shift;
# init
#@type Excel::Writer::XLSX::Workbook
my $workbook = $self->{workbook};
#@type Excel::Writer::XLSX::Worksheet
my $sheet = $workbook->add_worksheet($sheet_name);
my $format = $self->{format};
my $header = $opt->{header};
my $query_name = $opt->{query_name};
# create table header
for ( my $i = 0; $i < $self->{column}; $i++ ) {
$sheet->write( $self->{row}, $i, $query_name, $format->{HEADER} );
}
for ( my $i = 0; $i < scalar @{$header}; $i++ ) {
$sheet->write( $self->{row}, $i + $self->{column}, $header->[$i], $format->{HEADER} );
}
$sheet->freeze_panes( 1, 0 ); # freeze table
$self->increase_row;
return $sheet;
}
sub sql2names {
my $self = shift;
my $sql = shift;
my $opt = shift;
# bind value
my $bind_value = $opt->{bind_value};
if ( !defined $bind_value ) {
$bind_value = [];
}
#@type DBI
my $dbh = $self->{dbh};
#@type DBI
my $sth = $dbh->prepare($sql);
$sth->execute( @{$bind_value} );
my @names = @{ $sth->{'NAME'} };
return @names;
}
sub write_row {
my $self = shift;
#@type Excel::Writer::XLSX::Worksheet
my $sheet = shift;
my $opt = shift;
# init
my $format = $self->{format};
# query name
my $query_name = $opt->{query_name};
if ( defined $query_name ) {
$sheet->write( $self->{row}, $self->{column} - 1, $query_name, $format->{NAME} );
}
# array_ref
my $row = $opt->{row};
# insert table
for ( my $i = 0; $i < scalar @$row; $i++ ) {
$sheet->write( $self->{row}, $i + $self->{column}, $row->[$i], $format->{NORMAL} );
}
$self->increase_row;
return;
}
sub write_column {
my $self = shift;
#@type Excel::Writer::XLSX::Worksheet
my $sheet = shift;
my $opt = shift;
# init
my $format = $self->{format};
# query name
my $query_name = $opt->{query_name};
if ( defined $query_name ) {
$sheet->write( $self->{row} - 1, $self->{column}, $query_name, $format->{NAME} );
}
# array_ref
my $column = $opt->{column};
# insert table
$sheet->write( $self->{row}, $self->{column}, [$column], $format->{NORMAL} );
$self->increase_column;
return;
}
sub write_sql {
my $self = shift;
#@type Excel::Writer::XLSX::Worksheet
my $sheet = shift;
my $opt = shift;
# init
my $format = $self->{format};
# query name
my $query_name = $opt->{query_name};
if ( defined $query_name ) {
$sheet->write( $self->{row}, $self->{column} - 1, $query_name, $format->{NAME} );
}
# bind value
my $bind_value = $opt->{bind_value};
if ( !defined $bind_value ) {
$bind_value = [];
}
# init DBI query
my $sql_query = $opt->{sql_query};
#@type DBI
my $dbh = $self->{dbh};
#@type DBI
my $sth = $dbh->prepare($sql_query);
$sth->execute( @{$bind_value} );
# init $data
my $data;
if ( exists $opt->{data} ) {
if ( defined $opt->{data} and ref( $opt->{data} ) eq 'ARRAY' ) {
$data = $opt->{data};
}
else {
$data = [];
push @{$data}, [] for @{ $sth->{'NAME'} };
}
}
# insert table rows
while ( my @row = $sth->fetchrow_array ) {
for ( my $i = 0; $i < scalar @row; $i++ ) {
if ( exists $opt->{data} ) {
push @{ $data->[$i] }, $row[$i];
}
$sheet->write( $self->{row}, $i + $self->{column}, $row[$i], $format->{NORMAL} );
}
$self->increase_row;
}
return $data;
}
sub make_combine {
my $self = shift;
my $opt = shift;
# init parameters
my $sql_query = $opt->{sql_query};
my $threshold = $opt->{threshold};
my $standalone = $opt->{standalone};
# bind value
my $bind_value = $opt->{bind_value};
unless ( defined $bind_value ) {
$bind_value = [];
}
# merge_last
my $merge_last = $opt->{merge_last};
unless ( defined $merge_last ) {
$merge_last = 0;
}
# init DBI query
#@type DBI
my $dbh = $self->{dbh};
#@type DBI
my $sth = $dbh->prepare($sql_query);
$sth->execute(@$bind_value);
my @row_count = ();
while ( my @row = $sth->fetchrow_array ) {
push @row_count, \@row;
}
my @combined; # return these
my @temp_combined = ();
my $temp_count = 0;
foreach my $row_ref (@row_count) {
if ( List::MoreUtils::PP::any { $_ eq $row_ref->[0] } @{$standalone} ) {
push @combined, [ $row_ref->[0] ];
}
elsif ( $temp_count < $threshold ) {
push @temp_combined, $row_ref->[0];
$temp_count += $row_ref->[1];
if ( $temp_count < $threshold ) {
next;
}
else {
push @combined, [@temp_combined];
@temp_combined = ();
$temp_count = 0;
}
}
else {
warn "Errors occured in calculating combined distance.\n";
}
}
# Write the last weighted row which COUNT might
# be smaller than $threshold
if ( $temp_count > 0 ) {
if ($merge_last) {
if ( @combined == 0 ) {
@combined = ( [] );
}
push @{ $combined[-1] }, @temp_combined;
}
else {
push @combined, [@temp_combined];
}
}
return \@combined;
}
sub make_combine_piece {
my ( $self, $opt ) = @_;
#@type DBI
my $dbh = $self->{dbh};
# init parameters
my $sql_query = $opt->{sql_query};
my $piece = $opt->{piece};
# bind value
my $bind_value = $opt->{bind_value};
unless ( defined $bind_value ) {
$bind_value = [];
}
# init DBI query
#@type DBI
my $sth = $dbh->prepare($sql_query);
$sth->execute(@$bind_value);
my @row_count = ();
while ( my @row = $sth->fetchrow_array ) {
push @row_count, \@row;
}
my $sum;
$sum += $_->[1] for @row_count;
my $small_chunk = $sum / $piece;
my @combined; # return these
my @temp_combined = ();
my $temp_count = 0;
for my $row_ref (@row_count) {
if ( $temp_count < $small_chunk ) {
push @temp_combined, $row_ref->[0];
$temp_count += $row_ref->[1];
if ( $temp_count >= $small_chunk ) {
push @combined, [@temp_combined];
@temp_combined = ();
$temp_count = 0;
}
}
else {
warn "Errors occured in calculating combined distance.\n";
}
}
# Write the last weighted row which COUNT might
# be smaller than $threshold
if ( $temp_count > 0 ) {
push @combined, [@temp_combined];
}
return \@combined;
}
sub make_last_portion {
my ( $self, $opt ) = @_;
#@type DBI
my $dbh = $self->{dbh};
# init parameters
my $sql_query = $opt->{sql_query};
my $portion = $opt->{portion};
# init DBI query
#@type DBI
my $sth = $dbh->prepare($sql_query);
$sth->execute;
my @row_count = ();
while ( my @row = $sth->fetchrow_array ) {
push @row_count, \@row;
}
my @last_portion; # return @last_portion
my $all_length = 0; # return $all_length
foreach (@row_count) {
$all_length += $_->[2];
}
my @rev_row_count = reverse @row_count;
my $temp_length = 0;
foreach (@rev_row_count) {
push @last_portion, $_->[0];
$temp_length += $_->[2];
if ( $temp_length >= $all_length * $portion ) {
last;
}
}
return ( $all_length, \@last_portion );
}
sub excute_sql {
my ( $self, $opt ) = @_;
# bind value
my $bind_value = $opt->{bind_value};
unless ( defined $bind_value ) {
$bind_value = [];
}
# init DBI query
my $sql_query = $opt->{sql_query};
#@type DBI
my $dbh = $self->{dbh};
#@type DBI
my $sth = $dbh->prepare($sql_query);
$sth->execute( @{$bind_value} );
}
sub check_column {
my ( $self, $table, $column ) = @_;
# init
#@type DBI
my $dbh = $self->{dbh};
{ # check table existing
my @table_names = $dbh->tables( '', '', '' );
# table names are quoted by ` (back-quotes) which is the
# quote_identifier
my $table_name = "`$table`";
unless ( List::MoreUtils::PP::any { $_ =~ /$table_name/i } @table_names ) {
print " " x 4, "Table $table does not exist\n";
return 0;
}
}
{ # check column existing
my $sql_query = qq{
SHOW FIELDS
FROM $table
LIKE "$column"
};
#@type DBI
my $sth = $dbh->prepare($sql_query);
$sth->execute();
my ($field) = $sth->fetchrow_array;
if ( not $field ) {
print " " x 4, "Column $column does not exist\n";
return 0;
}
}
{ # check values in column
my $sql_query = qq{
SELECT COUNT($column)
FROM $table
};
#@type DBI
my $sth = $dbh->prepare($sql_query);
$sth->execute;
my ($count) = $sth->fetchrow_array;
if ( not $count ) {
print " " x 4, "Column $column has no records\n";
}
return $count;
}
}
sub quantile {
my ( $self, $data, $part_number ) = @_;
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(@$data);
my $min = $stat->min;
my @quantiles;
my $base = 100 / $part_number;
for ( 1 .. $part_number - 1 ) {
my $percentile = $stat->percentile( $_ * $base );
push @quantiles, $percentile;
}
my $max = $stat->max;
return [ $min, @quantiles, $max, ];
}
sub quantile_sql {
my ( $self, $opt, $part_number ) = @_;
#@type DBI
my $dbh = $self->{dbh};
# bind value
my $bind_value = $opt->{bind_value};
unless ( defined $bind_value ) {
$bind_value = [];
}
# init DBI query
my $sql_query = $opt->{sql_query};
#@type DBI
my $sth = $dbh->prepare($sql_query);
$sth->execute(@$bind_value);
my @data;
while ( my @row = $sth->fetchrow_array ) {
push @data, $row[0];
}
return $self->quantile( \@data, $part_number );
}
sub calc_threshold {
my $self = shift;
my ( $combine, $piece );
#@type DBI
my $dbh = $self->{dbh};
#@type DBI
my $sth = $dbh->prepare(
q{
SELECT SUM(FLOOR(align_comparables / 500) * 500)
FROM align
}
);
$sth->execute;
my ($total_length) = $sth->fetchrow_array;
if ( $total_length <= 5_000_000 ) {
$piece = 10;
}
elsif ( $total_length <= 10_000_000 ) {
$piece = 10;
}
elsif ( $total_length <= 100_000_000 ) {
$piece = 20;
}
elsif ( $total_length <= 1_000_000_000 ) {
$piece = 50;
}
else {
$piece = 100;
}
if ( $total_length <= 1_000_000 ) {
$combine = 100;
}
elsif ( $total_length <= 5_000_000 ) {
$combine = 500;
}
else {
$combine = 1000;
}
return ( $combine, $piece );
}
# See HACK #7 in OReilly.Excel.Hacks.2nd.Edition.
sub add_index_sheet {
my $self = shift;
( run in 0.819 second using v1.01-cache-2.11-cpan-2398b32b56e )