Spreadsheet-Compare
view release on metacpan or search on metacpan
lib/Spreadsheet/Compare/Reader/CSV.pm view on Meta::CPAN
rootdir => '.',
sep_auto => undef,
skip_before_head => 0,
skip_after_head => 0,
}, make_attributes => 1;
has filename => undef, ro => 1;
has filehandle => undef, ro => 1;
has _chunk_data => sub { {} }, ro => 1;
has csv => sub {
my $co = { $_[0]->csv_options->%* };
my $clear = delete $co->{__clear__};
unless ($clear) {
$co->{$_} //= $csv_options_default{$_} for keys %csv_options_default;
}
DEBUG "using Text::CSV with options:", Dump($co);
my $csv = Text::CSV->new( $co );
LOGDIE join( ',', Text::CSV->error_diag ) unless $csv;
return $csv;
}, ro => 1;
#>>>
my( $trace, $debug );
sub init ( $self, @args ) {
$self->{__ro__can_chunk} = 1;
return $self->SUPER::init(@args);
}
sub setup ($self) {
( $trace, $debug ) = get_log_settings();
my $proot = path( $self->rootdir // '.' );
my $fn = path( $self->files->[ $self->index ] );
my $pfull = $self->{__ro__filename} = $fn->is_absolute ? $fn : $proot->child($fn);
INFO "opening input file >>$pfull<<";
my $fh = $self->{__ro__filehandle} = $pfull->openr_raw;
<$fh> for 1 .. $self->skip_before_head;
$self->_set_header;
<$fh> for 1 .. $self->skip_after_head;
$self->_chunk_records() if $self->chunker;
$self->{_sln} = 0;
return $self;
}
sub _chunk_records ($self) {
$debug and DEBUG "chunking side $self->{index}";
my $skipper = $self->skipper;
while ( my $rec = $self->_read_record ) {
next if $skipper and $skipper->($rec);
my $cname = $self->chunker->($rec);
my $cdata = $self->_chunk_data->{$cname} //= [];
push @$cdata, delete( $rec->{__INFO__} );
}
$debug and DEBUG "found chunks:", sub { Dump( [ sort keys $self->_chunk_data->%* ] ) };
my $fh = $self->filehandle;
seek( $fh, 0, 0 );
return $self;
}
sub fetch ( $self, $size ) {
my $result = $self->result;
my $count = 0;
if ( $self->chunker ) {
my $cdata = $self->_chunk_data;
my $cname = ( sort keys %$cdata )[0];
my $chunk = delete $cdata->{$cname};
$self->{__ro__exhausted} = 1 unless keys %$cdata;
$debug and DEBUG "Fetching data for chunk $cname";
for my $rec_info (@$chunk) {
if ( my $rec = $self->_read_record($rec_info) ) {
push @$result, $rec;
$count++;
}
}
$debug and DEBUG "fetched $count records from chunk $cname";
}
else {
$debug and DEBUG "fetching max $size records";
my $i = 0;
my $fh = $self->filehandle;
my $skipper = $self->skipper;
while ( ++$i <= $size ) {
my $rec = $self->_read_record();
unless ($rec) {
$debug and DEBUG "EOF for $self->{__ro__filename}";
$self->{__ro__exhausted} = 1;
last;
}
next if $skipper and $skipper->($rec);
push @$result, $rec;
$count++;
}
if ( $size == ~0 ) {
@$result = sort { $a->id cmp $b->id } @$result;
}
}
$debug and DEBUG "fetched $count records";
return $count;
}
sub _set_header ($self) {
my $fh = $self->filehandle;
my $start_pos = tell($fh);
my $tcx = $self->csv_options;
my $csv = $self->csv;
my $sep = $tcx->{sep} // $tcx->{sep_char};
my $hd = $self->has_header;
my $sep_set = $sep ? [$sep] : $self->sep_auto;
my @rec;
if ( $sep and defined $hd and not $hd ) {
@rec = $csv->getline($fh)->@*;
}
else { # no separator defined and/or autodetect
try {
@rec = $csv->header(
$fh, {
$sep_set ? ( sep_set => $sep_set ) : (),
munge_column_names => sub ($hcol) {
state $count = 0;
( run in 1.102 second using v1.01-cache-2.11-cpan-f56aa216473 )