Spreadsheet-Compare

 view release on metacpan or  search on metacpan

lib/Spreadsheet/Compare/Reader/CSV.pm  view on Meta::CPAN

    rootdir            => '.',
    sep_auto           => undef,
    skip_before_head   => 0,
    skip_after_head    => 0,
}, make_attributes => 1;

has filename    => undef, ro => 1;
has filehandle  => undef, ro => 1;
has _chunk_data => sub { {} }, ro => 1;
has csv         => sub {
    my $co = { $_[0]->csv_options->%* };
    my $clear = delete $co->{__clear__};
    unless ($clear) {
        $co->{$_} //= $csv_options_default{$_} for keys %csv_options_default;
    }
    DEBUG "using Text::CSV with options:", Dump($co);
    my $csv = Text::CSV->new( $co );
    LOGDIE join( ',', Text::CSV->error_diag ) unless $csv;
    return $csv;
}, ro => 1;
#>>>

my( $trace, $debug );

sub init ( $self, @args ) {
    $self->{__ro__can_chunk} = 1;
    return $self->SUPER::init(@args);
}

sub setup ($self) {

    ( $trace, $debug ) = get_log_settings();

    my $proot = path( $self->rootdir // '.' );
    my $fn    = path( $self->files->[ $self->index ] );
    my $pfull = $self->{__ro__filename} = $fn->is_absolute ? $fn : $proot->child($fn);

    INFO "opening input file >>$pfull<<";
    my $fh = $self->{__ro__filehandle} = $pfull->openr_raw;

    <$fh> for 1 .. $self->skip_before_head;

    $self->_set_header;

    <$fh> for 1 .. $self->skip_after_head;

    $self->_chunk_records() if $self->chunker;

    $self->{_sln} = 0;

    return $self;
}


sub _chunk_records ($self) {
    $debug and DEBUG "chunking side $self->{index}";
    my $skipper = $self->skipper;
    while ( my $rec = $self->_read_record ) {
        next if $skipper and $skipper->($rec);
        my $cname = $self->chunker->($rec);
        my $cdata = $self->_chunk_data->{$cname} //= [];
        push @$cdata, delete( $rec->{__INFO__} );
    }

    $debug and DEBUG "found chunks:", sub { Dump( [ sort keys $self->_chunk_data->%* ] ) };

    my $fh = $self->filehandle;
    seek( $fh, 0, 0 );
    return $self;
}


sub fetch ( $self, $size ) {

    my $result = $self->result;
    my $count  = 0;

    if ( $self->chunker ) {
        my $cdata = $self->_chunk_data;
        my $cname = ( sort keys %$cdata )[0];
        my $chunk = delete $cdata->{$cname};
        $self->{__ro__exhausted} = 1 unless keys %$cdata;
        $debug and DEBUG "Fetching data for chunk $cname";
        for my $rec_info (@$chunk) {
            if ( my $rec = $self->_read_record($rec_info) ) {
                push @$result, $rec;
                $count++;
            }
        }
        $debug and DEBUG "fetched $count records from chunk $cname";
    }
    else {
        $debug and DEBUG "fetching max $size records";

        my $i       = 0;
        my $fh      = $self->filehandle;
        my $skipper = $self->skipper;
        while ( ++$i <= $size ) {
            my $rec = $self->_read_record();
            unless ($rec) {
                $debug and DEBUG "EOF for $self->{__ro__filename}";
                $self->{__ro__exhausted} = 1;
                last;
            }
            next if $skipper and $skipper->($rec);
            push @$result, $rec;
            $count++;
        }

        if ( $size == ~0 ) {
            @$result = sort { $a->id cmp $b->id } @$result;
        }
    }

    $debug and DEBUG "fetched $count records";

    return $count;
}


sub _set_header ($self) {
    my $fh        = $self->filehandle;
    my $start_pos = tell($fh);

    my $tcx = $self->csv_options;
    my $csv = $self->csv;
    my $sep = $tcx->{sep} // $tcx->{sep_char};
    my $hd  = $self->has_header;

    my $sep_set = $sep ? [$sep] : $self->sep_auto;
    my @rec;

    if ( $sep and defined $hd and not $hd ) {
        @rec = $csv->getline($fh)->@*;
    }
    else {    # no separator defined and/or autodetect
        try {
            @rec = $csv->header(
                $fh, {
                    $sep_set ? ( sep_set => $sep_set ) : (),
                    munge_column_names => sub ($hcol) {
                        state $count = 0;



( run in 1.102 second using v1.01-cache-2.11-cpan-f56aa216473 )