CSV-Reader
view release on metacpan or search on metacpan
lib/CSV/Reader.pm view on Meta::CPAN
package CSV::Reader;
use strict;
use Carp qw(carp croak);
use Text::CSV ();
use Tie::IxHash ();
our $VERSION = 1.12;
=head1 NAME
CSV::Reader - CSV reader class
=head1 DESCRIPTION
Simple CSV reader class that uses Text::CSV internally.
The CSV files are expected to have a header row of column names.
This was designed with the idea of using an iterator interface, but Perl does not support interators (nor interfaces) yet :(
=head1 SYNOPSIS
use CSV::Reader ();
use open OUT => ':locale'; # optional; make perl aware of your terminal's encoding
# Create reader from file name:
my $reader = new CSV::Reader('/path/to/file.csv');
# Create reader from a file handle (GLOB):
open(my $h, '<', $filename) || die("Failed to open $filename: $!");
# or preferred method that can handle files having a UTF-8 BOM:
open(my $h, '<:via(File::BOM)', $filename) || die("Failed to open $filename: $!");
my $reader = new CSV::Reader($h);
# Create reader from an IO::Handle based object:
my $io = IO::File->new(); # subclass of IO::Handle
$io->open($filename, '<:via(File::BOM)') || die("Failed to open $filename: $!");
my $reader = new CSV::Reader($io);
# Create reader with advanced options:
my $reader = new CSV::Reader('/path/to/file.csv',
'delimiter' => ';',
'enclosure' => '',
'field_normalizer' => sub {
my $nameref = shift;
$$nameref = lc($$nameref); # lowercase
$$nameref =~ s/\s/_/g; # whitespace to underscore
},
'field_aliases' => {
'postal_code' => 'postcode', # applied after normalization
},
'mutators' => {
'postcode' => sub { # if postalcode is Dutch, then make sure it has no spaces and is in uppercase.
my $val_ref = shift;
my $row_ref = shift;
if (defined($$val_ref) && defined($row_ref->{'country'}) && ($row_ref->{'country'} eq 'NL')) {
$$val_ref =~ s/\s+//;
$$val_ref = uc($$val_ref);
}
},
'has_fiber_internet' => sub { # set a default for an empty (undef) value
my $val_ref = shift;
$$val_ref //= 0;
},
},
);
# Show the field names found in the header row:
print 'Field names: ' . join("\n", $reader->fieldNames()) . "\n";
# Iterate over the data rows:
while (my $row = $reader->nextRow()) {
# It's recommended to validate the $row hashref first with something such as Params::Validate.
# Now do whatever you want with the (validated) row hashref...
require Data::Dumper; local $Data::Dumper::Terse = 1;
print Data::Dumper::Dumper($row);
}
=head1 PUBLIC STATIC METHODS
=head2 new($file, %options)
Constructor.
$file can be a string file name, an open file handle (GLOB), or an IO::Handle based object (e.g. IO::File or IO::Scalar).
If a string file name is given, then the file is opened via File::BOM.
The following %options are supported:
- debug: boolean, if true, then debug messages are emitted using warn().
- field_aliases: hashref of case insensitive alias (in file) => real name (as expected in code) pairs.
- field_normalizer: callback that receives a field name by reference to normalize (e.g. make lowercase).
- include_fields: arrayref of field names to include. If given, then all other field names are excluded.
- delimiter: string, default ','
- enclosure: string, default '"'
- escape: string, default backslash
- mutators: hashref of field name => callback($value_ref, $row_ref) pairs.
Note: the option field_aliases is processed after the option field_normalizer if given.
Note: the callbacks given with the mutators option are called in their key order (which is an unpredictable order unless they're tied with Tie::IxHash).
=cut
sub new {
my $proto = shift;
my $file = shift;
my %options = @_;
my $self = {
'h' => undef, # File handle.
'own_h' => undef, # Does this class own the file handle.
'field_cols' => {}, # Hashref of fieldname => column index pairs.
'row' => undef, # Current ReaderRow object.
'linenum' => 0, # Data row index.
'text_csv' => undef, # The Text::CSV object
# Options:
'debug' => 0,
'delimiter' => ',',
'enclosure' => '"',
'escape' => '\\',
'mutators' => undef,
'skip_empty_lines' => 0, # TODO: implement this
};
tie(%{$self->{'field_cols'}}, 'Tie::IxHash');
unless (defined($file) && length($file)) {
croak('Missing $file argument');
}
if (ref($file)) {
unless ((ref($file) eq 'GLOB') || UNIVERSAL::isa($file, 'IO::Handle')) {
croak(ref($file) . ' is not a legal file argument type');
}
$self->{'h'} = $file;
$self->{'own_h'} = 0;
}
else {
my $h;
eval {
require File::BOM;
};
my $mode = $@ ? '<' : '<:via(File::BOM)';
$options{'debug'} && warn(__PACKAGE__ . "::new file open mode is $mode\n");
open($h, $mode, $file) || croak('Failed to open "' . $file . '" for reading using mode "' . $mode . '": ' . $!);
$self->{'h'} = $h;
$self->{'own_h'} = 1;
}
# Get the options.
my %opt_field_aliases;
my $opt_field_normalizer;
my %opt_include_fields;
my %text_csv_options; # undocumented experimental feature; text_csv_*: avoid if possible; options with this prefix are passed as is (but without prefix) to the internal Text::CSV object.
if (%options) {
foreach my $key (keys %options) {
my $value = $options{$key};
if (($key eq 'debug') || ($key eq 'skip_empty_lines')) {
$self->{$key} = $value;
}
elsif (($key eq 'enclosure') || ($key eq 'escape')) {
if (!defined($value) || ref($value)) {
croak("The '$key' option must be a string");
}
$self->{$key} = $value;
}
elsif ($key eq 'delimiter') {
if (!defined($value) || ref($value) || !length($value)) {
croak("The '$key' option must be a non-empty string");
}
$self->{$key} = $value;
}
elsif ($key eq 'include_fields') {
if (ref($value) ne 'ARRAY') {
croak("The '$key' option must be an arrayref");
}
%opt_include_fields = map { $_ => undef } @$value;
}
elsif ($key eq 'field_aliases') {
if (ref($value) ne 'HASH') {
croak("The '$key' option must be a hashref");
}
%opt_field_aliases = map { lc($_) => $value->{$_} } keys %$value;
}
elsif ($key eq 'field_normalizer') {
if (ref($value) ne 'CODE') {
croak("The '$key' option must be a code ref");
}
$opt_field_normalizer = $value;
}
elsif ($key eq 'mutators') {
if (ref($value) ne 'HASH') {
croak("The '$key' option must be a hashref of field name => code ref pairs");
}
foreach my $name (keys %$value) {
my $mutator = $options{$key}->{$name};
if (defined($mutator)) {
unless (ref($mutator) eq 'CODE') {
croak('The mutator for "' . $name . '" must be a CODE ref');
}
}
}
( run in 1.872 second using v1.01-cache-2.11-cpan-39bf76dae61 )