CSV-Reader

 view release on metacpan or  search on metacpan

lib/CSV/Reader.pm  view on Meta::CPAN

package CSV::Reader;
use strict;
use Carp qw(carp croak);
use Text::CSV ();
use Tie::IxHash ();
our $VERSION = 1.12;

=head1 NAME

CSV::Reader - CSV reader class

=head1 DESCRIPTION

Simple CSV reader class that uses Text::CSV internally.
The CSV files are expected to have a header row of column names.
This was designed with the idea of using an iterator interface, but Perl does not support interators (nor interfaces) yet :(

=head1 SYNOPSIS

	use CSV::Reader ();
	use open OUT => ':locale'; # optional; make perl aware of your terminal's encoding

	# Create reader from file name:
	my $reader = new CSV::Reader('/path/to/file.csv');

	# Create reader from a file handle (GLOB):
	open(my $h, '<', $filename) || die("Failed to open $filename: $!");
	# or preferred method that can handle files having a UTF-8 BOM:
	open(my $h, '<:via(File::BOM)', $filename) || die("Failed to open $filename: $!");
	my $reader = new CSV::Reader($h);

	# Create reader from an IO::Handle based object:
	my $io = IO::File->new(); # subclass of IO::Handle
	$io->open($filename, '<:via(File::BOM)') || die("Failed to open $filename: $!");
	my $reader = new CSV::Reader($io);

	# Create reader with advanced options:
	my $reader = new CSV::Reader('/path/to/file.csv',
		'delimiter' => ';',
		'enclosure' => '',
		'field_normalizer' => sub {
			my $nameref = shift;
			$$nameref = lc($$nameref);	# lowercase
			$$nameref =~ s/\s/_/g;	# whitespace to underscore
		},
		'field_aliases'	=> {
			'postal_code' => 'postcode', # applied after normalization
		},
		'mutators' => {
			'postcode' => sub {	# if postalcode is Dutch, then make sure it has no spaces and is in uppercase.
				my $val_ref = shift;
				my $row_ref = shift;
				if (defined($$val_ref) && defined($row_ref->{'country'}) && ($row_ref->{'country'} eq 'NL')) {
					$$val_ref =~ s/\s+//;
					$$val_ref = uc($$val_ref);
				}
			},
			'has_fiber_internet' => sub {	# set a default for an empty (undef) value
				my $val_ref = shift;
				$$val_ref //= 0;
			},
		},
	);

	# Show the field names found in the header row:
	print 'Field names: ' . join("\n", $reader->fieldNames()) . "\n";

	# Iterate over the data rows:
	while (my $row = $reader->nextRow()) {
		# It's recommended to validate the $row hashref first with something such as Params::Validate.
		# Now do whatever you want with the (validated) row hashref...
		require Data::Dumper; local $Data::Dumper::Terse = 1;
		print Data::Dumper::Dumper($row);
	}

=head1 PUBLIC STATIC METHODS

=head2 new($file, %options)

Constructor.

$file can be a string file name, an open file handle (GLOB), or an IO::Handle based object (e.g. IO::File or IO::Scalar).
If a string file name is given, then the file is opened via File::BOM.

The following %options are supported:

	- debug: boolean, if true, then debug messages are emitted using warn().
	- field_aliases: hashref of case insensitive alias (in file) => real name (as expected in code) pairs.
	- field_normalizer: callback that receives a field name by reference to normalize (e.g. make lowercase).
	- include_fields: arrayref of field names to include. If given, then all other field names are excluded.
	- delimiter: string, default ','
	- enclosure: string, default '"'
	- escape: string, default backslash
	- mutators: hashref of field name => callback($value_ref, $row_ref) pairs.

Note: the option field_aliases is processed after the option field_normalizer if given.

Note: the callbacks given with the mutators option are called in their key order (which is an unpredictable order unless they're tied with Tie::IxHash).

=cut

sub new {
	my $proto = shift;
	my $file = shift;
	my %options = @_;
	my $self = {
		'h'				=> undef,	# File handle.
		'own_h'			=> undef,	# Does this class own the file handle.
		'field_cols'	=> {},		# Hashref of fieldname => column index pairs.
		'row'			=> undef,	# Current ReaderRow object.
		'linenum'		=> 0,		# Data row index.
		'text_csv'		=> undef,	# The Text::CSV object

		# Options:
		'debug'			=> 0,
		'delimiter'		=> ',',
		'enclosure'		=> '"',
		'escape'		=> '\\',
		'mutators'		=> undef,
		'skip_empty_lines'	=> 0, # TODO: implement this
	};
	tie(%{$self->{'field_cols'}}, 'Tie::IxHash');

	unless (defined($file) && length($file)) {
		croak('Missing $file argument');
	}
	if (ref($file)) {
		unless ((ref($file) eq 'GLOB') || UNIVERSAL::isa($file, 'IO::Handle')) {
			croak(ref($file) . ' is not a legal file argument type');
		}
		$self->{'h'} = $file;
		$self->{'own_h'} = 0;
	}
	else {
		my $h;
		eval {
			require File::BOM;
		};
		my $mode = $@ ? '<' : '<:via(File::BOM)';
		$options{'debug'} && warn(__PACKAGE__ . "::new file open mode is $mode\n");
		open($h, $mode, $file) || croak('Failed to open "' . $file . '" for reading using mode "' . $mode . '": ' . $!);
		$self->{'h'} = $h;
		$self->{'own_h'} = 1;
	}

	# Get the options.
	my %opt_field_aliases;
	my $opt_field_normalizer;
	my %opt_include_fields;
	my %text_csv_options;	# undocumented experimental feature; text_csv_*: avoid if possible; options with this prefix are passed as is (but without prefix) to the internal Text::CSV object.
	if (%options) {
		foreach my $key (keys %options) {
			my $value = $options{$key};
			if (($key eq 'debug') || ($key eq 'skip_empty_lines')) {
				$self->{$key} = $value;
			}
			elsif (($key eq 'enclosure') || ($key eq 'escape')) {
				if (!defined($value) || ref($value)) {
					croak("The '$key' option must be a string");
				}
				$self->{$key} = $value;
			}
			elsif ($key eq 'delimiter') {
				if (!defined($value) || ref($value) || !length($value)) {
					croak("The '$key' option must be a non-empty string");
				}
				$self->{$key} = $value;
			}

			elsif ($key eq 'include_fields') {
				if (ref($value) ne 'ARRAY') {
					croak("The '$key' option must be an arrayref");
				}
				%opt_include_fields = map { $_ => undef } @$value;
			}
			elsif ($key eq 'field_aliases') {
				if (ref($value) ne 'HASH') {
					croak("The '$key' option must be a hashref");
				}
				%opt_field_aliases = map { lc($_) => $value->{$_} } keys %$value;
			}
			elsif ($key eq 'field_normalizer') {
				if (ref($value) ne 'CODE') {
					croak("The '$key' option must be a code ref");
				}
				$opt_field_normalizer = $value;
			}
			elsif ($key eq 'mutators') {
				if (ref($value) ne 'HASH') {
					croak("The '$key' option must be a hashref of field name => code ref pairs");
				}
				foreach my $name (keys %$value) {
					my $mutator = $options{$key}->{$name};
					if (defined($mutator)) {
						unless (ref($mutator) eq 'CODE') {
							croak('The mutator for "' . $name . '" must be a CODE ref');
						}
					}
				}



( run in 1.872 second using v1.01-cache-2.11-cpan-39bf76dae61 )