CSV-Reader

 view release on metacpan or  search on metacpan

lib/CSV/Reader.pm  view on Meta::CPAN

	my $reader = new CSV::Reader('/path/to/file.csv',
		'delimiter' => ';',
		'enclosure' => '',
		'field_normalizer' => sub {
			my $nameref = shift;
			$$nameref = lc($$nameref);	# lowercase
			$$nameref =~ s/\s/_/g;	# whitespace to underscore
		},
		'field_aliases'	=> {
			'postal_code' => 'postcode', # applied after normalization
		},
		'mutators' => {
			'postcode' => sub {	# if postalcode is Dutch, then make sure it has no spaces and is in uppercase.
				my $val_ref = shift;
				my $row_ref = shift;
				if (defined($$val_ref) && defined($row_ref->{'country'}) && ($row_ref->{'country'} eq 'NL')) {
					$$val_ref =~ s/\s+//;
					$$val_ref = uc($$val_ref);
				}
			},
			'has_fiber_internet' => sub {	# set a default for an empty (undef) value
				my $val_ref = shift;
				$$val_ref //= 0;
			},
		},
	);

	# Show the field names found in the header row:
	print 'Field names: ' . join("\n", $reader->fieldNames()) . "\n";

	# Iterate over the data rows:
	while (my $row = $reader->nextRow()) {
		# It's recommended to validate the $row hashref first with something such as Params::Validate.
		# Now do whatever you want with the (validated) row hashref...
		require Data::Dumper; local $Data::Dumper::Terse = 1;
		print Data::Dumper::Dumper($row);
	}

=head1 PUBLIC STATIC METHODS

=head2 new($file, %options)

Constructor.

$file can be a string file name, an open file handle (GLOB), or an IO::Handle based object (e.g. IO::File or IO::Scalar).
If a string file name is given, then the file is opened via File::BOM.

The following %options are supported:

	- debug: boolean, if true, then debug messages are emitted using warn().
	- field_aliases: hashref of case insensitive alias (in file) => real name (as expected in code) pairs.
	- field_normalizer: callback that receives a field name by reference to normalize (e.g. make lowercase).
	- include_fields: arrayref of field names to include. If given, then all other field names are excluded.
	- delimiter: string, default ','
	- enclosure: string, default '"'
	- escape: string, default backslash
	- mutators: hashref of field name => callback($value_ref, $row_ref) pairs.

Note: the option field_aliases is processed after the option field_normalizer if given.

Note: the callbacks given with the mutators option are called in their key order (which is an unpredictable order unless they're tied with Tie::IxHash).

=cut

sub new {
	my $proto = shift;
	my $file = shift;
	my %options = @_;
	my $self = {
		'h'				=> undef,	# File handle.
		'own_h'			=> undef,	# Does this class own the file handle.
		'field_cols'	=> {},		# Hashref of fieldname => column index pairs.
		'row'			=> undef,	# Current ReaderRow object.
		'linenum'		=> 0,		# Data row index.
		'text_csv'		=> undef,	# The Text::CSV object

		# Options:
		'debug'			=> 0,
		'delimiter'		=> ',',
		'enclosure'		=> '"',
		'escape'		=> '\\',
		'mutators'		=> undef,
		'skip_empty_lines'	=> 0, # TODO: implement this
	};
	tie(%{$self->{'field_cols'}}, 'Tie::IxHash');

	unless (defined($file) && length($file)) {
		croak('Missing $file argument');
	}
	if (ref($file)) {
		unless ((ref($file) eq 'GLOB') || UNIVERSAL::isa($file, 'IO::Handle')) {
			croak(ref($file) . ' is not a legal file argument type');
		}
		$self->{'h'} = $file;
		$self->{'own_h'} = 0;
	}
	else {
		my $h;
		eval {
			require File::BOM;
		};
		my $mode = $@ ? '<' : '<:via(File::BOM)';
		$options{'debug'} && warn(__PACKAGE__ . "::new file open mode is $mode\n");
		open($h, $mode, $file) || croak('Failed to open "' . $file . '" for reading using mode "' . $mode . '": ' . $!);
		$self->{'h'} = $h;
		$self->{'own_h'} = 1;
	}

	# Get the options.
	my %opt_field_aliases;
	my $opt_field_normalizer;
	my %opt_include_fields;
	my %text_csv_options;	# undocumented experimental feature; text_csv_*: avoid if possible; options with this prefix are passed as is (but without prefix) to the internal Text::CSV object.
	if (%options) {
		foreach my $key (keys %options) {
			my $value = $options{$key};
			if (($key eq 'debug') || ($key eq 'skip_empty_lines')) {
				$self->{$key} = $value;
			}
			elsif (($key eq 'enclosure') || ($key eq 'escape')) {
				if (!defined($value) || ref($value)) {

lib/CSV/Reader.pm  view on Meta::CPAN

			elsif ($key eq 'delimiter') {
				if (!defined($value) || ref($value) || !length($value)) {
					croak("The '$key' option must be a non-empty string");
				}
				$self->{$key} = $value;
			}

			elsif ($key eq 'include_fields') {
				if (ref($value) ne 'ARRAY') {
					croak("The '$key' option must be an arrayref");
				}
				%opt_include_fields = map { $_ => undef } @$value;
			}
			elsif ($key eq 'field_aliases') {
				if (ref($value) ne 'HASH') {
					croak("The '$key' option must be a hashref");
				}
				%opt_field_aliases = map { lc($_) => $value->{$_} } keys %$value;
			}
			elsif ($key eq 'field_normalizer') {
				if (ref($value) ne 'CODE') {
					croak("The '$key' option must be a code ref");
				}
				$opt_field_normalizer = $value;
			}
			elsif ($key eq 'mutators') {
				if (ref($value) ne 'HASH') {
					croak("The '$key' option must be a hashref of field name => code ref pairs");
				}
				foreach my $name (keys %$value) {
					my $mutator = $options{$key}->{$name};
					if (defined($mutator)) {
						unless (ref($mutator) eq 'CODE') {
							croak('The mutator for "' . $name . '" must be a CODE ref');
						}
					}
				}
				$self->{$key} = $value;
			}
			elsif ($key =~ /^(?:Text(?::|_)CSV|text_csv)[\._:](.+)$/) {
				$text_csv_options{$1} = $value;
			}
			else {
				croak("Unknown option '$key'");
			}
		}
	}

	my $text_csv = $self->{'text_csv'} = $proto->_new_text_csv_object({
		'auto_diag'			=> 1,
		'binary'			=> 1,
		'blank_is_undef'	=> 1,
		'empty_is_undef'	=> 1,
		'sep_char'			=> $self->{'delimiter'},
		'escape_char'		=> $self->{'escape'},
		'quote_char'		=> $self->{'enclosure'},
		%text_csv_options,	# undocumented experimental feature; consider overriding _new_text_csv_object() instead.
	}) || die('Method _new_text_csv_object() did not return a Text::CSV object as expected');

	# Emulate the original Text::CSV error message format but without the LF and with the caller script/module.
	if (0 && $text_csv->can('callbacks')) {	# exists since Text::CSV_XS version 1.06
		$text_csv->callbacks(
			'error' => sub {
				my ($err, $msg, $pos, $recno, $fldno) = @_;	# This is dumb because the object itself is not given.
				if ($err eq '2012') { # EOF
					return;
				}
				#CSV_XS ERROR: 2021 - EIQ - NL char inside quotes, binary off @ rec 10 pos 51 field 6
				#die 'error args: ' . Data::Dumper::Dumper(\@_);
				local $Carp::CarpInternal{'Text::CSV'} = 1;
				local $Carp::CarpInternal{'Text::CSV_PP'} = 1;
				local $Carp::CarpInternal{'Text::CSV_XS'} = 1;
				carp(ref($text_csv) . " ERROR: $err - $msg \@ rec $recno pos $pos field $fldno");
				return;
			},
		);
	}

	# Read header row.
	if (my $row = $self->{'text_csv'}->getline($self->{'h'})) {
		# Get the fieldname => column indices
		for (my $x = 0; $x < @$row; $x++) {
			my $name = $row->[$x];
			unless(defined($name)) {
				next;
			}
			$name =~ s/^\s+|\s+$//g;
			unless(length($name)) {
				next;
			}
			if ($opt_field_normalizer) {
				&$opt_field_normalizer(\$name);
			}
			if (%opt_field_aliases) {
				my $key = lc($name);
				if (defined($opt_field_aliases{$key})) {
					$name = $opt_field_aliases{$key};
				}
			}
			if (%opt_include_fields && !exists($opt_include_fields{$name})) {
				next;
			}
			if (exists($self->{'field_cols'}->{$name})) {
				croak('Duplicate field "' . $name . '" detected');
			}
			$self->{'field_cols'}->{$name} = $x;
		}
		unless(%{$self->{'field_cols'}}) {
			croak(%opt_include_fields ? 'No fields found in header row to include' : 'No fields found in header row');
		}
		# If include_fields option was given, reorder keys of field_cols to match it.
		if (%opt_include_fields) {
			my %field_cols;
			#{$self->{'field_cols'}}
			tie(%field_cols, 'Tie::IxHash');
			foreach my $key (@{$options{'include_fields'}}) {
				if (exists($self->{'field_cols'}->{$key})) {
					$field_cols{$key} = $self->{'field_cols'}->{$key};
				}
			}
			$self->{'field_cols'} = \%field_cols;
		}



( run in 0.916 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )