Data-CTable
view release on metacpan or search on metacpan
## DoMacMapping is the actual setting for auto charset mapping
my $DoMacMapping =
((!defined($MacRomanMap) && ($LineEnding eq "\x0D")) || ## Auto
($MacRomanMap)); ## On
$this->progress("Will convert upper-ascii characters if any, from Mac Roman to ISO 8859-1.") if $DoMacMapping;
## FieldList is usable is it is a list and has at least one entry.
my $FieldListValid = ((ref($FieldList) eq 'ARRAY') && @$FieldList);
## Set <$File> to use the line ending sequence we no known we are looking for.
local $/ = $LineEnding;
## We use $_ explicitly, so must localize.
local $_;
my $IncomingFields;
if ($HeaderRow)
{
## Get the list of fields available in the file (first line of file).
$_ = <$File> or
$this->{_ErrorMsg} = "Could not find a first line with field names in $FileName.", goto done;
## Try to guess file delimiter from the header row if not yet specified.
$FDelimiter ||= guess_delimiter($_) or
$this->{_ErrorMsg} = "Could not find comma or tab delimiters in $FileName.", goto done;
## Maybe convert entire line (all records) Mac to ISO before splitting.
&MacRomanToISORoman8859_1(\ $_) if $DoMacMapping;
chomp;
s/^\"//; s/\"$//; ## remove possible leading, trailing quotes surrounding header row (rare)
## Split header row into field names, removing optional "" around each at the same time.
$IncomingFields = [split(/\"?$FDelimiter\"?/, $_)];
}
else
{
## Otherwise, require that the caller specifies it in _FieldList
$this->{_ErrorMsg} = "Must specify a _FieldList if _HeaderRow says no header row is present.", goto done
unless $FieldListValid;
$IncomingFields = [@$FieldList];
}
## Remove any leading underscores in the names of the incoming
## fields (not allowed because such field names are reserved for
## other object data). Note: this could result in
## duplicate/overwritten field names that were otherwise
## apparently unique in the incoming data file.
$IncomingFields = [map {(/^_*(.*)/)[0]} @$IncomingFields];
## Make a hash that can be used to map these fields' names to their numbers.
my $IncomingFieldNameToNum = {}; @$IncomingFieldNameToNum{@$IncomingFields} = ($[ .. $#$IncomingFields);
## Make a list of the fields we'll be importing (by taking the
## list the caller requested, and paring it down to only those
## fields that are actually available in the table.)
my $FieldsToGet =
[grep {exists($IncomingFieldNameToNum->{$_})}
($FieldListValid ? @$FieldList : @$IncomingFields)];
## Make a note of whether we're getting a subset of available
## fields because the caller requested such. If we are, we'll add
## a _Subset => 1 marker to the data for use later in ensuring the
## cache is OK.
my $GettingSubset = ($FieldListValid && ("@{[sort @$IncomingFields]}" ne
"@{[sort @$FieldList ]}"));
## Make an array of the incoming indices of these fields.
## Allocate a list of empty arrays into which we can import the
## data. Initially they'll each have 100 empty slots for data;
## after we have imported 100 records, we'll re-consider the size
## estimate. When we're all done, we'll prune them back.
my $FieldNums = [@$IncomingFieldNameToNum{@$FieldsToGet}];
my $FieldVectors = []; foreach (@$FieldNums) {$#{$FieldVectors->[$_] = []} = 100};
## We want to be cool and support any embedded NULL (ascii zero)
## characters should they exist in the data, even though we are
## going to use NULL chars to encode embedded delimiters before we
## split....
## First we create a sufficiently obscure placeholder for any
## ascii zero characters in the input text (a rare occurrence
## anyway).
my $ZeroMarker = "\001ASCII_ZERO" . time() . "\001";
## Now ready to go through the file line-by-line (record-by-record)
my $WroteProg;
my $RecordsRead = 0;
while (<$File>)
{
## Try to guess file delimiter from the header row if not yet specified.
$FDelimiter ||= guess_delimiter($_) or
$this->{_ErrorMsg} = "Could not find comma or tab delimiters in $FileName.", goto done;
## Maybe convert entire line (all records) ISO to Mac before splitting.
&MacRomanToISORoman8859_1(\ $_) if $DoMacMapping;
## Manipulate the single line of data fields into a splittable format.
chomp;
## Replace any delimiters inside quotes with ASCII 0.
## Split fields on delimiters.
## Delete leading or trailing quote marks from each field.
## Restore delimiters ASCII 0 back to delimiters.
( run in 2.075 seconds using v1.01-cache-2.11-cpan-5735350b133 )