Data-CTable

 view release on metacpan or  search on metacpan

CTable.pm  view on Meta::CPAN

	
	## DoMacMapping is the actual setting for auto charset mapping
	my $DoMacMapping	= 
		((!defined($MacRomanMap) && ($LineEnding eq "\x0D")) || ## Auto
		 ($MacRomanMap));									    ## On

	$this->progress("Will convert upper-ascii characters if any, from Mac Roman to ISO 8859-1.") if $DoMacMapping;

	## FieldList is usable is it is a list and has at least one entry.
	my $FieldListValid = ((ref($FieldList) eq 'ARRAY') && @$FieldList);
	
	## Set <$File> to use the line ending sequence we no known we are looking for.
	local $/ = $LineEnding;
	
	## We use $_ explicitly, so must localize.
	local $_;
	
	my $IncomingFields;

	if ($HeaderRow)
	{
		## Get the list of fields available in the file (first line of file).

		$_ = <$File> or
			$this->{_ErrorMsg} = "Could not find a first line with field names in $FileName.", goto done;

		## Try to guess file delimiter from the header row if not yet specified.
		$FDelimiter ||= guess_delimiter($_) or 
			$this->{_ErrorMsg} = "Could not find comma or tab delimiters in $FileName.", goto done;
		
		## Maybe convert entire line (all records) Mac to ISO before splitting.
		&MacRomanToISORoman8859_1(\ $_) if $DoMacMapping;

		chomp;
		
		s/^\"//; s/\"$//;  	  ## remove possible leading, trailing quotes surrounding header row (rare)

		## Split header row into field names, removing optional "" around each at the same time.
		$IncomingFields = [split(/\"?$FDelimiter\"?/, $_)];
		
	}
	else
	{
		## Otherwise, require that the caller specifies it in _FieldList

		$this->{_ErrorMsg} = "Must specify a _FieldList if _HeaderRow says no header row is present.", goto done 
			unless $FieldListValid;
		
		$IncomingFields = [@$FieldList];
	}

	## Remove any leading underscores in the names of the incoming
	## fields (not allowed because such field names are reserved for
	## other object data).  Note: this could result in
	## duplicate/overwritten field names that were otherwise
	## apparently unique in the incoming data file.

	$IncomingFields = [map {(/^_*(.*)/)[0]} @$IncomingFields];
		
	## Make a hash that can be used to map these fields' names to their numbers.
	my $IncomingFieldNameToNum = {}; @$IncomingFieldNameToNum{@$IncomingFields} = ($[ .. $#$IncomingFields);
		
	## Make a list of the fields we'll be importing (by taking the
	## list the caller requested, and paring it down to only those
	## fields that are actually available in the table.)

	my $FieldsToGet = 
		[grep {exists($IncomingFieldNameToNum->{$_})}
		 ($FieldListValid ? @$FieldList : @$IncomingFields)];

	## Make a note of whether we're getting a subset of available
	## fields because the caller requested such.  If we are, we'll add
	## a _Subset => 1 marker to the data for use later in ensuring the
	## cache is OK.
	
	my $GettingSubset = ($FieldListValid && ("@{[sort @$IncomingFields]}" ne 
											 "@{[sort @$FieldList  ]}"));
	
	## Make an array of the incoming indices of these fields.

	## Allocate a list of empty arrays into which we can import the
	## data.  Initially they'll each have 100 empty slots for data;
	## after we have imported 100 records, we'll re-consider the size
	## estimate.  When we're all done, we'll prune them back.

	my $FieldNums		= [@$IncomingFieldNameToNum{@$FieldsToGet}];
	my $FieldVectors	= []; foreach (@$FieldNums) {$#{$FieldVectors->[$_] = []} = 100};

	## We want to be cool and support any embedded NULL (ascii zero)
	## characters should they exist in the data, even though we are
	## going to use NULL chars to encode embedded delimiters before we
	## split....

	## First we create a sufficiently obscure placeholder for any
	## ascii zero characters in the input text (a rare occurrence
	## anyway).

	my $ZeroMarker = "\001ASCII_ZERO" . time() . "\001";
	
	## Now ready to go through the file line-by-line (record-by-record)

	my $WroteProg;
	my $RecordsRead = 0;
	while (<$File>)
	{
		## Try to guess file delimiter from the header row if not yet specified.
		$FDelimiter ||= guess_delimiter($_) or 
			$this->{_ErrorMsg} = "Could not find comma or tab delimiters in $FileName.", goto done;
		
		## Maybe convert entire line (all records) ISO to Mac before splitting.
		&MacRomanToISORoman8859_1(\ $_) if $DoMacMapping;
		
		## Manipulate the single line of data fields into a splittable format.
		
		chomp;
		
		## Replace any delimiters inside quotes with ASCII 0.
		## Split fields on delimiters.
		## Delete leading or trailing quote marks from each field.
		## Restore delimiters ASCII 0 back to delimiters.
		



( run in 2.075 seconds using v1.01-cache-2.11-cpan-5735350b133 )