Image-ExifTool
view release on metacpan or search on metacpan
lib/Image/ExifTool/Import.pm view on Meta::CPAN
$file = 'CSV file';
} else {
open CSVFILE, $file or return "Error opening CSV file '${file}'";
binmode CSVFILE;
$openedFile = 1;
$raf = File::RandomAccess->new(\*CSVFILE);
}
$delim = ',' unless defined $delim;
# set input record separator by first newline found in the file
# (safe because first line should contain only tag names)
while ($raf->Read($buff, 65536)) {
$buff =~ /(\x0d\x0a|\x0d|\x0a)/ and $/ = $1, last;
}
$raf->Seek(0,0);
while ($raf->ReadLine($buff)) {
my (@vals, $v, $i, %fileInfo);
my @toks = split /\Q$delim/, $buff;
while (@toks) {
($v = shift @toks) =~ s/^ +//; # remove leading spaces
if ($v =~ s/^"//) {
# quoted value must end in an odd number of quotes
while ($v !~ /("+)\s*$/ or not length($1) & 1) {
if (@toks) {
$v .= $delim . shift @toks;
} else {
# read another line from the file
$raf->ReadLine($buff) or last;
@toks = split /\Q$delim/, $buff;
last unless @toks;
$v .= shift @toks;
}
}
$v =~ s/"\s*$//; # remove trailing quote and whitespace
$v =~ s/""/"/g; # un-escape quotes
} else {
$v =~ s/[ \n\r]+$//;# remove trailing spaces/newlines
}
push @vals, $v;
}
if (@tags) {
# save values for each tag
$fileInfo{_ordered_keys_} = [ ];
for ($i=0; $i<@vals and $i<@tags; ++$i) {
# ignore empty entries unless missingValue is empty too
next unless length $vals[$i] or defined $missingValue and $missingValue eq '';
# delete tag (set value to undef) if value is same as missing tag
$fileInfo{$tags[$i]} =
(defined $missingValue and $vals[$i] eq $missingValue) ? undef : $vals[$i];
push @{$fileInfo{_ordered_keys_}}, $tags[$i];
}
# figure out the file name to use
if ($fileInfo{SourceFile}) {
$$database{$fileInfo{SourceFile}} = \%fileInfo;
$found = 1;
}
} else {
# the first row should be the tag names
foreach (@vals) {
# terminate at first blank tag name (eg. extra comma at end of line)
last unless length $_;
@tags or s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists
/^([-_0-9A-Z]+:)*[-_0-9A-Z]+#?$/i or $err = "Invalid tag name '${_}'", last;
push(@tags, $_);
}
last if $err;
@tags or $err = 'No tags found', last;
# fix "SourceFile" case if necessary
$tags[0] = 'SourceFile' if lc $tags[0] eq 'sourcefile';
}
}
close CSVFILE if $openedFile;
undef $raf;
$err = 'No SourceFile column' unless $found or $err;
return $err ? "$err in $file" : undef;
}
#------------------------------------------------------------------------------
# Convert unicode code point to UTF-8
# Inputs: 0) integer Unicode character
# Returns: UTF-8 bytes
sub ToUTF8($)
{
require Image::ExifTool::Charset;
return Image::ExifTool::Charset::Recompose(undef, [$_[0]], $charset);
}
#------------------------------------------------------------------------------
# Read JSON object from file
# Inputs: 0) RAF reference or undef, 1) optional scalar reference for data
# to read before reading from file (ie. the file read buffer)
# Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or
# empty object or array (and sets $$buffPt to empty string on EOF)
# Notes: position in buffer is significant
sub ReadJSONObject($;$)
{
my ($raf, $buffPt) = @_;
# initialize buffer if necessary
my ($pos, $readMore, $rtnVal, $tok, $key, $didBOM);
if ($buffPt) {
$pos = pos $$buffPt;
$pos = pos($$buffPt) = 0 unless defined $pos;
} else {
my $buff = '';
$buffPt = \$buff;
$pos = 0;
}
Tok: for (;;) {
# (didn't spend the time to understand how $pos could be undef, but
# put a test here to be safe because one user reported this problem)
last unless defined $pos;
if ($pos >= length $$buffPt or $readMore) {
last unless defined $raf;
# read another 64kB and add to unparsed data
my $offset = length($$buffPt) - $pos;
if ($offset) {
my $buff;
$raf->Read($buff, 65536) or $$buffPt = '', last;
$$buffPt = substr($$buffPt, $pos) . $buff;
} else {
$raf->Read($$buffPt, 65536) or $$buffPt = '', last;
}
unless ($didBOM) {
$$buffPt =~ s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists
$didBOM = 1;
}
$pos = pos($$buffPt) = 0;
$readMore = 0;
}
unless ($tok) {
# skip white space and find next character
$$buffPt =~ /(\S)/g or $pos = length($$buffPt), next;
$tok = $1;
$pos = pos $$buffPt;
}
# see what type of object this is
if ($tok eq '{') { # object (hash)
$rtnVal = { _ordered_keys_ => [ ] } unless defined $rtnVal;
for (;;) {
# read "KEY":"VALUE" pairs
unless (defined $key) {
$key = ReadJSONObject($raf, $buffPt);
$pos = pos $$buffPt;
}
# ($key may be undef for empty JSON object)
if (defined $key) {
# scan to delimiting ':'
$$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
$1 eq ':' or return undef; # error if not a colon
my $val = ReadJSONObject($raf, $buffPt);
$pos = pos $$buffPt;
return undef unless defined $val;
$$rtnVal{$key} = $val;
push @{$$rtnVal{_ordered_keys_}}, $key;
undef $key;
}
# scan to delimiting ',' or bounding '}'
$$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
last if $1 eq '}'; # check for end of object
$1 eq ',' or return undef; # error if not a comma
}
} elsif ($tok eq '[') { # array
$rtnVal = [ ] unless defined $rtnVal;
for (;;) {
my $item = ReadJSONObject($raf, $buffPt);
$pos = pos $$buffPt;
# ($item may be undef for empty array)
push @$rtnVal, $item if defined $item;
# scan to delimiting ',' or bounding ']'
$$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
last if $1 eq ']'; # check for end of array
$1 eq ',' or return undef; # error if not a comma
}
} elsif ($tok eq '"') { # quoted string
for (;;) {
$$buffPt =~ /(\\*)"/g or $readMore = 1, next Tok;
last unless length($1) & 1; # check for escaped quote
}
$rtnVal = substr($$buffPt, $pos, pos($$buffPt)-$pos-1);
# unescape characters
$rtnVal =~ s/\\u([0-9a-f]{4})/ToUTF8(hex $1)/ige;
$rtnVal =~ s/\\(.)/$unescapeJSON{$1}||$1/sge;
# decode base64 (binary data) values
if ($rtnVal =~ /^base64:[A-Za-z0-9+\/]*={0,2}$/ and length($rtnVal) % 4 == 3) {
require Image::ExifTool::XMP;
( run in 0.687 second using v1.01-cache-2.11-cpan-39bf76dae61 )