Text-CSV_XS

 view release on metacpan or  search on metacpan

examples/csv-check  view on Meta::CPAN


my ($rows, %cols, $s_eol) = (0);
unless ($sep) { # No sep char passed, try to auto-detect;
    my ($first_line) = ($data =~ m/\A(.*?)(?:\r\n|\n|\r)/);
    $first_line ||= $data; # if no EOL at all, use whole set
    $sep = $first_line =~ m/["\d],["\d,]/ ? ","  :
	   $first_line =~ m/["\d];["\d;]/ ? ";"  :
	   $first_line =~ m/["\d]\t["\d]/ ? "\t" :
	   # If neither, then for unquoted strings
	   $first_line =~ m/\w,[\w,]/     ? ","  :
	   $first_line =~ m/\w;[\w;]/     ? ";"  :
	   $first_line =~ m/\w\t[\w]/     ? "\t" : ",";
    $data =~ m/([\r\n]+)\Z/ and $s_eol = DDisplay "$1";
    $csvarg{sep_char} = $sep;
    }

my $csv = $csvmod->new (\%csvarg);
$opt_v > 8 and DDumper $csv;

$bin = 0; # Assume ASCII only

sub done {
    my $file = $ARGV // "STDIN";
    (my $pv = "$]0000000") =~ s{^([0-9]+)\.([0-9]{3})([0-9]{3})[0-9]*}
			       {sprintf "%d.%d.%d",$1,$2,$3}e;
    my $uv = eval {
	no warnings;
	(my $cv = $]) =~ s/0+$//;
	eval { require Unicode::UCD;     Unicode::UCD::UnicodeVersion () } ||
	eval { require Module::CoreList; $Module::CoreList::version{$cv}{Unicode} };
	} || "unknown";
    say "Checked $file with $cmd $VERSION\nusing $csvmod @{[$csvmod->VERSION]} with perl $pv and Unicode $uv";
    my @diag = $csv->error_diag;
    my $line = $. // $csv->record_number // "?";
    if ($diag[0] == 2012 && $csv->eof) {
	my @coll = sort { $a <=> $b } keys %cols;
	local $" = ", ";
	my $cols = @coll == 1 ? $coll[0] : "(@coll)";
	$s_eol //= $csv->eol || "--unknown--";
	$s_eol =~ m/[\x00-\x1f]/ and $s_eol = DDisplay $s_eol;
	say "OK: rows: $rows, columns: $cols";
	say "    sep = <$sep>, quo = <$quo>, bin = <$bin>, eol = <$s_eol>";
	say "    encoding = $csv->{ENCODING}" if $csv->{ENCODING};
	if (@coll > 1) {
	    say "multiple column lengths:";
	    printf " %6d line%s with %4d field%s\n",
		$cols{$_}, $cols{$_} == 1 ? " " : "s",
		$_,        $_        == 1 ? ""  : "s"
		    for @coll;
	    }
	$diag[0] = 0;
	}
    elsif ($diag[2]) {
	say "$ARGV record $diag[3] at line $line/$diag[2] - $diag[0] - $diag[1]";
	my $ep  = $diag[2] - 1; # diag[2] is 1-based
	my $ei  = $csv->error_input;
	if (defined $ei) {
	    my $l = 0;
	    my $s = "";
	    eval { my $u = decode ("utf-8", $ei); $ei = $u };
	    for (split m/([^ -~])/ => $ei) {
		if (m/^[ -~]+$/) {
		    $s .= $_;
		    $l += length;
		    next;
		    }
		if ($_ eq "\t") {
		    $s .= "\\t";
		    $ep > $l and $ep++;
		    $l += 2;
		    next;
		    }
		if ($_ eq "\n") {
		    $s .= "\\n";
		    $ep > $l and $ep++;
		    $l += 2;
		    next;
		    }
		if ($_ eq "\r") {
		    $s .= "\\r";
		    $ep > $l and $ep++;
		    $l += 2;
		    next;
		    }
		$s .= sprintf "\\x{%05x}", ord;
		$ep > $l and $ep += 9 - length encode "utf-8", $_;
		$l += 9;
		}

	    say "    |$s|"; #           2b06
	    say "    |", " " x $ep, "\x{25b2}", " " x (length ($s) - $ep - 1), "|";
	    }
	}
    else {
	say "$ARGV line $line - $diag[1]";
	}
    print for @warn;
    exit $diag[0];
    } # done

sub show {
    say STDERR join ", " => map { "\x{231e}$_\x{231d}" } @_;
    } # show

sub stats {
    my $r = shift;
    $cols{scalar @$r}++;
    grep { $_ & 0x0002 } $csv->meta_info and $bin = 1;
    $opt_v > 2 and show (@$r);
    if ($opt_u) {
	my @r = @$r;
	foreach my $x (0 .. $#r) {
	    utf8::is_utf8 ($r[$x]) and next;

	    local $SIG{__WARN__} = sub {
		(my $msg = shift) =~ s{ at /\S+Encode.pm.*}{};
		my @h = $csv->column_names;
		push @warn, sprintf "Field %d%s in record %d - '%s'\t- %s",
		    $x + 1, @h ? " (column: '$h[$x]')" : "", $rows,
		    DPeek ($r[$x]), $msg;
		};



( run in 0.537 second using v1.01-cache-2.11-cpan-5511b514fd6 )