File-Raw-Separated
view release on metacpan or search on metacpan
t/36-plugin-each-line.t view on Meta::CPAN
#!/usr/bin/perl
use strict;
use warnings;
use Test::More;
use File::Temp qw(tempdir);
use File::Raw;
use File::Raw::Separated;
# File::Raw::each_line($p, $cb, plugin => 'csv') routes through
# sep_stream: File::Raw owns the file open + chunk-read loop, and
# sep_stream holds a separated_ctx_t* across calls in
# FilePluginContext::call_state. The user callback fires once per
# parsed record (arrayref under default mode, hashref under header).
my $dir = tempdir(CLEANUP => 1);
subtest 'each_line streams parsed records via plugin' => sub {
my $f = "$dir/stream.csv";
File::Raw::spew($f, "a,1\nb,2\nc,3\n");
my @rows;
File::Raw::each_line($f, sub { push @rows, [@{$_[0]}] }, plugin => 'csv');
is(scalar @rows, 3, 'three records emitted');
is_deeply($rows[0], ['a', '1'], 'first record');
is_deeply($rows[2], ['c', '3'], 'last record');
};
subtest 'each_line via plugin matches in-memory parse_buf' => sub {
my $f = "$dir/equiv.csv";
File::Raw::spew($f, join("\n", map { "row$_,$_" } 1..50) . "\n");
my @stream;
File::Raw::each_line($f, sub { push @stream, [@{$_[0]}] }, plugin => 'csv');
my $buf = File::Raw::Separated::csv_parse_buf(File::Raw::slurp($f));
is(scalar @stream, scalar @$buf, 'same row count');
is_deeply(\@stream, $buf, 'streaming output equivalent to in-memory');
};
subtest 'each_line handles fields with embedded separator and quote' => sub {
my $f = "$dir/tricky.csv";
File::Raw::spew(
$f,
qq(plain,1\n) .
qq("has, comma",2\n) .
qq("has ""quote",3\n),
);
my @rows;
File::Raw::each_line($f, sub { push @rows, [@{$_[0]}] }, plugin => 'csv');
is(scalar @rows, 3, 'three records');
is_deeply($rows[0], ['plain', '1'], 'plain field');
is_deeply($rows[1], ['has, comma', '2'], 'embedded comma unquoted');
is_deeply($rows[2], ['has "quote', '3'], 'doubled-quote escape collapsed');
};
subtest 'each_line via tsv plugin' => sub {
my $f = "$dir/stream.tsv";
File::Raw::spew($f, "a\t1\nb\t2\nc\t3\n");
my @rows;
File::Raw::each_line($f, sub { push @rows, [@{$_[0]}] }, plugin => 'tsv');
is(scalar @rows, 3, 'three tsv records');
is_deeply($rows[1], ['b', '2'], 'tab-separated row parsed');
};
subtest 'header => 1 emits hashrefs' => sub {
my $f = "$dir/header.csv";
File::Raw::spew($f, "name,age\nalice,30\nbob,25\n");
my @rows;
File::Raw::each_line(
$f, sub { push @rows, { %{$_[0]} } }, plugin => 'csv', header => 1,
);
is(scalar @rows, 2, 'two data rows');
is_deeply($rows[0], { name => 'alice', age => '30' }, 'alice hashref');
is_deeply($rows[1], { name => 'bob', age => '25' }, 'bob hashref');
};
subtest 'large file streams across multiple chunks' => sub {
my $f = "$dir/big.csv";
# ~200 KiB ensures > 1 chunk at File::Raw's 64 KiB default.
my @lines;
for my $i (1..10_000) {
push @lines, sprintf('id%05d,name-%05d', $i, $i);
}
File::Raw::spew($f, join("\n", @lines) . "\n");
my $count = 0;
my $first;
my $last;
File::Raw::each_line($f, sub {
$count++;
$first ||= [@{$_[0]}];
$last = [@{$_[0]}];
}, plugin => 'csv');
is($count, 10_000, 'all 10k rows seen across chunks');
is_deeply($first, ['id00001', 'name-00001'], 'first row intact');
is_deeply($last, ['id10000', 'name-10000'], 'last row intact');
};
subtest 'callback dies propagate from streaming dispatch' => sub {
my $f = "$dir/die.csv";
File::Raw::spew($f, "a,1\nb,2\nc,3\n");
eval {
File::Raw::each_line($f, sub {
die "stop on row b\n" if $_[0][0] eq 'b';
}, plugin => 'csv');
};
like($@, qr/stop on row b/, 'die in callback re-raised');
};
done_testing;
( run in 2.971 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )