File-Raw-Separated
view release on metacpan or search on metacpan
Revision history for File-Raw-Separated
0.04 2026-05-17
- Bumped File::Raw prereq to 0.13 to pick up the streaming
O_BINARY fix on Windows.
- Remove pax headers
0.03 2026-05-07
- C89 fixes
- Bumped File::Raw prereq to 0.12
0.02 Initial release on CPAN.
0.01 Date/time
include/separated_parser.h view on Meta::CPAN
*
* Plain C99, no Perl dependency. Same parser drives both CSV and TSV;
* defaults differ only in `sep` and `quote`.
*
* Two ways to use:
*
* 1. One-shot: separated_parse(buf, len, opts, row_cb, ud)
* â runs the whole input through the state machine in one call.
*
* 2. Incremental: separated_init() + separated_feed()* + separated_finish()
* â for streaming parsers (mmap chunks, network, etc.).
*
* The callback receives borrowed pointers into the parser's field buffer;
* if the caller needs to keep the field across calls it must copy.
*/
#ifndef SEPARATED_PARSER_H
#define SEPARATED_PARSER_H
#include <stddef.h>
include/separated_parser.h view on Meta::CPAN
*/
long separated_parse(const char *buf, size_t len,
const separated_options_t *opts,
separated_field_cb cb, void *ud,
size_t *err_offset);
/* ============================================================
* Incremental parse
* ============================================================
*
* For chunked / streaming parsers. Same parser core, just keeps state
* between calls. Use:
*
* separated_ctx_t *ctx = separated_init(opts, cb, ud);
* while (chunk = next chunk) {
* rc = separated_feed(ctx, chunk, chunk_len);
* if (rc < 0) { handle error; break; }
* }
* rc = separated_finish(ctx); // flushes a trailing field/row
* separated_free(ctx);
*
lib/File/Raw/Separated.pm view on Meta::CPAN
=back
The plugins register at module load and stay registered for the life
of the process. Per-call options arrive through File::Raw's variadic
XSUB plumbing; there is no global state to mutate. To opt out for a
particular call, just don't pass C<plugin =E<gt>>.
The WRITE / RECORD / STREAM phases are not yet wired - they will land
once the parser core grows a serialiser and File::Raw teaches
C<each_line>, C<grep_lines>, etc. the plugin pipeline. In the meantime
use C<parse_stream> for streaming directly.
=head1 SEE ALSO
L<File::Raw> - the underlying fast file IO layer.
=head1 AUTHOR
LNATION <email@lnation.org>
=head1 LICENSE AND COPYRIGHT
t/30-stream-basic.t view on Meta::CPAN
my @rows;
file_csv_parse_stream($path, sub {
push @rows, [@{$_[0]}]; # explicit copy (AV is reused)
});
is(scalar(@rows), 100, 'streamed 100 rows');
is_deeply($rows[0], ['row1', 'col2_1', 'col3_1'], 'first row content');
is_deeply($rows[49], ['row50', 'col2_50', 'col3_50'], 'middle row content');
is_deeply($rows[99], ['row100', 'col2_100', 'col3_100'], 'last row content');
# Quoted CSV streaming
my ($fh2, $path2) = tempfile(SUFFIX => '.csv', UNLINK => 1);
print $fh2 qq("a,b","c""d"\n);
print $fh2 qq("line1\nline2",x\n);
close $fh2;
my @qrows;
file_csv_parse_stream($path2, sub { push @qrows, [@{$_[0]}] });
is_deeply(\@qrows, [
['a,b', 'c"d'],
["line1\nline2", 'x'],
], 'quoted + multiline streaming preserved');
done_testing;
t/31-stream-large.t view on Meta::CPAN
my $size = -s $path;
diag("fixture size: $size bytes");
ok($size > 5_000_000, "fixture is at least 5 MB (got $size)");
# Stream and count without retaining rows (so RSS doesn't balloon)
my $count = 0;
file_csv_parse_stream($path, sub { $count++ });
is($count, 50_000, 'streamed all 50 000 rows');
# Spot-check first/last via index-targeted streaming
my @keep_first;
my @keep_last;
my $i = 0;
file_csv_parse_stream($path, sub {
push @keep_first, [@{$_[0]}] if $i == 0;
push @keep_last, [@{$_[0]}] if $i == 49_999;
$i++;
});
is($keep_first[0][0], '1', 'first row index 1');
is($keep_last[0][0], '50000', 'last row index 50000');
t/32-stream-equivalence.t view on Meta::CPAN
use strict;
use warnings;
use Test::More;
use File::Raw::Separated qw(import);
use File::Temp qw(tempfile);
# Same fixture parsed via slurp+file_csv_parse_buf and via file_csv_parse_stream
# must yield identical AoA. This is the canary that the streaming path
# uses the same parser core â no behavioural drift.
my @cases = (
"a,b,c\n",
"a,b,c\nd,e,f\n", # multi-row
qq("a,b","c""d",e\n), # quoted + escapes
qq("multi\nline",x\nplain,y\n), # embedded newlines
"a,b\r\nc,d\r\n", # CRLF
"no_trailing_nl,here", # missing final newline
"", # empty
t/36-plugin-each-line.t view on Meta::CPAN
subtest 'each_line via plugin matches in-memory parse_buf' => sub {
my $f = "$dir/equiv.csv";
File::Raw::spew($f, join("\n", map { "row$_,$_" } 1..50) . "\n");
my @stream;
File::Raw::each_line($f, sub { push @stream, [@{$_[0]}] }, plugin => 'csv');
my $buf = File::Raw::Separated::csv_parse_buf(File::Raw::slurp($f));
is(scalar @stream, scalar @$buf, 'same row count');
is_deeply(\@stream, $buf, 'streaming output equivalent to in-memory');
};
subtest 'each_line handles fields with embedded separator and quote' => sub {
my $f = "$dir/tricky.csv";
File::Raw::spew(
$f,
qq(plain,1\n) .
qq("has, comma",2\n) .
qq("has ""quote",3\n),
);
t/36-plugin-each-line.t view on Meta::CPAN
$count++;
$first ||= [@{$_[0]}];
$last = [@{$_[0]}];
}, plugin => 'csv');
is($count, 10_000, 'all 10k rows seen across chunks');
is_deeply($first, ['id00001', 'name-00001'], 'first row intact');
is_deeply($last, ['id10000', 'name-10000'], 'last row intact');
};
subtest 'callback dies propagate from streaming dispatch' => sub {
my $f = "$dir/die.csv";
File::Raw::spew($f, "a,1\nb,2\nc,3\n");
eval {
File::Raw::each_line($f, sub {
die "stop on row b\n" if $_[0][0] eq 'b';
}, plugin => 'csv');
};
like($@, qr/stop on row b/, 'die in callback re-raised');
};
( run in 3.317 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )