File-Raw-Separated

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

Revision history for File-Raw-Separated

0.04    2026-05-17
        - Bumped File::Raw prereq to 0.13 to pick up the streaming
          O_BINARY fix on Windows.
	- Remove pax headers

0.03    2026-05-07
        - C89 fixes
        - Bumped File::Raw prereq to 0.12

0.02    Initial release on CPAN.

0.01    Date/time

include/separated_parser.h  view on Meta::CPAN

 *
 * Plain C99, no Perl dependency. Same parser drives both CSV and TSV;
 * defaults differ only in `sep` and `quote`.
 *
 * Two ways to use:
 *
 *   1. One-shot: separated_parse(buf, len, opts, row_cb, ud)
 *      — runs the whole input through the state machine in one call.
 *
 *   2. Incremental: separated_init() + separated_feed()* + separated_finish()
 *      — for streaming parsers (mmap chunks, network, etc.).
 *
 * The callback receives borrowed pointers into the parser's field buffer;
 * if the caller needs to keep the field across calls it must copy.
 */

#ifndef SEPARATED_PARSER_H
#define SEPARATED_PARSER_H

#include <stddef.h>

include/separated_parser.h  view on Meta::CPAN

 */
long separated_parse(const char *buf, size_t len,
                     const separated_options_t *opts,
                     separated_field_cb cb, void *ud,
                     size_t *err_offset);

/* ============================================================
 * Incremental parse
 * ============================================================
 *
 * For chunked / streaming parsers. Same parser core, just keeps state
 * between calls. Use:
 *
 *     separated_ctx_t *ctx = separated_init(opts, cb, ud);
 *     while (chunk = next chunk) {
 *         rc = separated_feed(ctx, chunk, chunk_len);
 *         if (rc < 0) { handle error; break; }
 *     }
 *     rc = separated_finish(ctx);    // flushes a trailing field/row
 *     separated_free(ctx);
 *

lib/File/Raw/Separated.pm  view on Meta::CPAN

=back

The plugins register at module load and stay registered for the life
of the process. Per-call options arrive through File::Raw's variadic
XSUB plumbing; there is no global state to mutate. To opt out for a
particular call, just don't pass C<plugin =E<gt>>.

The WRITE / RECORD / STREAM phases are not yet wired - they will land
once the parser core grows a serialiser and File::Raw teaches
C<each_line>, C<grep_lines>, etc. the plugin pipeline. In the meantime
use C<parse_stream> for streaming directly.

=head1 SEE ALSO

L<File::Raw> - the underlying fast file IO layer.

=head1 AUTHOR

LNATION <email@lnation.org>

=head1 LICENSE AND COPYRIGHT

t/30-stream-basic.t  view on Meta::CPAN

my @rows;
file_csv_parse_stream($path, sub {
    push @rows, [@{$_[0]}];   # explicit copy (AV is reused)
});

is(scalar(@rows), 100, 'streamed 100 rows');
is_deeply($rows[0],  ['row1',   'col2_1',   'col3_1'],   'first row content');
is_deeply($rows[49], ['row50',  'col2_50',  'col3_50'],  'middle row content');
is_deeply($rows[99], ['row100', 'col2_100', 'col3_100'], 'last row content');

# Quoted CSV streaming
my ($fh2, $path2) = tempfile(SUFFIX => '.csv', UNLINK => 1);
print $fh2 qq("a,b","c""d"\n);
print $fh2 qq("line1\nline2",x\n);
close $fh2;

my @qrows;
file_csv_parse_stream($path2, sub { push @qrows, [@{$_[0]}] });
is_deeply(\@qrows, [
    ['a,b', 'c"d'],
    ["line1\nline2", 'x'],
], 'quoted + multiline streaming preserved');

done_testing;

t/31-stream-large.t  view on Meta::CPAN


my $size = -s $path;
diag("fixture size: $size bytes");
ok($size > 5_000_000, "fixture is at least 5 MB (got $size)");

# Stream and count without retaining rows (so RSS doesn't balloon)
my $count = 0;
file_csv_parse_stream($path, sub { $count++ });
is($count, 50_000, 'streamed all 50 000 rows');

# Spot-check first/last via index-targeted streaming
my @keep_first;
my @keep_last;
my $i = 0;
file_csv_parse_stream($path, sub {
    push @keep_first, [@{$_[0]}] if $i == 0;
    push @keep_last,  [@{$_[0]}] if $i == 49_999;
    $i++;
});
is($keep_first[0][0], '1',     'first row index 1');
is($keep_last[0][0],  '50000', 'last row index 50000');

t/32-stream-equivalence.t  view on Meta::CPAN

use strict;
use warnings;
use Test::More;
use File::Raw::Separated qw(import);
use File::Temp qw(tempfile);

# Same fixture parsed via slurp+file_csv_parse_buf and via file_csv_parse_stream
# must yield identical AoA. This is the canary that the streaming path
# uses the same parser core — no behavioural drift.

my @cases = (
    "a,b,c\n",
    "a,b,c\nd,e,f\n",                   # multi-row
    qq("a,b","c""d",e\n),               # quoted + escapes
    qq("multi\nline",x\nplain,y\n),     # embedded newlines
    "a,b\r\nc,d\r\n",                   # CRLF
    "no_trailing_nl,here",              # missing final newline
    "",                                  # empty

t/36-plugin-each-line.t  view on Meta::CPAN

subtest 'each_line via plugin matches in-memory parse_buf' => sub {
    my $f = "$dir/equiv.csv";
    File::Raw::spew($f, join("\n", map { "row$_,$_" } 1..50) . "\n");

    my @stream;
    File::Raw::each_line($f, sub { push @stream, [@{$_[0]}] }, plugin => 'csv');

    my $buf = File::Raw::Separated::csv_parse_buf(File::Raw::slurp($f));

    is(scalar @stream, scalar @$buf, 'same row count');
    is_deeply(\@stream, $buf, 'streaming output equivalent to in-memory');
};

subtest 'each_line handles fields with embedded separator and quote' => sub {
    my $f = "$dir/tricky.csv";
    File::Raw::spew(
        $f,
        qq(plain,1\n) .
        qq("has, comma",2\n) .
        qq("has ""quote",3\n),
    );

t/36-plugin-each-line.t  view on Meta::CPAN

        $count++;
        $first ||= [@{$_[0]}];
        $last    = [@{$_[0]}];
    }, plugin => 'csv');

    is($count, 10_000, 'all 10k rows seen across chunks');
    is_deeply($first, ['id00001', 'name-00001'], 'first row intact');
    is_deeply($last,  ['id10000', 'name-10000'], 'last row intact');
};

subtest 'callback dies propagate from streaming dispatch' => sub {
    my $f = "$dir/die.csv";
    File::Raw::spew($f, "a,1\nb,2\nc,3\n");
    eval {
        File::Raw::each_line($f, sub {
            die "stop on row b\n" if $_[0][0] eq 'b';
        }, plugin => 'csv');
    };
    like($@, qr/stop on row b/, 'die in callback re-raised');
};



( run in 3.317 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )