view release on metacpan or search on metacpan
Support autochomp and input_record_separator functions on arbitrary
filehandles, possibly tie-ing them to this package as a side-effect.
0.05 2018-08-28
Test fixes for MSWin32
0.06 2018-08-30
Test fixes for MSWin32 and this time I mean it.
0.07 2018-12-23
Custom binmode function which supports :irs(REGEXP) layer to
set record separator, and auto-tie-ing file handles to this package.
lib/Acme/InputRecordSeparatorIsRegexp.pm view on Meta::CPAN
package Acme::InputRecordSeparatorIsRegexp;
use 5.006;
use strict;
use warnings FATAL => 'all';
use Symbol;
use Carp;
use IO::Handle;
require Exporter;
our @ISA = 'Exporter';
our @EXPORT_OK = ('open','autochomp','input_record_separator','binmode');
our %EXPORT_TAGS = ( all => [ @EXPORT_OK ] );
BEGIN {
no strict 'refs';
*{ 'Acme::IRSRegexp' . "::" } = \*{ __PACKAGE__ . "::" };
}
our $VERSION = '0.07';
sub TIEHANDLE {
lib/Acme/InputRecordSeparatorIsRegexp.pm view on Meta::CPAN
return CORE::open(*$glob,$mode,$expr,@list);
} elsif ($expr) {
return CORE::open(*$glob,$mode,$expr);
} elsif ($mode) {
return CORE::open(*$glob,$mode);
} else {
return CORE::open(*$glob);
}
}
sub binmode (*;$) {
my ($glob,$mode) = @_;
$mode ||= ":raw";
if (index($mode,":irs(") >= 0) {
my $irs = _extract_irs($mode);
input_record_separator($glob,$irs);
return 1 unless $mode;
}
return CORE::binmode($glob,$mode);
}
sub _compile_rs {
my $self = shift;
my $rs = $self->{rs};
my $q = eval { my @q = split /(?<=${rs})/,""; 1 };
if ($q) {
$self->{rsc} = qr/(?<=${rs})/s;
if ($rs =~ /\?\^\w*m/) {
lib/Acme/InputRecordSeparatorIsRegexp.pm view on Meta::CPAN
} else {
# eof?
return undef;
}
}
sub BINMODE {
my $self = shift;
my $handle = $self->{handle};
if (@_) {
CORE::binmode $handle, @_;
} else {
CORE::binmode $handle;
}
}
sub SEEK {
my ($self, $pos, $whence) = @_;
if ($whence == 1) {
$whence = 0;
$pos += $self->TELL;
}
lib/Acme/InputRecordSeparatorIsRegexp.pm view on Meta::CPAN
# tie-then-open
tie *{$fh=Symbol::gensym}, 'Acme::IRSRegExp', qr/\r\n|[\r\n]/;
open $fh, '<', 'file-with-ambiguous-line-endings';
$line = <$fh>;
# import open function and use :irs pseudo-layer
use Acme::InputRecordSeparatorIsRegexp 'open';
open my $fh, '<:irs(\r\n|\r|\n)', 'ambiguous.txt';
$line = <$fh>;
# import binmode and use :irs pseudo-layer
use Acme::InputRecordSeparatorIsRegexp 'binmode';
open my $fh, '<', 'ambiguous.txt';
binmode $fh, ':irs(\r\n|\r|\n)';
$line = <$fh>;
=head1 DESCRIPTION
In the section about the L<"input record separator"|perlvar/"$/">,
C<perlvar> famously quips
=over 4
Remember: the value of $/ is a string, not a regex. B<awk>
lib/Acme/InputRecordSeparatorIsRegexp.pm view on Meta::CPAN
$result = open FILEHANDLE, "<:irs(REGEXP)", EXPR
$result = open FILEHANDLE, "<:irs(REGEXP)", EXPR, LIST
$result = open FILEHANDLE, "<:irs(REGEXP)", REFERENCE
$result = open my $fh, "<:irs(\r|\n|\r\n)", "ambiguous-line-endings.txt"
The C<:irs(...)> layer may be combined with other layers.
open my $fh, "<:encoding(UTF-16):irs(\R)", "ambiguous.txt"
See also: L<"binmode">
=head2 autochomp
Returns the current setting, or sets the C<autochomp> attribute
of a file handle associated with this package. When the
C<autochomp> attribute of the file handle is enabled, any lines
read from the file handle through the C<readline> function
or C<< <> >> operator will be returned with the (custom) line
endings automatically removed.
lib/Acme/InputRecordSeparatorIsRegexp.pm view on Meta::CPAN
Enabling C<autochomp> with this function on a regular file handle
will tie the file handle into this package using the current
value of C<$/> as the handle's record separator. If you are
just looking for autochomp functionality and don't care about
applying regular expressions to determine line endings, this
function provides an (inefficient) way to do that to
arbitrary file handles.
The default attribute value is false.
=head2 binmode FILEHANDLE, LAYER
Overrides Perl's builtin L<binmode|perlfunc/"binmode"> function.
If the I<pseudo-layer> C<:irs(...)> is specified, then apply the
given regular expression as the dynamic input record separator for
the given filehandle.
Any other layers specified are passed to Perl's builtin C<binmode>
function.
=head2 input_record_separator
Returns the current setting, or changes the setting, of a file handle's
input record separator, I<including file handles that have not
already been tied to this package>. This overcomes a limitation
in L<IO::Handle::input_record_separator|IO::Handle/"METHODS">
where input record separators are not supported on a per-filehandle
t/03b-seektell.t view on Meta::CPAN
use Test::More;
use Acme::InputRecordSeparatorIsRegexp 'binmode';
use strict;
use warnings;
if ($] < 5.010000) {
diag "readline on this package is sloooow for Perl $]. ",
"Skipping this set of tests which is a near duplicate of ",
"another set of tests.";
ok(1, "# skip $0 tests on Perl $]");
done_testing();
exit;
t/03b-seektell.t view on Meta::CPAN
print $xx $i++,":",$_;
}
close $xx; # t/test03b.txt is about 150K
$! = 0;
my $z;
$z = open my $fh, '<', "t/test03b.txt";
ok($z, 'builtin open ok');
ok(!tied(*$fh), 'file handle is not tied yet');
$z = binmode $fh, ":irs(1.3|T.4|E....D)";
ok($z, 'Acme::IRSasRegeexp binmode ok');
ok(tied(*$fh), 'handle is tied after binmode');
my (@tell, @seek);
push @tell, tell($fh);
while (<$fh>) {
push @seek, $_;
push @tell, tell($fh);
if (@seek > 1) {
ok( $seek[-2] =~ /1.3$/ || $seek[-2] =~ /T.4$/ ||
$seek[-2] =~ /E....D$/, 'correct line ending' )
t/04b-longline.t view on Meta::CPAN
use Test::More;
use Acme::InputRecordSeparatorIsRegexp 'binmode';
use strict;
use warnings;
# handle the case where the length of a record is much larger
# than the size of the read buffer
if ($] < 5.010000) {
diag "readline on this package is sloooow for Perl $]. ",
"Skipping this set of tests which is a near duplicate of ",
"another set of tests.";
t/04b-longline.t view on Meta::CPAN
$yy .= "\r\n";
}
}
open my $xx, '>:raw', 't/test04b.txt';
print $xx $yy;
close $xx;
my $z = open(my $fh, '<:raw', "t/test04b.txt");
ok(!tied(*$fh), 'builtin open does not tie filehandle');
ok($z, 'builtin open ok');
$z = binmode $fh, ':irs(\r\n|\r|\n)';
ok($z, 'package binmode ok');
ok(tied(*$fh), 'handle is tied after binmode');
(tied *$fh)->{maxrecsize} = 100;
my (@tell, @seek);
push @tell, tell($fh);
while (<$fh>) {
push @seek, $_;
push @tell, tell($fh);
if (@seek > 1) {
ok( $seek[-2] =~ /[\r\n]$/, 'correct line ending' )
t/05-readwrite.t view on Meta::CPAN
# read and write access to same file
my $fh = Symbol::gensym;
tie *$fh, 'Acme::InputRecordSeparatorIsRegexp', '\r\n|[\r\n]';
ok(tied(*$fh), 'tie ok');
my $z = open $fh, '+>', 't/test05.txt';
ok($z, 'open +> ok') or diag $!;
# binmode on this filehandle will make these tests work on MSWin32
$z = binmode $fh;
ok($z, 'binmode ok');
for (1..4) {
$z = print $fh "x" x 99, "\r";
ok($z, "print x $_ ok");
print $fh "y" x 99, "\n";
ok($z, "print y $_ ok");
print $fh "z" x 98, "\r\n";
ok($z, "print z $_ ok");
}
t/06b-DATA.t view on Meta::CPAN
use Test::More;
use Acme::InputRecordSeparatorIsRegexp 'binmode';
use strict;
use warnings;
binmode *DATA, ":irs(1.43|T.44|E...XD)";
ok(tied(*DATA), 'return tied handle');
my (@tell, @seek);
push @tell, tell(DATA);
while (<DATA>) {
push @seek, $_;
push @tell, tell(DATA);
if (@seek > 1) {
ok( $seek[-2] =~ /1.43$/ || $seek[-2] =~ /T.44$/ ||
t/07b-memory.t view on Meta::CPAN
use Test::More;
use Acme::InputRecordSeparatorIsRegexp 'binmode';
use strict;
use warnings;
if ($] < 5.010000) {
diag "readline on this package is sloooow for Perl $]. ",
"Skipping this set of tests which is a near duplicate of ",
"another set of tests.";
ok(1, "# skip $0 tests on Perl $]");
done_testing();
exit;
t/07b-memory.t view on Meta::CPAN
my $yy = "";
my $i = 0;
for ('AAA'..'ZZZ') {
$yy .= sprintf "%d:%s", $i++, $_;
}
my $z = open(my $fh, "<", \$yy);
ok($z && $fh, 'builtin open ok to memory handle');
ok(!tied($fh), 'builtin open does not tie filehandle');
$z = binmode $fh, ':irs(1.3|T.4|E....D)';
ok($z, 'package binmode ok');
ok(tied(*$fh), 'return tied handle');
my (@tell, @seek);
push @tell, tell($fh);
while (<$fh>) {
push @seek, $_;
push @tell, tell($fh);
if (@seek > 1) {
ok( $seek[-2] =~ /1.3$/ || $seek[-2] =~ /T.4$/ ||
t/08b-chomp.t view on Meta::CPAN
use Test::More;
use Acme::InputRecordSeparatorIsRegexp 'binmode';
use strict;
use warnings;
# handle the case where the length of a record is much larger
# than the size of the read buffer
my $yy = "";
for (1..20) {
$yy .= "x" x 9999;
if (rand() < 0.333333) {
t/08b-chomp.t view on Meta::CPAN
open my $xx, '>:raw', 't/test08b.txt';
print $xx $yy;
close $xx;
my $fh;
ok(!defined($fh), "\$fh undefined before open call");
my $z = open($fh, '<:raw', "t/test08b.txt");
ok(defined($fh), "\$fh updated in open call");
ok($z, 'builtin open ok');
ok(!tied(*$fh), 'builtin open does not tie filehandle');
$z = binmode $fh, ':irs(\r\n|\r|\n)';
ok($z, 'Acme::InputRecordSeparatorIsRegexp::binmode ok');
ok(tied(*$fh), 'handle is tied after binmode');
(tied *$fh)->{maxrecsize} = 100;
my (@tell, @seek);
push @tell, tell($fh);
while (<$fh>) {
push @seek, $_;
push @tell, tell($fh);
if (@seek > 1) {
ok( $seek[-2] =~ /[\r\n]$/, 'correct line ending' )
t/09b-autochomp.t view on Meta::CPAN
use Test::More;
use Acme::InputRecordSeparatorIsRegexp 'binmode','autochomp';
use strict;
use warnings;
# handle the case where the length of a record is much larger
# than the size of the read buffer
my $yy = "";
for (1..20) {
$yy .= "x" x 9999;
if (rand() < 0.333333) {
t/09b-autochomp.t view on Meta::CPAN
$yy .= "\r\n";
}
}
open my $xx, '>:raw', 't/test09b.txt';
print $xx $yy;
close $xx;
my $z = open my $fh, '<:raw', "t/test09b.txt";
ok($z && $fh, 'builtin open ok');
ok(!tied(*$fh), 'handle not tied yet');
$z = binmode $fh, ":irs(\r\n|\r|\n)";
ok($z, 'Acme::InputRecordSeparatorIsRegexp::binmode ok');
ok(tied(*$fh), 'handle tied after binmode');
ok(!(tied *$fh)->autochomp(), 'autochomp is off');
ok(!autochomp(tied *$fh), 'autochomp get function');
ok(!autochomp($fh), 'autochomp get function');
(tied *$fh)->autochomp(1);
ok((tied *$fh)->autochomp(), 'autochomp is on');
autochomp(tied *$fh, 0);
ok(!autochomp(tied $fh), 'autochomp set function');
t/10b-input-record-separator.t view on Meta::CPAN
open my $f3, '<', "t/test10b.txt";
ok(!tied *$f3, "regular file handle is not tied");
$f3->input_record_separator("op|qr|s");
$rs = input_record_separator($f3);
ok($rs eq "op|qr|s", "IO::Handle::input_record_separator monkey patched");
ok(tied *$f3, 'set input_record_separator ties file handle');
close $f3;
open my $f4, "<:raw", "t/test10b.txt";
ok(!tied *$f4, "regular file handle not tied");
binmode $f4, ":irs(\r\n|\r|\n)";
$rs = $f4->input_record_separator;
ok($rs eq "\r\n|\r|\n" || $rs eq '\r\n|\r|\n' ||
$rs eq '(?^:\r\n|\r|\n)' ||
$rs eq '(?-xism:\r\n|\r|\n)', "record separator set with binmode")
or diag $rs;
my (@tell, @seek);
push @tell, tell($f4);
my $correct = 0;
my $incorrect = 0;
while (<$f4>) {
push @seek, $_;
push @tell, tell($f4);