File-LoadLines
view release on metacpan or search on metacpan
lib/File/LoadLines.pm view on Meta::CPAN
#! perl
package File::LoadLines;
use warnings;
use strict;
use Exporter qw(import);
our @EXPORT = qw( loadlines );
our @EXPORT_OK = qw( loadblob );
use Encode;
use Carp;
use utf8;
=head1 NAME
File::LoadLines - Load lines from files and network
=cut
our $VERSION = '1.047';
=head1 SYNOPSIS
use File::LoadLines;
my @lines = loadlines("mydata.txt");
use File::LoadLines qw(loadblob);
my $img = loadblob("https://img.shields.io/badge/Language-Perl-blue");
=head1 DESCRIPTION
File::LoadLines provides an easy way to load the contents of a text
file into an array of lines. It is intended for small to moderate size files
like config files that are often produced by weird tools (and users).
It will transparently fetch data from the network if the provided file
name is a URL.
File::LoadLines automatically handles ASCII, Latin-1 and UTF-8 text.
When the file has a BOM, it handles UTF-8, UTF-16 LE and BE, and
UTF-32 LE and BE.
Recognized line terminators are NL (Unix, Linux), CRLF (DOS, Windows)
and CR (Mac)
Function loadblob(), exported on depand, fetches the content and
returns it without processing, equivalent to File::Slurp and ilk.
=head1 EXPORT
By default the function loadlines() is exported.
=head1 FUNCTIONS
=head2 loadlines
@lines = loadlines("mydata.txt");
@lines = loadlines("mydata.txt", $options);
The file is opened, read, decoded and split into lines
that are returned in the result array. Line terminators are removed.
In scalar context, returns an array reference.
The first argument may be the name of a file, an opened file handle,
or a reference to a string that contains the data.
The name of a file on disk may start with C<"file://">, this is ignored.
If the name starts with C<"http:"> or C<"https:"> the data will be
retrieved using LWP.
L<Data URLs|https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs> like C<"data:text/plain;base64,SGVsbG8sIFdvcmxkIQ=="> are
also supported.
The second argument can be used to influence the behaviour.
It is a hash reference of option settings.
Note that loadlines() is a I<slurper>, it reads the whole file into
memory and, for splitting, requires temporarily memory for twice the
size of the file.
=over
=item split
Enabled by default.
The data is split into lines and returned as an array (in list
context) or as an array reference (in scalar context).
If set to zero, the data is not split into lines but returned as a
single string.
=item chomp
Enabled by default.
Line terminators are removed from the resultant lines.
If set to zero, the line terminators are not removed.
=item encoding
If specified, loadlines() will use this encoding to decode the file
data if it cannot automatically detect the encoding.
If you pass an options hash, File::LoadLines will set C<encoding> to
the encoding it detected and used for this file data.
=item blob
If specified, the data read is not touched but returned exactly as read.
C<blob> overrules C<split> and C<chomp>.
=item fail
If specified, it should be either C<"hard"> or C<"soft">.
If C<"hard">, read errors are signalled using croak exceptions.
This is the default.
If set to C<"soft">, loadlines() will return an empty result and set
the error message in the options hash with key C<"error">.
=back
=cut
sub loadlines {
my ( $filename, $options ) = @_;
croak("Missing filename.\n") unless defined $filename;
croak("Invalid options.\n") if (defined $options && (ref($options) ne "HASH"));
$options->{blob} //= 0;
$options->{split} //= !$options->{blob};
$options->{chomp} //= !$options->{blob};
$options->{fail} //= "hard";
my $data; # slurped file data
my $encoded; # already encoded
# Gather data from the input.
if ( ref($filename) ) {
if ( ref($filename) eq 'GLOB' || ref($filename) eq 'IO::File' ) {
binmode( $filename, ':raw' );
$data = do { local $/; <$filename> };
$filename = "__GLOB__";
}
else {
$data = $$filename;
$filename = "__STRING__";
$encoded++;
}
}
elsif ( $filename eq '-' ) {
$filename = "__STDIN__";
binmode( STDIN, ':raw' );
$data = do { local $/; <STDIN> };
}
elsif ( $filename =~ /^https?:/ ) {
require LWP::UserAgent;
my $ua = LWP::UserAgent->new( timeout => 20 );
my $res = $ua->get($filename);
if ( $res->is_success ) {
$data = $res->decoded_content;
}
elsif ( $options->{fail} eq "soft" ) {
$options->{error} = $res->status_line;
return;
}
else {
croak("$filename: ", $res->status_line);
}
}
elsif ( $filename =~ /^data:/ ) {
unless ( $filename =~ m! ^ data:
(?<mediatype> .*? )
,
(?<data> .* ) $
!sx ) {
if ( $options->{fail} eq "soft" ) {
$options->{error} = "Malformed inline data";
return;
}
else {
croak("Malformed inline data");
}
}
$data = $+{data};
$filename = "__DATA__";
my $mediatype = $+{mediatype};
my $enc = "";
if ( $mediatype && $mediatype =~ /^(.*);base64$/ ) {
$mediatype = $1;
$enc = "base64";
}
$options->{mediatype} = $mediatype if $mediatype;
if ( ! $enc ) {
# URL encoded.
$data = $+{data};
$data =~ s/\%([0-9a-f][0-9a-f])/chr(hex($1))/ige;
}
else {
# Base64.
require MIME::Base64;
$data = MIME::Base64::decode($data);
}
if ( $mediatype && $mediatype =~ /;charset=([^;]*)/ ) {
$data = decode( $1, $data );
$options->{encoding} = $1;
$encoded++;
}
}
else {
my $name = $filename;
$name =~ s;^file://;;;
$filename = decode_utf8($name);
# On MS Windows, non-latin (wide) filenames need special treatment.
if ( $filename ne $name && $^O =~ /mswin/i ) {
require Win32API::File;
my $fn = encode('UTF-16LE', "$filename").chr(0).chr(0);
my $fh = Win32API::File::CreateFileW
( $fn, Win32API::File::FILE_READ_DATA(), 0, [],
Win32API::File::OPEN_EXISTING(), 0, []);
croak("$filename: $^E (Win32)\n") if $^E;
( run in 0.489 second using v1.01-cache-2.11-cpan-71847e10f99 )