App-Sets
view release on metacpan or search on metacpan
lib/App/Sets/Sort.pm view on Meta::CPAN
package App::Sets::Sort;
$App::Sets::Sort::VERSION = '0.978';
use strict;
use warnings;
# ABSTRACT: sort handling
use English qw( -no_match_vars );
use 5.010;
use File::Temp qw< tempfile >;
use Fcntl qw< :seek >;
use Log::Log4perl::Tiny qw< :easy :dead_if_first >;
use base 'Exporter';
our @EXPORT_OK = qw< sort_filehandle internal_sort_filehandle >;
our @EXPORT = qw< sort_filehandle >;
our %EXPORT_TAGS = (
default => [ @EXPORT ],
all => [ @EXPORT_OK ],
);
sub _test_external_sort {
my $filename;
eval {
(my $fh, $filename) = tempfile(); # might croak
binmode $fh, ':raw';
print {$fh} "one\ntwo\nthree\nfour\n" or die 'whatever';
close $fh or die 'whatever';
} or return;
my $fh = eval {
open my $tfh, '-|', 'sort', '-u', $filename;
$tfh;
} or return;
my @lines = <$fh>;
return unless scalar(@lines) == 4;
return unless defined $lines[3];
$lines[3] =~ s{\s+}{}gmxs;
return unless $lines[3] eq 'two';
return 1;
}
sub sort_filehandle {
my ($filename, $config) = @_;
$config ||= {};
state $has_sort = (!$config->{internal_sort}) && _test_external_sort();
if ($has_sort) {
my $fh;
eval { open $fh, '-|', 'sort', '-u', $filename } and return $fh;
WARN 'cannot use system sort, falling back to internal implementation';
$has_sort = 0; # from now on, use internal sort
}
return internal_sort_filehandle($filename);
}
sub internal_sort_filehandle {
my ($filename) = @_;
# Open input stream
open my $ifh, '<', $filename
or LOGDIE "open('$filename'): $OS_ERROR";
# Maximum values hints taken from Perl Power Tools' sort
my $max_records = $ENV{SETS_MAX_RECORDS} || 200_000;
my $max_files = $ENV{SETS_MAX_FILES} || 40;
my (@records, @fhs);
while (<$ifh>) {
chomp;
push @records, $_;
if (@records >= $max_records) {
push @fhs, _flush_to_temp(\@records);
_compact(\@fhs) if @fhs >= $max_files - 1;
}
}
push @fhs, _flush_to_temp(\@records) if @records;
_compact(\@fhs);
return $fhs[0] if @fhs;
# seems like the file was empty... so it's sorted
seek $ifh, 0, SEEK_SET;
return $ifh;
}
( run in 1.431 second using v1.01-cache-2.11-cpan-39bf76dae61 )