App-Sets

 view release on metacpan or  search on metacpan

lib/App/Sets/Sort.pm  view on Meta::CPAN

package App::Sets::Sort;
$App::Sets::Sort::VERSION = '0.978';


use strict;
use warnings;

# ABSTRACT: sort handling

use English qw( -no_match_vars );
use 5.010;
use File::Temp qw< tempfile >;
use Fcntl qw< :seek >;
use Log::Log4perl::Tiny qw< :easy :dead_if_first >;
use base 'Exporter';

our @EXPORT_OK = qw< sort_filehandle internal_sort_filehandle >;
our @EXPORT = qw< sort_filehandle >;
our %EXPORT_TAGS = (
   default => [ @EXPORT ],
   all => [ @EXPORT_OK ],
);

sub _test_external_sort {
   my $filename;

   eval {
      (my $fh, $filename) = tempfile(); # might croak
      binmode $fh, ':raw';
      print {$fh} "one\ntwo\nthree\nfour\n" or die 'whatever';
      close $fh or die 'whatever';
   } or return;

   my $fh = eval {
      open my $tfh, '-|', 'sort', '-u', $filename;
      $tfh;
   } or return;
   my @lines = <$fh>;
   return unless scalar(@lines) == 4;
   return unless defined $lines[3];
   $lines[3] =~ s{\s+}{}gmxs;
   return unless $lines[3] eq 'two';

   return 1;
}

sub sort_filehandle {
   my ($filename, $config) = @_;
   $config ||= {};
   state $has_sort = (!$config->{internal_sort}) && _test_external_sort();

   if ($has_sort) {
      my $fh;
      eval { open $fh, '-|', 'sort', '-u', $filename } and return $fh;
      WARN 'cannot use system sort, falling back to internal implementation';
      $has_sort = 0; # from now on, use internal sort
   }

   return internal_sort_filehandle($filename);
}

sub internal_sort_filehandle {
   my ($filename) = @_;

   # Open input stream
   open my $ifh, '<', $filename
      or LOGDIE "open('$filename'): $OS_ERROR";

   # Maximum values hints taken from Perl Power Tools' sort
   my $max_records = $ENV{SETS_MAX_RECORDS} || 200_000;
   my $max_files = $ENV{SETS_MAX_FILES} || 40;
   my (@records, @fhs);
   while (<$ifh>) {
      chomp;
      push @records, $_;
      if (@records >= $max_records) {
         push @fhs, _flush_to_temp(\@records);
         _compact(\@fhs) if @fhs >= $max_files - 1;
      }
   }

   push @fhs, _flush_to_temp(\@records) if @records;
   _compact(\@fhs);
   return $fhs[0] if @fhs;

   # seems like the file was empty... so it's sorted
   seek $ifh, 0, SEEK_SET;
   return $ifh;
}



( run in 1.431 second using v1.01-cache-2.11-cpan-39bf76dae61 )