log results from the CPAN

Acme-Tools

view release on metacpan or search on metacpan

package Acme::Tools;

our $VERSION = '0.27';

use 5.008;     #Perl 5.8 was released July 18th 2002
use strict;
use warnings;
use Carp;      #todo: rid of deps, make own carp+croak here

require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( all => [ qw() ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
our @EXPORT = qw(
  min
  max
  mins
  maxs
  sum
  avg
  geomavg
  harmonicavg
  stddev
  rstddev
  median
  percentile
  $Resolve_iterations
  $Resolve_last_estimate
  $Resolve_time
  resolve
  resolve_equation
  conv
  rank
  rankstr
  egrep
  eqarr
  sorted
  sortedstr
  sortby
  subarrays
  pushsort
  pushsortstr
  binsearch
  binsearchstr
  random
  random_gauss
  big
  bigi
  bigf
  bigr
  bigscale
  nvl
  repl
  replace
  decode
  decode_num
  between
  btw
  curb
  bound
  log10
  log2
  logn
  distinct
  in
  in_num
  uniq
  union
  union_all
  minus
  minus_all
  intersect
  intersect_all
  not_intersect
  mix
  zip
  sim
  sim_perm
  subarr
  subhash
  hashtrans
  zipb64
  zipbin
  unzipb64
  unzipbin
  gzip
  gunzip
  bzip2
  bunzip2
  ipaddr
  ipnum
  ipnum_ok
  iprange_ok
  in_iprange
  webparams
  urlenc
  urldec
  ht2t
  chall
  makedir
  qrlist
  ansicolor
  ccn_ok
  KID_ok
  writefile
  readfile
  readdirectory
  basename
  dirname
  wipe
  username
  range
  globr
  permutations
  perm
  permute
  permute_continue
  trigram
  sliding
  chunks
  chars
  cart
  reduce

Tools.pm view on Meta::CPAN

  sys
  recursed
  md5sum
  pwgen
  which
  read_conf
  openstr
  printed
  ldist
  $Re_isnum
  isnum
  part
  parth
  parta
  a2h
  h2a
  refa
  refh
  refs
  refaa
  refah
  refha
  refhh
  pushr
  popr
  shiftr
  unshiftr
  splicer
  keysr
  valuesr
  eachr
  joinr
  pile
  aoh2sql
  aoh2xls
  base64
  unbase64
  opts
  ed
  changed
  $Edcursor
  brainfu
  brainfu2perl
  brainfu2perl_optimized
  bfinit
  bfsum
  bfaddbf
  bfadd
  bfcheck
  bfgrep
  bfgrepnot
  bfdelete
  bfstore
  bfretrieve
  bfclone
  bfdimensions
  $PI
  install_acme_command_tools

  $Dbh
  dlogin
  dlogout
  drow
  drows
  drowc
  drowsc
  dcols
  dpk
  dsel
  ddo
  dins
  dupd
  ddel
  dcommit
  drollback
);

our $PI = '3.141592653589793238462643383279502884197169399375105820974944592307816406286';

=head1 NAME

Acme::Tools - Lots of more or less useful subs lumped together and exported into your namespace

=head1 SYNOPSIS

 use Acme::Tools;

 print sum(1,2,3);                   # 6
 print avg(2,3,4,6);                 # 3.75
 print median(2,3,4,6);              # 3.5
 print percentile(25, 101..199);     # 125

 my @list = minus(\@listA, \@listB); # set operation
 my @list = union(\@listA, \@listB); # set operation

 print length(gzip("abc" x 1000));   # far less than 3000

 writefile("/dir/filename",$string); # convenient
 my $s=readfile("/dir/filename");    # also convenient

 print "yes!" if between($PI,3,4);

 print percentile(0.05, @numbers);

 my @even = range(1000,2000,2);      # even numbers between 1000 and 2000
 my @odd  = range(1001,2001,2);

 my $dice = random(1,6);
 my $color = random(['red','green','blue','yellow','orange']);

 pushr $arrayref[$num], @stuff;      # push @{ $arrayref[$num] }, @stuff ... popr, shiftr, unshiftr

 print 2**200;       # 1.60693804425899e+60
 print big(2)**200;  # 1606938044258990275541962092341162602522202993782792835301376

 ...and much more.

=encoding utf8

=head1 ABSTRACT

About 120 more or less useful perl subroutines lumped together and exported into your namespace.

Tools.pm view on Meta::CPAN

 print bytes_readable(1001);                             # 0.98 kB
 print bytes_readable(1024);                             # 1.00 kB
 print bytes_readable(1153433.6);                        # 1.10 MB
 print bytes_readable(1181116006.4);                     # 1.10 GB
 print bytes_readable(1209462790553.6);                  # 1.10 TB
 print bytes_readable(1088516511498.24*1000);            # 990.00 TB
 print bytes_readable(1088516511498.24*1000, 3);         # 990.000 TB
 print bytes_readable(1088516511498.24*1000, 1);         # 990.0 TB

=cut

sub bytes_readable {
  my $bytes=shift();
  my $d=shift()||2; #decimals
  return undef if !defined $bytes;
  return "$bytes B"                         if abs($bytes) <= 2** 0*1000; #bytes
  return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) <  2**10*1000; #kilobyte
  return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) <  2**20*1000; #megabyte
  return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) <  2**30*1000; #gigabyte
  return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) <  2**40*1000; #terrabyte
  return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
}

=head2 sec_readable

Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.

 print sec_readable( 0 );           # 0s
 print sec_readable( 0.0123 );      # 0.0123s
 print sec_readable(-0.0123 );      # -0.0123s
 print sec_readable( 1.23 );        # 1.23s
 print sec_readable( 1 );           # 1s
 print sec_readable( 9.87 );        # 9.87s
 print sec_readable( 10 );          # 10s
 print sec_readable( 10.1 );        # 10.1s
 print sec_readable( 59 );          # 59s
 print sec_readable( 59.123 );      # 59.1s
 print sec_readable( 60 );          # 1m 0s
 print sec_readable( 60.1 );        # 1m 0s
 print sec_readable( 121 );         # 2m 1s
 print sec_readable( 131 );         # 2m 11s
 print sec_readable( 1331 );        # 22m 11s
 print sec_readable(-1331 );        # -22m 11s
 print sec_readable( 13331 );       # 3h 42m
 print sec_readable( 133331 );      # 1d 13h
 print sec_readable( 1333331 );     # 15d 10h
 print sec_readable( 13333331 );    # 154d 7h
 print sec_readable( 133333331 );   # 4yr 82d
 print sec_readable( 1333333331 );  # 42yr 91d

=cut

sub sec_readable {
  my $s=shift();
  my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
   !defined$s     ? undef
  :!length($s)    ? ''
  :$s<0           ? '-'.sec_readable(-$s)
  :$s<60 && int($s)==$s
                  ? $s."s"
  :$s<60          ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
  :$s<3600        ? int($s/60)."m " .($s%60)        ."s"
  :$s<24*3600     ? int($s/$h)."h " .int(($s%$h)/60)."m"
  :$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
  :                 int($s/$y)."yr ".int(($s%$y)/$d)."d";
}

=head2 int2roman

Converts integers to roman numbers.

B<Examples:>

 print int2roman(1234);   # prints MCCXXXIV
 print int2roman(1971);   # prints MCMLXXI

(Adapted subroutine from Peter J. Acklam, jacklam(&)math.uio.no)

 I = 1
 V = 5
 X = 10
 L = 50
 C = 100     (centum)
 D = 500
 M = 1000    (mille)

See also L<Roman>.

See L<http://en.wikipedia.org/wiki/Roman_numbers> for more.

=head2 roman2int

 roman2int("MCMLXXI") == 1971

=cut

#alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
#see also t/17_roman.t sub int2roman_old
sub int2roman {
    my $n=shift;
    !defined$n  ? undef
  : !length($n) ? ""
  : $n<0        ? "-".int2roman(-$n)
  : int($n)!=$n ? croak"int2roman: $n is not an integer"
#  : $] >= 5.014 ?        #s///r modifier introduced in perl v5.14
#        ("I" x $n)
#        =~s,I{1000},M,gr #unnecessary, but speedup for n>1000
#        =~s,I{100},C,gr  #unnecessary, but speedup for n>100
#        =~s,I{10},X,gr   #unnecessary, but speedup for n>10
#        =~s,IIIII,V,gr
#        =~s,IIII,IV,gr
#        =~s,VV,X,gr
#        =~s,VIV,IX,gr
#        =~s,XXXXX,L,gr
#        =~s,XXXX,XL,gr
#        =~s,LL,C,gr
#        =~s,LXL,XC,gr
#        =~s,CCCCC,D,gr
#        =~s,CCCC,CD,gr
#        =~s,DD,M,gr
#        =~s,DCD,CM,gr

Tools.pm view on Meta::CPAN

  if (wantarray) { return (map Math::BigRat->new($_),@_)  }
  else           { return      Math::BigRat->new($_[0])   }
}
sub big {
  wantarray
  ? (map     /\./ ? bigf($_)    :        /\// ? bigr($_)    : bigi($_), @_)
  :   $_[0]=~/\./ ? bigf($_[0]) : $_[0]=~/\// ? bigr($_[0]) : bigi($_[0]);
}
sub bigscale {
  @_==1 or croak "bigscale requires one and only one argument";
  my $scale=shift();
  eval q(use Math::BigInt    try=>"GMP") if !$INC{'Math/BigInt.pm'};
  eval q(use Math::BigFloat  try=>"GMP") if !$INC{'Math/BigFloat.pm'};
  eval q(use Math::BigRat    try=>"GMP") if !$INC{'Math/BigRat.pm'};
  Math::BigInt->div_scale($scale);
  Math::BigFloat->div_scale($scale);
  Math::BigRat->div_scale($scale);
  return;
}

  #my $R_authalic=6371007.2; #earth radius in meters, mean, Authalic radius, real R varies 6353-6384km, http://en.wikipedia.org/wiki/Earth_radius
#*)
         #    ( 6378157.5, 6356772.2 )  #hmm
    #my $e=0.081819218048345;#sqrt(1 - $b**2/$a**2); #eccentricity of the ellipsoid
    #my($a,$b)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
    #warn "e=$e\n";
    #warn "t=".(1 - $e**2)."\n";
    #warn "n=".((1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5)."\n";
    #my $t=1 - $e**2;
    #my $n=(1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5;
    #warn "t=$t\n";
    #warn "n=$n\n";
    #$a * (1 - $e**2) / ((1 - $e**2 * sin(($lat1+$lat2)/2)**2)**1.5); #hmm avg lat
    #$R=$a * $t/$n;

#=head2 fractional
#=cut

sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
  carp "fractional: NOT FINISHED";
  my $n=shift;
  print "----fractional n=$n\n";
  my $nn=$n; my $dec;
  $nn=~s,\.(\d+)$,$dec=length($1);$1.,;
  my $l;
  my $max=0;
  my($te,$ne);
  for(1..length($nn)/2){
    if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
      print "_ = $_ ".length($2)."\n";
      if(length($2)>$max){
        $l=$_;
	$te="$1$3"-$1;
        $max=length($2);
      }
    }
  }
  return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
  print "l=$l max=$max\n";
  $ne="9" x $l;
  print log($n),"\n";
  my $st=sub{print "status: ".($te/$ne)."   n=$n   ".($n/$te*$ne)."\n"};
  while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
  while($te/$n/$ne<0.99){ &$st(); $te*=10 }
  &$st();
  while(1){
    my $d=gcd($te,$ne); print "gcd=$d\n";
    last if $d==1;
    $te/=$d; $ne/=$d;
  }
  &$st();
  wantarray ? ($te,$ne) : "$te/$ne"; #gcd()
}

=head2 isnum

B<Input:> String to be tested on this regexp:

C<< /^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x >>

If no argument is given isnum checks C<< $_ >>.

B<Output:> True or false (1 or 0)

 use Acme::Tools;
 my @e=('   +32.12354E-21  ', 2.2, '9' x 99999, ' -123.12', '29,323.31', '29 323.31');
 print isnum()       ? 'num' : 'str' for @e;  #prints num for every element except the last two
 print $_=~$Re_isnum ? 'num' : 'str' for @e;  #same but slighhly faster

=cut

our $Re_isnum     =qr/^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x;
our $Re_isnum_wolz=qr/^ \s* [\-\+]? (?: ([1-9]\d*|0)?\.\d+ | [1-9]\d* | 0 ) (?:[eE][\-\+]?\d+)?\s*$/x; #without leading zero
sub isnum {(@_?$_[0]:$_)=~$Re_isnum}

=head2 between

Input: Three arguments.

Returns: Something I<true> if the first argument is numerically between the two next. Uses Perls C<< < >>, C<< >= >> and C<< <= >> operators.

=head2 btw

Like L<between> but instead of assuming numbers it checks all three input args
and does alphanumerical comparisons (with Perl operators C<lt>, C<ge> and C<le>) if any of the
three input args don't look like a number or look like a number but with
one or more leading zeros.

 btw(1,1,10)             #true numeric order since all three looks like number according to =~$Re_isnum
 btw(1,'02',13)          #true leading zero in '02' leads to alphabetical order
 btw(10, 012,10)         #true leading zero here means oct number, 012 = 10 (8*1+2), so 10 is btw 10 and 10
 btw('003', '02', '09')  #false because '003' lt '02'
 btw('a', 'b', 'c')      #false because 'a' lt 'b'
 btw('a', 'B', 'c')      #true because upper case letters comes before lower case ones in the "ascii alphabet"
 btw('a', 'c', 'B')      #true, btw() and between switches from and to if the first is > the second
 btw( -1, -2, 1)         #true
 btw( -1, -2, 0)         #true

Both between and btw returns C<undef> if any of the three input args are C<undef> (not defined).
If you're doing only numerical comparisons, using C<between> is faster than C<btw>.

=cut

sub between {
  my($test ,$fom, $tom)=@_;
  return if !defined$test or !defined$fom or !defined$tom;
  $fom < $tom ? $test >= $fom && $test <= $tom : $test >= $tom && $test <= $fom;
}

sub btw {
  my($test,$fom,$tom)=@_;
  return if !defined$test or !defined$fom or !defined$tom;
  $fom =~ $Re_isnum_wolz &&
  $tom =~ $Re_isnum_wolz &&
  $test=~ $Re_isnum_wolz
  ? $fom <  $tom ? $test >= $fom && $test <= $tom : $test >= $tom && $test <= $fom
  : $fom lt $tom ? $test ge $fom && $test le $tom : $test ge $tom && $test le $fom 
}

=head2 curb

B<Input:> Three arguments: value, minumum, maximum.

B<Output:> Returns the value if its between the given minumum and maximum.
Returns minimum if the value is less or maximum if the value is more.
Changes the variable if 1st arg is a scalarref.

 my $enthusiasm = 11;
 print curb( $enthusiasm, 1, 20 );      # prints 11, within bounds
 print curb( $enthusiasm, 1, 10 );      # prints 10
 print curb( $enthusiasm, 20, 100 );    # prints 20
 print curb(\$enthusiasm, 1, 10 );      # prints 10 and sets $enthusiasm = 10
 print $enthusiasm;                     # prints 10

=cut

sub curb {
  my($val,$min,$max)=@_;
  # todo: undef min|max => dont curb min|max
  croak "curb: wrong args" if @_!=3 or !defined$min or !defined$max or !defined$val or $min>$max;
  return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
  $val < $min ? $min :
  $val > $max ? $max :
                $val;
}
sub bound { curb(@_) }

=head2 log10

=head2 log2

=head2 logn

 print log10(1000);                  # prints 3
 print log10(10000*sqtr(10));        # prints 4.5
 print log2(16);                     # prints 4
 print logn(4096, 8);                # prints 4 (12/3=4)
 print logn($PI, 2.71828182845905);  # same as  print log($PI)  using perls builtin log()

=cut

sub log10 { log($_[0]) / log(10)    }
sub log2  { log($_[0]) / log(2)     }
sub logn  { log($_[0]) / log($_[1]) }

=head1 STRINGS

=head2 upper

=head2 lower

Returns input string as uppercase or lowercase.

Can be used if Perls build in C<uc()> and C<lc()> for some reason does not convert Ã¦Ã¸Ã¥ or other latin1 letters outsize a-z.

Converts C<< Ã¦Ã¸Ã¥Ã¤Ã«Ã¯Ã¶Ã¼Ã¿Ã¢ÃªÃ®Ã´Ã»Ã£ÃµÃ Ã¨Ã¬Ã²Ã¹Ã¡Ã©ÃÃ³ÃºÃ½Ã±Ã° >> to and from C<< Ã†Ã˜Ã…Ã„Ã‹ÃÃ–Ãœ?Ã‚ÃŠÃŽÃ”Ã›ÃƒÃ•Ã€ÃˆÃŒÃ’Ã™ÃÃ‰ÃÃ“ÃšÃÃ‘Ã >>

See also C<< perldoc -f uc >> and C<< perldoc -f lc >>

=head2 trim

Removes space from the beginning and end of a string. Whitespace (C<< \s >>) that is.
And removes any whitespace inside the string of more than one char, leaving the first whitespace char. Thus:

 trim(" asdf \t\n    123 ")  eq "asdf 123"
 trim(" asdf\t\n    123\n")  eq "asdf\t123"

Works on C<< $_ >> if no argument i given:

 print join",", map trim, " please ", " remove ", " my ", " spaces ";   # please,remove,my,spaces
 print join",", trim(" please ", " remove ", " my ", " spaces ");       # works on arrays as well
 my $s=' please '; trim(\$s);                                           # now  $s eq 'please'
 trim(\@untrimmedstrings);                                              # trims array strings inplace
 @untrimmedstrings = map trim, @untrimmedstrings;                       # same, works on $_
 trim(\$_) for @untrimmedstrings;                                       # same, works on \$_

=head2 lpad

=head2 rpad

Left or right pads a string to the given length by adding one or more spaces at the end for  I<rpad> or at the start for I<lpad>.

B<Input:> First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
Default is space.

 rpad('gomle',9);         # 'gomle    '
 lpad('gomle',9);         # '    gomle'
 rpad('gomle',9,'-');     # 'gomle----'
 lpad('gomle',9,'+');     # '++++gomle'
 rpad('gomle',4);         # 'goml'
 lpad('gomle',4);         # 'goml'
 rpad('gomle',7,'xyz');   # 'gomlxy'
 lpad('gomle',10,'xyz');  # 'xyzxygoml'

=head2 cpad

Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.

 cpad('mat',5)            eq ' mat '
 cpad('mat',4)            eq 'mat '
 cpad('mat',6)            eq ' mat  '
 cpad('mat',9)            eq '   mat   '
 cpad('mat',5,'+')        eq '+mat+'
 cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'

Tools.pm view on Meta::CPAN

    for my $r (@$aoh){
	my $v=join",",map &$val($$r{$_},$t{$_}), @col;
	$sql.="insert into $conf{name} values ($v);\n";
    }
    $sql="drop table $conf{name};\n\n$sql" if $conf{drop}==1;
    $sql="drop table if exists $conf{name};\n\n$sql" if $conf{drop}>=2;
    $sql="$conf{begin}\n$sql" if $conf{begin};
    $sql.=$conf{end};
    $sql;
}

sub aoh2xls { croak "Not implemented yet: aoh2xls" }


=head1 STATISTICS

=head2 sum

Returns the sum of a list of numbers. Undef is ignored.

 print sum(1,3,undef,8);   # 12
 print sum(1..1000);       # 500500
 print sum(undef);         # undef

=cut

sub sum { my $sum; no warnings; defined($_) and $sum+=$_ for @_; $sum }

=head2 avg

Returns the I<average> number of a list of numbers. That is C<sum / count>

 print avg(  2, 4, 9);   # 5      (2+4+9) / 3 = 5
 print avg( [2, 4, 9] ); # 5      pass by reference, same result but faster for large arrays

Also known as I<arithmetic mean>.

Pass by reference: If one argument is given and it is a reference to an array,
this array is taken as the list of numbers. This mode is about twice as fast
for 10000 numbers or more. It most likely also saves memory.

=cut

sub avg {
  my($sum,$n,@a)=(0,0);
  no warnings;
  if( @_==0 )                          { return undef             }
  if( @_==1 and ref($_[0]) eq 'ARRAY' ){ @a=grep defined,@{$_[0]} }
  else                                 { @a=grep defined,@_       }
  if( @a==0 )                          { return undef             }
  $sum+=$_ for @a;
  return $sum/@a
}

=head2 geomavg

Returns the I<geometric average> (a.k.a I<geometric mean>) of a list of numbers.

 print geomavg(10,100,1000,10000,100000);               # 1000
 print 0+ (10*100*1000*10000*100000) ** (1/5);          # 1000 same thing
 print exp(avg(map log($_),10,100,1000,10000,100000));  # 1000 same thing, this is how geomavg() works internally

=cut

sub geomavg { exp(avg(map log($_), @_)) }

=head2 harmonicavg

Returns the I<harmonic average> (a.k.a I<geometric mean>) of a list of numbers. L<http://en.wikipedia.org/wiki/Harmonic_mean>

 print harmonicavg(10,11,12);               # 3 / ( 1/10 + 1/11 + 1/12) = 10.939226519337

=cut

sub harmonicavg { my $s; $s+=1/$_ for @_; @_/$s }

=head2 variance

C<< variance = ( sum (x[i]-Average)**2)/(n-1) >>

=cut

sub variance {
  my $sumx2; $sumx2+=$_*$_ for @_;
  my $sumx; $sumx+=$_ for @_;
  (@_*$sumx2-$sumx*$sumx)/(@_*(@_-1));
}

=head2 stddev

C<< Standard_Deviation = sqrt(variance) >>

Standard deviation (stddev) is a measurement of the width of a normal
distribution where one stddev on each side of the mean covers 68% and
two stddevs 95%.  Normal distributions are sometimes called Gauss curves
or Bell shapes. L<https://en.wikipedia.org/wiki/Standard_deviation>

 stddev(4,5,6,5,6,4,3,5,5,6,7,6,5,7,5,6,4)         # = 1.0914103126635
 avg(@testscores) + stddev(@testscores)            # = the score for one stddev above avg, 115
 avg(@testscores) - stddev(@testscores)            # = the score for one stddev below avg, 85

=cut

sub stddev {
  return undef        if @_==0;
  return stddev(\@_)  if @_>0 and !ref($_[0]);
  my $ar=shift;
  return undef        if @$ar==0;
  return 0            if @$ar==1;
  my $sumx2; $sumx2 += $_*$_ for @$ar;
  my $sumx;  $sumx  += $_    for @$ar;
  sqrt( (@$ar*$sumx2-$sumx*$sumx)/(@$ar*(@$ar-1)) );
}

=head2 rstddev

Relative stddev = stddev / avg

=cut

sub rstddev { stddev(@_) / avg(@_) }

=head2 median

Returns the median value of a list of numbers. The list do not have to

Tools.pm view on Meta::CPAN

  70 -  71   8202 ========
  72 -  73  10577 ==========
  74 -  75  13319 =============
  76 -  77  16283 ================
  78 -  79  20076 ====================
  80 -  81  23742 =======================
  82 -  83  27726 ===========================
  84 -  85  32205 ================================
  86 -  87  36577 ====================================
  88 -  89  40684 ========================================
  90 -  91  44515 ============================================
  92 -  93  47575 ===============================================
  94 -  95  50098 ==================================================
  96 -  97  52062 ====================================================
  98 -  99  53338 =====================================================
 100 - 101  52834 ====================================================
 102 - 103  52185 ====================================================
 104 - 105  50472 ==================================================
 106 - 107  47551 ===============================================
 108 - 109  44471 ============================================
 110 - 111  40704 ========================================
 112 - 113  36642 ====================================
 114 - 115  32171 ================================
 116 - 117  28166 ============================
 118 - 119  23618 =======================
 120 - 121  19873 ===================
 122 - 123  16360 ================
 124 - 125  13452 =============
 126 - 127  10575 ==========
 128 - 129   8283 ========
 130 - 131   6224 ======
 132 - 133   4661 ====
 134 - 135   3527 ===
 136 - 137   2516 ==
 138 - 139   1833 =
 140 - 141   1327 =
 142 - 143    860
 144 - 145    604
 146 - 147    428
 148 - 149    275
 150 - 151    184
 152 - 153    111
 154 - 155     67

=cut

sub random_gauss {
  my($avg,$stddev,$num)=@_;
  $avg=0    if !defined $avg;
  $stddev=1 if !defined $stddev;
  $num=1    if !defined $num;
  croak "random_gauss should not have more than 3 arguments" if @_>3;
  my @r;
  while (@r<$num) {
    my($x1,$x2,$w);
    do {
      $x1=2.0*rand()-1.0;
      $x2=2.0*rand()-1.0;
      $w=$x1*$x1+$x2*$x2;
    } while $w>=1.0;
    $w=sqrt(-2.0*log($w)/$w) * $stddev;
    push @r,  $x1*$w + $avg,
              $x2*$w + $avg;
  }
  pop @r if @r > $num;
  return $r[0] if @_<3;
  return @r;
}

=head2 mix

Mixes an array in random order. In-place if given an array reference or not if given an array.

C<mix()> could also have been named C<shuffle()>, as in shuffling a deck of cards.

Example:

This:

 print mix("a".."z"),"\n" for 1..3;

...could write something like:

 trgoykzfqsduphlbcmxejivnwa
 qycatilmpgxbhrdezfwsovujkn
 ytogrjialbewcpvndhkxfzqsmu

B<Input:>

=over 4

=item 1.
Either a reference to an array as the only input. This array will then be mixed I<in-place>. The array will be changed:

This: C<< @a=mix(@a) >> is the same as:  C<< mix(\@a) >>.

=item 2.
Or an array of zero, one or more elements.

=back

Note that an input-array which COINCIDENTLY SOME TIMES has one element
(but more other times), and that element is an array-ref, you will
probably not get the expected result.

To check distribution:

 perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n

The letters a-z should occur around 1000 times each.

Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)

 perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'

(Uses L</cart>, which is not a typo, see further down here)

Note: C<List::Util::shuffle()> is approximately four times faster. Both respects the Perl built-in C<srand()>.

=cut

Tools.pm view on Meta::CPAN

C<gzip()> is really just a wrapper for C< Compress:Zlib::memGzip() > and uses the same
compression algorithm as the well known GNU program gzip found in most unix/linux/cygwin
distros. Except C<gzip()> does this in-memory. (Both using the C-library C<zlib>).

 writefile( "file.gz", gzip("some string") );

=head2 gunzip

B<Input:> A binary compressed string or a reference to such a string. I.e. something returned from
C<gzip()> earlier or read from a C<< .gz >> file.

B<Output:> The original larger non-compressed string. Text or binary.

C<gunzip()> is a wrapper for Compress::Zlib::memGunzip()

 print gunzip( gzip("some string") );   #some string

=head2 bzip2

Same as L</gzip> and L</gunzip> except with a different compression algorithm (compresses more but is slower). Wrapper for Compress::Bzip2::memBzip.

Compared to gzip/gunzip, bzip2 compression is much slower, bunzip2 decompression not so much.

See also L<Compress::Bzip2>, C<man Compress::Bzip2>, C<man bzip2>, C<man bunzip2>.

 writefile( "file.bz2", bzip2("some string") );
 print bunzip2( bzip2("some string") );   #some string

=head2 bunzip2

Decompressed something compressed by bzip2() or data from a C<.bz2> file. See L</bzip2>.

=cut

sub gzip    { my $s=shift; eval"require Compress::Zlib"  if !$INC{'Compress/Zlib.pm'};  croak "Compress::Zlib not found"  if $@; Compress::Zlib::memGzip(    ref($s)?$s:\$s ) }
sub gunzip  { my $s=shift; eval"require Compress::Zlib"  if !$INC{'Compress/Zlib.pm'};  croak "Compress::Zlib not found"  if $@; Compress::Zlib::memGunzip(  ref($s)?$s:\$s ) }
sub bzip2   { my $s=shift; eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBzip(   ref($s)?$s:\$s ) }
sub bunzip2 { my $s=shift; eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBunzip( ref($s)?$s:\$s ) }

=head1 NET, WEB, CGI-STUFF

=head2 ipaddr

B<Input:> an IP-number

B<Output:> either an IP-address I<machine.sld.tld> or an empty string
if the DNS lookup didn't find anything.

Example:

 perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")'  # prints www.uio.no

Uses perls C<gethostbyaddr> internally.

C<ipaddr()> memoizes the results internally (using the
C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
particular IP number might take some time.

Some few DNS loopups can take several seconds.
Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
probably turn off hostname lookups in their logs and just log IP numbers by using
C<HostnameLookups Off> in Apache C<httpd.conf> and then use I<ipaddr> afterwards if necessary.

=cut

our %IPADDR_memo;
sub ipaddr {
  my $ipnr=shift;
  #hm, NOTE: The 2 parameter on the next code line is not 2 for all OSes,
  #but seems to work in Linux and HPUX. Den correct way is to use the
  #AF_INET constant in the Socket or the IO::Socket package.
  return $IPADDR_memo{$ipnr} ||= gethostbyaddr(pack("C4",split("\\.",$ipnr)),2);
}

=head2 ipnum

C<ipnum()> does the opposite of C<ipaddr()>

Does an attempt of converting an IP address (hostname) to an IP number.
Uses DNS name servers via perls internal C<gethostbyname()>.
Return empty string (undef) if unsuccessful.

 print ipnum("www.uio.no");   # prints 129.240.13.152

Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.

=cut

our %IPNUM_memo;
sub ipnum {
  my $ipaddr=shift;
  #croak "No $ipaddr" if !length($ipaddr);
  return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
  my $h=gethostbyname($ipaddr);
  #croak "No ipnum for $ipaddr" if !$h;
  return if !defined $h;
  my $ipnum = join(".",unpack("C4",$h));
  $IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
  return $IPNUM_memo{$ipaddr};
}

our $Ipnum_errmsg;
our $Ipnum;
sub ipnum_ok {
    my $ipnum=shift;
    $Ipnum=undef;
    eval{
      die "malformed ipnum $ipnum\n" if not $ipnum=~/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
      die "invalid ipnum $ipnum\n"   if grep$_>255,$1,$2,$3,$4;
      $Ipnum=$1*256**3 + $2*256**2 + $3*256 + $4;
    };
    my$r=($Ipnum_errmsg=$@) ? 0 : 1;
    $r
}
our $Iprange_errmsg;
our $Iprange_start;
sub iprange_ok {
    my $iprange=shift;
    $Iprange_start=undef;
    my($r,$m);
    eval{

Tools.pm view on Meta::CPAN

    elsif(ref($text) eq 'SCALAR'){
	print WRITEFILE $$text;
    }
    elsif(ref($text) eq 'ARRAY'){
	print WRITEFILE "$_\n" for @$text;
    }
    else {
	croak;
    }
    close(WRITEFILE);
    return;
}

=head2 readfile

Just as with L</writefile> you can read in a whole file in one operation with C<readfile()>. Instead of:

 open my $FILE,'<', $filename or die $!;
 my $data = join"",<$FILE>;
 close($FILE);

This is simpler:

 my $data = readfile($filename);

B<More examples:>

Reading the content of the file to a scalar variable: (Any content in C<$data> will be overwritten)

 my $data;
 readfile('filename.txt',\$data);

Reading the lines of a file into an array:

 my @lines;
 readfile('filnavn.txt',\@lines);
 for(@lines){
   ...
 }

Note: Chomp is done on each line. That is, any newlines (C<< \n >>) will be removed.
If C<@lines> is non-empty, this will be lost.

Sub readfile is context aware. If an array is expected it returns an array of the lines without a trailing C<< \n >>.
The last example can be rewritten:

 for(readfile('filnavn.txt')){
   ...
 }

With two input arguments, nothing (undef) is returned from C<readfile()>.

Automatic decompression:

 my $txt = readfile('file.txt.gz');  #uses /bin/gunzip to decompress content

Extentions C<.gz>, C<.bz2> and C<.xz> are recognized for decompression. See also C<writefile()> and C<openstr()>.

=cut

#http://blogs.perl.org/users/leon_timmermans/2013/05/why-you-dont-need-fileslurp.html
#todo: readfile with grep-filter code ref in a third arg (avoid reading all into mem)

sub readfile {
  my($filename,$ref)=@_;
  if(@_==1){
    if(wantarray){ my @data; readfile($filename,\@data); return @data }
    else         { my $data; readfile($filename,\$data); return $data }
  }
  else {
    open my $fh,openstr($filename) or croak("ERROR: readfile $! $?");
    if   ( ref($ref) eq 'SCALAR') { $$ref=join"",<$fh> }
    elsif( ref($ref) eq 'ARRAY' ) { while(my $l=<$fh>){ chomp($l); push @$ref, $l } }
    else { croak "ERROR: Second arg to readfile should be a ref to a scalar og array" }
    close($fh);
    return;#?
  }
}

=head2 readdirectory

B<Input:>

Name of a directory.

B<Output:>

A list of all files in it, except of  C<.> and C<..>  (on linux/unix systems, all directories have a C<.> and C<..> directory).

The names of all types of files are returned: normal files, directories, symbolic links,
pipes, semaphores. That is every thing shown by C<ls -la> except C<.> and C<..>

C<readdirectory> do not recurce down into subdirectories (but see example below).

B<Example:>

  my @files = readdirectory("/tmp");

B<Why readdirectory?>

Sometimes calling the built ins C<opendir>, C<readdir> and C<closedir> seems a tad tedious, since this:

 my $dir="/usr/bin";
 opendir(D,$dir);
 my @files=map "$dir/$_", grep {!/^\.\.?$/} readdir(D);
 closedir(D);

Is the same as this:

 my @files=readdirectory("/usr/bin");

See also: L<File::Find>

B<Why not readdirectory?>

On huge directories with perhaps tens or houndreds of thousands of
files, readdirectory() will consume more memory than perls
opendir/readdir. This isn't usually a concern anymore for modern
computers with gigabytes of RAM, but might be the rationale behind
Perls more tedious way created in the 80s.  The same argument goes for
file slurping. On the other side it's also a good practice to never

Tools.pm view on Meta::CPAN


A more perl-ish and often faster way of doing the same:

 {123=>3, 214=>7}->{$a} || $a                       # (beware of 0)

=cut

sub decode {
  croak "Must have a mimimum of two arguments" if @_<2;
  my $uttrykk=shift;
  if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
  else               { !defined shift    and return shift or shift for 1..@_/2 }
  return shift;
}

sub decode_num {
  croak "Must have a mimimum of two arguments" if @_<2;
  my $uttrykk=shift;
  if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }
  else               { !defined shift    and return shift or shift for 1..@_/2 }
  return shift;
}

=head2 qrlist

Input: An array of values to be used to test againts for existence.

Output: A reference to a regular expression. That is a C<qr//>

The regex sets $1 if it match.

Example:

  my @list=qw/ABc XY DEF DEFG XYZ/;
  my $filter=qrlist("ABC","DEF","XY.");         # makes a regex of it qr/^(\QABC\E|\QDEF\E|\QXY.\E)$/
  my @filtered= grep { $_ =~ $filter } @list;   # returns DEF and XYZ, but not XYZ because the . char is taken literally

Note: Filtering with hash lookups are WAY faster.

Source:

 sub qrlist (@) { my $str=join"|",map quotemeta, @_; qr/^($str)$/ }

=cut

sub qrlist (@) {
  my $str=join"|",map quotemeta,@_;
  return qr/^($str)$/;
}

=head2 ansicolor

Perhaps easier to use than L<Term::ANSIColor> ?

B<Input:> One argument. A string where the char C<Â¤> have special
meaning and is replaced by color codings depending on the letter
following the C<Â¤>.

B<Output:> The same string, but with C<Â¤letter> replaced by ANSI color
codes respected by many types terminal windows. (xterm, telnet, ssh,
telnet, rlog, vt100, cygwin, rxvt and such...).

B<Codes for ansicolor():>

 Â¤r red
 Â¤g green
 Â¤b blue
 Â¤y yellow
 Â¤m magenta
 Â¤B bold
 Â¤u underline
 Â¤c clear
 Â¤Â¤ reset, quits and returns to default text color.

B<Example:>

 print ansicolor("This is maybe Â¤ggreenÂ¤Â¤?");

Prints I<This is maybe green?> where the word I<green> is shown in green.

If L<Term::ANSIColor> is not installed or not found, returns the input
string with every C<Â¤> including the following code letters
removed. (That is: ansicolor is safe to use even if Term::ANSIColor is
not installed, you just don't get the colors).

See also L<Term::ANSIColor>.

=cut

sub ansicolor {
  my $txt=shift;
  eval{require Term::ANSIColor} or return replace($txt,qr/Â¤./);
  my %h=qw/r red  g green  b blue  y yellow  m magenta  B bold  u underline  c clear  Â¤ reset/;
  my $re=join"|",keys%h;
  $txt=~s/Â¤($re)/Term::ANSIColor::color($h{$1})/ge;
  return $txt;
}

=head2 ccn_ok

Checks if a Credit Card number (CCN) has correct control digits according to the LUHN-algorithm from 1960.
This method of control digits is used by MasterCard, Visa, American Express,
Discover, Diners Club / Carte Blanche, JCB and others.

B<Input:>

A credit card number. Can contain non-digits, but they are removed internally before checking.

B<Output:>

Something true or false.

Or more accurately:

Returns C<undef> (false) if the input argument is missing digits.

Returns 0 (zero, which is false) is the digits is not correct according to the LUHN algorithm.

Returns 1 or the name of a credit card company (true either way) if the last digit is an ok control digit for this ccn.

The name of the credit card company is returned like this (without the C<'> character)

Tools.pm view on Meta::CPAN


In algorithmic terms the number of bits needed is C<ln of ln of n>.  Thats why 4 bits (counters up
to 15) is "always" good enough except for extremely large capasities or extremely small error rates.
(Except when adding the same key many times, which should be avoided, and Acme::Tools::bfadd do not
check for that, perhaps in future versions).

Bloom filters of the counting type are not very space efficient: The tables above shows that 84%-85%
of the counters are 0 or 1. This means most bits are zero-bits. This doesn't have to be a problem if
a counting bloom filter is used to be sent over slow networks because they are very compressable by
common compression tools like I<gzip> or L<Compress::Zlib> and such.

Deletion of non-existing keys makes C<bfdelete> die (croak).

=head2 bfdelete

Deletes from a counting bloom filter:

 bfdelete($bf, @keys);
 bfdelete($bf, \@keys);

Returns C<$bf> after deletion.

Croaks (dies) on deleting a non-existing key or deleting from an previouly overflown counter in a counting bloom filter.

=head2 bfaddbf

Adds another bloom filter to a bloom filter.

Bloom filters has the proberty that bit-wise I<OR>-ing the bit-filters
of two filters with the same capacity and the same number and type of
hash functions, adds the filters:

  my $bf1=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[1..500]);
  my $bf2=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[501..1000]);

  bfaddbf($bf1,$bf2);

  print "Yes!" if bfgrep($bf1, 1..1000) == 1000;

Prints yes since C<bfgrep> now returns an array of all the 1000 elements.

Croaks if the filters are of different dimensions.

Works for counting bloom filters as well (C<< counting_bits=>4 >> e.g.)

=head2 bfsum

Returns the number of 1's in the filter.

 my $percent=100*bfsum($bf)/$$bf{filterlength};
 printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity

Sums the counters for counting bloom filters (much slower than for non counting).

=head2 bfdimensions

Input, two numeric arguments: Capacity and error_rate.

Outputs an array of two numbers: m and k.

  m = - n * log(p) / log(2)**2   # n = capacity, m = bits in filter (divide by 8 to get bytes)
  k = log(1/p) / log(2)          # p = error_rate, uses perls internal log() with base e (2.718)

...that is: m = the best number of bits in the filter and k = the best
number of hash functions optimized for the given capacity (n) and
error_rate (p). Note that k is a dependent only of the error_rate.  At
about two percent error rate the bloom filter needs just the same
number of bytes as the number of keys.

 Storage (bytes):
 Capacity      Error-rate  Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate
               0.000000001 0.00000001 0.0000001  0.000001   0.00001    0.0001     0.001      0.01       0.02141585 0.1        0.5        0.99
 ------------- ----------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
            10 54.48       48.49      42.5       36.51      30.52      24.53      18.53      12.54      10.56      6.553      2.366      0.5886
           100 539.7       479.8      419.9      360        300.1      240.2      180.3      120.4      100.6      60.47      18.6       0.824
          1000 5392        4793       4194       3595       2996       2397       1798       1199       1001       599.6      180.9      3.177
         10000 5.392e+04   4.793e+04  4.194e+04  3.594e+04  2.995e+04  2.396e+04  1.797e+04  1.198e+04  1e+04      5991       1804       26.71
        100000 5.392e+05   4.793e+05  4.193e+05  3.594e+05  2.995e+05  2.396e+05  1.797e+05  1.198e+05  1e+05      5.991e+04  1.803e+04  262
       1000000 5.392e+06   4.793e+06  4.193e+06  3.594e+06  2.995e+06  2.396e+06  1.797e+06  1.198e+06  1e+06      5.991e+05  1.803e+05  2615
      10000000 5.392e+07   4.793e+07  4.193e+07  3.594e+07  2.995e+07  2.396e+07  1.797e+07  1.198e+07  1e+07      5.991e+06  1.803e+06  2.615e+04
     100000000 5.392e+08   4.793e+08  4.193e+08  3.594e+08  2.995e+08  2.396e+08  1.797e+08  1.198e+08  1e+08      5.991e+07  1.803e+07  2.615e+05
    1000000000 5.392e+09   4.793e+09  4.193e+09  3.594e+09  2.995e+09  2.396e+09  1.797e+09  1.198e+09  1e+09      5.991e+08  1.803e+08  2.615e+06
   10000000000 5.392e+10   4.793e+10  4.193e+10  3.594e+10  2.995e+10  2.396e+10  1.797e+10  1.198e+10  1e+10      5.991e+09  1.803e+09  2.615e+07
  100000000000 5.392e+11   4.793e+11  4.193e+11  3.594e+11  2.995e+11  2.396e+11  1.797e+11  1.198e+11  1e+11      5.991e+10  1.803e+10  2.615e+08
 1000000000000 5.392e+12   4.793e+12  4.193e+12  3.594e+12  2.995e+12  2.396e+12  1.797e+12  1.198e+12  1e+12      5.991e+11  1.803e+11  2.615e+09

 Error rate:               0.99   Hash functions:  1
 Error rate:                0.5   Hash functions:  1
 Error rate:                0.1   Hash functions:  3
 Error rate: 0.0214158522653385   Hash functions:  6
 Error rate:               0.01   Hash functions:  7
 Error rate:              0.001   Hash functions: 10
 Error rate:             0.0001   Hash functions: 13
 Error rate:            0.00001   Hash functions: 17
 Error rate:           0.000001   Hash functions: 20
 Error rate:          0.0000001   Hash functions: 23
 Error rate:         0.00000001   Hash functions: 27
 Error rate:        0.000000001   Hash functions: 30

=head2 bfstore

Storing and retrieving bloom filters to and from disk uses L<Storable>s C<store> and C<retrieve>. This:

 bfstore($bf,'filename.bf');

It the same as:

 use Storable qw(store retrieve);
 ...
 store($bf,'filename.bf');

=head2 bfretrieve

This:

 my $bf=bfretrieve('filename.bf');

Or this:

 my $bf=bfinit('filename.bf');

Is the same as:

Tools.pm view on Meta::CPAN


Deep copies the bloom filter data structure. (Which btw is not very deep, two levels at most)

This:

 my $bfc = bfclone($bf);

Works just as:

 use Storable;
 my $bfc=Storable::dclone($bf);

=head2 Object oriented interface to bloom filters

 use Acme::Tools;
 my $bf=new Acme::Tools::BloomFilter(0.1,1000); # the same as bfinit, see bfinit above
 print ref($bf),"\n";                           # prints Acme::Tools::BloomFilter
 $bf->add(@keys);
 $bf->check($keys[0]) and print "ok\n";         # prints ok
 $bf->grep(\@keys)==@keys and print "ok\n";     # prints ok
 $bf->store('filename.bf');
 my $bf2=bfretrieve('filename.bf');
 $bf2->check($keys[0]) and print "ok\n";        # still ok

 $bf2=$bf->clone();

To instantiate a previously stored bloom filter:

 my $bf = Acme::Tools::BloomFilter->new( '/path/to/stored/bloomfilter.bf' );

The o.o. interface has the same methods as the C<bf...>-subs without the
C<bf>-prefix in the names. The C<bfretrieve> is not available as a
method, although C<bfretrieve>, C<Acme::Tools::bfretrieve> and
C<Acme::Tools::BloomFilter::retrieve> are synonyms.

=head2 Internals and speed

The internal hash-functions are C<< md5( "$key$salt" ) >> from L<Digest::MD5>.

Since C<md5> returns 128 bits and most medium to large sized bloom
filters need only a 32 bit hash function, the result from md5() are
split (C<unpack>-ed) into 4 parts 32 bits each and are treated as if 4
hash functions was called at once (speedup). Using different salts to
the key on each md5 results in different hash functions.

Digest::SHA512 would have been even better since it returns more bits,
if it werent for the fact that it's much slower than Digest::MD5.

String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:

 time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6'       #5.56 sec
 time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6'           #2.79 sec, faster but not per bit
 time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)

Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.

=head2 Theory and math behind bloom filters

L<http://www.internetmathematics.org/volumes/1/4/Broder.pdf>

L<http://blogs.sun.com/jrose/entry/bloom_filters_in_a_nutshell>

L<http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html>

See also Scaleable Bloom Filters: L<http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf> (not implemented in Acme::Tools)

...and perhaps L<http://intertrack.naist.jp/Matsumoto_IEICE-ED200805.pdf>

=cut

sub bfinit {
  return bfretrieve(@_)                             if @_==1;
  return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
  return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
  require Digest::MD5;
  @_%2&&croak "Arguments should be a hash of equal number of keys and values";
  my %arg=@_;
  my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
  my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
  croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
  croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
  croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
  croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
  my $bf={error_rate    => 0.001,  #default p
	  capacity      => 100000, #default n
          min_hashfuncs => 1,
          max_hashfuncs => 100,
	  counting_bits => 1,      #default: not counting filter
	  adaptive      => 0,
	  %arg,                    #arguments
	  key_count     => 0,
	  overflow      => {},
	  version       => $Acme::Tools::VERSION,
	 };
  croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
  @$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
  @$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
  $$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x   new empty filter
  $$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
               :$$bf{filterlength}<=2**32/4 ? "N*"
               :                              "Q*";
  bfadd($bf,@{$arg{keys}}) if $arg{keys};
  return $bf;
}
sub bfaddbf {
  my($bf,$bf2)=@_;
  my $differror=join"\n",
    map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
    grep $$bf{$_} ne $$bf2{$_},
    qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
  croak $differror if $differror;
  croak "Can not add adaptive bloom filters" if $$bf{adaptive};
  my $count=$$bf{key_count}+$$bf2{key_count};
  croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
    if $count > $$bf{capacity};
  $$bf{key_count}+=$$bf2{key_count};
  if($$bf{counting_bits}==1){
    $$bf{filter} |= $$bf2{filter};
    #$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
  }
  else {

Tools.pm view on Meta::CPAN

  require Digest::MD5;
  my($bf,@keys)=@_;
  return if !@keys;
  my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
  my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
  return grep {
    my $match = 1; # match if every bit is on
    my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
    vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
    !$match;
  } @$keysref;
}
sub bfdelete {
  require Digest::MD5;
  my($bf,@keys)=@_;
  return if !@keys;
  my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
  my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
  croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
  for my $key (@$keysref){
    my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
    $$bf{key_count}==0 and croak "Deleted all and then some"  or  $$bf{key_count}--;
    my($ones,$croak,@pos)=(0);
    for(0..$k-1){
      my $pos=$h[$_] % $m;
      my $c=
      vec($$bf{filter}, $pos, $cb);
      vec($$bf{filter}, $pos, $cb)=$c-1;
      $croak="Cannot delete a non-existing key $key" if $c==0;
      $croak="Cannot delete with previously overflown position. Try doubleing counting_bits"
	if $c==1 and ++$ones and $$bf{overflow}{$pos};
    }
    if($croak){ #rollback
      vec($$bf{filter}, $h[$_] % $m, $cb)=
      vec($$bf{filter}, $h[$_] % $m, $cb)+1 for 0..$k-1;
      croak $croak;
    }
  }
  return $bf;
}
sub bfstore {
  require Storable;
  Storable::store(@_);
}
sub bfretrieve {
  require Storable;
  my $bf=Storable::retrieve(@_);
  carp  "Retrieved bloom filter was stored in version $$bf{version}, this is version $VERSION" if $$bf{version}>$VERSION;
  return $bf;
}
sub bfclone {
  require Storable;
  return Storable::dclone(@_); #could be faster
}
sub bfdimensions_old {
  my($n,$p,$mink,$maxk, $k,$flen,$m)=
    @_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'},1)
   :@_==2 ? (@_,1,100,1)
          : croak "Wrong number of arguments (".@_."), should be 2";
  croak "p ($p) should be > 0 and < 1" if not ( 0<$p && $p<1 );
  $m=-1*$_*$n/log(1-$p**(1/$_)) and (!defined $flen or $m<$flen) and ($flen,$k)=($m,$_) for $mink..$maxk;
  $flen = int(1+$flen);
  return ($flen,$k);
}
sub bfdimensions {
  my($n,$p,$mink,$maxk)=
    @_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'})
   :@_==2 ? (@_,1,100)
          : croak "Wrong number of arguments (".@_."), should be 2";
  my $k=log(1/$p)/log(2);           # k hash funcs
  my $m=-$n*log($p)/log(2)**2;      # m bits in filter
  return ($m+0.5,min($maxk,max($mink,int($k+0.5))));
}

#crontab -e
#01 4,10,16,22 * * * /usr/bin/perl -MAcme::Tools -e'Acme::Tools::_update_currency_file("/var/www/html/currency-rates")' > /dev/null 2>&1

sub _update_currency_file { #call from cron
  my $fn=shift()||'/var/www/html/currency-rates';
  my %exe=map+($_=>"/usr/bin/$_"),qw/curl ci/;-x$_ or croak for values %exe;
  open my $F, '>', $fn or die"ERROR: Could not write file $fn ($!)\n";
  print $F "#-- Currency rates ".localtime()." (".time().")\n";
  print $F "#   File generated by Acme::Tools version $VERSION\n";
  print $F "#   Updated every 6th hour on http://calthis.com/currency-rates\n";
  print $F "NOK 1.000000000\n";
  my $amount=1000;
  my $data=qx($exe{curl} -s "https://www.x-rates.com/table/?from=NOK&amount=$amount");
  $data=~s,to=([A-Z]{3})(.)>,$2>$1</td><td>,g;
  my @data=ht2t($data,"Alphabetical order"); shift @data;
  @data=map "$$_[1] ".($$_[4]>1e-2?$$_[4]:$$_[2]?sprintf("%.8f",$amount/$$_[2]):0)."\n",@data;
  my %data=map split,@data;
  my $json=qx( $exe{curl} -s https://api.coinmarketcap.com/v1/ticker/ );
  eval "require JSON;"; croak if $@;
  my $arr=JSON::decode_json($json);
  for my $c (qw(BTC LTC XBT ETH XRP BCH ETC)) {
      my @a=grep$$_{symbol} eq $c,@$arr;
      next if @a != 1 or !$a[0]{price_usd};
      push @data, "$c ".($a[0]{price_usd}*$data{USD})."\n";
  }
  #die srlz(\@data,'data');
  print $F sort(@data);
  close($F);
  qx($exe{ci} -l -m. -d $fn) if -w"$fn,v";
}

sub ftype {
  my $f=shift;
  -e $f and
      -f$f ? 'file'         # -f  File is a plain file.
     :-d$f ? 'dir'          # -d  File is a directory.
     :-l$f ? 'symlink'      # -l  File is a symbolic link.
     :-p$f ? 'pipe'         # -p  File is a named pipe (FIFO), or Filehandle is a pipe.
     :-S$f ? 'socket'       # -S  File is a socket.
     :-b$f ? 'blockfile'    # -b  File is a block special file.
     :-c$f ? 'charfile'     # -c  File is a character special file.
     :-t$f ? 'ttyfile'      # -t  Filehandle is opened to a tty.
     :       ''
  or undef;
}

sub ext2mime {
  my $ext=shift(); #or filename
  #http://www.sitepoint.com/web-foundations/mime-types-complete-list/
  croak "todo: ext2mime not yet implemented";
  #return "application/json";#feks
}

sub base64 ($;$) { #
  if ($] >= 5.006) {
    require bytes;
    croak "base64 failed: only defined for bytes"

Tools.pm view on Meta::CPAN

    }
    $_=join",",@$_ for values %$hashref;
    (@r,@a)
}

#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp -
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp Tools.pm
sub cmd_zsize {
  my %o;
  my @argv=opts("heEpts",\%o,@_);
  my $stdin=!@argv || join(",",@argv) eq '-';
  @argv=("/tmp/acme-tools.$$.stdin") if $stdin;
  writefile($argv[0],join("",<STDIN>)) if $stdin;
  my @prog=grep qx(which $_), qw(gzip bzip2 xz zstd brotli);
  for my $f (@argv){
      my $sf=-s$f;
      print "--- $f does not exists\n" and next if !-e$f;
      print "--- $f is not a file\n" and next if !-f$f;
      print "--- $f ($sf b) is not readable\n" and next if !-r$f;
      print "--- $sf b  ".bytes_readable($sf)."  ".($stdin?"-":$f)."\n";
      next if !$sf;
      my(@t,@s);
      for my $prog (@prog){
          next if !qx(which $prog);
          my @l=1..9;
          push @l,map"e$_",1..9 if $prog eq 'xz' and $o{e};
          @l=map"e$_",1..9      if $prog eq 'xz' and $o{E};
          @l=map 10+$_,@l       if $prog eq 'zstd';
          @l=map"q $_",3..11    if $prog eq 'brotli';
          printf "%-6s",$prog;
          push @t, $prog, [] if $o{t};
          push @s, $prog, [] if $o{p} and $o{s};
          for my $l (@l){ #level
              my $t=time_fp();
              my $b=qx(cat $f|$prog -$l|wc -c);
              push@{$t[-1]},time_fp()-$t if $o{t};
              push@{$s[-1]},$b           if $o{p} and $o{s};
              $o{p} ? printf("%9.1f%% ",100*$b/$sf)
             :$o{h} ? printf("%10s ",bytes_readable($b))
             :        printf("%10d ",$b);
          }
          print "\n";
      }
      while(@s){
          printf "%-6s",shift@s;
          $o{h}?printf("%10s ",bytes_readable($_)):printf("%10d ",$_) for @{shift@s}; print "\n";
      }
      while(@t){
          printf "%-6s",shift@t;
          printf "%9.3fs ",$_ for @{shift@t}; print "\n";
      }
  }
  unlink $argv[0] if $stdin;
}

sub cmd_rttop   { die "rttop: not implemented here yet.\n" }
sub cmd_whichpm { die "whichpm: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
sub cmd_catal   { die "catal: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
#todo: cmd_tabdiff (fra sonyk)
#todo: cmd_catlog (ala catal med /etc/catlog.conf, default er access_log)

=head1 DATABASE STUFF - NOT IMPLEMENTED YET

Uses L<DBI>. Comming soon...

  $Dbh
  dlogin
  dlogout
  drow
  drows
  drowc
  drowsc
  dcols
  dpk
  dsel
  ddo
  dins
  dupd
  ddel
  dcommit
  drollback

=cut

#my$dummy=<<'SOON';
sub dtype {
  my $connstr=shift;
  return 'SQLite' if $connstr=~/(\.sqlite|sqlite:.*\.db)$/i;
  return 'Oracle' if $connstr=~/\@/;
  return 'Pg' if 1==2;
  die;
}

our($Dbh,@Dbh,%Sth);
our %Dbattr=(RaiseError => 1, AutoCommit => 0); #defaults
sub dlogin {
  my $connstr=shift();
  my %attr=(%Dbattr,@_);
  my $type=dtype($connstr);
  my($dsn,$u,$p)=('','','');
  if($type eq 'SQLite'){
    $dsn=$connstr;
  }
  elsif($type eq 'Oracle'){
    ($u,$p,$dsn)=($connstr=~m,(.+?)(/.+?)?\@(.+),);
  }
  elsif($type eq 'Pg'){
    croak "todo";
  }
  else{
    croak "dblogin: unknown database type for connection string $connstr\n";
  }
  $dsn="dbi:$type:$dsn";
  push @Dbh, $Dbh if $Dbh; #local is better?
  require DBI;
  $Dbh=DBI->connect($dsn,$u,$p,\%attr); #connect_cached?
}
sub dlogout {
  $Dbh->disconnect;
  $Dbh=pop@Dbh if @Dbh;
}
sub drow {
  my($q,@b)=_dattrarg(@_);
  #my $sth=do{$Sth{$Dbh,$q} ||= $Dbh->prepare_cached($q)};
  my $sth=$Dbh->prepare_cached($q);
  $sth->execute(@b);
  my @r=$sth->fetchrow_array;
  $sth->finish if $$Dbh{Driver}{Name} eq 'SQLite';
  #$dbh->selectrow_array($statement);
  return @r==1?$r[0]:@r;
}
sub drows {
}
sub drowc {
}
sub drowsc {
}
sub dcols {
}
sub dpk {
}
sub dsel {
}
sub ddo {
  my @arg=_dattrarg(@_);
  #warn serialize(\@arg,'arg','',1);
  $Dbh->do(@arg); #hm cache?
}
sub dins {
}
sub dupd {
}
sub ddel {
}
sub dcommit { $Dbh->commit }
sub drollback { $Dbh->rollback }

sub _dattrarg {
  my @arg=@_;
  splice @arg,1,0, ref($arg[-1]) eq 'HASH' ? pop(@arg) : {};
  @arg;
}

=head2 self_update

Update Acme::Tools to newest version quick and dirty:

 function pmview(){ ls -ld `perl -M$1 -le'$m=shift;$mi=$m;$mi=~s,::,/,g;print $INC{"$mi.pm"};warn"Version ".${$m."::VERSION"}."\n"' $1`;}

 pmview Acme::Tools                                     #view date and version before
 sudo perl -MAcme::Tools -e Acme::Tools::self_update    #update to newest version
 pmview Acme::Tools                                     #view date and version after

Does C<cd> to where Acme/Tools.pm are and then wget -N https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm

TODO: cmd_acme_tools_self_update, accept --no-check-certificate to use on curl

=cut

Tools.pm view on Meta::CPAN

  sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
}

1;

package Acme::Tools::BloomFilter;
use 5.008; use strict; use warnings; use Carp;
sub new      { my($class,@p)=@_; my $self=Acme::Tools::bfinit(@p); bless $self, $class }
sub add      { &Acme::Tools::bfadd      }
sub addbf    { &Acme::Tools::bfaddbf    }
sub check    { &Acme::Tools::bfcheck    }
sub grep     { &Acme::Tools::bfgrep     }
sub grepnot  { &Acme::Tools::bfgrepnot  }
sub delete   { &Acme::Tools::bfdelete   }
sub store    { &Acme::Tools::bfstore    }
sub retrieve { &Acme::Tools::bfretrieve }
sub clone    { &Acme::Tools::bfclone    }
sub sum      { &Acme::Tools::bfsum      }
1;

# Ny versjon:
# - git clone https://github.com/kjetillll/Acme-Tools.git
# - c-s todo
# - endre $VERSION
# - endre Release history under HISTORY
# - endre Ã¥rstall under =head1 COPYRIGHT
# - oppd default valutakurser inkl datoen
# - emacs Changes
# - emacs README versjon + aarstall
# - diff -byW200 <(grep -a ^sub Acme-Tools-0.22/Tools.pm|sort) <(grep -a ^sub Tools.pm|sort)|less
# - emacs MANIFEST legg til ev nye t/*.t
# - perl            Makefile.PL && make test
# - /usr/bin/perl   Makefile.PL && make test
# - perlbrew exec "perl Makefile.PL && time make test"
# - perlbrew exec "perl Makefile.PL && make test" | grep -P '^(perl-|All tests successful)'
# - perlbrew use perl-5.10.1; perl Makefile.PL && make test; perlbrew off
# - test evt i cygwin og mingw-perl
# - pod2html Tools.pm > Tools.html ; firefox Tools.html
# - https://metacpan.org/pod/Acme::Tools
# - http://cpants.cpanauthors.org/dist/Acme-Tools   #kvalitee
# - perl Makefile.PL && make test && make dist
# - cp -p *tar.gz /htdocs/
# - #ci -l -mversjon -d `cat MANIFEST` #no
# - git add `cat MANIFEST`
# - git status
# - git commit -am versjon
# - git push                    #eller:
# - git push origin master
# - http://pause.perl.org/
# - tegnsett/utf8-kroell
# - https://rt.cpan.org/Dist/Display.html?Queue=Acme-Tools
# http://en.wikipedia.org/wiki/Birthday_problem#Approximations

# memoize_expire()           http://perldoc.perl.org/Memoize/Expire.html
# memoize_file_expire()
# memoize_limit_size() #lru
# memoize_file_limit_size()
# memoize_memcached         http://search.cpan.org/~dtrischuk/Memoize-Memcached-0.03/lib/Memoize/Memcached.pm
# hint on http://perl.jonallen.info/writing/articles/install-perl-modules-without-root

# sub mycrc32 {  #http://billauer.co.il/blog/2011/05/perl-crc32-crc-xs-module/  eller String::CRC32::crc32 som er 100 x raskere enn Digest::CRC::crc32
#  my ($input, $init_value, $polynomial) = @_;
#  $init_value = 0 unless (defined $init_value);
#  $polynomial = 0xedb88320 unless (defined $polynomial);
#  my @lookup_table;
#  for (my $i=0; $i<256; $i++) {
#    my $x = $i;
#    for (my $j=0; $j<8; $j++) {
#      if ($x & 1) {
#        $x = ($x >> 1) ^ $polynomial;
#      } else {
#        $x = $x >> 1;
#      }
#    }
#    push @lookup_table, $x;
#  }
#  my $crc = $init_value ^ 0xffffffff;
#  foreach my $x (unpack ('C*', $input)) {
#    $crc = (($crc >> 8) & 0xffffff) ^ $lookup_table[ ($crc ^ $x) & 0xff ];
#  }
#  $crc = $crc ^ 0xffffffff;
#  return $crc;
# }
#
# $maybe_valid_utf8 =~                   # https://stackoverflow.com/questions/11709410/regex-to-detect-invalid-utf-8-string
# m/\A(
#     [\x09\x0A\x0D\x20-\x7E]            # ASCII, or rather: [\x00-\x7F]
#   | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
#   |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
#   | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
#   |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
#   |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
#   | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
#   |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
#  )*\z/x;

=head1 HISTORY

Release history

 0.27  Feb 2020   Small fixes for some platforms

 0.26  Jan 2020   Convert subs: base bin2dec bin2hex bin2oct dec2bin dec2hex dec2oct
                  hex2bin hex2dec hex2oct oct2bin oct2dec oct2hex
                  Array subs: joinr perm permute permute_continue pile sortby subarrays
                  Other subs: btw in_iprange ipnum_ok iprange_ok opts s2t

 0.24  Feb 2019   fixed failes on perl 5.16 and older

 0.23  Jan 2019   Subs: logn, egrep, which. More UTF-8 "oriented" (lower, upper, ...)
                  Commands: zsize, finddup, due (improved), conv (improved, [MGT]?Wh
                  and many more units), due -M for stdin of filenames.

 0.22  Feb 2018   Subs: subarr, sim, sim_perm, aoh2sql. command: resubst

 0.21  Mar 2017   Improved nicenum() and its tests

 0.20  Mar 2017   Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
                  throttle timems refa refaa refah refh refha refhh refs
                  eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
                  Commands: 2bz2 2gz 2xz z2z

 0.172 Dec 2015   Subs: curb openstr pwgen sleepms sleepnm srlz tms username
                   self_update install_acme_command_tools
                  Commands: conv due freq wipe xcat (see "Commands")

 0.16  Feb 2015   bigr curb cpad isnum parta parth read_conf resolve_equation
                  roman2int trim. Improved: conv (numbers currency) range ("derivatives")

 0.15  Nov 2014   Improved doc
 0.14  Nov 2014   New subs, improved tests and doc
 0.13  Oct 2010   Non-linux test issue, resolve. improved: bloom filter, tests, doc
 0.12  Oct 2010   Improved tests, doc, bloom filter, random_gauss, bytes_readable
 0.11  Dec 2008   Improved doc
 0.10  Dec 2008

=head1 SEE ALSO

L<https://github.com/kjetillll/Acme-Tools>

=head1 AUTHOR

Kjetil Skotheim, E<lt>kjetil.skotheim@gmail.comE<gt>

=head1 COPYRIGHT

2008-2020, Kjetil Skotheim

=head1 LICENSE

This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=cut

( run in 1.522 second using v1.01-cache-2.11-cpan-3fabe0161c3 )