Acme-Tools
view release on metacpan or search on metacpan
package Acme::Tools;
our $VERSION = '0.27';
use 5.008; #Perl 5.8 was released July 18th 2002
use strict;
use warnings;
use Carp; #todo: rid of deps, make own carp+croak here
require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( all => [ qw() ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
our @EXPORT = qw(
min
max
mins
maxs
sum
avg
geomavg
harmonicavg
stddev
rstddev
median
percentile
$Resolve_iterations
$Resolve_last_estimate
$Resolve_time
resolve
resolve_equation
conv
rank
rankstr
egrep
eqarr
sorted
sortedstr
sortby
subarrays
pushsort
pushsortstr
binsearch
binsearchstr
random
random_gauss
big
bigi
bigf
bigr
bigscale
nvl
repl
replace
decode
decode_num
between
btw
curb
bound
log10
log2
logn
distinct
in
in_num
uniq
union
union_all
minus
minus_all
intersect
intersect_all
not_intersect
mix
zip
sim
sim_perm
subarr
subhash
hashtrans
zipb64
zipbin
unzipb64
unzipbin
gzip
gunzip
bzip2
bunzip2
ipaddr
ipnum
ipnum_ok
iprange_ok
in_iprange
webparams
urlenc
urldec
ht2t
chall
makedir
qrlist
ansicolor
ccn_ok
KID_ok
writefile
readfile
readdirectory
basename
dirname
wipe
username
range
globr
permutations
perm
permute
permute_continue
trigram
sliding
chunks
chars
cart
reduce
sys
recursed
md5sum
pwgen
which
read_conf
openstr
printed
ldist
$Re_isnum
isnum
part
parth
parta
a2h
h2a
refa
refh
refs
refaa
refah
refha
refhh
pushr
popr
shiftr
unshiftr
splicer
keysr
valuesr
eachr
joinr
pile
aoh2sql
aoh2xls
base64
unbase64
opts
ed
changed
$Edcursor
brainfu
brainfu2perl
brainfu2perl_optimized
bfinit
bfsum
bfaddbf
bfadd
bfcheck
bfgrep
bfgrepnot
bfdelete
bfstore
bfretrieve
bfclone
bfdimensions
$PI
install_acme_command_tools
$Dbh
dlogin
dlogout
drow
drows
drowc
drowsc
dcols
dpk
dsel
ddo
dins
dupd
ddel
dcommit
drollback
);
our $PI = '3.141592653589793238462643383279502884197169399375105820974944592307816406286';
=head1 NAME
Acme::Tools - Lots of more or less useful subs lumped together and exported into your namespace
=head1 SYNOPSIS
use Acme::Tools;
print sum(1,2,3); # 6
print avg(2,3,4,6); # 3.75
print median(2,3,4,6); # 3.5
print percentile(25, 101..199); # 125
my @list = minus(\@listA, \@listB); # set operation
my @list = union(\@listA, \@listB); # set operation
print length(gzip("abc" x 1000)); # far less than 3000
writefile("/dir/filename",$string); # convenient
my $s=readfile("/dir/filename"); # also convenient
print "yes!" if between($PI,3,4);
print percentile(0.05, @numbers);
my @even = range(1000,2000,2); # even numbers between 1000 and 2000
my @odd = range(1001,2001,2);
my $dice = random(1,6);
my $color = random(['red','green','blue','yellow','orange']);
pushr $arrayref[$num], @stuff; # push @{ $arrayref[$num] }, @stuff ... popr, shiftr, unshiftr
print 2**200; # 1.60693804425899e+60
print big(2)**200; # 1606938044258990275541962092341162602522202993782792835301376
...and much more.
=encoding utf8
=head1 ABSTRACT
About 120 more or less useful perl subroutines lumped together and exported into your namespace.
print bytes_readable(1001); # 0.98 kB
print bytes_readable(1024); # 1.00 kB
print bytes_readable(1153433.6); # 1.10 MB
print bytes_readable(1181116006.4); # 1.10 GB
print bytes_readable(1209462790553.6); # 1.10 TB
print bytes_readable(1088516511498.24*1000); # 990.00 TB
print bytes_readable(1088516511498.24*1000, 3); # 990.000 TB
print bytes_readable(1088516511498.24*1000, 1); # 990.0 TB
=cut
sub bytes_readable {
my $bytes=shift();
my $d=shift()||2; #decimals
return undef if !defined $bytes;
return "$bytes B" if abs($bytes) <= 2** 0*1000; #bytes
return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) < 2**10*1000; #kilobyte
return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) < 2**20*1000; #megabyte
return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) < 2**30*1000; #gigabyte
return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) < 2**40*1000; #terrabyte
return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
}
=head2 sec_readable
Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.
print sec_readable( 0 ); # 0s
print sec_readable( 0.0123 ); # 0.0123s
print sec_readable(-0.0123 ); # -0.0123s
print sec_readable( 1.23 ); # 1.23s
print sec_readable( 1 ); # 1s
print sec_readable( 9.87 ); # 9.87s
print sec_readable( 10 ); # 10s
print sec_readable( 10.1 ); # 10.1s
print sec_readable( 59 ); # 59s
print sec_readable( 59.123 ); # 59.1s
print sec_readable( 60 ); # 1m 0s
print sec_readable( 60.1 ); # 1m 0s
print sec_readable( 121 ); # 2m 1s
print sec_readable( 131 ); # 2m 11s
print sec_readable( 1331 ); # 22m 11s
print sec_readable(-1331 ); # -22m 11s
print sec_readable( 13331 ); # 3h 42m
print sec_readable( 133331 ); # 1d 13h
print sec_readable( 1333331 ); # 15d 10h
print sec_readable( 13333331 ); # 154d 7h
print sec_readable( 133333331 ); # 4yr 82d
print sec_readable( 1333333331 ); # 42yr 91d
=cut
sub sec_readable {
my $s=shift();
my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
!defined$s ? undef
:!length($s) ? ''
:$s<0 ? '-'.sec_readable(-$s)
:$s<60 && int($s)==$s
? $s."s"
:$s<60 ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
:$s<3600 ? int($s/60)."m " .($s%60) ."s"
:$s<24*3600 ? int($s/$h)."h " .int(($s%$h)/60)."m"
:$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
: int($s/$y)."yr ".int(($s%$y)/$d)."d";
}
=head2 int2roman
Converts integers to roman numbers.
B<Examples:>
print int2roman(1234); # prints MCCXXXIV
print int2roman(1971); # prints MCMLXXI
(Adapted subroutine from Peter J. Acklam, jacklam(&)math.uio.no)
I = 1
V = 5
X = 10
L = 50
C = 100 (centum)
D = 500
M = 1000 (mille)
See also L<Roman>.
See L<http://en.wikipedia.org/wiki/Roman_numbers> for more.
=head2 roman2int
roman2int("MCMLXXI") == 1971
=cut
#alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
#see also t/17_roman.t sub int2roman_old
sub int2roman {
my $n=shift;
!defined$n ? undef
: !length($n) ? ""
: $n<0 ? "-".int2roman(-$n)
: int($n)!=$n ? croak"int2roman: $n is not an integer"
# : $] >= 5.014 ? #s///r modifier introduced in perl v5.14
# ("I" x $n)
# =~s,I{1000},M,gr #unnecessary, but speedup for n>1000
# =~s,I{100},C,gr #unnecessary, but speedup for n>100
# =~s,I{10},X,gr #unnecessary, but speedup for n>10
# =~s,IIIII,V,gr
# =~s,IIII,IV,gr
# =~s,VV,X,gr
# =~s,VIV,IX,gr
# =~s,XXXXX,L,gr
# =~s,XXXX,XL,gr
# =~s,LL,C,gr
# =~s,LXL,XC,gr
# =~s,CCCCC,D,gr
# =~s,CCCC,CD,gr
# =~s,DD,M,gr
# =~s,DCD,CM,gr
if (wantarray) { return (map Math::BigRat->new($_),@_) }
else { return Math::BigRat->new($_[0]) }
}
sub big {
wantarray
? (map /\./ ? bigf($_) : /\// ? bigr($_) : bigi($_), @_)
: $_[0]=~/\./ ? bigf($_[0]) : $_[0]=~/\// ? bigr($_[0]) : bigi($_[0]);
}
sub bigscale {
@_==1 or croak "bigscale requires one and only one argument";
my $scale=shift();
eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
Math::BigInt->div_scale($scale);
Math::BigFloat->div_scale($scale);
Math::BigRat->div_scale($scale);
return;
}
#my $R_authalic=6371007.2; #earth radius in meters, mean, Authalic radius, real R varies 6353-6384km, http://en.wikipedia.org/wiki/Earth_radius
#*)
# ( 6378157.5, 6356772.2 ) #hmm
#my $e=0.081819218048345;#sqrt(1 - $b**2/$a**2); #eccentricity of the ellipsoid
#my($a,$b)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
#warn "e=$e\n";
#warn "t=".(1 - $e**2)."\n";
#warn "n=".((1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5)."\n";
#my $t=1 - $e**2;
#my $n=(1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5;
#warn "t=$t\n";
#warn "n=$n\n";
#$a * (1 - $e**2) / ((1 - $e**2 * sin(($lat1+$lat2)/2)**2)**1.5); #hmm avg lat
#$R=$a * $t/$n;
#=head2 fractional
#=cut
sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
carp "fractional: NOT FINISHED";
my $n=shift;
print "----fractional n=$n\n";
my $nn=$n; my $dec;
$nn=~s,\.(\d+)$,$dec=length($1);$1.,;
my $l;
my $max=0;
my($te,$ne);
for(1..length($nn)/2){
if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
print "_ = $_ ".length($2)."\n";
if(length($2)>$max){
$l=$_;
$te="$1$3"-$1;
$max=length($2);
}
}
}
return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
print "l=$l max=$max\n";
$ne="9" x $l;
print log($n),"\n";
my $st=sub{print "status: ".($te/$ne)." n=$n ".($n/$te*$ne)."\n"};
while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
while($te/$n/$ne<0.99){ &$st(); $te*=10 }
&$st();
while(1){
my $d=gcd($te,$ne); print "gcd=$d\n";
last if $d==1;
$te/=$d; $ne/=$d;
}
&$st();
wantarray ? ($te,$ne) : "$te/$ne"; #gcd()
}
=head2 isnum
B<Input:> String to be tested on this regexp:
C<< /^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x >>
If no argument is given isnum checks C<< $_ >>.
B<Output:> True or false (1 or 0)
use Acme::Tools;
my @e=(' +32.12354E-21 ', 2.2, '9' x 99999, ' -123.12', '29,323.31', '29 323.31');
print isnum() ? 'num' : 'str' for @e; #prints num for every element except the last two
print $_=~$Re_isnum ? 'num' : 'str' for @e; #same but slighhly faster
=cut
our $Re_isnum =qr/^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x;
our $Re_isnum_wolz=qr/^ \s* [\-\+]? (?: ([1-9]\d*|0)?\.\d+ | [1-9]\d* | 0 ) (?:[eE][\-\+]?\d+)?\s*$/x; #without leading zero
sub isnum {(@_?$_[0]:$_)=~$Re_isnum}
=head2 between
Input: Three arguments.
Returns: Something I<true> if the first argument is numerically between the two next. Uses Perls C<< < >>, C<< >= >> and C<< <= >> operators.
=head2 btw
Like L<between> but instead of assuming numbers it checks all three input args
and does alphanumerical comparisons (with Perl operators C<lt>, C<ge> and C<le>) if any of the
three input args don't look like a number or look like a number but with
one or more leading zeros.
btw(1,1,10) #true numeric order since all three looks like number according to =~$Re_isnum
btw(1,'02',13) #true leading zero in '02' leads to alphabetical order
btw(10, 012,10) #true leading zero here means oct number, 012 = 10 (8*1+2), so 10 is btw 10 and 10
btw('003', '02', '09') #false because '003' lt '02'
btw('a', 'b', 'c') #false because 'a' lt 'b'
btw('a', 'B', 'c') #true because upper case letters comes before lower case ones in the "ascii alphabet"
btw('a', 'c', 'B') #true, btw() and between switches from and to if the first is > the second
btw( -1, -2, 1) #true
btw( -1, -2, 0) #true
Both between and btw returns C<undef> if any of the three input args are C<undef> (not defined).
If you're doing only numerical comparisons, using C<between> is faster than C<btw>.
=cut
sub between {
my($test ,$fom, $tom)=@_;
return if !defined$test or !defined$fom or !defined$tom;
$fom < $tom ? $test >= $fom && $test <= $tom : $test >= $tom && $test <= $fom;
}
sub btw {
my($test,$fom,$tom)=@_;
return if !defined$test or !defined$fom or !defined$tom;
$fom =~ $Re_isnum_wolz &&
$tom =~ $Re_isnum_wolz &&
$test=~ $Re_isnum_wolz
? $fom < $tom ? $test >= $fom && $test <= $tom : $test >= $tom && $test <= $fom
: $fom lt $tom ? $test ge $fom && $test le $tom : $test ge $tom && $test le $fom
}
=head2 curb
B<Input:> Three arguments: value, minumum, maximum.
B<Output:> Returns the value if its between the given minumum and maximum.
Returns minimum if the value is less or maximum if the value is more.
Changes the variable if 1st arg is a scalarref.
my $enthusiasm = 11;
print curb( $enthusiasm, 1, 20 ); # prints 11, within bounds
print curb( $enthusiasm, 1, 10 ); # prints 10
print curb( $enthusiasm, 20, 100 ); # prints 20
print curb(\$enthusiasm, 1, 10 ); # prints 10 and sets $enthusiasm = 10
print $enthusiasm; # prints 10
=cut
sub curb {
my($val,$min,$max)=@_;
# todo: undef min|max => dont curb min|max
croak "curb: wrong args" if @_!=3 or !defined$min or !defined$max or !defined$val or $min>$max;
return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
$val < $min ? $min :
$val > $max ? $max :
$val;
}
sub bound { curb(@_) }
=head2 log10
=head2 log2
=head2 logn
print log10(1000); # prints 3
print log10(10000*sqtr(10)); # prints 4.5
print log2(16); # prints 4
print logn(4096, 8); # prints 4 (12/3=4)
print logn($PI, 2.71828182845905); # same as print log($PI) using perls builtin log()
=cut
sub log10 { log($_[0]) / log(10) }
sub log2 { log($_[0]) / log(2) }
sub logn { log($_[0]) / log($_[1]) }
=head1 STRINGS
=head2 upper
=head2 lower
Returns input string as uppercase or lowercase.
Can be used if Perls build in C<uc()> and C<lc()> for some reason does not convert æøå or other latin1 letters outsize a-z.
Converts C<< æøåäëïöüÿâêîôûãõà èìòùáéÃóúýñð >> to and from C<< ÃÃÃ
ÃÃÃÃÃ?ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ >>
See also C<< perldoc -f uc >> and C<< perldoc -f lc >>
=head2 trim
Removes space from the beginning and end of a string. Whitespace (C<< \s >>) that is.
And removes any whitespace inside the string of more than one char, leaving the first whitespace char. Thus:
trim(" asdf \t\n 123 ") eq "asdf 123"
trim(" asdf\t\n 123\n") eq "asdf\t123"
Works on C<< $_ >> if no argument i given:
print join",", map trim, " please ", " remove ", " my ", " spaces "; # please,remove,my,spaces
print join",", trim(" please ", " remove ", " my ", " spaces "); # works on arrays as well
my $s=' please '; trim(\$s); # now $s eq 'please'
trim(\@untrimmedstrings); # trims array strings inplace
@untrimmedstrings = map trim, @untrimmedstrings; # same, works on $_
trim(\$_) for @untrimmedstrings; # same, works on \$_
=head2 lpad
=head2 rpad
Left or right pads a string to the given length by adding one or more spaces at the end for I<rpad> or at the start for I<lpad>.
B<Input:> First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
Default is space.
rpad('gomle',9); # 'gomle '
lpad('gomle',9); # ' gomle'
rpad('gomle',9,'-'); # 'gomle----'
lpad('gomle',9,'+'); # '++++gomle'
rpad('gomle',4); # 'goml'
lpad('gomle',4); # 'goml'
rpad('gomle',7,'xyz'); # 'gomlxy'
lpad('gomle',10,'xyz'); # 'xyzxygoml'
=head2 cpad
Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.
cpad('mat',5) eq ' mat '
cpad('mat',4) eq 'mat '
cpad('mat',6) eq ' mat '
cpad('mat',9) eq ' mat '
cpad('mat',5,'+') eq '+mat+'
cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'
for my $r (@$aoh){
my $v=join",",map &$val($$r{$_},$t{$_}), @col;
$sql.="insert into $conf{name} values ($v);\n";
}
$sql="drop table $conf{name};\n\n$sql" if $conf{drop}==1;
$sql="drop table if exists $conf{name};\n\n$sql" if $conf{drop}>=2;
$sql="$conf{begin}\n$sql" if $conf{begin};
$sql.=$conf{end};
$sql;
}
sub aoh2xls { croak "Not implemented yet: aoh2xls" }
=head1 STATISTICS
=head2 sum
Returns the sum of a list of numbers. Undef is ignored.
print sum(1,3,undef,8); # 12
print sum(1..1000); # 500500
print sum(undef); # undef
=cut
sub sum { my $sum; no warnings; defined($_) and $sum+=$_ for @_; $sum }
=head2 avg
Returns the I<average> number of a list of numbers. That is C<sum / count>
print avg( 2, 4, 9); # 5 (2+4+9) / 3 = 5
print avg( [2, 4, 9] ); # 5 pass by reference, same result but faster for large arrays
Also known as I<arithmetic mean>.
Pass by reference: If one argument is given and it is a reference to an array,
this array is taken as the list of numbers. This mode is about twice as fast
for 10000 numbers or more. It most likely also saves memory.
=cut
sub avg {
my($sum,$n,@a)=(0,0);
no warnings;
if( @_==0 ) { return undef }
if( @_==1 and ref($_[0]) eq 'ARRAY' ){ @a=grep defined,@{$_[0]} }
else { @a=grep defined,@_ }
if( @a==0 ) { return undef }
$sum+=$_ for @a;
return $sum/@a
}
=head2 geomavg
Returns the I<geometric average> (a.k.a I<geometric mean>) of a list of numbers.
print geomavg(10,100,1000,10000,100000); # 1000
print 0+ (10*100*1000*10000*100000) ** (1/5); # 1000 same thing
print exp(avg(map log($_),10,100,1000,10000,100000)); # 1000 same thing, this is how geomavg() works internally
=cut
sub geomavg { exp(avg(map log($_), @_)) }
=head2 harmonicavg
Returns the I<harmonic average> (a.k.a I<geometric mean>) of a list of numbers. L<http://en.wikipedia.org/wiki/Harmonic_mean>
print harmonicavg(10,11,12); # 3 / ( 1/10 + 1/11 + 1/12) = 10.939226519337
=cut
sub harmonicavg { my $s; $s+=1/$_ for @_; @_/$s }
=head2 variance
C<< variance = ( sum (x[i]-Average)**2)/(n-1) >>
=cut
sub variance {
my $sumx2; $sumx2+=$_*$_ for @_;
my $sumx; $sumx+=$_ for @_;
(@_*$sumx2-$sumx*$sumx)/(@_*(@_-1));
}
=head2 stddev
C<< Standard_Deviation = sqrt(variance) >>
Standard deviation (stddev) is a measurement of the width of a normal
distribution where one stddev on each side of the mean covers 68% and
two stddevs 95%. Normal distributions are sometimes called Gauss curves
or Bell shapes. L<https://en.wikipedia.org/wiki/Standard_deviation>
stddev(4,5,6,5,6,4,3,5,5,6,7,6,5,7,5,6,4) # = 1.0914103126635
avg(@testscores) + stddev(@testscores) # = the score for one stddev above avg, 115
avg(@testscores) - stddev(@testscores) # = the score for one stddev below avg, 85
=cut
sub stddev {
return undef if @_==0;
return stddev(\@_) if @_>0 and !ref($_[0]);
my $ar=shift;
return undef if @$ar==0;
return 0 if @$ar==1;
my $sumx2; $sumx2 += $_*$_ for @$ar;
my $sumx; $sumx += $_ for @$ar;
sqrt( (@$ar*$sumx2-$sumx*$sumx)/(@$ar*(@$ar-1)) );
}
=head2 rstddev
Relative stddev = stddev / avg
=cut
sub rstddev { stddev(@_) / avg(@_) }
=head2 median
Returns the median value of a list of numbers. The list do not have to
70 - 71 8202 ========
72 - 73 10577 ==========
74 - 75 13319 =============
76 - 77 16283 ================
78 - 79 20076 ====================
80 - 81 23742 =======================
82 - 83 27726 ===========================
84 - 85 32205 ================================
86 - 87 36577 ====================================
88 - 89 40684 ========================================
90 - 91 44515 ============================================
92 - 93 47575 ===============================================
94 - 95 50098 ==================================================
96 - 97 52062 ====================================================
98 - 99 53338 =====================================================
100 - 101 52834 ====================================================
102 - 103 52185 ====================================================
104 - 105 50472 ==================================================
106 - 107 47551 ===============================================
108 - 109 44471 ============================================
110 - 111 40704 ========================================
112 - 113 36642 ====================================
114 - 115 32171 ================================
116 - 117 28166 ============================
118 - 119 23618 =======================
120 - 121 19873 ===================
122 - 123 16360 ================
124 - 125 13452 =============
126 - 127 10575 ==========
128 - 129 8283 ========
130 - 131 6224 ======
132 - 133 4661 ====
134 - 135 3527 ===
136 - 137 2516 ==
138 - 139 1833 =
140 - 141 1327 =
142 - 143 860
144 - 145 604
146 - 147 428
148 - 149 275
150 - 151 184
152 - 153 111
154 - 155 67
=cut
sub random_gauss {
my($avg,$stddev,$num)=@_;
$avg=0 if !defined $avg;
$stddev=1 if !defined $stddev;
$num=1 if !defined $num;
croak "random_gauss should not have more than 3 arguments" if @_>3;
my @r;
while (@r<$num) {
my($x1,$x2,$w);
do {
$x1=2.0*rand()-1.0;
$x2=2.0*rand()-1.0;
$w=$x1*$x1+$x2*$x2;
} while $w>=1.0;
$w=sqrt(-2.0*log($w)/$w) * $stddev;
push @r, $x1*$w + $avg,
$x2*$w + $avg;
}
pop @r if @r > $num;
return $r[0] if @_<3;
return @r;
}
=head2 mix
Mixes an array in random order. In-place if given an array reference or not if given an array.
C<mix()> could also have been named C<shuffle()>, as in shuffling a deck of cards.
Example:
This:
print mix("a".."z"),"\n" for 1..3;
...could write something like:
trgoykzfqsduphlbcmxejivnwa
qycatilmpgxbhrdezfwsovujkn
ytogrjialbewcpvndhkxfzqsmu
B<Input:>
=over 4
=item 1.
Either a reference to an array as the only input. This array will then be mixed I<in-place>. The array will be changed:
This: C<< @a=mix(@a) >> is the same as: C<< mix(\@a) >>.
=item 2.
Or an array of zero, one or more elements.
=back
Note that an input-array which COINCIDENTLY SOME TIMES has one element
(but more other times), and that element is an array-ref, you will
probably not get the expected result.
To check distribution:
perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n
The letters a-z should occur around 1000 times each.
Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)
perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'
(Uses L</cart>, which is not a typo, see further down here)
Note: C<List::Util::shuffle()> is approximately four times faster. Both respects the Perl built-in C<srand()>.
=cut
C<gzip()> is really just a wrapper for C< Compress:Zlib::memGzip() > and uses the same
compression algorithm as the well known GNU program gzip found in most unix/linux/cygwin
distros. Except C<gzip()> does this in-memory. (Both using the C-library C<zlib>).
writefile( "file.gz", gzip("some string") );
=head2 gunzip
B<Input:> A binary compressed string or a reference to such a string. I.e. something returned from
C<gzip()> earlier or read from a C<< .gz >> file.
B<Output:> The original larger non-compressed string. Text or binary.
C<gunzip()> is a wrapper for Compress::Zlib::memGunzip()
print gunzip( gzip("some string") ); #some string
=head2 bzip2
Same as L</gzip> and L</gunzip> except with a different compression algorithm (compresses more but is slower). Wrapper for Compress::Bzip2::memBzip.
Compared to gzip/gunzip, bzip2 compression is much slower, bunzip2 decompression not so much.
See also L<Compress::Bzip2>, C<man Compress::Bzip2>, C<man bzip2>, C<man bunzip2>.
writefile( "file.bz2", bzip2("some string") );
print bunzip2( bzip2("some string") ); #some string
=head2 bunzip2
Decompressed something compressed by bzip2() or data from a C<.bz2> file. See L</bzip2>.
=cut
sub gzip { my $s=shift; eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGzip( ref($s)?$s:\$s ) }
sub gunzip { my $s=shift; eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGunzip( ref($s)?$s:\$s ) }
sub bzip2 { my $s=shift; eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBzip( ref($s)?$s:\$s ) }
sub bunzip2 { my $s=shift; eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBunzip( ref($s)?$s:\$s ) }
=head1 NET, WEB, CGI-STUFF
=head2 ipaddr
B<Input:> an IP-number
B<Output:> either an IP-address I<machine.sld.tld> or an empty string
if the DNS lookup didn't find anything.
Example:
perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")' # prints www.uio.no
Uses perls C<gethostbyaddr> internally.
C<ipaddr()> memoizes the results internally (using the
C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
particular IP number might take some time.
Some few DNS loopups can take several seconds.
Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
probably turn off hostname lookups in their logs and just log IP numbers by using
C<HostnameLookups Off> in Apache C<httpd.conf> and then use I<ipaddr> afterwards if necessary.
=cut
our %IPADDR_memo;
sub ipaddr {
my $ipnr=shift;
#hm, NOTE: The 2 parameter on the next code line is not 2 for all OSes,
#but seems to work in Linux and HPUX. Den correct way is to use the
#AF_INET constant in the Socket or the IO::Socket package.
return $IPADDR_memo{$ipnr} ||= gethostbyaddr(pack("C4",split("\\.",$ipnr)),2);
}
=head2 ipnum
C<ipnum()> does the opposite of C<ipaddr()>
Does an attempt of converting an IP address (hostname) to an IP number.
Uses DNS name servers via perls internal C<gethostbyname()>.
Return empty string (undef) if unsuccessful.
print ipnum("www.uio.no"); # prints 129.240.13.152
Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.
=cut
our %IPNUM_memo;
sub ipnum {
my $ipaddr=shift;
#croak "No $ipaddr" if !length($ipaddr);
return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
my $h=gethostbyname($ipaddr);
#croak "No ipnum for $ipaddr" if !$h;
return if !defined $h;
my $ipnum = join(".",unpack("C4",$h));
$IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
return $IPNUM_memo{$ipaddr};
}
our $Ipnum_errmsg;
our $Ipnum;
sub ipnum_ok {
my $ipnum=shift;
$Ipnum=undef;
eval{
die "malformed ipnum $ipnum\n" if not $ipnum=~/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
die "invalid ipnum $ipnum\n" if grep$_>255,$1,$2,$3,$4;
$Ipnum=$1*256**3 + $2*256**2 + $3*256 + $4;
};
my$r=($Ipnum_errmsg=$@) ? 0 : 1;
$r
}
our $Iprange_errmsg;
our $Iprange_start;
sub iprange_ok {
my $iprange=shift;
$Iprange_start=undef;
my($r,$m);
eval{
elsif(ref($text) eq 'SCALAR'){
print WRITEFILE $$text;
}
elsif(ref($text) eq 'ARRAY'){
print WRITEFILE "$_\n" for @$text;
}
else {
croak;
}
close(WRITEFILE);
return;
}
=head2 readfile
Just as with L</writefile> you can read in a whole file in one operation with C<readfile()>. Instead of:
open my $FILE,'<', $filename or die $!;
my $data = join"",<$FILE>;
close($FILE);
This is simpler:
my $data = readfile($filename);
B<More examples:>
Reading the content of the file to a scalar variable: (Any content in C<$data> will be overwritten)
my $data;
readfile('filename.txt',\$data);
Reading the lines of a file into an array:
my @lines;
readfile('filnavn.txt',\@lines);
for(@lines){
...
}
Note: Chomp is done on each line. That is, any newlines (C<< \n >>) will be removed.
If C<@lines> is non-empty, this will be lost.
Sub readfile is context aware. If an array is expected it returns an array of the lines without a trailing C<< \n >>.
The last example can be rewritten:
for(readfile('filnavn.txt')){
...
}
With two input arguments, nothing (undef) is returned from C<readfile()>.
Automatic decompression:
my $txt = readfile('file.txt.gz'); #uses /bin/gunzip to decompress content
Extentions C<.gz>, C<.bz2> and C<.xz> are recognized for decompression. See also C<writefile()> and C<openstr()>.
=cut
#http://blogs.perl.org/users/leon_timmermans/2013/05/why-you-dont-need-fileslurp.html
#todo: readfile with grep-filter code ref in a third arg (avoid reading all into mem)
sub readfile {
my($filename,$ref)=@_;
if(@_==1){
if(wantarray){ my @data; readfile($filename,\@data); return @data }
else { my $data; readfile($filename,\$data); return $data }
}
else {
open my $fh,openstr($filename) or croak("ERROR: readfile $! $?");
if ( ref($ref) eq 'SCALAR') { $$ref=join"",<$fh> }
elsif( ref($ref) eq 'ARRAY' ) { while(my $l=<$fh>){ chomp($l); push @$ref, $l } }
else { croak "ERROR: Second arg to readfile should be a ref to a scalar og array" }
close($fh);
return;#?
}
}
=head2 readdirectory
B<Input:>
Name of a directory.
B<Output:>
A list of all files in it, except of C<.> and C<..> (on linux/unix systems, all directories have a C<.> and C<..> directory).
The names of all types of files are returned: normal files, directories, symbolic links,
pipes, semaphores. That is every thing shown by C<ls -la> except C<.> and C<..>
C<readdirectory> do not recurce down into subdirectories (but see example below).
B<Example:>
my @files = readdirectory("/tmp");
B<Why readdirectory?>
Sometimes calling the built ins C<opendir>, C<readdir> and C<closedir> seems a tad tedious, since this:
my $dir="/usr/bin";
opendir(D,$dir);
my @files=map "$dir/$_", grep {!/^\.\.?$/} readdir(D);
closedir(D);
Is the same as this:
my @files=readdirectory("/usr/bin");
See also: L<File::Find>
B<Why not readdirectory?>
On huge directories with perhaps tens or houndreds of thousands of
files, readdirectory() will consume more memory than perls
opendir/readdir. This isn't usually a concern anymore for modern
computers with gigabytes of RAM, but might be the rationale behind
Perls more tedious way created in the 80s. The same argument goes for
file slurping. On the other side it's also a good practice to never
A more perl-ish and often faster way of doing the same:
{123=>3, 214=>7}->{$a} || $a # (beware of 0)
=cut
sub decode {
croak "Must have a mimimum of two arguments" if @_<2;
my $uttrykk=shift;
if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
else { !defined shift and return shift or shift for 1..@_/2 }
return shift;
}
sub decode_num {
croak "Must have a mimimum of two arguments" if @_<2;
my $uttrykk=shift;
if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }
else { !defined shift and return shift or shift for 1..@_/2 }
return shift;
}
=head2 qrlist
Input: An array of values to be used to test againts for existence.
Output: A reference to a regular expression. That is a C<qr//>
The regex sets $1 if it match.
Example:
my @list=qw/ABc XY DEF DEFG XYZ/;
my $filter=qrlist("ABC","DEF","XY."); # makes a regex of it qr/^(\QABC\E|\QDEF\E|\QXY.\E)$/
my @filtered= grep { $_ =~ $filter } @list; # returns DEF and XYZ, but not XYZ because the . char is taken literally
Note: Filtering with hash lookups are WAY faster.
Source:
sub qrlist (@) { my $str=join"|",map quotemeta, @_; qr/^($str)$/ }
=cut
sub qrlist (@) {
my $str=join"|",map quotemeta,@_;
return qr/^($str)$/;
}
=head2 ansicolor
Perhaps easier to use than L<Term::ANSIColor> ?
B<Input:> One argument. A string where the char C<¤> have special
meaning and is replaced by color codings depending on the letter
following the C<¤>.
B<Output:> The same string, but with C<¤letter> replaced by ANSI color
codes respected by many types terminal windows. (xterm, telnet, ssh,
telnet, rlog, vt100, cygwin, rxvt and such...).
B<Codes for ansicolor():>
¤r red
¤g green
¤b blue
¤y yellow
¤m magenta
¤B bold
¤u underline
¤c clear
¤¤ reset, quits and returns to default text color.
B<Example:>
print ansicolor("This is maybe ¤ggreen¤¤?");
Prints I<This is maybe green?> where the word I<green> is shown in green.
If L<Term::ANSIColor> is not installed or not found, returns the input
string with every C<¤> including the following code letters
removed. (That is: ansicolor is safe to use even if Term::ANSIColor is
not installed, you just don't get the colors).
See also L<Term::ANSIColor>.
=cut
sub ansicolor {
my $txt=shift;
eval{require Term::ANSIColor} or return replace($txt,qr/¤./);
my %h=qw/r red g green b blue y yellow m magenta B bold u underline c clear ¤ reset/;
my $re=join"|",keys%h;
$txt=~s/¤($re)/Term::ANSIColor::color($h{$1})/ge;
return $txt;
}
=head2 ccn_ok
Checks if a Credit Card number (CCN) has correct control digits according to the LUHN-algorithm from 1960.
This method of control digits is used by MasterCard, Visa, American Express,
Discover, Diners Club / Carte Blanche, JCB and others.
B<Input:>
A credit card number. Can contain non-digits, but they are removed internally before checking.
B<Output:>
Something true or false.
Or more accurately:
Returns C<undef> (false) if the input argument is missing digits.
Returns 0 (zero, which is false) is the digits is not correct according to the LUHN algorithm.
Returns 1 or the name of a credit card company (true either way) if the last digit is an ok control digit for this ccn.
The name of the credit card company is returned like this (without the C<'> character)
In algorithmic terms the number of bits needed is C<ln of ln of n>. Thats why 4 bits (counters up
to 15) is "always" good enough except for extremely large capasities or extremely small error rates.
(Except when adding the same key many times, which should be avoided, and Acme::Tools::bfadd do not
check for that, perhaps in future versions).
Bloom filters of the counting type are not very space efficient: The tables above shows that 84%-85%
of the counters are 0 or 1. This means most bits are zero-bits. This doesn't have to be a problem if
a counting bloom filter is used to be sent over slow networks because they are very compressable by
common compression tools like I<gzip> or L<Compress::Zlib> and such.
Deletion of non-existing keys makes C<bfdelete> die (croak).
=head2 bfdelete
Deletes from a counting bloom filter:
bfdelete($bf, @keys);
bfdelete($bf, \@keys);
Returns C<$bf> after deletion.
Croaks (dies) on deleting a non-existing key or deleting from an previouly overflown counter in a counting bloom filter.
=head2 bfaddbf
Adds another bloom filter to a bloom filter.
Bloom filters has the proberty that bit-wise I<OR>-ing the bit-filters
of two filters with the same capacity and the same number and type of
hash functions, adds the filters:
my $bf1=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[1..500]);
my $bf2=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[501..1000]);
bfaddbf($bf1,$bf2);
print "Yes!" if bfgrep($bf1, 1..1000) == 1000;
Prints yes since C<bfgrep> now returns an array of all the 1000 elements.
Croaks if the filters are of different dimensions.
Works for counting bloom filters as well (C<< counting_bits=>4 >> e.g.)
=head2 bfsum
Returns the number of 1's in the filter.
my $percent=100*bfsum($bf)/$$bf{filterlength};
printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity
Sums the counters for counting bloom filters (much slower than for non counting).
=head2 bfdimensions
Input, two numeric arguments: Capacity and error_rate.
Outputs an array of two numbers: m and k.
m = - n * log(p) / log(2)**2 # n = capacity, m = bits in filter (divide by 8 to get bytes)
k = log(1/p) / log(2) # p = error_rate, uses perls internal log() with base e (2.718)
...that is: m = the best number of bits in the filter and k = the best
number of hash functions optimized for the given capacity (n) and
error_rate (p). Note that k is a dependent only of the error_rate. At
about two percent error rate the bloom filter needs just the same
number of bytes as the number of keys.
Storage (bytes):
Capacity Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate
0.000000001 0.00000001 0.0000001 0.000001 0.00001 0.0001 0.001 0.01 0.02141585 0.1 0.5 0.99
------------- ----------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
10 54.48 48.49 42.5 36.51 30.52 24.53 18.53 12.54 10.56 6.553 2.366 0.5886
100 539.7 479.8 419.9 360 300.1 240.2 180.3 120.4 100.6 60.47 18.6 0.824
1000 5392 4793 4194 3595 2996 2397 1798 1199 1001 599.6 180.9 3.177
10000 5.392e+04 4.793e+04 4.194e+04 3.594e+04 2.995e+04 2.396e+04 1.797e+04 1.198e+04 1e+04 5991 1804 26.71
100000 5.392e+05 4.793e+05 4.193e+05 3.594e+05 2.995e+05 2.396e+05 1.797e+05 1.198e+05 1e+05 5.991e+04 1.803e+04 262
1000000 5.392e+06 4.793e+06 4.193e+06 3.594e+06 2.995e+06 2.396e+06 1.797e+06 1.198e+06 1e+06 5.991e+05 1.803e+05 2615
10000000 5.392e+07 4.793e+07 4.193e+07 3.594e+07 2.995e+07 2.396e+07 1.797e+07 1.198e+07 1e+07 5.991e+06 1.803e+06 2.615e+04
100000000 5.392e+08 4.793e+08 4.193e+08 3.594e+08 2.995e+08 2.396e+08 1.797e+08 1.198e+08 1e+08 5.991e+07 1.803e+07 2.615e+05
1000000000 5.392e+09 4.793e+09 4.193e+09 3.594e+09 2.995e+09 2.396e+09 1.797e+09 1.198e+09 1e+09 5.991e+08 1.803e+08 2.615e+06
10000000000 5.392e+10 4.793e+10 4.193e+10 3.594e+10 2.995e+10 2.396e+10 1.797e+10 1.198e+10 1e+10 5.991e+09 1.803e+09 2.615e+07
100000000000 5.392e+11 4.793e+11 4.193e+11 3.594e+11 2.995e+11 2.396e+11 1.797e+11 1.198e+11 1e+11 5.991e+10 1.803e+10 2.615e+08
1000000000000 5.392e+12 4.793e+12 4.193e+12 3.594e+12 2.995e+12 2.396e+12 1.797e+12 1.198e+12 1e+12 5.991e+11 1.803e+11 2.615e+09
Error rate: 0.99 Hash functions: 1
Error rate: 0.5 Hash functions: 1
Error rate: 0.1 Hash functions: 3
Error rate: 0.0214158522653385 Hash functions: 6
Error rate: 0.01 Hash functions: 7
Error rate: 0.001 Hash functions: 10
Error rate: 0.0001 Hash functions: 13
Error rate: 0.00001 Hash functions: 17
Error rate: 0.000001 Hash functions: 20
Error rate: 0.0000001 Hash functions: 23
Error rate: 0.00000001 Hash functions: 27
Error rate: 0.000000001 Hash functions: 30
=head2 bfstore
Storing and retrieving bloom filters to and from disk uses L<Storable>s C<store> and C<retrieve>. This:
bfstore($bf,'filename.bf');
It the same as:
use Storable qw(store retrieve);
...
store($bf,'filename.bf');
=head2 bfretrieve
This:
my $bf=bfretrieve('filename.bf');
Or this:
my $bf=bfinit('filename.bf');
Is the same as:
Deep copies the bloom filter data structure. (Which btw is not very deep, two levels at most)
This:
my $bfc = bfclone($bf);
Works just as:
use Storable;
my $bfc=Storable::dclone($bf);
=head2 Object oriented interface to bloom filters
use Acme::Tools;
my $bf=new Acme::Tools::BloomFilter(0.1,1000); # the same as bfinit, see bfinit above
print ref($bf),"\n"; # prints Acme::Tools::BloomFilter
$bf->add(@keys);
$bf->check($keys[0]) and print "ok\n"; # prints ok
$bf->grep(\@keys)==@keys and print "ok\n"; # prints ok
$bf->store('filename.bf');
my $bf2=bfretrieve('filename.bf');
$bf2->check($keys[0]) and print "ok\n"; # still ok
$bf2=$bf->clone();
To instantiate a previously stored bloom filter:
my $bf = Acme::Tools::BloomFilter->new( '/path/to/stored/bloomfilter.bf' );
The o.o. interface has the same methods as the C<bf...>-subs without the
C<bf>-prefix in the names. The C<bfretrieve> is not available as a
method, although C<bfretrieve>, C<Acme::Tools::bfretrieve> and
C<Acme::Tools::BloomFilter::retrieve> are synonyms.
=head2 Internals and speed
The internal hash-functions are C<< md5( "$key$salt" ) >> from L<Digest::MD5>.
Since C<md5> returns 128 bits and most medium to large sized bloom
filters need only a 32 bit hash function, the result from md5() are
split (C<unpack>-ed) into 4 parts 32 bits each and are treated as if 4
hash functions was called at once (speedup). Using different salts to
the key on each md5 results in different hash functions.
Digest::SHA512 would have been even better since it returns more bits,
if it werent for the fact that it's much slower than Digest::MD5.
String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:
time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6' #5.56 sec
time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6' #2.79 sec, faster but not per bit
time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)
Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.
=head2 Theory and math behind bloom filters
L<http://www.internetmathematics.org/volumes/1/4/Broder.pdf>
L<http://blogs.sun.com/jrose/entry/bloom_filters_in_a_nutshell>
L<http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html>
See also Scaleable Bloom Filters: L<http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf> (not implemented in Acme::Tools)
...and perhaps L<http://intertrack.naist.jp/Matsumoto_IEICE-ED200805.pdf>
=cut
sub bfinit {
return bfretrieve(@_) if @_==1;
return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
require Digest::MD5;
@_%2&&croak "Arguments should be a hash of equal number of keys and values";
my %arg=@_;
my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
my $bf={error_rate => 0.001, #default p
capacity => 100000, #default n
min_hashfuncs => 1,
max_hashfuncs => 100,
counting_bits => 1, #default: not counting filter
adaptive => 0,
%arg, #arguments
key_count => 0,
overflow => {},
version => $Acme::Tools::VERSION,
};
croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
@$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
@$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
$$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x new empty filter
$$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
:$$bf{filterlength}<=2**32/4 ? "N*"
: "Q*";
bfadd($bf,@{$arg{keys}}) if $arg{keys};
return $bf;
}
sub bfaddbf {
my($bf,$bf2)=@_;
my $differror=join"\n",
map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
grep $$bf{$_} ne $$bf2{$_},
qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
croak $differror if $differror;
croak "Can not add adaptive bloom filters" if $$bf{adaptive};
my $count=$$bf{key_count}+$$bf2{key_count};
croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
if $count > $$bf{capacity};
$$bf{key_count}+=$$bf2{key_count};
if($$bf{counting_bits}==1){
$$bf{filter} |= $$bf2{filter};
#$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
}
else {
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
return grep {
my $match = 1; # match if every bit is on
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
!$match;
} @$keysref;
}
sub bfdelete {
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
for my $key (@$keysref){
my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
$$bf{key_count}==0 and croak "Deleted all and then some" or $$bf{key_count}--;
my($ones,$croak,@pos)=(0);
for(0..$k-1){
my $pos=$h[$_] % $m;
my $c=
vec($$bf{filter}, $pos, $cb);
vec($$bf{filter}, $pos, $cb)=$c-1;
$croak="Cannot delete a non-existing key $key" if $c==0;
$croak="Cannot delete with previously overflown position. Try doubleing counting_bits"
if $c==1 and ++$ones and $$bf{overflow}{$pos};
}
if($croak){ #rollback
vec($$bf{filter}, $h[$_] % $m, $cb)=
vec($$bf{filter}, $h[$_] % $m, $cb)+1 for 0..$k-1;
croak $croak;
}
}
return $bf;
}
sub bfstore {
require Storable;
Storable::store(@_);
}
sub bfretrieve {
require Storable;
my $bf=Storable::retrieve(@_);
carp "Retrieved bloom filter was stored in version $$bf{version}, this is version $VERSION" if $$bf{version}>$VERSION;
return $bf;
}
sub bfclone {
require Storable;
return Storable::dclone(@_); #could be faster
}
sub bfdimensions_old {
my($n,$p,$mink,$maxk, $k,$flen,$m)=
@_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'},1)
:@_==2 ? (@_,1,100,1)
: croak "Wrong number of arguments (".@_."), should be 2";
croak "p ($p) should be > 0 and < 1" if not ( 0<$p && $p<1 );
$m=-1*$_*$n/log(1-$p**(1/$_)) and (!defined $flen or $m<$flen) and ($flen,$k)=($m,$_) for $mink..$maxk;
$flen = int(1+$flen);
return ($flen,$k);
}
sub bfdimensions {
my($n,$p,$mink,$maxk)=
@_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'})
:@_==2 ? (@_,1,100)
: croak "Wrong number of arguments (".@_."), should be 2";
my $k=log(1/$p)/log(2); # k hash funcs
my $m=-$n*log($p)/log(2)**2; # m bits in filter
return ($m+0.5,min($maxk,max($mink,int($k+0.5))));
}
#crontab -e
#01 4,10,16,22 * * * /usr/bin/perl -MAcme::Tools -e'Acme::Tools::_update_currency_file("/var/www/html/currency-rates")' > /dev/null 2>&1
sub _update_currency_file { #call from cron
my $fn=shift()||'/var/www/html/currency-rates';
my %exe=map+($_=>"/usr/bin/$_"),qw/curl ci/;-x$_ or croak for values %exe;
open my $F, '>', $fn or die"ERROR: Could not write file $fn ($!)\n";
print $F "#-- Currency rates ".localtime()." (".time().")\n";
print $F "# File generated by Acme::Tools version $VERSION\n";
print $F "# Updated every 6th hour on http://calthis.com/currency-rates\n";
print $F "NOK 1.000000000\n";
my $amount=1000;
my $data=qx($exe{curl} -s "https://www.x-rates.com/table/?from=NOK&amount=$amount");
$data=~s,to=([A-Z]{3})(.)>,$2>$1</td><td>,g;
my @data=ht2t($data,"Alphabetical order"); shift @data;
@data=map "$$_[1] ".($$_[4]>1e-2?$$_[4]:$$_[2]?sprintf("%.8f",$amount/$$_[2]):0)."\n",@data;
my %data=map split,@data;
my $json=qx( $exe{curl} -s https://api.coinmarketcap.com/v1/ticker/ );
eval "require JSON;"; croak if $@;
my $arr=JSON::decode_json($json);
for my $c (qw(BTC LTC XBT ETH XRP BCH ETC)) {
my @a=grep$$_{symbol} eq $c,@$arr;
next if @a != 1 or !$a[0]{price_usd};
push @data, "$c ".($a[0]{price_usd}*$data{USD})."\n";
}
#die srlz(\@data,'data');
print $F sort(@data);
close($F);
qx($exe{ci} -l -m. -d $fn) if -w"$fn,v";
}
sub ftype {
my $f=shift;
-e $f and
-f$f ? 'file' # -f File is a plain file.
:-d$f ? 'dir' # -d File is a directory.
:-l$f ? 'symlink' # -l File is a symbolic link.
:-p$f ? 'pipe' # -p File is a named pipe (FIFO), or Filehandle is a pipe.
:-S$f ? 'socket' # -S File is a socket.
:-b$f ? 'blockfile' # -b File is a block special file.
:-c$f ? 'charfile' # -c File is a character special file.
:-t$f ? 'ttyfile' # -t Filehandle is opened to a tty.
: ''
or undef;
}
sub ext2mime {
my $ext=shift(); #or filename
#http://www.sitepoint.com/web-foundations/mime-types-complete-list/
croak "todo: ext2mime not yet implemented";
#return "application/json";#feks
}
sub base64 ($;$) { #
if ($] >= 5.006) {
require bytes;
croak "base64 failed: only defined for bytes"
}
$_=join",",@$_ for values %$hashref;
(@r,@a)
}
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp -
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp Tools.pm
sub cmd_zsize {
my %o;
my @argv=opts("heEpts",\%o,@_);
my $stdin=!@argv || join(",",@argv) eq '-';
@argv=("/tmp/acme-tools.$$.stdin") if $stdin;
writefile($argv[0],join("",<STDIN>)) if $stdin;
my @prog=grep qx(which $_), qw(gzip bzip2 xz zstd brotli);
for my $f (@argv){
my $sf=-s$f;
print "--- $f does not exists\n" and next if !-e$f;
print "--- $f is not a file\n" and next if !-f$f;
print "--- $f ($sf b) is not readable\n" and next if !-r$f;
print "--- $sf b ".bytes_readable($sf)." ".($stdin?"-":$f)."\n";
next if !$sf;
my(@t,@s);
for my $prog (@prog){
next if !qx(which $prog);
my @l=1..9;
push @l,map"e$_",1..9 if $prog eq 'xz' and $o{e};
@l=map"e$_",1..9 if $prog eq 'xz' and $o{E};
@l=map 10+$_,@l if $prog eq 'zstd';
@l=map"q $_",3..11 if $prog eq 'brotli';
printf "%-6s",$prog;
push @t, $prog, [] if $o{t};
push @s, $prog, [] if $o{p} and $o{s};
for my $l (@l){ #level
my $t=time_fp();
my $b=qx(cat $f|$prog -$l|wc -c);
push@{$t[-1]},time_fp()-$t if $o{t};
push@{$s[-1]},$b if $o{p} and $o{s};
$o{p} ? printf("%9.1f%% ",100*$b/$sf)
:$o{h} ? printf("%10s ",bytes_readable($b))
: printf("%10d ",$b);
}
print "\n";
}
while(@s){
printf "%-6s",shift@s;
$o{h}?printf("%10s ",bytes_readable($_)):printf("%10d ",$_) for @{shift@s}; print "\n";
}
while(@t){
printf "%-6s",shift@t;
printf "%9.3fs ",$_ for @{shift@t}; print "\n";
}
}
unlink $argv[0] if $stdin;
}
sub cmd_rttop { die "rttop: not implemented here yet.\n" }
sub cmd_whichpm { die "whichpm: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
sub cmd_catal { die "catal: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
#todo: cmd_tabdiff (fra sonyk)
#todo: cmd_catlog (ala catal med /etc/catlog.conf, default er access_log)
=head1 DATABASE STUFF - NOT IMPLEMENTED YET
Uses L<DBI>. Comming soon...
$Dbh
dlogin
dlogout
drow
drows
drowc
drowsc
dcols
dpk
dsel
ddo
dins
dupd
ddel
dcommit
drollback
=cut
#my$dummy=<<'SOON';
sub dtype {
my $connstr=shift;
return 'SQLite' if $connstr=~/(\.sqlite|sqlite:.*\.db)$/i;
return 'Oracle' if $connstr=~/\@/;
return 'Pg' if 1==2;
die;
}
our($Dbh,@Dbh,%Sth);
our %Dbattr=(RaiseError => 1, AutoCommit => 0); #defaults
sub dlogin {
my $connstr=shift();
my %attr=(%Dbattr,@_);
my $type=dtype($connstr);
my($dsn,$u,$p)=('','','');
if($type eq 'SQLite'){
$dsn=$connstr;
}
elsif($type eq 'Oracle'){
($u,$p,$dsn)=($connstr=~m,(.+?)(/.+?)?\@(.+),);
}
elsif($type eq 'Pg'){
croak "todo";
}
else{
croak "dblogin: unknown database type for connection string $connstr\n";
}
$dsn="dbi:$type:$dsn";
push @Dbh, $Dbh if $Dbh; #local is better?
require DBI;
$Dbh=DBI->connect($dsn,$u,$p,\%attr); #connect_cached?
}
sub dlogout {
$Dbh->disconnect;
$Dbh=pop@Dbh if @Dbh;
}
sub drow {
my($q,@b)=_dattrarg(@_);
#my $sth=do{$Sth{$Dbh,$q} ||= $Dbh->prepare_cached($q)};
my $sth=$Dbh->prepare_cached($q);
$sth->execute(@b);
my @r=$sth->fetchrow_array;
$sth->finish if $$Dbh{Driver}{Name} eq 'SQLite';
#$dbh->selectrow_array($statement);
return @r==1?$r[0]:@r;
}
sub drows {
}
sub drowc {
}
sub drowsc {
}
sub dcols {
}
sub dpk {
}
sub dsel {
}
sub ddo {
my @arg=_dattrarg(@_);
#warn serialize(\@arg,'arg','',1);
$Dbh->do(@arg); #hm cache?
}
sub dins {
}
sub dupd {
}
sub ddel {
}
sub dcommit { $Dbh->commit }
sub drollback { $Dbh->rollback }
sub _dattrarg {
my @arg=@_;
splice @arg,1,0, ref($arg[-1]) eq 'HASH' ? pop(@arg) : {};
@arg;
}
=head2 self_update
Update Acme::Tools to newest version quick and dirty:
function pmview(){ ls -ld `perl -M$1 -le'$m=shift;$mi=$m;$mi=~s,::,/,g;print $INC{"$mi.pm"};warn"Version ".${$m."::VERSION"}."\n"' $1`;}
pmview Acme::Tools #view date and version before
sudo perl -MAcme::Tools -e Acme::Tools::self_update #update to newest version
pmview Acme::Tools #view date and version after
Does C<cd> to where Acme/Tools.pm are and then wget -N https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm
TODO: cmd_acme_tools_self_update, accept --no-check-certificate to use on curl
=cut
sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
}
1;
package Acme::Tools::BloomFilter;
use 5.008; use strict; use warnings; use Carp;
sub new { my($class,@p)=@_; my $self=Acme::Tools::bfinit(@p); bless $self, $class }
sub add { &Acme::Tools::bfadd }
sub addbf { &Acme::Tools::bfaddbf }
sub check { &Acme::Tools::bfcheck }
sub grep { &Acme::Tools::bfgrep }
sub grepnot { &Acme::Tools::bfgrepnot }
sub delete { &Acme::Tools::bfdelete }
sub store { &Acme::Tools::bfstore }
sub retrieve { &Acme::Tools::bfretrieve }
sub clone { &Acme::Tools::bfclone }
sub sum { &Acme::Tools::bfsum }
1;
# Ny versjon:
# - git clone https://github.com/kjetillll/Acme-Tools.git
# - c-s todo
# - endre $VERSION
# - endre Release history under HISTORY
# - endre årstall under =head1 COPYRIGHT
# - oppd default valutakurser inkl datoen
# - emacs Changes
# - emacs README versjon + aarstall
# - diff -byW200 <(grep -a ^sub Acme-Tools-0.22/Tools.pm|sort) <(grep -a ^sub Tools.pm|sort)|less
# - emacs MANIFEST legg til ev nye t/*.t
# - perl Makefile.PL && make test
# - /usr/bin/perl Makefile.PL && make test
# - perlbrew exec "perl Makefile.PL && time make test"
# - perlbrew exec "perl Makefile.PL && make test" | grep -P '^(perl-|All tests successful)'
# - perlbrew use perl-5.10.1; perl Makefile.PL && make test; perlbrew off
# - test evt i cygwin og mingw-perl
# - pod2html Tools.pm > Tools.html ; firefox Tools.html
# - https://metacpan.org/pod/Acme::Tools
# - http://cpants.cpanauthors.org/dist/Acme-Tools #kvalitee
# - perl Makefile.PL && make test && make dist
# - cp -p *tar.gz /htdocs/
# - #ci -l -mversjon -d `cat MANIFEST` #no
# - git add `cat MANIFEST`
# - git status
# - git commit -am versjon
# - git push #eller:
# - git push origin master
# - http://pause.perl.org/
# - tegnsett/utf8-kroell
# - https://rt.cpan.org/Dist/Display.html?Queue=Acme-Tools
# http://en.wikipedia.org/wiki/Birthday_problem#Approximations
# memoize_expire() http://perldoc.perl.org/Memoize/Expire.html
# memoize_file_expire()
# memoize_limit_size() #lru
# memoize_file_limit_size()
# memoize_memcached http://search.cpan.org/~dtrischuk/Memoize-Memcached-0.03/lib/Memoize/Memcached.pm
# hint on http://perl.jonallen.info/writing/articles/install-perl-modules-without-root
# sub mycrc32 { #http://billauer.co.il/blog/2011/05/perl-crc32-crc-xs-module/ eller String::CRC32::crc32 som er 100 x raskere enn Digest::CRC::crc32
# my ($input, $init_value, $polynomial) = @_;
# $init_value = 0 unless (defined $init_value);
# $polynomial = 0xedb88320 unless (defined $polynomial);
# my @lookup_table;
# for (my $i=0; $i<256; $i++) {
# my $x = $i;
# for (my $j=0; $j<8; $j++) {
# if ($x & 1) {
# $x = ($x >> 1) ^ $polynomial;
# } else {
# $x = $x >> 1;
# }
# }
# push @lookup_table, $x;
# }
# my $crc = $init_value ^ 0xffffffff;
# foreach my $x (unpack ('C*', $input)) {
# $crc = (($crc >> 8) & 0xffffff) ^ $lookup_table[ ($crc ^ $x) & 0xff ];
# }
# $crc = $crc ^ 0xffffffff;
# return $crc;
# }
#
# $maybe_valid_utf8 =~ # https://stackoverflow.com/questions/11709410/regex-to-detect-invalid-utf-8-string
# m/\A(
# [\x09\x0A\x0D\x20-\x7E] # ASCII, or rather: [\x00-\x7F]
# | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
# | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
# | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
# | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
# | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
# | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
# | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
# )*\z/x;
=head1 HISTORY
Release history
0.27 Feb 2020 Small fixes for some platforms
0.26 Jan 2020 Convert subs: base bin2dec bin2hex bin2oct dec2bin dec2hex dec2oct
hex2bin hex2dec hex2oct oct2bin oct2dec oct2hex
Array subs: joinr perm permute permute_continue pile sortby subarrays
Other subs: btw in_iprange ipnum_ok iprange_ok opts s2t
0.24 Feb 2019 fixed failes on perl 5.16 and older
0.23 Jan 2019 Subs: logn, egrep, which. More UTF-8 "oriented" (lower, upper, ...)
Commands: zsize, finddup, due (improved), conv (improved, [MGT]?Wh
and many more units), due -M for stdin of filenames.
0.22 Feb 2018 Subs: subarr, sim, sim_perm, aoh2sql. command: resubst
0.21 Mar 2017 Improved nicenum() and its tests
0.20 Mar 2017 Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
throttle timems refa refaa refah refh refha refhh refs
eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
Commands: 2bz2 2gz 2xz z2z
0.172 Dec 2015 Subs: curb openstr pwgen sleepms sleepnm srlz tms username
self_update install_acme_command_tools
Commands: conv due freq wipe xcat (see "Commands")
0.16 Feb 2015 bigr curb cpad isnum parta parth read_conf resolve_equation
roman2int trim. Improved: conv (numbers currency) range ("derivatives")
0.15 Nov 2014 Improved doc
0.14 Nov 2014 New subs, improved tests and doc
0.13 Oct 2010 Non-linux test issue, resolve. improved: bloom filter, tests, doc
0.12 Oct 2010 Improved tests, doc, bloom filter, random_gauss, bytes_readable
0.11 Dec 2008 Improved doc
0.10 Dec 2008
=head1 SEE ALSO
L<https://github.com/kjetillll/Acme-Tools>
=head1 AUTHOR
Kjetil Skotheim, E<lt>kjetil.skotheim@gmail.comE<gt>
=head1 COPYRIGHT
2008-2020, Kjetil Skotheim
=head1 LICENSE
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
=cut
( run in 3.141 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )