Acme-Tools
view release on metacpan or search on metacpan
use Acme::Tools;
print sum(1,2,3); # 6
print avg(2,3,4,6); # 3.75
print median(2,3,4,6); # 3.5
print percentile(25, 101..199); # 125
my @list = minus(\@listA, \@listB); # set operation
my @list = union(\@listA, \@listB); # set operation
print length(gzip("abc" x 1000)); # far less than 3000
writefile("/dir/filename",$string); # convenient
my $s=readfile("/dir/filename"); # also convenient
print "yes!" if between($PI,3,4);
print percentile(0.05, @numbers);
my @even = range(1000,2000,2); # even numbers between 1000 and 2000
my @odd = range(1001,2001,2);
=head2 code2num
C<num2code()> convert numbers (integers) from the normal decimal system to some arbitrary other number system.
That can be binary (2), oct (8), hex (16) or others.
Example:
print num2code(255,2,"0123456789ABCDEF"); # prints FF
print num2code( 14,2,"0123456789ABCDEF"); # prints 0E
...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
...and 14 are converted to 0E, with leading 0 because of the second argument 2.
Example:
print num2code(1234,16,"01")
Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.
To convert back:
print num2code("241274432",5,$chars); # prints EOOv0
print code2num("EOOv0",$chars); # prints 241274432
=cut
#Math::BaseCnv
sub num2code {
return num2code($_[0],0,$_[1]) if @_==2;
my($num,$digits,$validchars,$start)=@_;
my $l=length($validchars);
my $key;
$digits||=9e9;
no warnings;
croak if $num<$start;
$num-=$start;
for(1..$digits){
$key=substr($validchars,$num%$l,1).$key;
$num=int($num/$l);
last if $digits==9e9 and !$num;
}
croak if $num>0;
return $key;
}
sub code2num {
my($code,$validchars,$start)=@_; $start=0 if!defined$start;
my $l=length($validchars);
my $num=0;
$num=$num*$l+index($validchars,$_) for split//,$code;
return $num+$start;
}
=head2 base
Numbers in any number system of base between 2 and 36. Using capital letters A-Z for base higher than 10.
base(2,15) # 1111 2 --> binary
current: A, _A, N/m2
energy: BTU, Btu, J, Nm, W/s, Wh, Wps, Ws, _J, _eV,
cal, calorie, calories, eV, electronvolt, BeV,
erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh, MWh, GWh, TWh
kcal, kilocalorie, kilocalories,
newtonmeter, newtonmeters, th, thermie
force: N, _N, dyn, dyne, dynes, lb, newton
length: NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
in, inch, inches, km, league, lightyear, ls, ly,
m, meter, meters, mi, mil, mile, miles,
nautical mile, nautical miles, nmi,
parsec, pc, planck, yard, yard_imperical, yd, Ã
, ångstrøm, angstrom
mass: Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey
See: L<http://en.wikipedia.org/wiki/Units_of_measurement>
=cut
#TODO: @arr2=conv(\@arr1,"from","to") # should be way faster than:
#TODO: @arr2=map conv($_,"from","to"),@arr1
#TODO: conv(123456789,'b','h'); # h converts to something human-readable
our %conv=(
length=>{
m => 1,
_m => 1,
meter => 1,
meters => 1,
metre => 1,
metres => 1,
km => 1000,
mil => 10000, #scandinavian #also: inch/1000!
in => 0.0254,
inch => 0.0254,
lightyear => 299792458*3600*24*365.25, # = 9460730472580800 by def
ls => 299792458, #light-second
au => 149597870700, # by def: meters earth to sun
astronomical_unit => 149597870700,
'astronomical unit' => 149597870700,
pc => 149597870700*648000/$PI, #3.0857e16 = 3.26156 ly
_pc => 149597870700*648000/$PI,
parsec => 149597870700*648000/$PI,
attoparsec => 149597870700*648000/$PI/1e18,
apc => 149597870700*648000/$PI/1e18,
planck => 1.61619997e-35, #planck length
#Norwegian (old) lengths:
tomme => 0.0254,
tommer => 0.0254,
fot => 0.0254*12, #0.3048m
alen => 0.0254*12*2, #0.6096m
favn => 0.0254*12*2*3, #1.8288m
kvart => 0.0254*12*2/4, #0.1524m a quarter alen
#--https://upload.wikimedia.org/wikipedia/commons/e/eb/English_length_units_graph.svg
twip => 0.0254 / 6 / 12 / 20,
point => 0.0254 / 6 / 12,
pica => 0.0254 / 6,
line => 0.0254 / 12,
thou => 0.0254 / 1000,
barleycorn => 0.0254 / 3,
poppyseed => 0.0254 / 3 / 4,
finger => 0.0254 / 6 / 12 * 63,
palm => 0.0254 * 3,
digit => 0.0254 * 3 / 4,
olympiad => 4 * 60*60*24*365.2425,
lustrum => 5 * 60*60*24*365.2425,
indiction => 15 * 60*60*24*365.2425,
jubilee => 50 * 60*60*24*365.2425,
century => 100 * 60*60*24*365.2425,
millennium => 1000 * 60*60*24*365.2425,
shake => 1e-8,
moment => 3600/40, #1/40th of an hour, used by Medieval Western European computists
ke => 864, #1/100th of a day, trad Chinese, 14m24s
fortnight => 14*24*3600,
tp => 5.3910632e-44, #planck time, time for ligth to travel 1 planck length
nanocentury => 100 * 60*60*24*365.2425 / 1e9, #3.156 ~ pi seconds, response time limit (usability)
warhol => 15*60, #"fifteen minutes of fame"
},
speed=>{
'm/s' => 1,
'_m/s' => 1,
mps => 1,
mph => 1609.344/3600,
'mi/h' => 1609.344/3600,
kmh => 1/3.6,
print sec_readable( 13333331 ); # 154d 7h
print sec_readable( 133333331 ); # 4yr 82d
print sec_readable( 1333333331 ); # 42yr 91d
=cut
sub sec_readable {
my $s=shift();
my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
!defined$s ? undef
:!length($s) ? ''
:$s<0 ? '-'.sec_readable(-$s)
:$s<60 && int($s)==$s
? $s."s"
:$s<60 ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
:$s<3600 ? int($s/60)."m " .($s%60) ."s"
:$s<24*3600 ? int($s/$h)."h " .int(($s%$h)/60)."m"
:$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
: int($s/$y)."yr ".int(($s%$y)/$d)."d";
}
roman2int("MCMLXXI") == 1971
=cut
#alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
#see also t/17_roman.t sub int2roman_old
sub int2roman {
my $n=shift;
!defined$n ? undef
: !length($n) ? ""
: $n<0 ? "-".int2roman(-$n)
: int($n)!=$n ? croak"int2roman: $n is not an integer"
# : $] >= 5.014 ? #s///r modifier introduced in perl v5.14
# ("I" x $n)
# =~s,I{1000},M,gr #unnecessary, but speedup for n>1000
# =~s,I{100},C,gr #unnecessary, but speedup for n>100
# =~s,I{10},X,gr #unnecessary, but speedup for n>10
# =~s,IIIII,V,gr
# =~s,IIII,IV,gr
# =~s,VV,X,gr
# : $r=~s,^CD,,i ? 400+roman2int($r)
# : $r=~s,^C,,i ? 100+roman2int($r)
# : $r=~s,^XC,,i ? 90+roman2int($r)
# : $r=~s,^L,,i ? 50+roman2int($r)
# : $r=~s,^XL,,i ? 40+roman2int($r)
# : $r=~s,^X,,i ? 10+roman2int($r)
# : $r=~s,^IX,,i ? 9+roman2int($r)
# : $r=~s,^V,,i ? 5+roman2int($r)
# : $r=~s,^IV,,i ? 4+roman2int($r)
# : $r=~s,^I,,i ? 1+roman2int($r)
# : !length($r) ? 0
# : croak "Invalid roman number $r";
#}
=head2 distance
B<Input:> the four decimal numbers of two GPS positions: latutude1, longitude1, latitude2, longitude2
B<Output:> the air distance in meters between the two points
Calculation is done using the Haversine Formula for spherical distance:
#$R=$a * $t/$n;
#=head2 fractional
#=cut
sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
carp "fractional: NOT FINISHED";
my $n=shift;
print "----fractional n=$n\n";
my $nn=$n; my $dec;
$nn=~s,\.(\d+)$,$dec=length($1);$1.,;
my $l;
my $max=0;
my($te,$ne);
for(1..length($nn)/2){
if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
print "_ = $_ ".length($2)."\n";
if(length($2)>$max){
$l=$_;
$te="$1$3"-$1;
$max=length($2);
}
}
}
return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
print "l=$l max=$max\n";
$ne="9" x $l;
print log($n),"\n";
my $st=sub{print "status: ".($te/$ne)." n=$n ".($n/$te*$ne)."\n"};
while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
while($te/$n/$ne<0.99){ &$st(); $te*=10 }
print join",", trim(" please ", " remove ", " my ", " spaces "); # works on arrays as well
my $s=' please '; trim(\$s); # now $s eq 'please'
trim(\@untrimmedstrings); # trims array strings inplace
@untrimmedstrings = map trim, @untrimmedstrings; # same, works on $_
trim(\$_) for @untrimmedstrings; # same, works on \$_
=head2 lpad
=head2 rpad
Left or right pads a string to the given length by adding one or more spaces at the end for I<rpad> or at the start for I<lpad>.
B<Input:> First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
Default is space.
rpad('gomle',9); # 'gomle '
lpad('gomle',9); # ' gomle'
rpad('gomle',9,'-'); # 'gomle----'
lpad('gomle',9,'+'); # '++++gomle'
rpad('gomle',4); # 'goml'
lpad('gomle',4); # 'goml'
rpad('gomle',7,'xyz'); # 'gomlxy'
lpad('gomle',10,'xyz'); # 'xyzxygoml'
=head2 cpad
Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.
cpad('mat',5) eq ' mat '
cpad('mat',4) eq 'mat '
cpad('mat',6) eq ' mat '
cpad('mat',9) eq ' mat '
cpad('mat',5,'+') eq '+mat+'
cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'
=cut
return map trim($_), @_ if @_>1;
my $s=shift;
if(ref($s) eq 'SCALAR'){ $$s=~s,^\s+|(?<=\s)\s+|\s+$,,g; return $$s}
if(ref($s) eq 'ARRAY') { trim(\$_) for @$s; return $s }
$s=~s,^\s+|(?<=\s)\s+|\s+$,,g if defined $s;
$s;
}
sub rpad {
my($s,$l,$p)=@_;
$p=' ' if @_<3 or !length($p);
$s.=$p while length($s)<$l;
substr($s,0,$l);
}
sub lpad {
my($s,$l,$p)=@_;
$p=' ' if @_<3 or !length($p);
$l<length($s)
? substr($s,0,$l)
: substr($p x (1+$l/length($p)), 0, $l-length($s)).$s;
}
sub cpad {
my($s,$l,$p)=@_;
$p=' ' if @_<3 or !length($p);
my $ls=length($s);
return substr($s,0,$l) if $l<$ls;
$p=$p x (($l-$ls+2)/length($p));
substr($p, 0, ($l-$ls )/2) . $s .
substr($p, 0, ($l-$ls+1)/2);
}
sub cpad_old {
my($s,$l,$p)=@_;
$p=' ' if !length($p);
return substr($s,0,$l) if $l<length($s);
my $i=0;
while($l>length($s)){
my $pc=substr($p,($i==int($i)?1:-1)*($i%length($p)),1);
$i==int($i) ? ($s.=$pc) : ($s=$pc.$s);
$i+=1/2;
}
$s;
}
=head2 trigram
B<Input:> A string (i.e. a name). And an optional x (see example 2)
=head2 chars
chars("Tittentei"); # ('T','i','t','t','e','n','t','e','i')
=cut
sub trigram { sliding($_[0],$_[1]||3) }
sub sliding {
my($s,$w)=@_;
return map substr($s,$_,$w), 0..length($s)-$w if !ref($s);
return map [@$s[$_..$_+$w-1]], 0..@$s-$w if ref($s) eq 'ARRAY';
}
sub chunks {
my($s,$w)=@_;
return $s=~/(.{1,$w})/g if !ref($s);
return map [@$s[$_*$w .. min($_*$w+$w-1,$#$s)]], 0..$#$s/$w if ref($s) eq 'ARRAY';
}
sub chars { split//, shift }
defined $til ? $str=~s/$fra/$til/g : $str=~s/$fra//g;
}
return $str;
}
=head1 ARRAYS
=head2 subarr
The equivalent of C<substr> on arrays or C<splice> without changing the array.
Input: 1) array or arrayref, 2) offset and optionally 3) length. Without a
third argument, subarr returns the rest of the array.
@top10 = subarr( @array, 0, 10); # first 10
@last_two = subarr( @array, -2, 2); # last 2
@last_two = subarr( $array_ref, -2); # also last 2
@last_six = subarr $array_ref, -6; # parens are optional
The same can be obtained from C<< @array[$from..$to] >> but that dont work the
same way with negative offsets and boundary control of length.
=cut
#Todo: sjekk paastand over
#sub subarr(+$;$) { #perl>=5.14 # t/35_subarr.t
sub subarr { #perl<5.14
my($a,$o,$l)=@_;
$o=@$a+$o if $o<0;
$l=@$a-$o if @_<3;
croak if $l<0;
$l=@$a-$o if $l>@$a-$o;
@$a[$o..$o+$l-1];
}
=head2 min
Returns the smallest number in a list. Undef is ignored.
@lengths=(2,3,5,2,10,undef,5,4);
$shortest = min(@lengths); # returns 2
Note: The comparison operator is perls C<< < >>> which means empty strings is treated as C<0>, the number zero. The same goes for C<max()>, except of course C<< > >> is used instead.
min(3,4,5) # 3
min(3,4,5,undef) # 3
min(3,4,5,'') # returns the empty string
=head2 max
Returns the largest number in a list. Undef is ignored.
either the same (or similar) email or phone number or zip code and similar enough
names are going on the list of probable doubles.
*) Todo: deal with initials better, should be higher than 0.78
=cut
sub sim_perm {
require String::Similarity;
my($s1,$s2)=map {s/^\s*(.+?)\s*$/$1/;$_} map upper($_), @_; #/r v5.14
croak if !length($s1) or !length($s2);
my $max;
for(cart([permutations(split(/[\s,]+/,$s1))],
[permutations(split(/[\s,]+/,$s2))])) {
my($n1,$n2)=@$_;
if(@$n1>@$n2){ pop@$n1 while @$n1>@$n2 }
else { pop@$n2 while @$n1<@$n2 }
my($str1,$str2)=map join(" ",@$_),($n1,$n2);
if(defined $max){
my $sim=String::Similarity::similarity($str1,$str2,$max);
$max=$sim if $sim>$max;
return wantarray ? @sort : $sort[$rank-1];
}
sub rankstr {wantarray?(rank(@_,sub{$_[0]cmp$_[1]})):rank(@_,sub{$_[0]cmp$_[1]})}
=head2 egrep
Extended grep.
Works like L<grep> but with more insight: local vars $i, $n, $prev, $next, $prevr and $nextr are available:
$i is the current index, starts with 0, ends with the length of the input array minus one
$n is the current element number, starts with 1, $n = $i + 1
$prev is the previous value (undef if current is first)
$next is the next value (undef if current is last)
$prevr is the previous value, rotated so that the previous of the first element is the last element
$nextr is the next value, rotated so that the next of the last element is the first element
%hash = ( T=>['These','the','this'],
A=>['are','array'],
O=>['of'],
W=>['words'] )
=head2 parta
Like L<parth> but returns an array of lists where the predicate returns an index number.
my @a = parta { length } qw/These are the words of this array/;
Result:
@a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )
Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.
=cut
sub part (&@) { my($c,@r)=(shift,[],[]); push @{ $r[ &$c?0:1 ] }, $_ for @_; @r }
sub parth (&@) { my($c,%r)=(shift); push @{ $r{ &$c } }, $_ for @_; %r }
sub parta (&@) { my($c,@r)=(shift); push @{ $r[ &$c ] }, $_ for @_; @r }
#sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map
=head2 refa
: croak("eachr needs hashref or arrayref got '".ref($_[0])."'") }
sub joinr {join(shift(),@{shift()})}
#sub mapr # som scala: hvis map faar subref se kalles den sub paa hvert elem og resultatet returneres
#sub eachr { each(%{shift()}) }
=head2 pile
B<Input:> a pile size s and a list
B<Output:> A list of lists of length s or the length of the remainer in
the last list. Piles together the input list in lists of the given size.
my @list=(1,2,3,4,5,6,7,8,9,10);
my @piles = pile(3, @list ); # ([1,2,3], [4,5,6], [7,8,9], [10])
my $i=0;
my @piles = parta {$i++/3} @list; # same as above pile(3, @list)
=cut
sub pile { my $size=shift; my @r; for (@_){ push@r,[] if !@r or 0+@{$r[-1]}>=$size; push @{$r[-1]}, $_ } @r }
my @col=sort keys %col;
my @colerr=grep!/^[a-z]\w+$/i,@col;
croak "Invalid column name(s): @colerr" if @colerr and !$conf{fix_colnames};
my(%t,%tdb);
for my $c (@col){
my($l,$s,$p,$nn,%ant,$t)=(0,0,0,0);
for my $r (@$aoh){
my $v=$$r{$c};
next if !defined$v or $v!~/\S/;
$nn++;
$l=length($v) if length($v)>$l;
no warnings 'uninitialized';
if($v=~/^(18|19|20)\d\d(0[1-9]|1[0-2])(0[1-9]|1\d|2\d|3[01])-?\d\d:?\d\d:?\d\d$/ and $conf{date}){
$ant{date}++;
next;
}
elsif($v=~/^\s*[-+]?(\d*)(\.\d+)?([Ee]\-?\d+)?\s*$/ and length("$1$2") and $conf{number}){
$ant{number}++;
$s=length("$1.$2") if length("$1.$2")>$s;#hm
$p=length($2)-1 if $2 and length($2)-1>$p;
next;
}
else {
$ant{varchar}++;
}
}
$t||='varchar' if $ant{varchar} or $ant{number} and $ant{date};
$t||='number' if $ant{number};
$t||='date' if $ant{date};
$t||='varchar'; #hm
$tdb="$conf{$t}($l)" if $t eq 'varchar';
$tdb="$conf{$t}($s)" if $t eq 'number' and $p==0;
$tdb="$conf{$t}($s,$p)" if $t eq 'number' and $p>0 and ++$s;
$tdb.=" not null" if $nn == 0+@$aoh;
$t{$c}=$t;
$tdb{$c}=$tdb;
}
my $sql;
$sql="create table $conf{name} (".
join(",",map sprintf("\n %-30s %s",do{s/\W+//g;$_},$tdb{$_}), @col). "\n);\n\n" if $conf{create};
my $val=sub{my($v,$t)=@_;defined$v or $v="";!length($v)?'null':$t eq 'number' ? $v : "'".repl($v,"\'","''")."'"};
for my $r (@$aoh){
my $v=join",",map &$val($$r{$_},$t{$_}), @col;
$sql.="insert into $conf{name} values ($v);\n";
}
$sql="drop table $conf{name};\n\n$sql" if $conf{drop}==1;
$sql="drop table if exists $conf{name};\n\n$sql" if $conf{drop}>=2;
$sql="$conf{begin}\n$sql" if $conf{begin};
$sql.=$conf{end};
$sql;
}
return @e;
}
}
=head2 pwgen
Generates random passwords.
B<Input:> 0-n args
* First arg: length of password(s), default 8
* Second arg: number of passwords, default 1
* Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!
* Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:
sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
...meaning the password should:
To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
own C<rand()> internally.
C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
pwgen tries to find a password. Defaults for those are 0.01 and 10000.
Whenever one of the two limits is reached, a first generates a croak.
Examples:
my $pw=pwgen(); # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
my $pw=pwgen(12); # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
my @pw=pwgen(0,10); # 10 random 8 chars passwords, containing the same possible chars
my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z
pwgen(3); # dies, defaults require chars in each of 4 group (see above)
pwgen(5,1,'A-C0-9', qr/^\D{3}\d{2}$/); # a 5 char string starting with three A, B or Cs and endring with two digits
pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above
Examples of adding additional requirements to the default ones:
our $Pwgen_sec=0; #seconds used in last call to pwgen()
our $Pwgen_trials=0; #trials in last call to pwgen()
sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
sub pwgen {
my($len,$num,$chars,@req)=@_;
$len||=8;
$num||=1;
$chars||='A-Za-z0-9,-./&%_!';
$req[0]||=\&pwgendefreq if !$_[2];
$chars=~s/([$_])-([$_])/join("","$1".."$2")/eg for ('a-z','A-Z','0-9');
my($c,$t,@pw,$d)=(length($chars),time_fp());
($Pwgen_trials,$Pwgen_sec)=(0,0);
TRIAL:
while(@pw<$num){
croak "pwgen timeout after $Pwgen_trials trials"
if ++$Pwgen_trials >= $Pwgen_max_trials
or ($d=time_fp()-$t) > $Pwgen_max_sec*$num
and $d!~/^\d+$/; #jic int from time_fp
my $pw=join"",map substr($chars,rand($c),1),1..$len;
for my $r (@req){
if (ref($r) eq 'CODE' ){ local$_=$pw; &$r() or next TRIAL }
=head1 COMPRESSION
L</zipb64>, L</unzipb64>, L</zipbin>, L</unzipbin>, L</gzip>, and L</gunzip>
compresses and uncompresses strings to save space in disk, memory,
database or network transfer. Trades time for space. (Beware of wormholes)
=head2 zipb64
Compresses the input (text or binary) and returns a base64-encoded string of the compressed binary data.
No known limit on input length, several MB has been tested, as long as you've got the RAM...
B<Input:> One or two strings.
First argument: The string to be compressed.
Second argument is optional: A I<dictionary> string.
B<Output:> a base64-kodet string of the compressed input.
The use of an optional I<dictionary> string will result in an even
The returned string is base64 encoded. That is, the output is 33%
larger than it has to be. The advantage is that this string more
easily can be stored in a database (without the hassles of CLOB/BLOB)
or perhaps easier transfer in http POST requests (it still needs some
url-encoding, normally). See L</zipbin> and L</unzipbin> for the
same without base 64 encoding.
Example 1, normal compression without dictionary:
$txt = "Test av komprimering, hva skjer? " x 10; # ten copies of this norwegian string, $txt is now 330 bytes (or chars rather...)
print length($txt)," bytes input!\n"; # prints 330
$zip = zipb64($txt); # compresses
print length($zip)," bytes output!\n"; # prints 65
print $zip; # prints the base64 string ("noise")
$output=unzipb64($zip); # decompresses
print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
print length($output),"\n"; # prints 330
Example 2, same compression, now with dictionary:
$txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
$dict = "Testing av kompresjon, hva vil skje?"; # dictionary with certain similarities
# of the text to be compressed
$zip2 = zipb64($txt,$dict); # compressing with $dict as dictionary
print length($zip2)," bytes output!\n"; # prints 49, which is less than 65 in ex. 1 above
$output=unzipb64($zip2,$dict); # uses $dict in the decompressions too
print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
Example 3, dictionary = string to be compressed: (out of curiosity)
$txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
$zip3 = zipb64($txt,$txt); # hmm
print length($zip3)," bytes output!\n"; # prints 25
print "Hurra\n" if unzipb64($zip3,$txt) eq $txt; # hipp hipp ...
zipb64() and zipbin() is really just wrappers around L<Compress::Zlib> and C<inflate()> & co there.
=cut
sub zipb64 {
require MIME::Base64;
return MIME::Base64::encode_base64(zipbin(@_));
}
print ipnum("www.uio.no"); # prints 129.240.13.152
Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.
=cut
our %IPNUM_memo;
sub ipnum {
my $ipaddr=shift;
#croak "No $ipaddr" if !length($ipaddr);
return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
my $h=gethostbyname($ipaddr);
#croak "No ipnum for $ipaddr" if !$h;
return if !defined $h;
my $ipnum = join(".",unpack("C4",$h));
$IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
return $IPNUM_memo{$ipaddr};
}
our $Ipnum_errmsg;
sub webparams {
my $query=shift();
$query=$ENV{QUERY_STRING} if !defined $query;
if(!defined $query and $ENV{REQUEST_METHOD} eq "POST"){
read(STDIN,$query , $ENV{CONTENT_LENGTH});
$ENV{QUERY_STRING}=$query;
}
my %R;
for(split("&",$query)){
next if !length($_);
my($nkl,$verdi)=map urldec($_),split("=",$_,2);
$R{$nkl}=exists$R{$nkl}?"$R{$nkl},$verdi":$verdi;
}
return %R;
}
=head2 urlenc
Input: a string
=head2 username
Returns the current linux/unix username, for example the string root
print username(); #just (getpwuid($<))[0] but more readable perhaps
=cut
sub basename { my($f,$s)=(@_,'');$s=quotemeta($s)if!ref($s);$f=~m,^(.*/)?([^/]*?)($s)?$,;$2 }
sub dirname { $_[0]=~m,^(.*)/,;defined($1) && length($1) ? $1 : '.' }
sub username { (getpwuid($<))[0] }
=head2 wipe
Deletes a file by "wiping" it on the disk. Overwrites the file before deleting. (May not work properly on SSDs)
B<Input:>
* Arg 1: A filename
* Optional arg 2: number of times to overwrite file. Default is 3 if omitted, 0 or undef
* Optional arg 3: keep (true/false), wipe() but no delete of file
my($section,@l)=('',split"\n",$conf);
while(@l) {
my $l=shift@l;
if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
$section=$1;
$$hr{$1}||={};
}
elsif( $l=~/^\s*([^\:\=]+?)\s*[:=]\s*(.*?)\s*$/ ) {
my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
my $v=&$ml($2);
$$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
$$hr{$1}=$v if !length($section);
}
}
%$hr;
}
# my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
# s,<INCLUDE ([^>]+)>,"".readfile(&$incfn($1)),eg; #todo
=head2 openstr
'Dg' => [6, 'SÃn','Man','Tir','Ons','Tor','Fre','Lør'],
'dg' => [6, 'søn','man','tir','ons','tor','fre','lør'],
);
my $_tms_inited=0;
sub tms_init {
return if $_tms_inited++;
for(qw(MAANED Maaned maaned MAAN Maan maan),'MAANE.','Maane.','maane.'){
$Tms_str{$_}=$Tms_str{replace($_,"aa","Ã¥","AA","Ã
")};
}
$Tms_pattern=join("|",map{quotemeta($_)}
sort{length($b)<=>length($a)}
keys %Tms_str);
#without sort "måned" could be "mared" because "mån"=>"mar"
}
sub totime {
}
=head2 s2t
if it can be divided by 400.
=cut
sub leapyear{$_[0]%400?$_[0]%100?$_[0]%4?0:1:0:1} #bool
#http://rosettacode.org/wiki/Levenshtein_distance#Perl
our %ldist_cache;
sub ldist {
my($s,$t,$l) = @_;
return length($t) if !$s;
return length($s) if !$t;
%ldist_cache=() if !$l and 1000<0+%ldist_cache;
$ldist_cache{$s,$t} ||=
do {
my($s1,$t1) = ( substr($s,1), substr($t,1) );
substr($s,0,1) eq substr($t,0,1)
? ldist($s1,$t1)
: 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
};
}
=head1 OTHER
=head2 nvl
The I<no value> function (or I<null value> function)
C<nvl()> takes two or more arguments. (Oracles nvl-function take just two)
Returns the value of the first input argument with length() > 0.
Return I<undef> if there is no such input argument.
In perl 5.10 and perl 6 this will most often be easier with the C< //
> operator, although C<nvl()> and C<< // >> treats empty strings C<"">
differently. Sub nvl here considers empty strings and undef the same.
=cut
sub nvl {
return $_[0] if defined $_[0] and length($_[0]) or @_==1;
return $_[1] if @_==2;
return nvl(@_[1..$#_]) if @_>2;
return undef;
}
=head2 decode_num
See L</decode>.
=head2 decode
5 Banking and financial
6 Merchandizing and banking
7 Petroleum
8 Telecommunications and other industry assignments
9 National assignment
...although this has no meaning to C<Acme::Tools::ccn_ok()>.
The first six digits is I<Issuer Identifier>, that is the bank
(probably). The rest in the "account number", except the last digits,
which is the control digit. Max length on credit card numbers are 19
digits.
=cut
sub ccn_ok {
my $ccn=shift(); #credit card number
$ccn=~s/\D+//g;
if(KID_ok($ccn)){
return "MasterCard" if $ccn=~/^5[1-5]\d{14}$/;
return "Visa" if $ccn=~/^4\d{12}(?:\d{3})?$/;
#print serialize(\%opt,'opt');
#print serialize(\$opt_pro,'opt_pro');
my $antned=0+@vertikalefelt;
my $bakerst=-1+@{$$tabref[0]};
my(%h,%feltfinnes,%sum);
#print "Bakerst<$bakerst>\n";
for(@$tabref){
my $rad=join($;,@$_[0..($antned-1)]);
my $felt=join($;,@$_[$antned..($bakerst-1)]);
my $verdi=$$_[$bakerst];
length($rad) or $rad=' ';
length($felt) or $felt=' ';
$h{$rad}{$felt}=$verdi;
$h{$rad}{"%$felt"}=$verdi;
if($opt_sum or defined $opt_pro){
$h{$rad}{Sum}+=$verdi;
$sum{$felt}+=$verdi;
$sum{Sum}+=$verdi;
}
$feltfinnes{$felt}++;
$feltfinnes{"%$felt"}++ if $opt_pro;
}
my $cell=$_;
$width[$j]||=0;
if($nodup_rad and $i>0 and $$tab[$i][$j] eq $$tab[$i-1][$j] || ($nodup_rad=0)){
$cell=$nodup==1?"":$nodup;
$nodup[$i][$j]=1;
}
else{
my $height=0;
my $wider;
no warnings;
$not_empty[$j]=1 if !$head && length($cell)>0;
for(split("\n",$cell)){
$wider=/<input.+type=text.+size=(\d+)/i?$1:0; #hm
s/<[^>]+>//g;
$height++;
s/>/>/g;
s/</</g;
$width[$j]=length($_)+1+$wider if length($_)+1+$wider>$width[$j];
$left[$j]=1 if $_ && !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !$head;
}
if( $height>1 && !$no_multiline_space){
$height++ if !$head;
$height[$i-1]++ if $i>1 && $height[$i-1]==1;
}
$height[$i]=$height if $height>$height[$i];
}
$j++;
}
for my $x (0..$i){
if($$tab[$x] eq '-'){
my @tegn=map {$$tab[$x-1][$_]=~/\S/?"-":" "} (0..$j);
$tabout[$row_start_line]=join(" ",map {$tegn[$_] x ($width[$_]-1)} (0..$j));
}
else{
for my $y (0..$j){
next if $remove_empty && !$not_empty[$y];
no warnings;
my @cell = !$header_last&&$nodup&&$nodup[$x][$y]
? ($nodup>0?():((" " x (($width[$y]-length($nodup))/2)).$nodup))
: split("\n",$$tab[$x][$y]);
for(0..($height[$x]-1)){
my $line=$row_start_line+$_;
my $txt=shift(@cell);
$txt='' if !defined$txt;
$txt=sprintf("%*s",$width[$y]-1,$txt) if length($txt)>0 && !$left[$y] && ($x>0 || $no_header_line);
$tabout[$line].=$txt;
if($y==$j){
$tabout[$line]=~s/\s+$//;
}
else{
my $wider;
$wider = $txt=~/<input.+type=text.+size=(\d+)/i?1+$1:0;
$txt=~s/<[^>]+>//g;
$txt=~s/>/>/g;
$txt=~s/</</g;
$tabout[$line].= ' ' x ($width[$y]-length($txt)-$wider);
}
}
}
}
$row_start_line+=$height[$x];
#--lage streker?
if(not $no_header_line){
if($x==0){
for my $y (0..$j){
Toyota SUM 56
Volvo SUM 18
Nissan SUM 36
Tesla SUM 8
SUM SUM 56 100%
=cut
sub cnttbl {
my $hr=shift;
my $maxlen=max(3,map length($_),keys%$hr);
join"",ref((values%$hr)[0])
?do{ map {my$o=$_;join("",map rpad($$o[0],$maxlen)." $_\n",split("\n",$$o[1]))}
map [$_,cnttbl($$hr{$_})],
sort keys%$hr }
:do{ my $sum=sum(values%$hr);
my $fmt=repl("%-xs %yd %6.2f%%\n",x=>$maxlen,y=>length($sum));
map sprintf($fmt,@$_,100*$$_[1]/$sum),
(map[$_,$$hr{$_}],sort{$$hr{$a}<=>$$hr{$b} or $a cmp $b}keys%$hr),
(['SUM',$sum]) }
}
=head2 ref_deep
NOT IMPLEMENTED
Same as ref, but goes deeper.
(TODO: alfa...and more docs needed)
=cut
our $Edcursor;
sub ed {
my($s,$cs,$p,$buf)=@_; #string, commands, point (or cursor)
return $$s=ed($$s,$cs,$p,$buf) if ref($s);
my($sh,$cl,$m,$t,@m)=(0,0,0,undef);
while(length($cs)){
my $n = 0;
my $c = $cs=~s,^(M\d+|M.|""|".+?"|S.+?R|\\.|.),,s ? $1 : die;
$p = curb($p||0,0,length($s));
if(defined$t){$cs="".($c x $t).$cs;$t=undef;next}
my $add=sub{substr($s,$p,0)=$_[0];$p+=length($_[0])};
if ($c =~ /^([a-z0-9 ])/){ &$add($sh^$cl?uc($1):$1); $sh=0 }
elsif($c =~ /^"(.+)"$/) { &$add($1) }
elsif($c =~ /^\\(.)/) { &$add($1) }
elsif($c =~ /^S(.+)R/) { my $i=index($s,$1,$p);$p=$i+length($1) if $i>=0 }
elsif($c =~ /^M(\d+)/) { $t=$1; next }
elsif($c eq 'F') { $p++ }
elsif($c eq 'B') { $p-- }
elsif($c eq 'A') { $p-- while $p>0 and substr($s,$p-1,2)!~/^\n/ }
elsif($c eq 'E') { substr($s,$p)=~/(.*)/ and $p+=length($1) }
elsif($c eq 'D') { substr($s,$p,1)='' }
elsif($c eq 'MD'){ substr($s,$p)=~s/^(\W*\w+)// and $buf=$1 }
elsif($c eq 'MF'){ substr($s,$p)=~/(\W*\w+)/ and $p+=length($1) }
elsif($c eq 'MB'){ substr($s,0,$p)=~/(\w+\W*)$/ and $p-=length($1) }
elsif($c eq '-') { substr($s,--$p,1)='' if $p }
elsif($c eq 'M-'){ substr($s,0,$p)=~s/(\w+\W*)$// and $p-=length($buf=$1)}
elsif($c eq 'K') { substr($s,$p)=~s/(\S.+|\s*?\n)// and $buf=$1 }
elsif($c eq 'Y') { &$add($buf) }
elsif($c eq 'U') { substr($s,$p)=~s/(\W*)(\w+)/$1\U$2\E/; $p+=length($1.$2) }
elsif($c eq 'L') { substr($s,$p)=~s/(\W*)(\w+)/$1\L$2\E/; $p+=length($1.$2) }
elsif($c eq 'C') { substr($s,$p)=~s/(\W*)(\w+)/$1\u\L$2\E/; $p+=length($1.$2) }
elsif($c eq '<') { $p=0 }
elsif($c eq '>') { $p=length($s) }
elsif($c eq 'T') { $sh=1 }
elsif($c eq 'C') { $cl^=1 }
elsif($c eq '{') { $m=1; @m=() }
elsif($c eq '}') { $m=0 }
elsif($c eq '!') { $m||!@m and die"ed: no macro"; $cs=join("",@m).$cs }
elsif($c eq '""'){ &$add('"') }
else { croak "ed: Unknown cmd '$c'\n" }
push @m, $c if $m and $c ne '{';
#warn serialize([$c,$m,$cs],'d');
}
=head1 BLOOM FILTER SUBROUTINES
Bloom filters can be used to check whether an element (a string) is a
member of a large set using much less memory or disk space than other
data structures. Trading speed and accuracy for memory usage. While
risking false positives, Bloom filters have a very strong space
advantage over other data structures for representing sets.
In the example below, a set of 100000 phone numbers (or any string of
any length) can be "stored" in just 91230 bytes if you accept that you
can only check the data structure for existence of a string and accept
false positives with an error rate of 0.03 (that is three percent, error
rates are given in numbers larger than 0 and smaller than 1).
You can not retrieve the strings in the set without using "brute
force" methods and even then you would get slightly more strings than
you put in because of the error rate inaccuracy.
Bloom Filters have many uses.
capacity => 10000000,
counting_bits => 4 # power of 2, that is 2, 4, 8, 16 or 32
);
bfadd( $bf, @unique_phone_numbers);
bfdelete($bf, @unique_phone_numbers);
Example: examine the frequency of the counters with 4 bit counters and 4 million keys:
my $bf=bfinit( error_rate=>0.001, capacity=>4e6, counting_bits=>4 );
bfadd($bf,[1e3*$_+1 .. 1e3*($_+1)]) for 0..4000-1; # adding 4 million keys one thousand at a time
my %c; $c{vec($$bf{filter},$_,$$bf{counting_bits})}++ for 0..$$bf{filterlength}-1;
printf "%8d counters = %d\n",$c{$_},$_ for sort{$a<=>$b}keys%c;
The output:
28689562 counters = 0
19947673 counters = 1
6941082 counters = 2
1608250 counters = 3
280107 counters = 4
38859 counters = 5
Prints yes since C<bfgrep> now returns an array of all the 1000 elements.
Croaks if the filters are of different dimensions.
Works for counting bloom filters as well (C<< counting_bits=>4 >> e.g.)
=head2 bfsum
Returns the number of 1's in the filter.
my $percent=100*bfsum($bf)/$$bf{filterlength};
printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity
Sums the counters for counting bloom filters (much slower than for non counting).
=head2 bfdimensions
Input, two numeric arguments: Capacity and error_rate.
Outputs an array of two numbers: m and k.
max_hashfuncs => 100,
counting_bits => 1, #default: not counting filter
adaptive => 0,
%arg, #arguments
key_count => 0,
overflow => {},
version => $Acme::Tools::VERSION,
};
croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
@$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
@$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
$$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x new empty filter
$$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
:$$bf{filterlength}<=2**32/4 ? "N*"
: "Q*";
bfadd($bf,@{$arg{keys}}) if $arg{keys};
return $bf;
}
sub bfaddbf {
my($bf,$bf2)=@_;
my $differror=join"\n",
map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
grep $$bf{$_} ne $$bf2{$_},
qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
croak $differror if $differror;
croak "Can not add adaptive bloom filters" if $$bf{adaptive};
my $count=$$bf{key_count}+$$bf2{key_count};
croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
if $count > $$bf{capacity};
$$bf{key_count}+=$$bf2{key_count};
if($$bf{counting_bits}==1){
$$bf{filter} |= $$bf2{filter};
#$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
}
else {
my $cb=$$bf{counting_bits};
for(0..$$bf{filterlength}-1){
my $sum=
vec($$bf{filter}, $_,$cb)+
vec($$bf2{filter},$_,$cb);
if( $sum>2**$cb-1 ){
$sum=2**$cb-1;
$$bf{overflow}{$_}++;
}
vec($$bf{filter}, $_,$cb)=$sum;
no warnings;
$$bf{overflow}{$_}+=$$bf2{overflow}{$_}
and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb)
if exists $$bf2{overflow}{$_};
}
}
return $bf; #for convenience
}
sub bfsum {
my($bf)=@_;
return unpack( "%32b*", $$bf{filter}) if $$bf{counting_bits}==1;
my($sum,$cb)=(0,$$bf{counting_bits});
$sum+=vec($$bf{filter},$_,$cb) for 0..$$bf{filterlength}-1;
return $sum;
}
sub bfadd {
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$n,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','capacity','counting_bits','adaptive'};
for(@$keysref){
#croak "Key should be scalar" if ref($_);
$$bf{key_count} >= $n and croak "Exceeded filter capacity $n" or $$bf{key_count}++;
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
if ($cb==1 and !$adaptive) { # normal bloom filter
vec($$bf{filter}, $h[$_] % $m, 1) = 1 for 0..$k-1;
}
elsif ($cb>1) { # counting bloom filter
for(0..$k-1){
my $pos=$h[$_] % $m;
if($c==0){
vec($$bf{filter}, $pos, $cb) = -1;
$$bf{overflow}{$pos}++
and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb);
}
}
}
elsif ($adaptive) { # adaptive bloom filter
my($i,$key,$bit)=(0+@h,$_);
for(0..$$bf{filterlength}-1){
$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
my $pos=shift(@h) % $m;
$bit=vec($$bf{filter}, $pos, 1);
vec($$bf{filter}, $pos, 1)=1;
last if $_>=$k-1 and $bit==0;
}
}
else {croak}
}
return 1;
}
sub bfcheck {
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','counting_bits','adaptive'};
my $wa=wantarray();
if(!$adaptive){ # normal bloom filter or counting bloom filter
return map {
my $match = 1; # match if every bit is on
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
return $match if !$wa;
$match;
} @$keysref;
}
else { # adaptive bloom filter
return map {
my($match,$i,$key,$bit,@h)=(1,0,$_);
for(0..$$bf{filterlength}-1){
$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
my $pos=shift(@h) % $m;
$bit=vec($$bf{filter}, $pos, 1);
$match++ if $_ > $k-1 and $bit==1;
$match=0 if $_ <= $k-1 and $bit==0;
last if $bit==0;
}
return $match if !$wa;
$match;
} @$keysref;
}
}
sub bfgrep { # just a copy of bfcheck with map replaced by grep
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
return grep {
my $match = 1; # match if every bit is on
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
$match;
} @$keysref;
}
sub bfgrepnot { # just a copy of bfgrep with $match replaced by not $match
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
return grep {
my $match = 1; # match if every bit is on
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
!$match;
} @$keysref;
}
sub bfdelete {
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
for my $key (@$keysref){
my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
$$bf{key_count}==0 and croak "Deleted all and then some" or $$bf{key_count}--;
my($ones,$croak,@pos)=(0);
for(0..$k-1){
my $pos=$h[$_] % $m;
my $c=
vec($$bf{filter}, $pos, $cb);
vec($$bf{filter}, $pos, $cb)=$c-1;
my $ext=shift(); #or filename
#http://www.sitepoint.com/web-foundations/mime-types-complete-list/
croak "todo: ext2mime not yet implemented";
#return "application/json";#feks
}
sub base64 ($;$) { #
if ($] >= 5.006) {
require bytes;
croak "base64 failed: only defined for bytes"
if bytes::length($_[0]) > length($_[0])
or $] >= 5.008 && $_[0] =~ /[^\0-\xFF]/
}
my $eol=defined$_[1]?$_[1]:"\n";
my $res=pack("u",$_[0]);
$res=~s/^.//mg;
$res=~s/\n//g;
$res=~tr|` -_|AA-Za-z0-9+/|;
my $pad=(3-length($_[0])%3)%3;
$res=~s/.{$pad}$/'=' x $pad/e if $pad;
$res=~s/(.{1,76})/$1$eol/g if length($eol); #todo !=76
$res;
}
our $Fix_unbase64=0;
sub unbase64 ($) {
my $s=shift;
$s=~tr,0-9a-zA-Z+=/,,cd;
if($Fix_unbase64){ $s.='=' while length($s)%4 }
croak "unbase64 failed: length ".length($s)." not multiple of 4" if length($s)%4;
$s=~s/=+$//;
$s=~tr|A-Za-z0-9+/| -_|;
length($s) ? unpack("u",join'',map(chr(32+length($_)*3/4).$_,$s=~/(.{1,60})/gs)) : "";
}
=head1 COMMANDS
=head2 install_acme_command_tools
sudo perl -MAcme::Tools -e install_acme_command_tools
Wrote executable /usr/local/bin/conv
my @p=$o{P}?(10,50,90):(50);
my @m=@_>0 ? do {grep$_, split",", $xtime{$_[0]}}
: do {grep$_, map {split","} values %xtime};
my @r=percentile(\@p,@m);
@r=(min(@m),@r,max(@m)) if $o{M}||$o{C}||$o{A};
@r=map int($_), @r;
my $fmt=$o{t}?'YYYY/MM/DD-MM:MI:SS':'YYYY/MM/DD';
@r=map tms($_,$fmt), @r;
" ".join(" ",@r);
};
my $width=max( 10, grep $_, map length($_), @e );
@e=@e[-10..-1] if $o{t} and @e>10; #-t tail
printf("%-*s %8d $f %7.2f%%%s\n",$width,$_,$c{$_},&$s($b{$_}),100*$b{$_}/$bts,&$perc($_)) for @e;
printf("%-*s %8d $f %7.2f%%%s\n",$width,"Sum",$cnt,&$s($bts),100,&$perc());
}
sub cmd_resubst {
my %o;
my $zo="123456789e";
my @argv=opts("f:t:vno:gi$zo",\%o,@_);
if(exists$o{t}){ $o{t}=~s,\\,\$, } else { $o{t}='' }
my($i,$tc,$tbfr,$tbto)=(0,0,0,0);
$tc+=$c;
close($I);close($O);
chall($file,"$file.tmp$$") or croak"ERR: chall $file\n" if !$o{n};
my($bfr,$bto)=(-s$file,-s"$file.tmp$$");
unlink $file or croak"ERR: cant rm $file\n";
my $newfile=$o{o}?repl($file,qr/\.(gz|bz2|xz)$/i,".$oext"):$file;
rename("$file.tmp$$",$newfile) or croak"ERR: rename $file.tmp$$ -> $newfile failed\n";
if($o{v}){
my $pr=$bfr?100*$bto/$bfr:0;
printf "%*d/%d %*s %7d =>%8d b (%2d%%) %s\n",
length(0+@argv), ++$i, 0+@argv, -15, "$tc/$c", $bfr, $bto, $pr, $file;
$tbfr+=$bfr;
$tbto+=$bto;
}
}
if($o{v} and @argv>1){
printf "Replaces: %d Bytes before: %d After: %d Change: %.1f%%\n",
$tc, $tbfr, $tbto, $tbfr?100*($tbto-$tbfr)/$tbfr:0
}
$tc;
}
my $o1=join"",grep$def{$_}==1,sort keys%def;
my $o= join"", sort keys%def;
my @r;
while(@a){
my $a=shift(@a);
if($a=~/^-([$o1])([$o].*)$/){
unshift@a,"-$1","-$2";
}
elsif($a=~/^-(\w)(.*)$/){
my $d=$def{$1}//0;
push@{$$hashref{$1}},$d==1 && length($2) ? croak"opt -$1 has no arg (is $2 here)"
:$d==1 ? 1
:$d==2 && length($2) ? $2
:$d==2 ? shift(@a)
:croak"unknown opt -$1";
}
elsif($a eq '--'){
last;
}
else {
push @r, $a;
}
}
t/03_bloomfilter.t view on Meta::CPAN
my $capacity=10000;
my $bf=bfinit($error_rate, $capacity);
my $t=time_fp();
bfadd($bf, map $_*2,0..$capacity-1);
#deb "Adds pr sec: ".int($capacity/(time_fp()-$t))."\n";
#bfadd($bf, $_) for map $_*2,0..$capacity-1;
deb serialize({%$bf,filter=>''},'bf','',1);
deb "Filter has capacity $$bf{capacity}\n";
deb "Filter has $$bf{key_count} keys\n";
deb "Filter has ".length($$bf{filter})." bytes\n";
deb "Filter has $$bf{filterlength} bits of which ".bfsum($bf)." (".int(100*bfsum($bf)/$$bf{filterlength})."%) are on\n";
deb "Filter has $$bf{hashfuncs} hash functions\n";
my @c=bfcheck($bf,0..$capacity*2); #test next ok: $c[2000]=0;
#deb "$_->".bfcheck($bf,$_)."\n" for 0..200;
my $sum; $sum+=$c[ $_*2+1 ], for 0..$capacity-1;
deb "Filter has $sum false positives\n";
ok(!(grep $c[$_]!=1, map $_*2, 0..$capacity-1), 'no false negatives');
ok(
$sum >= $capacity*$error_rate*80/100
&& $sum <= $capacity*$error_rate*120/100
t/03_bloomfilter.t view on Meta::CPAN
ok(0+grep($_,bfcheck($cbf,1..$cap)) == $cap, 'cbf no false negatives');
ok(bfgrepnot($cbf,[1..$cap]) == 0, 'cbf grepnot');
my $errs=grep($_,bfcheck($cbf,$cap+1..$cap*2));
deb "Errs $errs\n";
ok(between($errs/$cap/$er,0.7,1.3),'error rate rating '.($errs/$cap/$er).' within ok range 0.7-1.3');
#---------- see doc about this example:
#do{
# my $bf=bfinit( error_rate=>0.00001, capacity=>4e6, counting_bits=>4 );
# bfadd($bf,[1000*$_+1 .. 1000*($_+1)]),deb"." for 0..4000-1; # adding 4 million keys one thousand at a time
# my %c; $c{vec($$bf{filter},$_,$$bf{counting_bits})}++ for 0..$$bf{filterlength}-1;
# deb sprintf("%8d counters is %2d\n",$c{$_},$_) for sort{$a<=>$b}keys%c;
#};
my %c; $c{vec($$cbf{filter},$_,$cb)}++ for 0..$$cbf{filterlength}-1;
ok(sum(map$c{$_}*$_,keys%c)/$$cbf{key_count} == $$cbf{hashfuncs}, 'counter check');
#deb sprintf("%8d counters is %2d\n",$c{$_},$_) for sort{$a<=>$b}keys%c;
#---------- counting bloom filter, test delete
do{
my($er,$cap,$cb)=(0.1,500,4);
my $bf=bfinit(error_rate=>$er,capacity=>$cap*2,counting_bits=>$cb,keys=>[1..$cap*2]);
bfdelete($bf, $cap+1 .. $cap*1.5);
bfdelete($bf,[$cap*1.5+1 .. $cap*2]);
ok(bfgrep($bf,[1..$cap]) == $cap, 'cbf, delete test, no false negatives');
my $err=bfgrep($bf,[$cap+1..$cap*2]);
deb "Err $err\n";
ok($err/$cap/$er<1.3,"cbf, delete test, after delete ($err)");
my %c=(); $c{vec($$bf{filter},$_,$cb)}++ for 0..$$bf{filterlength}-1;
ok(sum(map$c{$_}*$_,keys%c)/$$bf{key_count} == $$bf{hashfuncs}, 'cbf, delete test, counter check after delete');
eval{ok(bfdelete($bf,'x'))};ok($@=~/Cannot delete a non-existing key x/,'delete non-existing key');
};
#---------- test filter lengths
my $r;
ok(between($r=
length(bfinit(counting_bits=>$_,error_rate=>0.01,capacity=>100)->{filter}) /
length(bfinit(counting_bits=>1, error_rate=>0.01,capacity=>100)->{filter}) / $_, 0.95, 1.05), "filter length ($r), cb $_") for qw/2 4 8 16/;
eval{bfinit(counting_bits=>2,error_rate=>0.1,capacity=>1000,keys=>[1..1000])};ok($@=~/Too many overflows/,'overflow check');
#----------storing and retrieving
my $tmp=tmp();
if(-w$tmp){
my $file="$tmp/cbf.bf";
bfstore($cbf,$file);
deb "Stored size of $file: ".(-s$file)." bytes\n";
my $cbfr=bfretrieve($file);
t/03_bloomfilter.t view on Meta::CPAN
}
else{
ok(1,'skipped, not linux') for 1..3;
}
#----------adaptive bloom filter, not implemented/tested, see http://intertrack.naist.jp/Matsumoto_IEICE-ED200805.pdf
# $cap=100;
# $bf=bfinit(adaptive=>0,error_rate=>0.001,capacity=>$cap,keys=>[1..$cap]);
# @c=bfcheck($bf,[1..$cap]);
# %c=(); $c{$_}++ for @c;
# deb "Filter has $$bf{filterlength} bits of which ".bfsum($bf)." (".int(100*bfsum($bf)/$$bf{filterlength})."%) are on\n";
# deb "Filter has ".int(1+$$bf{filterlength}/8)." bytes (".sprintf("%.1f",int(1+$$bf{filterlength}/8)/1024)." kb)\n";
# deb "Filter has $$bf{hashfuncs} hash functions\n";
# deb "Number of $_: $c{$_}\n" for sort{$a<=>$b}keys%c;
# deb "Sum bits ".sum(map $$bf{hashfuncs}+$_-1,bfcheck($bf,1..$cap))."\n";
# deb "False negatives: ".grep(!$_,@c)."\n";
# deb "Error rate: ".(($errs=grep($_,bfcheck($bf,$cap+1..$cap*2)))/$cap)."\n";
# deb "Errors: $errs\n";
#---------- bfaddbf, adding two bloom filters
do{
my $cap=100;
t/11_part.t view on Meta::CPAN
my @words=qw/These are the words of this array/;
my %h=parth { uc(substr($_,0,1)) } @words;
#warn serialize(\%h);
ok_ref( \%h,
{ T=>[qw/These the this/],
A=>[qw/are array/],
W=>[qw/words/],
O=>[qw/of/] }, 'parth');
my @a=parta { length } @words;
#warn serialize(\@a);
ok_ref( \@a, [undef,undef,['of'],['are','the'],['this'],['These','words','array']], 'parta' );
ok_ref( [pile(2, 1..9)], [[1,2],[3,4],[5,6],[7,8],[9]], 'pile 2' );
ok_ref( [pile(4, 1..9)], [[1,2,3,4],[5,6,7,8],[9]], 'pile 4' );
ok_ref( [pile(2)], [], 'pile empty' );
ok_ref( [pile2(4, 1..9)], [[1,2,3,4],[5,6,7,8],[9]], 'pile parta' );
sub pile2 {
ok_ref( [zip([1,3,5],[2,4,6])], [1..6], 'zip 2' );
ok_ref( [zip([1,4,7],[2,5,8],[3,6,9])], [1..9], 'zip 3' );
sub ziperr{eval{zip(@_)};$@=~/ERROR.*zip/}
ok( ziperr([1,2],[3,4],5), 'zip err 1');
ok( ziperr([1,2],[3,4,5]), 'zip err 2');
ok( ziperr([1,2],[3,4],5), 'zip err 1');
ok( ziperr([1,2],[3,4,5]), 'zip err 2');
#--zipb64, zipbin, unzipb64, unzipbin, gzip, gunzip
my $s=join"",map random([qw/hip hop and you dont stop/]), 1..1000;
ok( length(zipb64($s)) / length($s) < 0.5, 'zipb64');
ok( between(length(zipbin($s)) / length(zipb64($s)), 0.7, 0.8), 'zipbin zipb64');
ok( between(length(zipbin($s)) / length(zipb64($s)), 0.7, 0.8), 'zipbin zipb64');
ok( length(zipbin($s)) / length($s) < 0.4, 'zipbin');
ok( $s eq unzipb64(zipb64($s)), 'unzipb64');
ok( $s eq unzipbin(zipbin($s)), 'unzipbin');
my $d=substr($s,1,1000);
ok( length(zipb64($s,$d)) / length(zipb64($s)) < 0.8 );
my $f;
ok( ($f=length(zipb64($s,$d)) / length(zipb64($s))) < 0.73 , "0.73 > $f");
#for(1..10){
# my $s=join"",map random([qw/hip hop and you dont stop/]), 1..1000;
# my $d=substr($s,1,1000);
# my $f= length(zipbin($s,$d)) / length(zipbin($s));
# print $f,"\n";
#}
#--gzip, gunzip
$s=join"",map random([qw/hip hop and you do not everever stop/]), 1..10000;
ok(length(gzip($s))/length($s) < 1/5);
ok($s eq gunzip(gzip($s)));
ok($s eq unzipbin(gunzip(gzip(zipbin($s)))));
ok($s eq unzipb64(unzipbin(gunzip(gzip(zipbin(zipb64($s)))))));
print length($s),"\n";
print length(gzip($s)),"\n";
print length(zipbin($s)),"\n";
print length(zipbin($s,$d)),"\n";
t/17_roman.t view on Meta::CPAN
use lib '.'; BEGIN{require 't/common.pl'}
use Test::More tests => 31;
use Carp;
my %rom=(MCCXXXIV=>1234,MCMLXXI=>1971,IV=>4,VI=>6,I=>1,V=>5,X=>10,L=>50,C=>100,D=>500,M=>1000,CDXCVII=>497);
my$rom;ok( ($rom=int2roman($rom{$_})) eq $_, sprintf"int2roman %8d => %-10s %-10s",$rom{$_},$_,"($rom)") for sort keys%rom;
my$int;ok( ($int=roman2int($_)) eq $rom{$_}, sprintf"roman2int %-8s => %10d %10d",$_,$rom{$_},$int) for sort keys%rom;
ok( do{eval{roman2int("a")};$@=~/invalid/i}, "croaks ok" );
ok( roman2int("-MCCXXXIV")==-1234, 'negative ok');
ok( int2roman(0) eq '', 'zero');
ok( !defined(int2roman(undef)), 'undef');
ok( defined(int2roman("")) && !length(int2roman("")), 'empty');
my @n=(-100..4999);
my @err=grep roman2int(int2roman($_))!=$_, grep $_>100?$_%7==0:1, @n;
ok( @err==0, "all, not ok: ".(join(", ",@err)||'none') );
my @t=([time_fp(),join(" ",map int2roman($_) ,@n),time_fp()],
[time_fp(),join(" ",map int2roman_old($_),@n),time_fp()]);
ok( $t[0][1] eq $t[1][1] );
if($ENV{ATDEBUG}){
printf "Acme::Tools::int2roman - %.6fs\n",$t[0][2]-$t[0][0];
printf "17_roman.t/int2roman_old - %.6fs\n",$t[1][2]-$t[1][0];
}
sub int2roman_old {
my($n,@p)=(shift,[],[1],[1,1],[1,1,1],[1,2],[2],[2,1],[2,1,1],[2,1,1,1],[1,3],[3]);
!defined($n)? undef
: !length($n) ? ""
: int($n)!=$n ? croak"int2roman: $n is not an integer"
: $n==0 ? ""
: $n<0 ? "-".int2roman(-$n)
: $n>3999 ? "M".int2roman($n-1000)
: join'',@{[qw/I V X L C D M/]}[map{my$i=$_;map($_+5-$i*2,@{$p[$n/10**(3-$i)%10]})}(0..3)];
}
t/21_read_conf.t view on Meta::CPAN
'hei'=>'fds1 312321 123321',
'sykkel'=>'sdfkdsa'
},
'section3'=>{}
);
my $t;
sub rc {$t=time_fp();my%c=read_conf(@_);$t=time_fp()-$t;%c}
sub sjekk {
my $f=serialize(\%fasit,'c','',1);
my $s=serialize(\%c,'c','',1);
ok($s eq $f, sprintf("read_conf %10.6f sek (".length($s)." bytes)",$t)) or warn"s=$s\nf=$f\n";
}
sjekk(); #1
my $f=tmp()."/acme-tools.read_conf.tmp";
eval{writefile($f,$c)};$@&&ok(1)&&exit;
%c=(); rc($f,\%c);
sjekk(); #2
$Acme::Tools::Read_conf_empty_section=1; #default 0
$fasit{''}=\%s0;
t/25_pwgen.t view on Meta::CPAN
sub tstr{sprintf(" (%d trials, %.5f sec)",$Acme::Tools::Pwgen_trials, $Acme::Tools::Pwgen_sec)}
SKIP: {
skip "- strangely pwgen-croak-test fails on windows sometime", 2 if $^O ne 'linux';
local $Acme::Tools::Pwgen_max_sec=0.001;
eval{pwgen(3)}; ok($@=~/pwgen.*25_pwgen.t/,"pwgen croak works: ".trim($@));
local $Acme::Tools::Pwgen_max_trials=3;
eval{pwgen(3)}; ok($@=~/pwgen.*after 3 .*25_pwgen.t/,"pwgen croak works: ".trim($@));
};
ok(length(pwgen())==8, 'default len 8');
my $n=300;
$Acme::Tools::Pwgen_max_sec=1;
sub test{/^[a-z0-9]/i and /[A-Z]/ and /[a-z]/ and /\d/ and /[\,\-\.\/\&\%\_\!]/};
my @pw=grep test(), pwgen(0,$n);
ok(@pw==$n, "pwgen ok ".@pw.tstr());
$n=50;
@pw=grep/^[A-Z]{20}$/,pwgen(20,$n,'A-Z');
ok(@pw==$n, "pwgen ok ".@pw);
t/28_wipe.t view on Meta::CPAN
# make test
# perl Makefile.PL; make; perl -Iblib/lib t/28_wipe.t
use lib '.'; BEGIN{require 't/common.pl'}
use Test::More tests => 3;
if($^O eq 'linux'){
my $f=tmp().'/acme-tools.wipe.tmp';
writefile($f,join(" ",map rand(),1..1000)); #system("ls -l $f");
my $ntrp=sub{length(gz(readfile($f).""))};
my $n=&$ntrp;
wipe($f,undef,1);
my $ratio=$n/&$ntrp;
ok($ratio>50 || !$INC{'Compress/Zlib.pm'}, "ratio $ratio > 50");
ok(-s$f>5e3);
wipe($f,1);
ok(!-e$f);
}
else{ ok(1) for 1..3 }
t/38_base64.t view on Meta::CPAN
use lib '.'; BEGIN{require 't/common.pl'}
use Test::More tests => 24;
use MIME::Base64;
my($s,$b64,$b64_2)=("");
for(0..1000){
if($_%100==0){
$b64=encode_base64($s);
$b64_2=base64($s);
my $s2=unbase64($b64);
is($s,$s2,'yes '.length($s));
is($b64,$b64_2,'yes b '.length($s));
}
$s.=$_;
}
if($^O eq 'linux' and -x '/usr/bin/base64'){
$s=qx(base64 -w 1000 Tools.pm);
$b64=encode_base64($s);
$b64_2=base64($s);
my $s2=unbase64($b64);
is($s,$s2,'yes ps '.length($s));
is($b64,$b64_2,'yes b ps '.length($s));
}
else {
is(1,1,'skips on non-linux') for 1..2;
}
#print "$s\n\n$b64\n";
t/test_fork_bloom.pl view on Meta::CPAN
}
1 while wait() != -1;
print "building finished\n";
my $bf=bfinit(error_rate=>$error_rate,capacity=>$cap);
for my $job (0..$jobs-1){
print "Adding bloom filter $job...";
my $t=time_fp();
bfaddbf($bf,bfretrieve("/tmp/bf$job.bf"));
print "took ".(time_fp()-$t)." sec\n";
}
print int($$bf{filterlength}/8)," bytes\n";
printf "%.1f%%\n",100*bfsum($bf)/$$bf{filterlength};
print "keys: $$bf{key_count}\n";
print "found: ".bfgrep($bf,[1..$cap/10])."\n";
my $tests=10000;
my $errs=bfgrep($bf,[$cap+1..$cap+1+$tests]);
print "Error rate: $errs/$tests = ".($errs/$tests)."\n";
bfstore($bf,"/tmp/bfall.bf");
$$bf{filter}="gone";
print serialize($bf,'bf','',2);
( run in 0.743 second using v1.01-cache-2.11-cpan-65fba6d93b7 )