Acme-Tools
view release on metacpan or search on metacpan
256257258259260261262263264265266267268269270271272273274275276use
Acme::Tools;
sum(1,2,3);
# 6
avg(2,3,4,6);
# 3.75
median(2,3,4,6);
# 3.5
percentile(25, 101..199);
# 125
my
@list
= minus(\
@listA
, \
@listB
);
# set operation
my
@list
= union(\
@listA
, \
@listB
);
# set operation
length
(gzip(
"abc"
x 1000));
# far less than 3000
writefile(
"/dir/filename"
,
$string
);
# convenient
my
$s
=readfile(
"/dir/filename"
);
# also convenient
"yes!"
if
between(
$PI
,3,4);
percentile(0.05,
@numbers
);
my
@even
= range(1000,2000,2);
# even numbers between 1000 and 2000
my
@odd
= range(1001,2001,2);
323324325326327328329330331332333334335336337338339340341342=head2 code2num
C<num2code()> convert numbers (integers) from the normal decimal system to some arbitrary other number system.
That can be binary (2), oct (8), hex (16) or others.
Example:
print num2code(255,2,"0123456789ABCDEF"); # prints FF
print num2code( 14,2,"0123456789ABCDEF"); # prints 0E
...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
...and 14 are converted to 0E, with leading 0 because of the second argument 2.
Example:
print num2code(1234,16,"01")
Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.
To convert back:
349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386num2code(
"241274432"
,5,
$chars
);
# prints EOOv0
code2num(
"EOOv0"
,
$chars
);
# prints 241274432
=cut
#Math::BaseCnv
sub num2code {
return num2code($_[0],0,$_[1]) if @_==2;
my($num,$digits,$validchars,$start)=@_;
my $l=length($validchars);
my $key;
$digits||=9e9;
no warnings;
croak if $num<$start;
$num-=$start;
for(1..$digits){
$key=substr($validchars,$num%$l,1).$key;
$num=int($num/$l);
last if $digits==9e9 and !$num;
}
croak if $num>0;
return $key;
}
sub code2num {
my($code,$validchars,$start)=@_; $start=0 if!defined$start;
my $l=length($validchars);
my $num=0;
$num=$num*$l+index($validchars,$_) for split//,$code;
return $num+$start;
}
=head2 base
Numbers in any number system of base between 2 and 36. Using capital letters A-Z for base higher than 10.
base(2,15) # 1111 2 --> binary
732733734735736737738739740741742743744745746747748749750751752current: A, _A, N/m2
energy: BTU, Btu, J, Nm, W/s, Wh, Wps, Ws, _J, _eV,
cal, calorie, calories, eV, electronvolt, BeV,
erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh, MWh, GWh, TWh
kcal, kilocalorie, kilocalories,
newtonmeter, newtonmeters, th, thermie
force: N, _N, dyn, dyne, dynes, lb, newton
length
: NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
in, inch, inches, km, league, lightyear, ls, ly,
m, meter, meters, mi, mil, mile, miles,
nautical mile, nautical miles, nmi,
parsec, pc, planck, yard, yard_imperical, yd, Å, ångstrøm, angstrom
mass: Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey
791792793794795796797798799800801802803804805806807808809810811=cut
#TODO: @arr2=conv(\@arr1,"from","to") # should be way faster than:
#TODO: @arr2=map conv($_,"from","to"),@arr1
#TODO: conv(123456789,'b','h'); # h converts to something human-readable
our %conv=(
length=>{
m => 1,
_m => 1,
meter => 1,
meters => 1,
metre => 1,
metres => 1,
km => 1000,
mil => 10000, #scandinavian #also: inch/1000!
in => 0.0254,
inch => 0.0254,
854855856857858859860861862863864865866867868869870871872873874875876877878879880881882lightyear
=> 299792458*3600*24*365.25,
# = 9460730472580800 by def
ls
=> 299792458,
#light-second
au
=> 149597870700,
# by def: meters earth to sun
astronomical_unit
=> 149597870700,
'astronomical unit'
=> 149597870700,
pc
=> 149597870700*648000/
$PI
,
#3.0857e16 = 3.26156 ly
_pc
=> 149597870700*648000/
$PI
,
parsec
=> 149597870700*648000/
$PI
,
attoparsec
=> 149597870700*648000/
$PI
/1e18,
apc
=> 149597870700*648000/
$PI
/1e18,
planck
=> 1.61619997e-35,
#planck length
#Norwegian (old) lengths:
tomme
=> 0.0254,
tommer
=> 0.0254,
fot
=> 0.0254*12,
#0.3048m
alen
=> 0.0254*12*2,
#0.6096m
favn
=> 0.0254*12*2*3,
#1.8288m
kvart
=> 0.0254*12*2/4,
#0.1524m a quarter alen
twip
=> 0.0254 / 6 / 12 / 20,
point
=> 0.0254 / 6 / 12,
pica
=> 0.0254 / 6,
line
=> 0.0254 / 12,
thou
=> 0.0254 / 1000,
barleycorn
=> 0.0254 / 3,
poppyseed
=> 0.0254 / 3 / 4,
finger
=> 0.0254 / 6 / 12 * 63,
palm
=> 0.0254 * 3,
digit
=> 0.0254 * 3 / 4,
113011311132113311341135113611371138113911401141114211431144114511461147114811491150
olympiad
=> 4 * 60*60*24*365.2425,
lustrum
=> 5 * 60*60*24*365.2425,
indiction
=> 15 * 60*60*24*365.2425,
jubilee
=> 50 * 60*60*24*365.2425,
century
=> 100 * 60*60*24*365.2425,
millennium
=> 1000 * 60*60*24*365.2425,
shake
=> 1e-8,
moment
=> 3600/40,
#1/40th of an hour, used by Medieval Western European computists
ke
=> 864,
#1/100th of a day, trad Chinese, 14m24s
fortnight
=> 14*24*3600,
tp
=> 5.3910632e-44,
#planck time, time for ligth to travel 1 planck length
nanocentury
=> 100 * 60*60*24*365.2425 / 1e9,
#3.156 ~ pi seconds, response time limit (usability)
warhol
=> 15*60,
#"fifteen minutes of fame"
},
speed
=>{
'm/s'
=> 1,
'_m/s'
=> 1,
mps
=> 1,
mph
=> 1609.344/3600,
'mi/h'
=> 1609.344/3600,
kmh
=> 1/3.6,
16211622162316241625162616271628162916301631163216331634163516361637163816391640sec_readable( 13333331 );
# 154d 7h
sec_readable( 133333331 );
# 4yr 82d
sec_readable( 1333333331 );
# 42yr 91d
=cut
sub sec_readable {
my $s=shift();
my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
!defined$s ? undef
:!length($s) ? ''
:$s<0 ? '-'.sec_readable(-$s)
:$s<60 && int($s)==$s
? $s."s"
:$s<60 ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
:$s<3600 ? int($s/60)."m " .($s%60) ."s"
:$s<24*3600 ? int($s/$h)."h " .int(($s%$h)/60)."m"
:$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
: int($s/$y)."yr ".int(($s%$y)/$d)."d";
}
166616671668166916701671167216731674167516761677167816791680168116821683168416851686
roman2int(
"MCMLXXI"
) == 1971
=cut
#alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
#see also t/17_roman.t sub int2roman_old
sub int2roman {
my $n=shift;
!defined$n ? undef
: !length($n) ? ""
: $n<0 ? "-".int2roman(-$n)
: int($n)!=$n ? croak"int2roman: $n is not an integer"
# : $] >= 5.014 ? #s///r modifier introduced in perl v5.14
# ("I" x $n)
# =~s,I{1000},M,gr #unnecessary, but speedup for n>1000
# =~s,I{100},C,gr #unnecessary, but speedup for n>100
# =~s,I{10},X,gr #unnecessary, but speedup for n>10
# =~s,IIIII,V,gr
# =~s,IIII,IV,gr
# =~s,VV,X,gr
173217331734173517361737173817391740174117421743174417451746174717481749175017511752# : $r=~s,^CD,,i ? 400+roman2int($r)
# : $r=~s,^C,,i ? 100+roman2int($r)
# : $r=~s,^XC,,i ? 90+roman2int($r)
# : $r=~s,^L,,i ? 50+roman2int($r)
# : $r=~s,^XL,,i ? 40+roman2int($r)
# : $r=~s,^X,,i ? 10+roman2int($r)
# : $r=~s,^IX,,i ? 9+roman2int($r)
# : $r=~s,^V,,i ? 5+roman2int($r)
# : $r=~s,^IV,,i ? 4+roman2int($r)
# : $r=~s,^I,,i ? 1+roman2int($r)
# : !length($r) ? 0
# : croak "Invalid roman number $r";
#}
=head2 distance
B<Input:> the four decimal numbers of two GPS positions: latutude1, longitude1, latitude2, longitude2
B<Output:> the air distance in meters between the two points
Calculation is done using the Haversine Formula for spherical distance:
1914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944
#$R=$a * $t/$n;
#=head2 fractional
#=cut
carp
"fractional: NOT FINISHED"
;
my
$n
=
shift
;
"----fractional n=$n\n"
;
my
$nn
=
$n
;
my
$dec
;
$nn
=~s,\.(\d+)$,
$dec
=
length
($1);$1.,;
my
$l
;
my
$max
=0;
my
(
$te
,
$ne
);
for
(1..
length
(
$nn
)/2){
if
(
$nn
=~/^(\d*?)((.{
$_
})(\3)+)$/ ){
"_ = $_ "
.
length
($2).
"\n"
;
if
(
length
($2)>
$max
){
$l
=
$_
;
$te
=
"$1$3"
-$1;
$max
=
length
($2);
}
}
}
return
fractional(
$n
)
if
!
$l
and !recursed() and
$dec
>6 and
substr
(
$n
,-1) and
substr
(
$n
,-1)--;
"l=$l max=$max\n"
;
$ne
=
"9"
x
$l
;
log
(
$n
),
"\n"
;
my
$st
=
sub
{
"status: "
.(
$te
/
$ne
).
" n=$n "
.(
$n
/
$te
*$ne
).
"\n"
};
while
(
$n
/
$te
*$ne
<0.99){
&$st
();
$ne
*=10 }
while
(
$te
/
$n
/
$ne
<0.99){
&$st
();
$te
*=10 }
209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126join
","
, trim(
" please "
,
" remove "
,
" my "
,
" spaces "
);
# works on arrays as well
my
$s
=
' please '
; trim(\
$s
);
# now $s eq 'please'
trim(\
@untrimmedstrings
);
# trims array strings inplace
@untrimmedstrings
=
map
trim,
@untrimmedstrings
;
# same, works on $_
trim(\
$_
)
for
@untrimmedstrings
;
# same, works on \$_
=head2 lpad
=head2 rpad
Left or right pads a string to the given length by adding one or more spaces at the end for I<rpad> or at the start for I<lpad>.
B<Input:> First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
Default is space.
rpad('gomle',9); # 'gomle '
lpad('gomle',9); # ' gomle'
rpad('gomle',9,'-'); # 'gomle----'
lpad('gomle',9,'+'); # '++++gomle'
rpad('gomle',4); # 'goml'
lpad('gomle',4); # 'goml'
rpad('gomle',7,'xyz'); # 'gomlxy'
lpad('gomle',10,'xyz'); # 'xyzxygoml'
=head2 cpad
Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.
cpad('mat',5) eq ' mat '
cpad('mat',4) eq 'mat '
cpad('mat',6) eq ' mat '
cpad('mat',9) eq ' mat '
cpad('mat',5,'+') eq '+mat+'
cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'
=cut
2133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181
return
map
trim(
$_
),
@_
if
@_
>1;
my
$s
=
shift
;
if
(
ref
(
$s
) eq
'SCALAR'
){
$$s
=~s,^\s+|(?<=\s)\s+|\s+$,,g;
return
$$s
}
if
(
ref
(
$s
) eq
'ARRAY'
) { trim(\
$_
)
for
@$s
;
return
$s
}
$s
=~s,^\s+|(?<=\s)\s+|\s+$,,g
if
defined
$s
;
$s
;
}
sub
rpad {
my
(
$s
,
$l
,
$p
)=
@_
;
$p
=
' '
if
@_
<3 or !
length
(
$p
);
$s
.=
$p
while
length
(
$s
)<
$l
;
substr
(
$s
,0,
$l
);
}
sub
lpad {
my
(
$s
,
$l
,
$p
)=
@_
;
$p
=
' '
if
@_
<3 or !
length
(
$p
);
$l
<
length
(
$s
)
?
substr
(
$s
,0,
$l
)
:
substr
(
$p
x (1+
$l
/
length
(
$p
)), 0,
$l
-
length
(
$s
)).
$s
;
}
sub
cpad {
my
(
$s
,
$l
,
$p
)=
@_
;
$p
=
' '
if
@_
<3 or !
length
(
$p
);
my
$ls
=
length
(
$s
);
return
substr
(
$s
,0,
$l
)
if
$l
<
$ls
;
$p
=
$p
x ((
$l
-
$ls
+2)/
length
(
$p
));
substr
(
$p
, 0, (
$l
-
$ls
)/2) .
$s
.
substr
(
$p
, 0, (
$l
-
$ls
+1)/2);
}
sub
cpad_old {
my
(
$s
,
$l
,
$p
)=
@_
;
$p
=
' '
if
!
length
(
$p
);
return
substr
(
$s
,0,
$l
)
if
$l
<
length
(
$s
);
my
$i
=0;
while
(
$l
>
length
(
$s
)){
my
$pc
=
substr
(
$p
,(
$i
==
int
(
$i
)?1:-1)*(
$i
%length
(
$p
)),1);
$i
==
int
(
$i
) ? (
$s
.=
$pc
) : (
$s
=
$pc
.
$s
);
$i
+=1/2;
}
$s
;
}
=head2 trigram
B<Input:> A string (i.e. a name). And an optional x (see example 2)
224422452246224722482249225022512252225322542255225622572258225922602261226222632264=head2 chars
chars("Tittentei"); # ('T','i','t','t','e','n','t','e','i')
=cut
sub
trigram { sliding(
$_
[0],
$_
[1]||3) }
sub
sliding {
my
(
$s
,
$w
)=
@_
;
return
map
substr
(
$s
,
$_
,
$w
), 0..
length
(
$s
)-
$w
if
!
ref
(
$s
);
return
map
[
@$s
[
$_
..
$_
+
$w
-1]], 0..
@$s
-
$w
if
ref
(
$s
) eq
'ARRAY'
;
}
sub
chunks {
my
(
$s
,
$w
)=
@_
;
return
$s
=~/(.{1,
$w
})/g
if
!
ref
(
$s
);
return
map
[
@$s
[
$_
*$w
.. min(
$_
*$w
+
$w
-1,
$#$s
)]], 0..
$#$s
/
$w
if
ref
(
$s
) eq
'ARRAY'
;
}
sub
chars {
split
//,
shift
}
231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344
defined
$til
?
$str
=~s/
$fra
/
$til
/g :
$str
=~s/
$fra
//g;
}
return
$str
;
}
=head1 ARRAYS
=head2 subarr
The equivalent of C<substr> on arrays or C<splice> without changing the array.
Input: 1) array or arrayref, 2) offset and optionally 3) length. Without a
third argument, subarr returns the rest of the array.
@top10 = subarr( @array, 0, 10); # first 10
@last_two = subarr( @array, -2, 2); # last 2
@last_two = subarr( $array_ref, -2); # also last 2
@last_six = subarr $array_ref, -6; # parens are optional
The same can be obtained from C<< @array[$from..$to] >> but that dont work the
same way with negative offsets and boundary control of length.
=cut
#Todo: sjekk paastand over
#sub subarr(+$;$) { #perl>=5.14 # t/35_subarr.t
sub
subarr {
#perl<5.14
my
(
$a
,
$o
,
$l
)=
@_
;
$o
=
@$a
+
$o
if
$o
<0;
2347234823492350235123522353235423552356235723582359236023612362236323642365236623672368
$l
=
@$a
-
$o
if
@_
<3;
croak
if
$l
<0;
$l
=
@$a
-
$o
if
$l
>
@$a
-
$o
;
@$a
[
$o
..
$o
+
$l
-1];
}
=head2 min
Returns the smallest number in a list. Undef is ignored.
@lengths=(2,3,5,2,10,undef,5,4);
$shortest = min(@lengths); # returns 2
Note: The comparison operator is perls C<< < >>> which means empty strings is treated as C<0>, the number zero. The same goes for C<max()>, except of course C<< > >> is used instead.
min(3,4,5) # 3
min(3,4,5,undef) # 3
min(3,4,5,'') # returns the empty string
=head2 max
Returns the largest number in a list. Undef is ignored.
251025112512251325142515251625172518251925202521252225232524252525262527252825292530either the same (or similar) email or phone number or zip code and similar enough
names are going on the list of probable doubles.
*) Todo: deal
with
initials better, should be higher than 0.78
=cut
sub sim_perm {
require String::Similarity;
my($s1,$s2)=map {s/^\s*(.+?)\s*$/$1/;$_} map upper($_), @_; #/r v5.14
croak if !length($s1) or !length($s2);
my $max;
for(cart([permutations(split(/[\s,]+/,$s1))],
[permutations(split(/[\s,]+/,$s2))])) {
my($n1,$n2)=@$_;
if(@$n1>@$n2){ pop@$n1 while @$n1>@$n2 }
else { pop@$n2 while @$n1<@$n2 }
my($str1,$str2)=map join(" ",@$_),($n1,$n2);
if(defined $max){
my $sim=String::Similarity::similarity($str1,$str2,$max);
$max=$sim if $sim>$max;
272927302731273227332734273527362737273827392740274127422743274427452746274727482749
return
wantarray
?
@sort
:
$sort
[
$rank
-1];
}
sub
rankstr {
wantarray
?(rank(
@_
,
sub
{
$_
[0]cmp
$_
[1]})):rank(
@_
,
sub
{
$_
[0]cmp
$_
[1]})}
=head2 egrep
Extended grep.
Works like L<grep> but with more insight: local vars $i, $n, $prev, $next, $prevr and $nextr are available:
$i is the current index, starts with 0, ends with the length of the input array minus one
$n is the current element number, starts with 1, $n = $i + 1
$prev is the previous value (undef if current is first)
$next is the next value (undef if current is last)
$prevr is the previous value, rotated so that the previous of the first element is the last element
$nextr is the next value, rotated so that the next of the last element is the first element
29052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930
%hash
= (
T
=>[
'These'
,
'the'
,
'this'
],
A
=>[
'are'
,
'array'
],
O
=>[
'of'
],
W
=>[
'words'
] )
=head2 parta
Like L<parth> but returns an array of lists where the predicate returns an index number.
my @a = parta { length } qw/These are the words of this array/;
Result:
@a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )
Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.
=cut
sub
part (&@) {
my
(
$c
,
@r
)=(
shift
,[],[]);
push
@{
$r
[
&$c
?0:1 ] },
$_
for
@_
;
@r
}
sub
parth (&@) {
my
(
$c
,
%r
)=(
shift
);
push
@{
$r
{
&$c
} },
$_
for
@_
;
%r
}
sub
parta (&@) {
my
(
$c
,
@r
)=(
shift
);
push
@{
$r
[
&$c
] },
$_
for
@_
;
@r
}
#sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map
=head2 refa
302030213022302330243025302630273028302930303031303230333034303530363037303830393040
: croak(
"eachr needs hashref or arrayref got '"
.
ref
(
$_
[0]).
"'"
) }
sub
joinr {
join
(
shift
(),@{
shift
()})}
#sub mapr # som scala: hvis map faar subref se kalles den sub paa hvert elem og resultatet returneres
#sub eachr { each(%{shift()}) }
=head2 pile
B<Input:> a pile size s and a list
B<Output:> A list of lists of length s or the length of the remainer in
the last list. Piles together the input list in lists of the given size.
my @list=(1,2,3,4,5,6,7,8,9,10);
my @piles = pile(3, @list ); # ([1,2,3], [4,5,6], [7,8,9], [10])
my $i=0;
my @piles = parta {$i++/3} @list; # same as above pile(3, @list)
=cut
sub
pile {
my
$size
=
shift
;
my
@r
;
for
(
@_
){
push
@r
,[]
if
!
@r
or 0+@{
$r
[-1]}>=
$size
;
push
@{
$r
[-1]},
$_
}
@r
}
314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177my
@col
=
sort
keys
%col
;
my
@colerr
=
grep
!/^[a-z]\w+$/i,
@col
;
croak
"Invalid column name(s): @colerr"
if
@colerr
and !
$conf
{fix_colnames};
my
(
%t
,
%tdb
);
for
my
$c
(
@col
){
my
(
$l
,
$s
,
$p
,
$nn
,
%ant
,
$t
)=(0,0,0,0);
for
my
$r
(
@$aoh
){
my
$v
=
$$r
{
$c
};
next
if
!
defined
$v
or
$v
!~/\S/;
$nn
++;
$l
=
length
(
$v
)
if
length
(
$v
)>
$l
;
no
warnings
'uninitialized'
;
if
(
$v
=~/^(18|19|20)\d\d(0[1-9]|1[0-2])(0[1-9]|1\d|2\d|3[01])-?\d\d:?\d\d:?\d\d$/ and
$conf
{date}){
$ant
{date}++;
next
;
}
elsif
(
$v
=~/^\s*[-+]?(\d*)(\.\d+)?([Ee]\-?\d+)?\s*$/ and
length
(
"$1$2"
) and
$conf
{number}){
$ant
{number}++;
$s
=
length
(
"$1.$2"
)
if
length
(
"$1.$2"
)>
$s
;
#hm
$p
=
length
($2)-1
if
$2 and
length
($2)-1>
$p
;
next
;
}
else
{
$ant
{varchar}++;
}
}
$t
||=
'varchar'
if
$ant
{varchar} or
$ant
{number} and
$ant
{date};
$t
||=
'number'
if
$ant
{number};
$t
||=
'date'
if
$ant
{date};
$t
||=
'varchar'
;
#hm
318131823183318431853186318731883189319031913192319331943195319631973198319932003201
$tdb
=
"$conf{$t}($l)"
if
$t
eq
'varchar'
;
$tdb
=
"$conf{$t}($s)"
if
$t
eq
'number'
and
$p
==0;
$tdb
=
"$conf{$t}($s,$p)"
if
$t
eq
'number'
and
$p
>0 and ++
$s
;
$tdb
.=
" not null"
if
$nn
== 0+
@$aoh
;
$t
{
$c
}=
$t
;
$tdb
{
$c
}=
$tdb
;
}
my
$sql
;
$sql
=
"create table $conf{name} ("
.
join
(
","
,
map
sprintf
(
"\n %-30s %s"
,
do
{s/\W+//g;
$_
},
$tdb
{
$_
}),
@col
).
"\n);\n\n"
if
$conf
{create};
my
$val
=
sub
{
my
(
$v
,
$t
)=
@_
;
defined
$v
or
$v
=
""
;!
length
(
$v
)?
'null'
:
$t
eq
'number'
?
$v
:
"'"
.repl(
$v
,
"\'"
,
"''"
).
"'"
};
for
my
$r
(
@$aoh
){
my
$v
=
join
","
,
map
&$val
(
$$r
{
$_
},
$t
{
$_
}),
@col
;
$sql
.=
"insert into $conf{name} values ($v);\n"
;
}
$sql
=
"drop table $conf{name};\n\n$sql"
if
$conf
{drop}==1;
$sql
=
"drop table if exists $conf{name};\n\n$sql"
if
$conf
{drop}>=2;
$sql
=
"$conf{begin}\n$sql"
if
$conf
{begin};
$sql
.=
$conf
{end};
$sql
;
}
374037413742374337443745374637473748374937503751375237533754375537563757375837593760
return
@e
;
}
}
=head2 pwgen
Generates random passwords.
B<Input:> 0-n args
* First arg: length of password(s), default 8
* Second arg: number of passwords, default 1
* Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!
* Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:
sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
...meaning the password should:
37643765376637673768376937703771377237733774377537763777377837793780378137823783To keep the
default
requirement-
sub
but add additional ones just set the fourth arg to false/
undef
and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
own C<
rand
()> internally.
C<<
$Acme::Tools::Pwgen_max_sec
>> and C<<
$Acme::Tools::Pwgen_max_trials
>> can be set to adjust
for
how long
pwgen tries to find a password. Defaults
for
those are 0.01 and 10000.
Whenever one of the two limits is reached, a first generates a croak.
Examples:
my
$pw
=pwgen();
# a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
my
$pw
=pwgen(12);
# a random 12 chars password A-Z a-z 0-9 ,-./&%!_
my
@pw
=pwgen(0,10);
# 10 random 8 chars passwords, containing the same possible chars
my
@pw
=pwgen(0,1000,
'A-Z'
);
# 1000 random 8 chars passwords containing just uppercase letters from A to Z
pwgen(3);
# dies, defaults require chars in each of 4 group (see above)
pwgen(5,1,
'A-C0-9'
,
qr/^\D{3}\d{2}$/
);
# a 5 char string starting with three A, B or Cs and endring with two digits
pwgen(5,1,
'ABC0-9'
,
sub
{/^\D{3}\d{2}$/});
# same as above
Examples of adding additional requirements to the
default
ones:
379737983799380038013802380338043805380638073808380938103811381238133814381538163817our
$Pwgen_sec
=0;
#seconds used in last call to pwgen()
our
$Pwgen_trials
=0;
#trials in last call to pwgen()
sub
pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&
%_
!]/}
sub
pwgen {
my
(
$len
,
$num
,
$chars
,
@req
)=
@_
;
$len
||=8;
$num
||=1;
$chars
||=
'A-Za-z0-9,-./&%_!'
;
$req
[0]||=\
&pwgendefreq
if
!
$_
[2];
$chars
=~s/([
$_
])-([
$_
])/
join
(
""
,
"$1"
..
"$2"
)/eg
for
(
'a-z'
,
'A-Z'
,
'0-9'
);
my
(
$c
,
$t
,
@pw
,
$d
)=(
length
(
$chars
),time_fp());
(
$Pwgen_trials
,
$Pwgen_sec
)=(0,0);
TRIAL:
while
(
@pw
<
$num
){
croak
"pwgen timeout after $Pwgen_trials trials"
if
++
$Pwgen_trials
>=
$Pwgen_max_trials
or (
$d
=time_fp()-
$t
) >
$Pwgen_max_sec
*$num
and
$d
!~/^\d+$/;
#jic int from time_fp
my
$pw
=
join
""
,
map
substr
(
$chars
,
rand
(
$c
),1),1..
$len
;
for
my
$r
(
@req
){
if
(
ref
(
$r
) eq
'CODE'
){
local
$_
=
$pw
;
&$r
() or
next
TRIAL }
410941104111411241134114411541164117411841194120412141224123412441254126412741284129=head1 COMPRESSION
L</zipb64>, L</unzipb64>, L</zipbin>, L</unzipbin>, L</gzip>, and L</gunzip>
compresses and uncompresses strings to save space in disk, memory,
database or network transfer. Trades time for space. (Beware of wormholes)
=head2 zipb64
Compresses the input (text or binary) and returns a base64-encoded string of the compressed binary data.
No known limit on input length, several MB has been tested, as long as you've got the RAM...
B<Input:> One or two strings.
First argument: The string to be compressed.
Second argument is optional: A I<dictionary> string.
B<Output:> a base64-kodet string of the compressed input.
The use of an optional I<dictionary> string will result in an even
413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183The returned string is base64 encoded. That is, the output is 33%
larger than it
has
to be. The advantage is that this string more
easily can be stored in a database (without the hassles of CLOB/BLOB)
or perhaps easier transfer in http POST requests (it still needs some
url-encoding, normally). See L</zipbin> and L</unzipbin>
for
the
same without base 64 encoding.
Example 1, normal compression without dictionary:
$txt
=
"Test av komprimering, hva skjer? "
x 10;
# ten copies of this norwegian string, $txt is now 330 bytes (or chars rather...)
length
(
$txt
),
" bytes input!\n"
;
# prints 330
$zip
= zipb64(
$txt
);
# compresses
length
(
$zip
),
" bytes output!\n"
;
# prints 65
$zip
;
# prints the base64 string ("noise")
$output
=unzipb64(
$zip
);
# decompresses
"Hurra\n"
if
$output
eq
$txt
;
# prints Hurra if everything went well
length
(
$output
),
"\n"
;
# prints 330
Example 2, same compression, now
with
dictionary:
$txt
=
"Test av komprimering, hva skjer? "
x 10;
# Same original string as above
$dict
=
"Testing av kompresjon, hva vil skje?"
;
# dictionary with certain similarities
# of the text to be compressed
$zip2
= zipb64(
$txt
,
$dict
);
# compressing with $dict as dictionary
length
(
$zip2
),
" bytes output!\n"
;
# prints 49, which is less than 65 in ex. 1 above
$output
=unzipb64(
$zip2
,
$dict
);
# uses $dict in the decompressions too
"Hurra\n"
if
$output
eq
$txt
;
# prints Hurra if everything went well
Example 3, dictionary = string to be compressed: (out of curiosity)
$txt
=
"Test av komprimering, hva skjer? "
x 10;
# Same original string as above
$zip3
= zipb64(
$txt
,
$txt
);
# hmm
length
(
$zip3
),
" bytes output!\n"
;
# prints 25
"Hurra\n"
if
unzipb64(
$zip3
,
$txt
) eq
$txt
;
# hipp hipp ...
zipb64() and zipbin() is really just wrappers
around
L<Compress::Zlib> and C<inflate()> & co there.
=cut
sub zipb64 {
require MIME::Base64;
return MIME::Base64::encode_base64(zipbin(@_));
}
433243334334433543364337433843394340434143424343434443454346434743484349435043514352
ipnum(
"www.uio.no"
);
# prints 129.240.13.152
Does internal memoization via the hash C<
%Acme::Tools::IPNUM_memo
>.
=cut
our %IPNUM_memo;
sub ipnum {
my $ipaddr=shift;
#croak "No $ipaddr" if !length($ipaddr);
return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
my $h=gethostbyname($ipaddr);
#croak "No ipnum for $ipaddr" if !$h;
return if !defined $h;
my $ipnum = join(".",unpack("C4",$h));
$IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
return $IPNUM_memo{$ipaddr};
}
our $Ipnum_errmsg;
44384439444044414442444344444445444644474448444944504451445244534454445544564457sub
webparams {
my
$query
=
shift
();
$query
=
$ENV
{QUERY_STRING}
if
!
defined
$query
;
if
(!
defined
$query
and
$ENV
{REQUEST_METHOD} eq
"POST"
){
read
(STDIN,
$query
,
$ENV
{CONTENT_LENGTH});
$ENV
{QUERY_STRING}=
$query
;
}
my
%R
;
for
(
split
(
"&"
,
$query
)){
next
if
!
length
(
$_
);
my
(
$nkl
,
$verdi
)=
map
urldec(
$_
),
split
(
"="
,
$_
,2);
$R
{
$nkl
}=
exists
$R
{
$nkl
}?
"$R{$nkl},$verdi"
:
$verdi
;
}
return
%R
;
}
=head2 urlenc
Input: a string
480448054806480748084809481048114812481348144815481648174818481948204821482248234824=head2 username
Returns the current linux/unix username, for example the string root
print username(); #just (getpwuid($<))[0] but more readable perhaps
=cut
sub
basename {
my
(
$f
,
$s
)=(
@_
,
''
);
$s
=
quotemeta
(
$s
)
if
!
ref
(
$s
);
$f
=~m,^(.*/)?([^/]*?)(
$s
)?$,;$2 }
sub
dirname {
$_
[0]=~m,^(.*)/,;
defined
($1) &&
length
($1) ? $1 :
'.'
}
sub
username { (
getpwuid
($<))[0] }
=head2 wipe
Deletes a file by "wiping" it on the disk. Overwrites the file before deleting. (May not work properly on SSDs)
B<Input:>
* Arg 1: A filename
* Optional arg 2: number of times to overwrite file. Default is 3 if omitted, 0 or undef
* Optional arg 3: keep (true/false), wipe() but no delete of file
501250135014501550165017501850195020502150225023502450255026502750285029503050315032
my
(
$section
,
@l
)=(
''
,
split
"\n"
,
$conf
);
while
(
@l
) {
my
$l
=
shift
@l
;
if
(
$l
=~/^\s*\[\s*(.*?)\s*\]/ ) {
$section
=$1;
$$hr
{$1}||={};
}
elsif
(
$l
=~/^\s*([^\:\=]+?)\s*[:=]\s*(.*?)\s*$/ ) {
my
$ml
=
sub
{
my
$v
=
shift
;
$v
.=
"\n"
.
shift
@l
while
$v
=~/^\{[^\}]*$/&&
@l
;
$v
=~s/^\{(.*)\}\s*$/$1/s;
$v
=~s,\\
#,#,g;$v};
my
$v
=
&$ml
($2);
$$hr
{
$section
}{$1}=
$v
if
length
(
$section
) or
$Read_conf_empty_section
;
$$hr
{$1}=
$v
if
!
length
(
$section
);
}
}
%$hr
;
}
# my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
# s,<INCLUDE ([^>]+)>,"".readfile(&$incfn($1)),eg; #todo
=head2 openstr
52215222522352245225522652275228522952305231523252335234523552365237523852395240
'Dg'
=> [6,
'SØn'
,
'Man'
,
'Tir'
,
'Ons'
,
'Tor'
,
'Fre'
,
'Lør'
],
'dg'
=> [6,
'søn'
,
'man'
,
'tir'
,
'ons'
,
'tor'
,
'fre'
,
'lør'
],
);
my
$_tms_inited
=0;
sub
tms_init {
return
if
$_tms_inited
++;
for
(
qw(MAANED Maaned maaned MAAN Maan maan)
,
'MAANE.'
,
'Maane.'
,
'maane.'
){
$Tms_str
{
$_
}=
$Tms_str
{replace(
$_
,
"aa"
,
"Ã¥"
,
"AA"
,
"Ã…"
)};
}
$Tms_pattern
=
join
(
"|"
,
map
{
quotemeta
(
$_
)}
sort
{
length
(
$b
)<=>
length
(
$a
)}
keys
%Tms_str
);
#without sort "måned" could be "mared" because "mån"=>"mar"
}
sub
totime {
}
=head2 s2t
5626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677if
it can be divided by 400.
=cut
sub leapyear{$_[0]%400?$_[0]%100?$_[0]%4?0:1:0:1} #bool
our %ldist_cache;
sub ldist {
my($s,$t,$l) = @_;
return length($t) if !$s;
return length($s) if !$t;
%ldist_cache=() if !$l and 1000<0+%ldist_cache;
$ldist_cache{$s,$t} ||=
do {
my($s1,$t1) = ( substr($s,1), substr($t,1) );
substr($s,0,1) eq substr($t,0,1)
? ldist($s1,$t1)
: 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
};
}
=head1 OTHER
=head2 nvl
The I<no value> function (or I<null value> function)
C<nvl()> takes two or more arguments. (Oracles nvl-function take just two)
Returns the value of the first input argument with length() > 0.
Return I<undef> if there is no such input argument.
In perl 5.10 and perl 6 this will most often be easier with the C< //
> operator, although C<nvl()> and C<< // >> treats empty strings C<"">
differently. Sub nvl here considers empty strings and undef the same.
=cut
sub
nvl {
return
$_
[0]
if
defined
$_
[0] and
length
(
$_
[0]) or
@_
==1;
return
$_
[1]
if
@_
==2;
return
nvl(
@_
[1..
$#_
])
if
@_
>2;
return
undef
;
}
=head2 decode_num
See L</decode>.
=head2 decode
5861586258635864586558665867586858695870587158725873587458755876587758785879588058815 Banking and financial
6 Merchandizing and banking
7 Petroleum
8 Telecommunications and other industry assignments
9 National assignment
...although this
has
no
meaning to C<Acme::Tools::ccn_ok()>.
The first six digits is I<Issuer Identifier>, that is the bank
(probably). The rest in the
"account number"
, except the
last
digits,
which is the control digit. Max
length
on credit card numbers are 19
digits.
=cut
sub ccn_ok {
my $ccn=shift(); #credit card number
$ccn=~s/\D+//g;
if(KID_ok($ccn)){
return "MasterCard" if $ccn=~/^5[1-5]\d{14}$/;
return "Visa" if $ccn=~/^4\d{12}(?:\d{3})?$/;
6559656065616562656365646565656665676568656965706571657265736574657565766577657865796580#print serialize(\%opt,'opt');
#print serialize(\$opt_pro,'opt_pro');
my
$antned
=0+
@vertikalefelt
;
my
$bakerst
=-1+@{
$$tabref
[0]};
my
(
%h
,
%feltfinnes
,
%sum
);
#print "Bakerst<$bakerst>\n";
for
(
@$tabref
){
my
$rad
=
join
($;,
@$_
[0..(
$antned
-1)]);
my
$felt
=
join
($;,
@$_
[
$antned
..(
$bakerst
-1)]);
my
$verdi
=
$$_
[
$bakerst
];
length
(
$rad
) or
$rad
=
' '
;
length
(
$felt
) or
$felt
=
' '
;
$h
{
$rad
}{
$felt
}=
$verdi
;
$h
{
$rad
}{
"%$felt"
}=
$verdi
;
if
(
$opt_sum
or
defined
$opt_pro
){
$h
{
$rad
}{Sum}+=
$verdi
;
$sum
{
$felt
}+=
$verdi
;
$sum
{Sum}+=
$verdi
;
}
$feltfinnes
{
$felt
}++;
$feltfinnes
{
"%$felt"
}++
if
$opt_pro
;
}
6698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725
my
$cell
=
$_
;
$width
[
$j
]||=0;
if
(
$nodup_rad
and
$i
>0 and
$$tab
[
$i
][
$j
] eq
$$tab
[
$i
-1][
$j
] || (
$nodup_rad
=0)){
$cell
=
$nodup
==1?
""
:
$nodup
;
$nodup
[
$i
][
$j
]=1;
}
else
{
my
$height
=0;
my
$wider
;
no
warnings;
$not_empty
[
$j
]=1
if
!
$head
&&
length
(
$cell
)>0;
for
(
split
(
"\n"
,
$cell
)){
$wider
=/<input.+type=text.+size=(\d+)/i?$1:0;
#hm
s/<[^>]+>//g;
$height
++;
s/
>
;/>/g;
s/
<
;/</g;
$width
[
$j
]=
length
(
$_
)+1+
$wider
if
length
(
$_
)+1+
$wider
>
$width
[
$j
];
$left
[
$j
]=1
if
$_
&& !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !
$head
;
}
if
(
$height
>1 && !
$no_multiline_space
){
$height
++
if
!
$head
;
$height
[
$i
-1]++
if
$i
>1 &&
$height
[
$i
-1]==1;
}
$height
[
$i
]=
$height
if
$height
>
$height
[
$i
];
}
$j
++;
}
67426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779for
my
$x
(0..
$i
){
if
(
$$tab
[
$x
] eq
'-'
){
my
@tegn
=
map
{
$$tab
[
$x
-1][
$_
]=~/\S/?
"-"
:
" "
} (0..
$j
);
$tabout
[
$row_start_line
]=
join
(
" "
,
map
{
$tegn
[
$_
] x (
$width
[
$_
]-1)} (0..
$j
));
}
else
{
for
my
$y
(0..
$j
){
next
if
$remove_empty
&& !
$not_empty
[
$y
];
no
warnings;
my
@cell
= !
$header_last
&
&$nodup
&
&$nodup
[
$x
][
$y
]
? (
$nodup
>0?():((
" "
x ((
$width
[
$y
]-
length
(
$nodup
))/2)).
$nodup
))
:
split
(
"\n"
,
$$tab
[
$x
][
$y
]);
for
(0..(
$height
[
$x
]-1)){
my
$line
=
$row_start_line
+
$_
;
my
$txt
=
shift
(
@cell
);
$txt
=
''
if
!
defined
$txt
;
$txt
=
sprintf
(
"%*s"
,
$width
[
$y
]-1,
$txt
)
if
length
(
$txt
)>0 && !
$left
[
$y
] && (
$x
>0 ||
$no_header_line
);
$tabout
[
$line
].=
$txt
;
if
(
$y
==
$j
){
$tabout
[
$line
]=~s/\s+$//;
}
else
{
my
$wider
;
$wider
=
$txt
=~/<input.+type=text.+size=(\d+)/i?1+$1:0;
$txt
=~s/<[^>]+>//g;
$txt
=~s/
>
;/>/g;
$txt
=~s/
<
;/</g;
$tabout
[
$line
].=
' '
x (
$width
[
$y
]-
length
(
$txt
)-
$wider
);
}
}
}
}
$row_start_line
+=
$height
[
$x
];
#--lage streker?
if
(not
$no_header_line
){
if
(
$x
==0){
for
my
$y
(0..
$j
){
703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060Toyota SUM 56
Volvo SUM 18
Nissan SUM 36
Tesla SUM 8
SUM SUM 56 100%
=cut
sub cnttbl {
my $hr=shift;
my $maxlen=max(3,map length($_),keys%$hr);
join"",ref((values%$hr)[0])
?do{ map {my$o=$_;join("",map rpad($$o[0],$maxlen)." $_\n",split("\n",$$o[1]))}
map [$_,cnttbl($$hr{$_})],
sort keys%$hr }
:do{ my $sum=sum(values%$hr);
my $fmt=repl("%-xs %yd %6.2f%%\n",x=>$maxlen,y=>length($sum));
map sprintf($fmt,@$_,100*$$_[1]/$sum),
(map[$_,$$hr{$_}],sort{$$hr{$a}<=>$$hr{$b} or $a cmp $b}keys%$hr),
(['SUM',$sum]) }
}
=head2 ref_deep
NOT IMPLEMENTED
Same as ref, but goes deeper.
714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189(TODO: alfa...and more docs needed)
=cut
our $Edcursor;
sub ed {
my($s,$cs,$p,$buf)=@_; #string, commands, point (or cursor)
return $$s=ed($$s,$cs,$p,$buf) if ref($s);
my($sh,$cl,$m,$t,@m)=(0,0,0,undef);
while(length($cs)){
my $n = 0;
my $c = $cs=~s,^(M\d+|M.|""|".+?"|S.+?R|\\.|.),,s ? $1 : die;
$p = curb($p||0,0,length($s));
if(defined$t){$cs="".($c x $t).$cs;$t=undef;next}
my $add=sub{substr($s,$p,0)=$_[0];$p+=length($_[0])};
if ($c =~ /^([a-z0-9 ])/){ &$add($sh^$cl?uc($1):$1); $sh=0 }
elsif($c =~ /^"(.+)"$/) { &$add($1) }
elsif($c =~ /^\\(.)/) { &$add($1) }
elsif($c =~ /^S(.+)R/) { my $i=index($s,$1,$p);$p=$i+length($1) if $i>=0 }
elsif($c =~ /^M(\d+)/) { $t=$1; next }
elsif($c eq 'F') { $p++ }
elsif($c eq 'B') { $p-- }
elsif($c eq 'A') { $p-- while $p>0 and substr($s,$p-1,2)!~/^\n/ }
elsif($c eq 'E') { substr($s,$p)=~/(.*)/ and $p+=length($1) }
elsif($c eq 'D') { substr($s,$p,1)='' }
elsif($c eq 'MD'){ substr($s,$p)=~s/^(\W*\w+)// and $buf=$1 }
elsif($c eq 'MF'){ substr($s,$p)=~/(\W*\w+)/ and $p+=length($1) }
elsif($c eq 'MB'){ substr($s,0,$p)=~/(\w+\W*)$/ and $p-=length($1) }
elsif($c eq '-') { substr($s,--$p,1)='' if $p }
elsif($c eq 'M-'){ substr($s,0,$p)=~s/(\w+\W*)$// and $p-=length($buf=$1)}
elsif($c eq 'K') { substr($s,$p)=~s/(\S.+|\s*?\n)// and $buf=$1 }
elsif($c eq 'Y') { &$add($buf) }
elsif($c eq 'U') { substr($s,$p)=~s/(\W*)(\w+)/$1\U$2\E/; $p+=length($1.$2) }
elsif($c eq 'L') { substr($s,$p)=~s/(\W*)(\w+)/$1\L$2\E/; $p+=length($1.$2) }
elsif($c eq 'C') { substr($s,$p)=~s/(\W*)(\w+)/$1\u\L$2\E/; $p+=length($1.$2) }
elsif($c eq '<') { $p=0 }
elsif($c eq '>') { $p=length($s) }
elsif($c eq 'T') { $sh=1 }
elsif($c eq 'C') { $cl^=1 }
elsif($c eq '{') { $m=1; @m=() }
elsif($c eq '}') { $m=0 }
elsif($c eq '!') { $m||!@m and die"ed: no macro"; $cs=join("",@m).$cs }
elsif($c eq '""'){ &$add('"') }
else { croak "ed: Unknown cmd '$c'\n" }
push @m, $c if $m and $c ne '{';
#warn serialize([$c,$m,$cs],'d');
}
73087309731073117312731373147315731673177318731973207321732273237324732573267327=head1 BLOOM FILTER SUBROUTINES
Bloom filters can be used to check whether an element (a string) is a
member of a large set using much less memory or disk space than other
data structures. Trading speed and accuracy for memory usage. While
risking false positives, Bloom filters have a very strong space
advantage over other data structures for representing sets.
In the example below, a set of 100000 phone numbers (or any string of
any length) can be "stored" in just 91230 bytes if you accept that you
can only check the data structure for existence of a string and accept
false positives with an error rate of 0.03 (that is three percent, error
rates are given in numbers larger than 0 and smaller than 1).
You can not retrieve the strings in the set without using "brute
force" methods and even then you would get slightly more strings than
you put in because of the error rate inaccuracy.
Bloom Filters have many uses.
740874097410741174127413741474157416741774187419742074217422742374247425742674277428
capacity
=> 10000000,
counting_bits
=> 4
# power of 2, that is 2, 4, 8, 16 or 32
);
bfadd(
$bf
,
@unique_phone_numbers
);
bfdelete(
$bf
,
@unique_phone_numbers
);
Example: examine the frequency of the counters
with
4 bit counters and 4 million
keys
:
my
$bf
=bfinit(
error_rate
=>0.001,
capacity
=>4e6,
counting_bits
=>4 );
bfadd(
$bf
,[1e3
*$_
+1 .. 1e3*(
$_
+1)])
for
0..4000-1;
# adding 4 million keys one thousand at a time
my
%c
;
$c
{
vec
(
$$bf
{filter},
$_
,
$$bf
{counting_bits})}++
for
0..
$$bf
{filterlength}-1;
printf
"%8d counters = %d\n"
,
$c
{
$_
},
$_
for
sort
{
$a
<=>
$b
}
keys
%c
;
The output:
28689562 counters = 0
19947673 counters = 1
6941082 counters = 2
1608250 counters = 3
280107 counters = 4
38859 counters = 5
74857486748774887489749074917492749374947495749674977498749975007501750275037504Prints yes since C<bfgrep> now returns an array of all the 1000 elements.
Croaks
if
the filters are of different dimensions.
Works
for
counting bloom filters as well (C<<
counting_bits
=>4 >> e.g.)
=head2 bfsum
Returns the number of 1's in the filter.
my $percent=100*bfsum($bf)/$$bf{filterlength};
printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity
Sums the counters for counting bloom filters (much slower than for non counting).
=head2 bfdimensions
Input, two numeric arguments: Capacity and error_rate.
Outputs an array of two numbers: m and k.
76597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705
max_hashfuncs
=> 100,
counting_bits
=> 1,
#default: not counting filter
adaptive
=> 0,
%arg
,
#arguments
key_count
=> 0,
overflow
=> {},
version
=>
$Acme::Tools::VERSION
,
};
croak
"Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1"
if
$$bf
{error_rate}<=0 or
$$bf
{error_rate}>=1;
@$bf
{
'min_hashfuncs'
,
'max_hashfuncs'
}=(
map
$arg
{hashfuncs},1..2)
if
$arg
{hashfuncs};
@$bf
{
'filterlength'
,
'hashfuncs'
}=bfdimensions(
$bf
);
#m and k
$$bf
{filter}=
pack
(
"b*"
,
'0'
x (
$$bf
{filterlength}*
$$bf
{counting_bits}) );
#hm x new empty filter
$$bf
{
unpack
}=
$$bf
{filterlength}<=2**16/4 ?
"n*"
# /4 alleviates skewing if m just slightly < 2**x
:
$$bf
{filterlength}<=2**32/4 ?
"N*"
:
"Q*"
;
bfadd(
$bf
,@{
$arg
{
keys
}})
if
$arg
{
keys
};
return
$bf
;
}
sub
bfaddbf {
my
(
$bf
,
$bf2
)=
@_
;
my
$differror
=
join
"\n"
,
map
"Property $_ differs ($$bf{$_} vs $$bf2{$_})"
,
grep
$$bf
{
$_
} ne
$$bf2
{
$_
},
qw/capacity counting_bits adaptive hashfuncs filterlength/
;
#not error_rate
croak
$differror
if
$differror
;
croak
"Can not add adaptive bloom filters"
if
$$bf
{adaptive};
my
$count
=
$$bf
{key_count}+
$$bf2
{key_count};
croak
"Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
if
$count
>
$$bf
{capacity};
$$bf
{key_count}+=
$$bf2
{key_count};
if
(
$$bf
{counting_bits}==1){
$$bf
{filter} |=
$$bf2
{filter};
#$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
}
else
{
my
$cb
=
$$bf
{counting_bits};
for
(0..
$$bf
{filterlength}-1){
my
$sum
=
vec
(
$$bf
{filter},
$_
,
$cb
)+
vec
(
$$bf2
{filter},
$_
,
$cb
);
if
(
$sum
>2*
*$cb
-1 ){
$sum
=2*
*$cb
-1;
$$bf
{overflow}{
$_
}++;
}
vec
(
$$bf
{filter},
$_
,
$cb
)=
$sum
;
no
warnings;
$$bf
{overflow}{
$_
}+=
$$bf2
{overflow}{
$_
}
77077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735
and croak
"Too many overflows, concider doubling counting_bits from $cb to "
.(2
*$cb
)
if
exists
$$bf2
{overflow}{
$_
};
}
}
return
$bf
;
#for convenience
}
sub
bfsum {
my
(
$bf
)=
@_
;
return
unpack
(
"%32b*"
,
$$bf
{filter})
if
$$bf
{counting_bits}==1;
my
(
$sum
,
$cb
)=(0,
$$bf
{counting_bits});
$sum
+=
vec
(
$$bf
{filter},
$_
,
$cb
)
for
0..
$$bf
{filterlength}-1;
return
$sum
;
}
sub
bfadd {
my
(
$bf
,
@keys
)=
@_
;
return
if
!
@keys
;
my
$keysref
=
@keys
==1 &&
ref
(
$keys
[0]) eq
'ARRAY'
?
$keys
[0] : \
@keys
;
my
(
$m
,
$k
,
$up
,
$n
,
$cb
,
$adaptive
)=
@$bf
{
'filterlength'
,
'hashfuncs'
,
'unpack'
,
'capacity'
,
'counting_bits'
,
'adaptive'
};
for
(
@$keysref
){
#croak "Key should be scalar" if ref($_);
$$bf
{key_count} >=
$n
and croak
"Exceeded filter capacity $n"
or
$$bf
{key_count}++;
my
@h
;
push
@h
,
unpack
$up
, Digest::MD5::md5(
$_
,0+
@h
)
while
@h
<
$k
;
if
(
$cb
==1 and !
$adaptive
) {
# normal bloom filter
vec
(
$$bf
{filter},
$h
[
$_
] %
$m
, 1) = 1
for
0..
$k
-1;
}
elsif
(
$cb
>1) {
# counting bloom filter
for
(0..
$k
-1){
my
$pos
=
$h
[
$_
] %
$m
;
773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834
if
(
$c
==0){
vec
(
$$bf
{filter},
$pos
,
$cb
) = -1;
$$bf
{overflow}{
$pos
}++
and
keys
(%{
$$bf
{overflow}})>10
#hmm, arbitrary limit
and croak
"Too many overflows, concider doubling counting_bits from $cb to "
.(2
*$cb
);
}
}
}
elsif
(
$adaptive
) {
# adaptive bloom filter
my
(
$i
,
$key
,
$bit
)=(0+
@h
,
$_
);
for
(0..
$$bf
{filterlength}-1){
$i
+=
push
(
@h
,
unpack
$up
, Digest::MD5::md5(
$key
,
$i
))
if
!
@h
;
my
$pos
=
shift
(
@h
) %
$m
;
$bit
=
vec
(
$$bf
{filter},
$pos
, 1);
vec
(
$$bf
{filter},
$pos
, 1)=1;
last
if
$_
>=
$k
-1 and
$bit
==0;
}
}
else
{croak}
}
return
1;
}
sub
bfcheck {
my
(
$bf
,
@keys
)=
@_
;
return
if
!
@keys
;
my
$keysref
=
@keys
==1 &&
ref
(
$keys
[0]) eq
'ARRAY'
?
$keys
[0] : \
@keys
;
my
(
$m
,
$k
,
$up
,
$cb
,
$adaptive
)=
@$bf
{
'filterlength'
,
'hashfuncs'
,
'unpack'
,
'counting_bits'
,
'adaptive'
};
my
$wa
=
wantarray
();
if
(!
$adaptive
){
# normal bloom filter or counting bloom filter
return
map
{
my
$match
= 1;
# match if every bit is on
my
@h
;
push
@h
,
unpack
$up
, Digest::MD5::md5(
$_
,0+
@h
)
while
@h
<
$k
;
vec
(
$$bf
{filter},
$h
[
$_
] %
$m
,
$cb
) or
$match
=0 or
last
for
0..
$k
-1;
return
$match
if
!
$wa
;
$match
;
}
@$keysref
;
}
else
{
# adaptive bloom filter
return
map
{
my
(
$match
,
$i
,
$key
,
$bit
,
@h
)=(1,0,
$_
);
for
(0..
$$bf
{filterlength}-1){
$i
+=
push
(
@h
,
unpack
$up
, Digest::MD5::md5(
$key
,
$i
))
if
!
@h
;
my
$pos
=
shift
(
@h
) %
$m
;
$bit
=
vec
(
$$bf
{filter},
$pos
, 1);
$match
++
if
$_
>
$k
-1 and
$bit
==1;
$match
=0
if
$_
<=
$k
-1 and
$bit
==0;
last
if
$bit
==0;
}
return
$match
if
!
$wa
;
$match
;
}
@$keysref
;
}
}
sub
bfgrep {
# just a copy of bfcheck with map replaced by grep
my
(
$bf
,
@keys
)=
@_
;
return
if
!
@keys
;
my
$keysref
=
@keys
==1 &&
ref
(
$keys
[0]) eq
'ARRAY'
?
$keys
[0] : \
@keys
;
my
(
$m
,
$k
,
$up
,
$cb
)=
@$bf
{
'filterlength'
,
'hashfuncs'
,
'unpack'
,
'counting_bits'
};
return
grep
{
my
$match
= 1;
# match if every bit is on
my
@h
;
push
@h
,
unpack
$up
, Digest::MD5::md5(
$_
,0+
@h
)
while
@h
<
$k
;
vec
(
$$bf
{filter},
$h
[
$_
] %
$m
,
$cb
) or
$match
=0 or
last
for
0..
$k
-1;
$match
;
}
@$keysref
;
}
sub
bfgrepnot {
# just a copy of bfgrep with $match replaced by not $match
my
(
$bf
,
@keys
)=
@_
;
return
if
!
@keys
;
my
$keysref
=
@keys
==1 &&
ref
(
$keys
[0]) eq
'ARRAY'
?
$keys
[0] : \
@keys
;
my
(
$m
,
$k
,
$up
,
$cb
)=
@$bf
{
'filterlength'
,
'hashfuncs'
,
'unpack'
,
'counting_bits'
};
return
grep
{
my
$match
= 1;
# match if every bit is on
my
@h
;
push
@h
,
unpack
$up
, Digest::MD5::md5(
$_
,0+
@h
)
while
@h
<
$k
;
vec
(
$$bf
{filter},
$h
[
$_
] %
$m
,
$cb
) or
$match
=0 or
last
for
0..
$k
-1;
!
$match
;
}
@$keysref
;
}
sub
bfdelete {
my
(
$bf
,
@keys
)=
@_
;
return
if
!
@keys
;
my
$keysref
=
@keys
==1 &&
ref
(
$keys
[0]) eq
'ARRAY'
?
$keys
[0] : \
@keys
;
my
(
$m
,
$k
,
$up
,
$cb
)=
@$bf
{
'filterlength'
,
'hashfuncs'
,
'unpack'
,
'counting_bits'
};
croak
"Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)"
if
$cb
==1;
for
my
$key
(
@$keysref
){
my
@h
;
push
@h
,
unpack
$up
, Digest::MD5::md5(
$key
,0+
@h
)
while
@h
<
$k
;
$$bf
{key_count}==0 and croak
"Deleted all and then some"
or
$$bf
{key_count}--;
my
(
$ones
,
$croak
,
@pos
)=(0);
for
(0..
$k
-1){
my
$pos
=
$h
[
$_
] %
$m
;
my
$c
=
vec
(
$$bf
{filter},
$pos
,
$cb
);
vec
(
$$bf
{filter},
$pos
,
$cb
)=
$c
-1;
7928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970
my
$ext
=
shift
();
#or filename
croak
"todo: ext2mime not yet implemented"
;
#return "application/json";#feks
}
sub
base64 ($;$) {
#
if
($] >= 5.006) {
croak
"base64 failed: only defined for bytes"
if
bytes::
length
(
$_
[0]) >
length
(
$_
[0])
or $] >= 5.008 &&
$_
[0] =~ /[^\0-\xFF]/
}
my
$eol
=
defined
$_
[1]?
$_
[1]:
"\n"
;
my
$res
=
pack
(
"u"
,
$_
[0]);
$res
=~s/^.//mg;
$res
=~s/\n//g;
$res
=~
tr
|` -_|AA-Za-z0-9+/|;
my
$pad
=(3-
length
(
$_
[0])%3)%3;
$res
=~s/.{
$pad
}$/
'='
x
$pad
/e
if
$pad
;
$res
=~s/(.{1,76})/$1
$eol
/g
if
length
(
$eol
);
#todo !=76
$res
;
}
our
$Fix_unbase64
=0;
sub
unbase64 ($) {
my
$s
=
shift
;
$s
=~
tr
,0-9a-zA-Z+=/,,cd;
if
(
$Fix_unbase64
){
$s
.=
'='
while
length
(
$s
)%4 }
croak
"unbase64 failed: length "
.
length
(
$s
).
" not multiple of 4"
if
length
(
$s
)%4;
$s
=~s/=+$//;
$s
=~
tr
|A-Za-z0-9+/| -_|;
length
(
$s
) ?
unpack
(
"u"
,
join
''
,
map
(
chr
(32+
length
(
$_
)*3/4).
$_
,
$s
=~/(.{1,60})/gs)) :
""
;
}
=head1 COMMANDS
=head2 install_acme_command_tools
sudo perl -MAcme::Tools -e install_acme_command_tools
Wrote executable /usr/local/bin/conv
816881698170817181728173817481758176817781788179818081818182818381848185818681878188
my
@p
=
$o
{P}?(10,50,90):(50);
my
@m
=
@_
>0 ?
do
{
grep
$_
,
split
","
,
$xtime
{
$_
[0]}}
:
do
{
grep
$_
,
map
{
split
","
}
values
%xtime
};
my
@r
=percentile(\
@p
,
@m
);
@r
=(min(
@m
),
@r
,max(
@m
))
if
$o
{M}||
$o
{C}||
$o
{A};
@r
=
map
int
(
$_
),
@r
;
my
$fmt
=
$o
{t}?
'YYYY/MM/DD-MM:MI:SS'
:
'YYYY/MM/DD'
;
@r
=
map
tms(
$_
,
$fmt
),
@r
;
" "
.
join
(
" "
,
@r
);
};
my
$width
=max( 10,
grep
$_
,
map
length
(
$_
),
@e
);
@e
=
@e
[-10..-1]
if
$o
{t} and
@e
>10;
#-t tail
printf
(
"%-*s %8d $f %7.2f%%%s\n"
,
$width
,
$_
,
$c
{
$_
},
&$s
(
$b
{
$_
}),100
*$b
{
$_
}/
$bts
,
&$perc
(
$_
))
for
@e
;
printf
(
"%-*s %8d $f %7.2f%%%s\n"
,
$width
,
"Sum"
,
$cnt
,
&$s
(
$bts
),100,
&$perc
());
}
sub
cmd_resubst {
my
%o
;
my
$zo
=
"123456789e"
;
my
@argv
=opts(
"f:t:vno:gi$zo"
,\
%o
,
@_
);
if
(
exists
$o
{t}){
$o
{t}=~s,\\,\$, }
else
{
$o
{t}=
''
}
my
(
$i
,
$tc
,
$tbfr
,
$tbto
)=(0,0,0,0);
820182028203820482058206820782088209821082118212821382148215821682178218821982208221
$tc
+=
$c
;
close
(
$I
);
close
(
$O
);
chall(
$file
,
"$file.tmp$$"
) or croak
"ERR: chall $file\n"
if
!
$o
{n};
my
(
$bfr
,
$bto
)=(-s
$file
,-s
"$file.tmp$$"
);
unlink
$file
or croak
"ERR: cant rm $file\n"
;
my
$newfile
=
$o
{o}?repl(
$file
,
qr/\.(gz|bz2|xz)$/
i,
".$oext"
):
$file
;
rename
(
"$file.tmp$$"
,
$newfile
) or croak
"ERR: rename $file.tmp$$ -> $newfile failed\n"
;
if
(
$o
{v}){
my
$pr
=
$bfr
?100
*$bto
/
$bfr
:0;
printf
"%*d/%d %*s %7d =>%8d b (%2d%%) %s\n"
,
length
(0+
@argv
), ++
$i
, 0+
@argv
, -15,
"$tc/$c"
,
$bfr
,
$bto
,
$pr
,
$file
;
$tbfr
+=
$bfr
;
$tbto
+=
$bto
;
}
}
if
(
$o
{v} and
@argv
>1){
printf
"Replaces: %d Bytes before: %d After: %d Change: %.1f%%\n"
,
$tc
,
$tbfr
,
$tbto
,
$tbfr
?100*(
$tbto
-
$tbfr
)/
$tbfr
:0
}
$tc
;
}
84628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484my
$o1
=
join
""
,
grep
$def
{
$_
}==1,
sort
keys
%def
;
my
$o
=
join
""
,
sort
keys
%def
;
my
@r
;
while
(
@a
){
my
$a
=
shift
(
@a
);
if
(
$a
=~/^-([
$o1
])([
$o
].*)$/){
unshift
@a
,
"-$1"
,
"-$2"
;
}
elsif
(
$a
=~/^-(\w)(.*)$/){
my
$d
=
$def
{$1}//0;
push
@{
$$hashref
{$1}},
$d
==1 &&
length
($2) ? croak
"opt -$1 has no arg (is $2 here)"
:
$d
==1 ? 1
:
$d
==2 &&
length
($2) ? $2
:
$d
==2 ?
shift
(
@a
)
:croak
"unknown opt -$1"
;
}
elsif
(
$a
eq
'--'
){
last
;
}
else
{
push
@r
,
$a
;
}
}
t/03_bloomfilter.t view on Meta::CPAN
10111213141516171819202122232425262728293031my
$capacity
=10000;
my
$bf
=bfinit(
$error_rate
,
$capacity
);
my
$t
=time_fp();
bfadd(
$bf
,
map
$_
*2,0..
$capacity
-1);
#deb "Adds pr sec: ".int($capacity/(time_fp()-$t))."\n";
#bfadd($bf, $_) for map $_*2,0..$capacity-1;
deb serialize({
%$bf
,
filter
=>
''
},
'bf'
,
''
,1);
deb
"Filter has capacity $$bf{capacity}\n"
;
deb
"Filter has $$bf{key_count} keys\n"
;
deb
"Filter has "
.
length
(
$$bf
{filter}).
" bytes\n"
;
deb
"Filter has $$bf{filterlength} bits of which "
.bfsum(
$bf
).
" ("
.
int
(100
*bfsum
(
$bf
)/
$$bf
{filterlength}).
"%) are on\n"
;
deb
"Filter has $$bf{hashfuncs} hash functions\n"
;
my
@c
=bfcheck(
$bf
,0..
$capacity
*2);
#test next ok: $c[2000]=0;
#deb "$_->".bfcheck($bf,$_)."\n" for 0..200;
my
$sum
;
$sum
+=
$c
[
$_
*2+1 ],
for
0..
$capacity
-1;
deb
"Filter has $sum false positives\n"
;
ok(!(
grep
$c
[
$_
]!=1,
map
$_
*2, 0..
$capacity
-1),
'no false negatives'
);
ok(
$sum
>=
$capacity
*$error_rate
*80/100
&&
$sum
<=
$capacity
*$error_rate
*120/100
t/03_bloomfilter.t view on Meta::CPAN
585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105ok(0+
grep
(
$_
,bfcheck(
$cbf
,1..
$cap
)) ==
$cap
,
'cbf no false negatives'
);
ok(bfgrepnot(
$cbf
,[1..
$cap
]) == 0,
'cbf grepnot'
);
my
$errs
=
grep
(
$_
,bfcheck(
$cbf
,
$cap
+1..
$cap
*2));
deb
"Errs $errs\n"
;
ok(between(
$errs
/
$cap
/
$er
,0.7,1.3),
'error rate rating '
.(
$errs
/
$cap
/
$er
).
' within ok range 0.7-1.3'
);
#---------- see doc about this example:
#do{
# my $bf=bfinit( error_rate=>0.00001, capacity=>4e6, counting_bits=>4 );
# bfadd($bf,[1000*$_+1 .. 1000*($_+1)]),deb"." for 0..4000-1; # adding 4 million keys one thousand at a time
# my %c; $c{vec($$bf{filter},$_,$$bf{counting_bits})}++ for 0..$$bf{filterlength}-1;
# deb sprintf("%8d counters is %2d\n",$c{$_},$_) for sort{$a<=>$b}keys%c;
#};
my
%c
;
$c
{
vec
(
$$cbf
{filter},
$_
,
$cb
)}++
for
0..
$$cbf
{filterlength}-1;
ok(sum(
map
$c
{
$_
}
*$_
,
keys
%c
)/
$$cbf
{key_count} ==
$$cbf
{hashfuncs},
'counter check'
);
#deb sprintf("%8d counters is %2d\n",$c{$_},$_) for sort{$a<=>$b}keys%c;
#---------- counting bloom filter, test delete
do
{
my
(
$er
,
$cap
,
$cb
)=(0.1,500,4);
my
$bf
=bfinit(
error_rate
=>
$er
,
capacity
=>
$cap
*2,
counting_bits
=>
$cb
,
keys
=>[1..
$cap
*2]);
bfdelete(
$bf
,
$cap
+1 ..
$cap
*1.5);
bfdelete(
$bf
,[
$cap
*1.5+1 ..
$cap
*2]);
ok(bfgrep(
$bf
,[1..
$cap
]) ==
$cap
,
'cbf, delete test, no false negatives'
);
my
$err
=bfgrep(
$bf
,[
$cap
+1..
$cap
*2]);
deb
"Err $err\n"
;
ok(
$err
/
$cap
/
$er
<1.3,
"cbf, delete test, after delete ($err)"
);
my
%c
=();
$c
{
vec
(
$$bf
{filter},
$_
,
$cb
)}++
for
0..
$$bf
{filterlength}-1;
ok(sum(
map
$c
{
$_
}
*$_
,
keys
%c
)/
$$bf
{key_count} ==
$$bf
{hashfuncs},
'cbf, delete test, counter check after delete'
);
eval
{ok(bfdelete(
$bf
,
'x'
))};ok($@=~/Cannot
delete
a non-existing key x/,
'delete non-existing key'
);
};
#---------- test filter lengths
my
$r
;
ok(between(
$r
=
length
(bfinit(
counting_bits
=>
$_
,
error_rate
=>0.01,
capacity
=>100)->{filter}) /
length
(bfinit(
counting_bits
=>1,
error_rate
=>0.01,
capacity
=>100)->{filter}) /
$_
, 0.95, 1.05),
"filter length ($r), cb $_"
)
for
qw/2 4 8 16/
;
eval
{bfinit(
counting_bits
=>2,
error_rate
=>0.1,
capacity
=>1000,
keys
=>[1..1000])};ok($@=~/Too many overflows/,
'overflow check'
);
#----------storing and retrieving
my
$tmp
=tmp();
if
(-w
$tmp
){
my
$file
=
"$tmp/cbf.bf"
;
bfstore(
$cbf
,
$file
);
deb
"Stored size of $file: "
.(-s
$file
).
" bytes\n"
;
my
$cbfr
=bfretrieve(
$file
);
t/03_bloomfilter.t view on Meta::CPAN
113114115116117118119120121122123124125126127128129130131132133134}
else
{
ok(1,
'skipped, not linux'
)
for
1..3;
}
#----------adaptive bloom filter, not implemented/tested, see http://intertrack.naist.jp/Matsumoto_IEICE-ED200805.pdf
# $cap=100;
# $bf=bfinit(adaptive=>0,error_rate=>0.001,capacity=>$cap,keys=>[1..$cap]);
# @c=bfcheck($bf,[1..$cap]);
# %c=(); $c{$_}++ for @c;
# deb "Filter has $$bf{filterlength} bits of which ".bfsum($bf)." (".int(100*bfsum($bf)/$$bf{filterlength})."%) are on\n";
# deb "Filter has ".int(1+$$bf{filterlength}/8)." bytes (".sprintf("%.1f",int(1+$$bf{filterlength}/8)/1024)." kb)\n";
# deb "Filter has $$bf{hashfuncs} hash functions\n";
# deb "Number of $_: $c{$_}\n" for sort{$a<=>$b}keys%c;
# deb "Sum bits ".sum(map $$bf{hashfuncs}+$_-1,bfcheck($bf,1..$cap))."\n";
# deb "False negatives: ".grep(!$_,@c)."\n";
# deb "Error rate: ".(($errs=grep($_,bfcheck($bf,$cap+1..$cap*2)))/$cap)."\n";
# deb "Errors: $errs\n";
#---------- bfaddbf, adding two bloom filters
do
{
my
$cap
=100;
t/11_part.t view on Meta::CPAN
91011121314151617181920212223242526272829my
@words
=
qw/These are the words of this array/
;
my
%h
=parth {
uc
(
substr
(
$_
,0,1)) }
@words
;
#warn serialize(\%h);
ok_ref( \
%h
,
{
T
=>[
qw/These the this/
],
A
=>[
qw/are array/
],
W
=>[
qw/words/
],
O
=>[
qw/of/
] },
'parth'
);
my
@a
=parta {
length
}
@words
;
#warn serialize(\@a);
ok_ref( \
@a
, [
undef
,
undef
,[
'of'
],[
'are'
,
'the'
],[
'this'
],[
'These'
,
'words'
,
'array'
]],
'parta'
);
ok_ref( [pile(2, 1..9)], [[1,2],[3,4],[5,6],[7,8],[9]],
'pile 2'
);
ok_ref( [pile(4, 1..9)], [[1,2,3,4],[5,6,7,8],[9]],
'pile 4'
);
ok_ref( [pile(2)], [],
'pile empty'
);
ok_ref( [pile2(4, 1..9)], [[1,2,3,4],[5,6,7,8],[9]],
'pile parta'
);
sub
pile2 {
1011121314151617181920212223242526272829303132333435363738394041424344454647ok_ref( [zip([1,3,5],[2,4,6])], [1..6],
'zip 2'
);
ok_ref( [zip([1,4,7],[2,5,8],[3,6,9])], [1..9],
'zip 3'
);
sub
ziperr{
eval
{zip(
@_
)};$@=~/ERROR.
*zip
/}
ok( ziperr([1,2],[3,4],5),
'zip err 1'
);
ok( ziperr([1,2],[3,4,5]),
'zip err 2'
);
ok( ziperr([1,2],[3,4],5),
'zip err 1'
);
ok( ziperr([1,2],[3,4,5]),
'zip err 2'
);
#--zipb64, zipbin, unzipb64, unzipbin, gzip, gunzip
my
$s
=
join
""
,
map
random([
qw/hip hop and you dont stop/
]), 1..1000;
ok(
length
(zipb64(
$s
)) /
length
(
$s
) < 0.5,
'zipb64'
);
ok( between(
length
(zipbin(
$s
)) /
length
(zipb64(
$s
)), 0.7, 0.8),
'zipbin zipb64'
);
ok( between(
length
(zipbin(
$s
)) /
length
(zipb64(
$s
)), 0.7, 0.8),
'zipbin zipb64'
);
ok(
length
(zipbin(
$s
)) /
length
(
$s
) < 0.4,
'zipbin'
);
ok(
$s
eq unzipb64(zipb64(
$s
)),
'unzipb64'
);
ok(
$s
eq unzipbin(zipbin(
$s
)),
'unzipbin'
);
my
$d
=
substr
(
$s
,1,1000);
ok(
length
(zipb64(
$s
,
$d
)) /
length
(zipb64(
$s
)) < 0.8 );
my
$f
;
ok( (
$f
=
length
(zipb64(
$s
,
$d
)) /
length
(zipb64(
$s
))) < 0.73 ,
"0.73 > $f"
);
#for(1..10){
# my $s=join"",map random([qw/hip hop and you dont stop/]), 1..1000;
# my $d=substr($s,1,1000);
# my $f= length(zipbin($s,$d)) / length(zipbin($s));
# print $f,"\n";
#}
#--gzip, gunzip
$s
=
join
""
,
map
random([
qw/hip hop and you do not everever stop/
]), 1..10000;
ok(
length
(gzip(
$s
))/
length
(
$s
) < 1/5);
ok(
$s
eq gunzip(gzip(
$s
)));
ok(
$s
eq unzipbin(gunzip(gzip(zipbin(
$s
)))));
ok(
$s
eq unzipb64(unzipbin(gunzip(gzip(zipbin(zipb64(
$s
)))))));
length
(
$s
),
"\n"
;
length
(gzip(
$s
)),
"\n"
;
length
(zipbin(
$s
)),
"\n"
;
length
(zipbin(
$s
,
$d
)),
"\n"
;
t/17_roman.t view on Meta::CPAN
456789101112131415161718192021222324252627282930313233343536use
Carp;
my
%rom
=(
MCCXXXIV
=>1234,
MCMLXXI
=>1971,
IV
=>4,
VI
=>6,
I
=>1,
V
=>5,
X
=>10,
L
=>50,
C
=>100,
D
=>500,
M
=>1000,
CDXCVII
=>497);
my
$rom
;ok( (
$rom
=int2roman(
$rom
{
$_
})) eq
$_
,
sprintf
"int2roman %8d => %-10s %-10s"
,
$rom
{
$_
},
$_
,
"($rom)"
)
for
sort
keys
%rom
;
my
$int
;ok( (
$int
=roman2int(
$_
)) eq
$rom
{
$_
},
sprintf
"roman2int %-8s => %10d %10d"
,
$_
,
$rom
{
$_
},
$int
)
for
sort
keys
%rom
;
ok(
do
{
eval
{roman2int(
"a"
)};$@=~/invalid/i},
"croaks ok"
);
ok( roman2int(
"-MCCXXXIV"
)==-1234,
'negative ok'
);
ok( int2roman(0) eq
''
,
'zero'
);
ok( !
defined
(int2roman(
undef
)),
'undef'
);
ok(
defined
(int2roman(
""
)) && !
length
(int2roman(
""
)),
'empty'
);
my
@n
=(-100..4999);
my
@err
=
grep
roman2int(int2roman(
$_
))!=
$_
,
grep
$_
>100?
$_
%7==0:1,
@n
;
ok(
@err
==0,
"all, not ok: "
.(
join
(
", "
,
@err
)||
'none'
) );
my
@t
=([time_fp(),
join
(
" "
,
map
int2roman(
$_
) ,
@n
),time_fp()],
[time_fp(),
join
(
" "
,
map
int2roman_old(
$_
),
@n
),time_fp()]);
ok(
$t
[0][1] eq
$t
[1][1] );
if
(
$ENV
{ATDEBUG}){
printf
"Acme::Tools::int2roman - %.6fs\n"
,
$t
[0][2]-
$t
[0][0];
printf
"17_roman.t/int2roman_old - %.6fs\n"
,
$t
[1][2]-
$t
[1][0];
}
sub
int2roman_old {
my
(
$n
,
@p
)=(
shift
,[],[1],[1,1],[1,1,1],[1,2],[2],[2,1],[2,1,1],[2,1,1,1],[1,3],[3]);
!
defined
(
$n
)?
undef
: !
length
(
$n
) ?
""
:
int
(
$n
)!=
$n
? croak
"int2roman: $n is not an integer"
:
$n
==0 ?
""
:
$n
<0 ?
"-"
.int2roman(-
$n
)
:
$n
>3999 ?
"M"
.int2roman(
$n
-1000)
:
join
''
,@{[
qw/I V X L C D M/
]}[
map
{
my
$i
=
$_
;
map
(
$_
+5-
$i
*2,@{
$p
[
$n
/10**(3-
$i
)%10]})}(0..3)];
}
t/21_read_conf.t view on Meta::CPAN
495051525354555657585960616263646566676869
'hei'
=>
'fds1 312321 123321'
,
'sykkel'
=>
'sdfkdsa'
},
'section3'
=>{}
);
my
$t
;
sub
rc {
$t
=time_fp();
my
%c
=read_conf(
@_
);
$t
=time_fp()-
$t
;
%c
}
sub
sjekk {
my
$f
=serialize(\
%fasit
,
'c'
,
''
,1);
my
$s
=serialize(\
%c
,
'c'
,
''
,1);
ok(
$s
eq
$f
,
sprintf
(
"read_conf %10.6f sek ("
.
length
(
$s
).
" bytes)"
,
$t
)) or
warn
"s=$s\nf=$f\n"
;
}
sjekk();
#1
my
$f
=tmp().
"/acme-tools.read_conf.tmp"
;
eval
{writefile(
$f
,
$c
)};$@&
&ok
(1)&
&exit
;
%c
=(); rc(
$f
,\
%c
);
sjekk();
#2
$Acme::Tools::Read_conf_empty_section
=1;
#default 0
$fasit
{
''
}=\
%s0
;
t/25_pwgen.t view on Meta::CPAN
789101112131415161718192021222324252627sub
tstr{
sprintf
(
" (%d trials, %.5f sec)"
,
$Acme::Tools::Pwgen_trials
,
$Acme::Tools::Pwgen_sec
)}
SKIP: {
skip
"- strangely pwgen-croak-test fails on windows sometime"
, 2
if
$^O ne
'linux'
;
local
$Acme::Tools::Pwgen_max_sec
=0.001;
eval
{pwgen(3)}; ok($@=~/pwgen.*25_pwgen.t/,
"pwgen croak works: "
.trim($@));
local
$Acme::Tools::Pwgen_max_trials
=3;
eval
{pwgen(3)}; ok($@=~/pwgen.
*after
3 .*25_pwgen.t/,
"pwgen croak works: "
.trim($@));
};
ok(
length
(pwgen())==8,
'default len 8'
);
my
$n
=300;
$Acme::Tools::Pwgen_max_sec
=1;
sub
test{/^[a-z0-9]/i and /[A-Z]/ and /[a-z]/ and /\d/ and /[\,\-\.\/\&\%\_\!]/};
my
@pw
=
grep
test(), pwgen(0,
$n
);
ok(
@pw
==
$n
,
"pwgen ok "
.
@pw
.tstr());
$n
=50;
@pw
=
grep
/^[A-Z]{20}$/,pwgen(20,
$n
,
'A-Z'
);
ok(
@pw
==
$n
,
"pwgen ok "
.
@pw
);
t/28_wipe.t view on Meta::CPAN
1234567891011121314151617# make test
# perl Makefile.PL; make; perl -Iblib/lib t/28_wipe.t
if
($^O eq
'linux'
){
my
$f
=tmp().
'/acme-tools.wipe.tmp'
;
writefile(
$f
,
join
(
" "
,
map
rand
(),1..1000));
#system("ls -l $f");
my
$ntrp
=
sub
{
length
(gz(readfile(
$f
).
""
))};
my
$n
=
&$ntrp
;
wipe(
$f
,
undef
,1);
my
$ratio
=
$n
/
&$ntrp
;
ok(
$ratio
>50 || !
$INC
{
'Compress/Zlib.pm'
},
"ratio $ratio > 50"
);
ok(-s
$f
>5e3);
wipe(
$f
,1);
ok(!-e
$f
);
}
else
{ ok(1)
for
1..3 }
t/38_base64.t view on Meta::CPAN
45678910111213141516171819202122232425262728293031use
MIME::Base64;
my
(
$s
,
$b64
,
$b64_2
)=(
""
);
for
(0..1000){
if
(
$_
%100==0){
$b64
=encode_base64(
$s
);
$b64_2
=base64(
$s
);
my
$s2
=unbase64(
$b64
);
is(
$s
,
$s2
,
'yes '
.
length
(
$s
));
is(
$b64
,
$b64_2
,
'yes b '
.
length
(
$s
));
}
$s
.=
$_
;
}
if
($^O eq
'linux'
and -x
'/usr/bin/base64'
){
$s
=
qx(base64 -w 1000 Tools.pm)
;
$b64
=encode_base64(
$s
);
$b64_2
=base64(
$s
);
my
$s2
=unbase64(
$b64
);
is(
$s
,
$s2
,
'yes ps '
.
length
(
$s
));
is(
$b64
,
$b64_2
,
'yes b ps '
.
length
(
$s
));
}
else
{
is(1,1,
'skips on non-linux'
)
for
1..2;
}
#print "$s\n\n$b64\n";
t/test_fork_bloom.pl view on Meta::CPAN
17181920212223242526272829303132333435363738}
1
while
wait
() != -1;
"building finished\n"
;
my
$bf
=bfinit(
error_rate
=>
$error_rate
,
capacity
=>
$cap
);
for
my
$job
(0..
$jobs
-1){
"Adding bloom filter $job..."
;
my
$t
=time_fp();
bfaddbf(
$bf
,bfretrieve(
"/tmp/bf$job.bf"
));
"took "
.(time_fp()-
$t
).
" sec\n"
;
}
int
(
$$bf
{filterlength}/8),
" bytes\n"
;
printf
"%.1f%%\n"
,100
*bfsum
(
$bf
)/
$$bf
{filterlength};
"keys: $$bf{key_count}\n"
;
"found: "
.bfgrep(
$bf
,[1..
$cap
/10]).
"\n"
;
my
$tests
=10000;
my
$errs
=bfgrep(
$bf
,[
$cap
+1..
$cap
+1+
$tests
]);
"Error rate: $errs/$tests = "
.(
$errs
/
$tests
).
"\n"
;
bfstore(
$bf
,
"/tmp/bfall.bf"
);
$$bf
{filter}=
"gone"
;
serialize(
$bf
,
'bf'
,
''
,2);
( run in 0.496 second using v1.01-cache-2.11-cpan-26ccb49234f )