Acme-Tools
view release on metacpan or search on metacpan
0.24 Feb 2019 Fixed failes on perl 5.16 and older
0.23 Jan 2019 Subs: logn, egrep, which. More UTF-8 "oriented" (lower, upper, ...)
Commands: zsize, finddup, due (improved), conv (improved, [MGT]?Wh
and many more units), due -M for stdin of filenames.
0.22 Feb 2018 Subs: subarr, sim, sim_perm, aoh2sql. command: resubst
0.21 Mar 2017 Improved nicenum() and its tests
0.20 Mar 2017 Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
throttle timems refa refaa refah refh refha refhh refs
eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
Commands: 2bz2 2gz 2xz z2z
0.172 Dec 2015 Subs: curb openstr pwgen sleepms sleepnm srlz tms username
self_update install_acme_command_tools
Commands: conv due freq wipe xcat (see "Commands")
0.16 Feb 2015 bigr curb cpad isnum parta parth read_conf resolve_equation
roman2int trim. Improved: conv (numbers currency) range ("derivatives")
log2
logn
distinct
in
in_num
uniq
union
union_all
minus
minus_all
intersect
intersect_all
not_intersect
mix
zip
sim
sim_perm
subarr
subhash
hashtrans
zipb64
zipbin
unzipb64
trim
rpad
lpad
cpad
dserialize
serialize
srlz
cnttbl
nicenum
bytes_readable
sec_readable
distance
tms
s2t
easter
time_fp
timems
sleep_fp
sleeps
sleepms
sleepus
C<num2code()> convert numbers (integers) from the normal decimal system to some arbitrary other number system.
That can be binary (2), oct (8), hex (16) or others.
Example:
print num2code(255,2,"0123456789ABCDEF"); # prints FF
print num2code( 14,2,"0123456789ABCDEF"); # prints 0E
...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
...and 14 are converted to 0E, with leading 0 because of the second argument 2.
Example:
print num2code(1234,16,"01")
Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.
To convert back:
print code2num("0000010011010010","01"); #prints 1234
Third argument: a start position for x. Default 0.
Fourth argument: a small delta value. Default 1e-4 (0.0001).
Fifth argument: a maximum number of iterations before resolve gives up
and carps. Default 100 (if fifth argument is not given or is
undef). The number 0 means infinite here. If the derivative of the
start position is zero or close to zero more iterations are typically
needed.
Sixth argument: A number of seconds to run before giving up. If both
fifth and sixth argument is given and > 0, C<resolve> stops at
whichever comes first.
B<Output:> returns the number C<x> for C<f(x)> = 0
...or equal to the second input argument if present.
B<Example:>
The equation C<< x^2 - 4x - 21 = 0 >> has two solutions: -3 and 7.
The result of C<resolve> will depend on the start position:
print resolve(sub{ $_**2 - 4*$_ - 21 }); # -3 with $_ as your x
print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 }); # -3 more elaborate call
print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 },0,3); # 7 with start position 3
The variable C< $Acme::Tools::Resolve_iterations > (which is exported) will be set
to the last number of iterations C<resolve> used. Also if C<resolve> dies (carps).
The variable C< $Acme::Tools::Resolve_last_estimate > (which is exported) will be
set to the last estimate. This number will often be close to the solution and can
be used even if C<resolve> dies (carps).
B<BigFloat-example:>
If either second, third or fourth argument is an instance of L<Math::BigFloat>, so will the result be:
use Acme::Tools;
my $equation = sub{ $_ - 1 - 1/$_ };
my $gr1 = resolve( $equation, 0, 1 ); #
my $gr2 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
bigscale(50);
my $gr3 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
print 1/2 + sqrt(5)/2, "\n";
print "Golden ratio 1: $gr1\n";
our $Resolve_time;
#sub resolve(\[&$]@) {
#sub resolve(&@) { <=0.17
#todo: perl -MAcme::Tools -le'print resolve(sub{$_[0]**2-9431**2});print$Acme::Tools::Resolve_iterations'
#todo: perl -MAcme::Tools -le'sub d{5.3*1.0094**$_[0]-10.2*1.0072**$_[0]} print resolve(\&d)' #err, pop norway vs sweden
#todo: perl -MAcme::Tools -le' print resolve(sub{5.3*1.0094**$_[0]-10.2*1.0072**$_[0]})' #err, pop norway vs sweden
# =>Div by zero: df(x) = 0 at n'th iteration, n=0, delta=0.0001, fx=CODE(0xc81d470) at -e line 1
#todo: ren solve?
sub resolve {
my($f,$goal,$start,$delta,$iters,$sec)=@_;
$goal=0 if!defined$goal;
$start=0 if!defined$start;
$delta=1e-4 if!defined$delta;
$iters=100 if!defined$iters;
$sec=0 if!defined$sec;
$iters=13e13 if $iters==0;
croak "Iterations ($iters) or seconds ($sec) can not be a negative number" if $iters<0 or $sec<0;
$Resolve_iterations=undef;
$Resolve_last_estimate=undef;
croak "Should have at least 1 argument, a coderef" if !@_;
croak "First argument should be a coderef" if ref($f) ne 'CODE';
my @x=($start);
my $time_start=$sec>0?time_fp():undef;
my $ds=ref($start) eq 'Math::BigFloat' ? Math::BigFloat->div_scale() : undef;
my $fx=sub{
local$_=$_[0];
my $fx=&$f($_);
if($fx=~/x/ and $fx=~/^[ \(\)\.\d\+\-\*\/x\=\^]+$/){
$fx=~s/(\d)x/$1*x/g;
$fx=~s/\^/**/g;
$fx=~s/^(.*)=(.*)$/($1)-($2)/;
$fx=~s,x,\$_,g;
$f=eval"sub{$fx}";
$fd = &$fx($x[$n]+$delta*0.7) - &$fx($x[$n]-$delta*0.3) if $fd==0;# and warn"wigle 1\n";
$fd = &$fx($x[$n]+$delta*0.2) - &$fx($x[$n]-$delta*0.8) if $fd==0;# and warn"wigle 2\n";
croak "Div by zero: df(x) = $x[$n] at n'th iteration, n=$n, delta=$delta, fx=$fx" if $fd==0;
$x[$n+1]=$x[$n]-(&$fx($x[$n])-$goal)/($fd/$delta);
$Resolve_last_estimate=$x[$n+1];
#warn "n=$n fd=$fd x=$x[$n+1]\n";
$Resolve_iterations=$n;
last if $n>3 and $x[$n+1]==$x[$n] and $x[$n]==$x[$n-1];
last if $n>4 and $x[$n]!=0 and abs(1-$x[$n+1]/$x[$n])<1e-13; #sub{3*$_+$_**4-12}
last if $n>3 and ref($x[$n+1]) eq 'Math::BigFloat' and substr($x[$n+1],0,$ds) eq substr($x[$n],0,$ds); #hm
croak "Could not resolve, perhaps too little time given ($sec), iteratons=$n"
if $sec>0 and ($Resolve_time=time_fp()-$time_start)>$sec;
#warn "$n: ".$x[$n+1]."\n";
$n++;
}
croak "Could not resolve, perhaps too few iterations ($iters)" if @x>=$iters;
return $x[-1];
}
=head2 resolve_equation
This prints 2:
erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh, MWh, GWh, TWh
kcal, kilocalorie, kilocalories,
newtonmeter, newtonmeters, th, thermie
force: N, _N, dyn, dyne, dynes, lb, newton
length: NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
in, inch, inches, km, league, lightyear, ls, ly,
m, meter, meters, mi, mil, mile, miles,
nautical mile, nautical miles, nmi,
parsec, pc, planck, yard, yard_imperical, yd, Ã
, ångstrøm, angstrom
mass: Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey
mileage: mpg, l/100km, l/km, l/10km, lp10km, l/mil, liter_pr_100km, liter_pr_km, lp100km
money: AED, ARS, AUD, BGN, BHD, BND, BRL, BWP, CAD, CHF, CLP, CNY,
radioactivity: Bq, becquerel, curie
speed: _m/s, km/h, km/t, kmh, kmph, kmt, m/s, mi/h, mph, mps,
kn, knot, knots, kt, kts, mach, machs, c, fps, ft/s, ftps
temperature: C, F, K, celsius, fahrenheit, kelvin
time: _s, biennium, century, d, day, days, decade, dy, fortnight,
h, hour, hours, hr, indiction, jubilee, ke, lustrum, m,
millennium, min, minute, minutes, mo, moment, mon, month,
olympiad, quarter, s, season, sec, second, seconds, shake,
tp, triennium, w, week, weeks, y, y365, ySI, ycommon,
year, years, ygregorian, yjulian, ysideral, ytropical
volume: l, L, _L, _l, cm3, m3, ft3, in3, liter, liters, litre, litres,
gal, gallon, gallon_imp, gallon_uk, gallon_us, gallons,
pint, pint_imp, pint_uk, pint_us, tsp, tablespoon, teaspoon,
floz, floz_uk, therm, thm, fat, bbl, Mbbl, MMbbl, drum,
container (or container20), container40, container40HC, container45HC
See: L<http://en.wikipedia.org/wiki/Units_of_measurement>
femtometer => 1e-15, #derived from "femten" (=fifteen in Norwegian and Danish)
femtometre => 1e-15,
femtometers => 1e-15, #derived from "femten" (=fifteen in Norwegian and Danish)
femtometres => 1e-15,
attometer => 1e-18, #derived from "atten/atton" (=eighteen)
attometre => 1e-18,
attometers => 1e-18, #derived from "atten/atton" (=eighteen)
attometres => 1e-18,
ly => 299792458*3600*24*365.25,
lightyear => 299792458*3600*24*365.25, # = 9460730472580800 by def
ls => 299792458, #light-second
au => 149597870700, # by def: meters earth to sun
astronomical_unit => 149597870700,
'astronomical unit' => 149597870700,
pc => 149597870700*648000/$PI, #3.0857e16 = 3.26156 ly
_pc => 149597870700*648000/$PI,
parsec => 149597870700*648000/$PI,
attoparsec => 149597870700*648000/$PI/1e18,
apc => 149597870700*648000/$PI/1e18,
planck => 1.61619997e-35, #planck length
#Norwegian (old) lengths:
tomme => 0.0254,
tommer => 0.0254,
fot => 0.0254*12, #0.3048m
alen => 0.0254*12*2, #0.6096m
favn => 0.0254*12*2*3, #1.8288m
kvart => 0.0254*12*2/4, #0.1524m a quarter alen
#--https://upload.wikimedia.org/wikipedia/commons/e/eb/English_length_units_graph.svg
container40HC => 75.3e3,
container45HC => 86.1e3,
firkin => 282*0.0254**3 * 8, #8 gallon_ale
#Norwegian:
meterfavn => 2 * 2 * 0.6, #fire wood/ved 2.4 m3
storfavn => 2 * 2 * 3, #fire wood/ved 12 m3
},
time =>{
s => 1,
_s => 1,
sec => 1,
second => 1,
seconds => 1,
m => 60,
min => 60,
minute => 60,
minutes => 60,
h => 60*60,
hr => 60*60,
hour => 60*60,
hours => 60*60,
d => 60*60*24,
dy => 60*60*24,
lustrum => 5 * 60*60*24*365.2425,
indiction => 15 * 60*60*24*365.2425,
jubilee => 50 * 60*60*24*365.2425,
century => 100 * 60*60*24*365.2425,
millennium => 1000 * 60*60*24*365.2425,
shake => 1e-8,
moment => 3600/40, #1/40th of an hour, used by Medieval Western European computists
ke => 864, #1/100th of a day, trad Chinese, 14m24s
fortnight => 14*24*3600,
tp => 5.3910632e-44, #planck time, time for ligth to travel 1 planck length
nanocentury => 100 * 60*60*24*365.2425 / 1e9, #3.156 ~ pi seconds, response time limit (usability)
warhol => 15*60, #"fifteen minutes of fame"
},
speed=>{
'm/s' => 1,
'_m/s' => 1,
mps => 1,
mph => 1609.344/3600,
'mi/h' => 1609.344/3600,
kmh => 1/3.6,
kmph => 1/3.6,
machs => 340.3,
fps => 0.3048, #0.0254*12
ftps => 0.3048,
'ft/s' => 0.3048,
},
acceleration=>{
'm/s2' => 1,
'mps2' => 1,
g => 9.80665,
g0 => 9.80665,
#0-100kmh or ca 0-60 mph x seconds...
},
temperature=>{ #http://en.wikipedia.org/wiki/Temperature#Conversion
C=>1, F=>1, K=>1, celsius=>1, fahrenheit=>1, kelvin=>1
},
radioactivity=>{
Bq => 1,
becquerel => 1,
curie => 3.7e10,
},
current=> {
1; #not yet
}
sub conv {
my($num,$from,$to)=@_;
croak "conf requires 3 args" if @_!=3;
conv_prepare() if !$conv_prepare_time;
my $types=sub{ my $unit=shift; [sort grep$conv{$_}{$unit}, keys%conv] };
my @types=map{ my $ru=$_; my $r;$r=&$types($_) and @$r and $$ru=$_ and last for ($$ru,uc($$ru),lc($$ru)); $r }(\$from,\$to);
my @err=map "Unit ".[$from,$to]->[$_]." is unknown",grep!@{$types[$_]},0..1;
my @type=intersect(@types);
push @err, "from=$from and to=$to has more than one possible conversions: ".join(", ", @type) if @type>1;
push @err, "from $from (".(join(",",@{$types[0]})||'?').") and "
."to $to (" .(join(",",@{$types[1]})||'?').") has no known common dimension (unit type).\n" if @type<1;
croak join"\n",map"conv: $_",@err if @err;
my $type=$type[0];
conv_prepare_money() if $type eq 'money' and time() >= $conv_prepare_money_time + $Currency_rates_expire;
return conv_temperature(@_) if $type eq 'temperature';
return conv_numbers(@_) if $type eq 'numbers';
my $c=$conv{$type};
my($cf,$ct)=@{$conv{$type}}{$from,$to};
my $d=shift()||2; #decimals
return undef if !defined $bytes;
return "$bytes B" if abs($bytes) <= 2** 0*1000; #bytes
return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) < 2**10*1000; #kilobyte
return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) < 2**20*1000; #megabyte
return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) < 2**30*1000; #gigabyte
return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) < 2**40*1000; #terrabyte
return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
}
=head2 sec_readable
Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.
print sec_readable( 0 ); # 0s
print sec_readable( 0.0123 ); # 0.0123s
print sec_readable(-0.0123 ); # -0.0123s
print sec_readable( 1.23 ); # 1.23s
print sec_readable( 1 ); # 1s
print sec_readable( 9.87 ); # 9.87s
print sec_readable( 10 ); # 10s
print sec_readable( 10.1 ); # 10.1s
print sec_readable( 59 ); # 59s
print sec_readable( 59.123 ); # 59.1s
print sec_readable( 60 ); # 1m 0s
print sec_readable( 60.1 ); # 1m 0s
print sec_readable( 121 ); # 2m 1s
print sec_readable( 131 ); # 2m 11s
print sec_readable( 1331 ); # 22m 11s
print sec_readable(-1331 ); # -22m 11s
print sec_readable( 13331 ); # 3h 42m
print sec_readable( 133331 ); # 1d 13h
print sec_readable( 1333331 ); # 15d 10h
print sec_readable( 13333331 ); # 154d 7h
print sec_readable( 133333331 ); # 4yr 82d
print sec_readable( 1333333331 ); # 42yr 91d
=cut
sub sec_readable {
my $s=shift();
my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
!defined$s ? undef
:!length($s) ? ''
:$s<0 ? '-'.sec_readable(-$s)
:$s<60 && int($s)==$s
? $s."s"
:$s<60 ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
:$s<3600 ? int($s/60)."m " .($s%60) ."s"
:$s<24*3600 ? int($s/$h)."h " .int(($s%$h)/60)."m"
:$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
: int($s/$y)."yr ".int(($s%$y)/$d)."d";
}
=head2 int2roman
and does alphanumerical comparisons (with Perl operators C<lt>, C<ge> and C<le>) if any of the
three input args don't look like a number or look like a number but with
one or more leading zeros.
btw(1,1,10) #true numeric order since all three looks like number according to =~$Re_isnum
btw(1,'02',13) #true leading zero in '02' leads to alphabetical order
btw(10, 012,10) #true leading zero here means oct number, 012 = 10 (8*1+2), so 10 is btw 10 and 10
btw('003', '02', '09') #false because '003' lt '02'
btw('a', 'b', 'c') #false because 'a' lt 'b'
btw('a', 'B', 'c') #true because upper case letters comes before lower case ones in the "ascii alphabet"
btw('a', 'c', 'B') #true, btw() and between switches from and to if the first is > the second
btw( -1, -2, 1) #true
btw( -1, -2, 0) #true
Both between and btw returns C<undef> if any of the three input args are C<undef> (not defined).
If you're doing only numerical comparisons, using C<between> is faster than C<btw>.
=cut
sub between {
my($test ,$fom, $tom)=@_;
B<Example 1:>
print join ", ", trigram("Kjetil Skotheim");
Prints:
Kje, jet, eti, til, il , l S, Sk, Sko, kot, oth, the, hei, eim
B<Example 2:>
Default is 3, but here 4 is used instead in the second optional input argument:
print join ", ", trigram("Kjetil Skotheim", 4);
And this prints:
Kjet, jeti, etil, til , il S, l Sk, Sko, Skot, koth, othe, thei, heim
C<trigram()> was created for "fuzzy" name searching. If you have a database of many names,
addresses, phone numbers, customer numbers etc. You can use trigram() to search
among all of those at the same time. If the search form only has one input field.
Examples:
print replace("water","ater","ine"); # Turns water into wine
print replace("water","ater"); # w
print replace("water","at","eath"); # weather
print replace("water","wa","ju",
"te","ic",
"x","y", # No x is found, no y is returned
'r$',"e"); # Turns water into juice. 'r$' says that the r it wants
# to change should be the last letters. This reveals that
# second, fourth, sixth and so on argument is really regexs,
# not normal strings. So use \ (or \\ inside "") to protect
# the special characters of regexes. You probably also
# should write qr/regexp/ instead of 'regexp' if you make
# use of regexps here, just to make it more clear that
# these are really regexps, not strings.
print replace('JACK and JUE','J','BL'); # prints BLACK and BLUE
print replace('JACK and JUE','J'); # prints ACK and UE
print replace("abc","a","b","b","c"); # prints ccc (not bcc)
B<Input:> Two or more arrayrefs. A number of equal sized arrays
containing numbers, strings or anything really.
B<Output:> An array of those input arrays zipped (interlocked, merged) into each other.
print join " ", zip( [1,3,5], [2,4,6] ); # 1 2 3 4 5 6
print join " ", zip( [1,4,7], [2,5,8], [3,6,9] ); # 1 2 3 4 5 6 7 8 9
Example:
zip() creates a hash where the keys are found in the first array and values in the secord in the correct order:
my @media = qw/CD DVD VHS LP Blueray/;
my @count = qw/20 12 2 4 3/;
my %count = zip(\@media,\@count); # or zip( [@media], [@count] )
print "I got $count{DVD} DVDs\n"; # I got 12 DVDs
Dies (croaks) if the two lists are of different sizes
...or any input argument is not an array ref.
Same as binsearch except that the arrays is sorted alphanumerically
(cmp) instead of numerically (<=>) and the searched element is a
string, not a number. See L</binsearch>.
=cut
our $Binsearch_steps;
our $Binsearch_maxsteps=100;
sub binsearch {
my($search,$aref,$insertpos,$cmpsub)=@_; #search pos of search in array
croak "binsearch did not get arrayref as second arg" if ref($aref) ne 'ARRAY';
croak "binsearch got fourth arg which is not a code-ref" if defined $cmpsub and ref($cmpsub) and ref($cmpsub) ne 'CODE';
if(defined $cmpsub and !ref($cmpsub)){
my $key=$cmpsub;
$cmpsub = sub{ $_[0]{$key} <=> $_[1]{$key} };
}
return $insertpos ? -0.5 : undef if !@$aref;
my($min,$max)=(0,$#$aref);
$Binsearch_steps=0;
while (++$Binsearch_steps <= $Binsearch_maxsteps) {
my $middle=int(($min+$max+0.5)/2);
In scalar context: Returns the nth smallest number in an array. The array doesn't have to be sorted.
In array context: Returns the n smallest numbers in an array.
To return the n(th) largest number(s) instead of smallest, just negate n.
An optional third argument can be a sub that is used to compare the elements of the input array.
Examples:
my $second_smallest = rank(2, [11,12,13,14]); # 12
my @top10 = rank(-10, [1..100]); # 100, 99, 98, 97, 96, 95, 94, 93, 92, 91
my $max = rank(-1, [101,102,103,102,101]); #103
my @contest = ({name=>"Alice",score=>14},{name=>"Bob",score=>13},{name=>"Eve",score=>12});
my $second = rank(2, \@contest, sub{$_[1]{score}<=>$_[0]{score}})->{name}; #Bob
=head2 rankstr
Just as C<rank> but sorts alphanumerically (strings, cmp) instead of numerically.
=cut
sub rank {
my($rank,$aref,$cmpsub)=@_;
if($rank<0){
sub subarrays { map { my $n = 2*$_; [ grep {($n/=2)%2} @_ ] } 1 .. 2**@_-1 }
=head2 part
B<Input:> A code-ref and a list
B<Output:> Two array-refs
Like C<grep> but returns the false list as well. Partitions a list
into two lists where each element goes into the first or second list
whether the predicate (a code-ref) is true or false for that element.
my( $odd, $even ) = part {$_%2} (1..8);
print for @$odd; #prints 1 3 5 7
print for @$even; #prints 2 4 6 8
(Works like C< partition() > in the Scala programming language)
=head2 parth
my $ref_to_hash = {1,100,2,200,3,300};
my $ref_to_scalar = \"String";
print "arrayref" if ref($ref_to_array) eq 'ARRAY'; #true
print "hashref" if ref($ref_to_hash) eq 'HASH'; #true
print "scalarref" if ref($ref_to_scalar) eq 'SCALAR'; #true
print "arrayref" if refa($ref_to_array); #also true, without: eq 'ARRAY'
print "hashref" if refh($ref_to_hash); #also true, without: eq 'HASH'
print "scalarref" if refs($ref_to_scalar); #also true, without: eq 'SCALAR'
my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
my $ref_to_hash_of_arrays = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
my $ref_to_hash_of_hashes = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };
print "aa" if refaa($ref_to_array_of_arrays); #true
print "ah" if refah($ref_to_array_of_hashes); #true
print "ha" if refha($ref_to_hash_of_arrays); #true
print "hh" if refhh($ref_to_hash_of_hashes); #true
=cut
place the highest temperature on the 100-percentile you are sort of
saying that there can never be a higher temperatures in future measurements.
A quick non-exhaustive Google survey suggests that method 1 here is most used.
The larger the data sets, the less difference there is between the two methods.
B<Extrapolation:>
In method one, when you want a percentile outside of any possible
interpolation, you use the smallest and second smallest to extrapolate
from. For instance in the data set C<5, 6, 7>, if you want an
x-percentile of x < 25, this is below 5.
If you feel tempted to go below 0 or above 100, C<percentile()> will
I<die> (or I<croak> to be more precise)
Another method could be to use "soft curves" instead of "straight
lines" in interpolation. Maybe B-splines or Bezier curves. This is not
used here.
=head2 random
B<Input:> One or two arguments.
B<Output:>
If two integer arguments: returns a random integer between the integers in argument one and two.
If the first argument is an arrayref: returns a random member of that array without changing the array.
If the first argument is an arrayref and there is a second arg: return that many random members of that array
If the first argument is an hashref and there is no second arg: return a random key weighted by the values of that hash
If the first argument is an hashref and there is a second arg: return that many random keys weighted by the values of that hash
If there is no second argument and the first is an integer, a random integer between 0 and that number is returned. Including 0 and the number itself.
B<Examples:>
$dice=random(1,6); # 1, 2, 3, 4, 5 or 6
$dice=random([1..6]); # same as previous
@dice=random([1..6],10); # 10 dice tosses
$dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2}); # weighted dice with 6 being twice as likely as the others
@dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2},10); # 10 weighted dice tosses
print random({head=>0.4999,tail=>0.4999,edge=>0.0002}); # coin toss (sum 1 here but not required to be)
print random(2); # prints 0, 1 or 2
sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
...meaning the password should:
* start and end with: a letter a-z (lower- or uppercase) or a digit 0-9
* should contain at least one char from each of the groups lower, upper, digit and special char
To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
own C<rand()> internally.
C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
pwgen tries to find a password. Defaults for those are 0.01 and 10000.
Whenever one of the two limits is reached, a first generates a croak.
Examples:
my $pw=pwgen(); # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
my $pw=pwgen(12); # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
my @pw=pwgen(0,10); # 10 random 8 chars passwords, containing the same possible chars
my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z
pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above
Examples of adding additional requirements to the default ones:
my @pwreq = ( qr/^[A-C]/ );
pwgen(8,1,'','',@pwreq); # use defaults for allowed chars and the standard requirements
# but also demand that the password must start with A, B or C
push @pwreq, sub{ not /[a-z]{3}/i };
pwgen(8,1,'','',@pwreq); # as above and in addition the password should not contain three
# or more consecutive letters (to avoid "offensive" words perhaps)
=cut
our $Pwgen_max_sec=0.01; #max seconds/password before croak (for hard to find requirements)
our $Pwgen_max_trials=10000; #max trials/password before croak (for hard to find requirements)
our $Pwgen_sec=0; #seconds used in last call to pwgen()
our $Pwgen_trials=0; #trials in last call to pwgen()
sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
sub pwgen {
my($len,$num,$chars,@req)=@_;
$len||=8;
$num||=1;
$chars||='A-Za-z0-9,-./&%_!';
$req[0]||=\&pwgendefreq if !$_[2];
$chars=~s/([$_])-([$_])/join("","$1".."$2")/eg for ('a-z','A-Z','0-9');
my($c,$t,@pw,$d)=(length($chars),time_fp());
($Pwgen_trials,$Pwgen_sec)=(0,0);
TRIAL:
while(@pw<$num){
croak "pwgen timeout after $Pwgen_trials trials"
if ++$Pwgen_trials >= $Pwgen_max_trials
or ($d=time_fp()-$t) > $Pwgen_max_sec*$num
and $d!~/^\d+$/; #jic int from time_fp
my $pw=join"",map substr($chars,rand($c),1),1..$len;
for my $r (@req){
if (ref($r) eq 'CODE' ){ local$_=$pw; &$r() or next TRIAL }
elsif(ref($r) eq 'Regexp'){ no warnings; $pw=~$r or next TRIAL }
else { croak "pwgen: invalid req type $r ".ref($r) }
}
push@pw,$pw;
}
$Pwgen_sec=time_fp()-$t;
return $pw[0] if $num==1;
return @pw;
}
# =head1 veci
#
# Perls C<vec> takes 1, 2, 4, 8, 16, 32 and possibly 64 as its third argument.
#
# This limitation is removed with C<veci> (vec improved, but much slower)
#
perl -MAcme::Tools -le 'print join ",", union([1,2,3],[2,3,3,4,4])' # 1,2,3,4
=cut
sub union { my %seen; grep !$seen{$_}++, map @{shift()},@_ }
=head2 minus
Input: Two arrayrefs.
Output: An array containing all elements in the first input array but not in the second.
Example:
perl -MAcme::Tools -le 'print join " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] )'
Output is C<< five 1 2 >>.
=cut
sub minus {
my %seen;
my %notme=map{($_=>1)}@{$_[1]};
grep !$notme{$_}&&!$seen{$_}++, @{$_[0]};
}
=head2 intersect
Input: Two arrayrefs
Output: An array containing all elements which exists in both input arrays.
Example:
perl -MAcme::Tools -le 'print join" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )' # 4 3 five
Output: C<< 4 3 five >>
=cut
sub intersect {
my %first=map{($_=>1)}@{$_[0]};
my %seen;
return grep{$first{$_}&&!$seen{$_}++}@{$_[1]};
}
=head2 not_intersect
Input: Two arrayrefs
Output: An array containing all elements member of just one of the input arrays (not both).
Example:
perl -MAcme::Tools -le ' print join " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'
The output is C<< 1 2 >>.
=cut
sub not_intersect {
my %code;
my %seen;
for(@{$_[0]}){$code{$_}|=1}
for(@{$_[1]}){$code{$_}|=2}
return grep{$code{$_}!=3&&!$seen{$_}++}(@{$_[0]},@{$_[1]});
}
=head2 uniq
Input: An array of strings (or numbers)
Example:
perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")' # prints www.uio.no
Uses perls C<gethostbyaddr> internally.
C<ipaddr()> memoizes the results internally (using the
C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
particular IP number might take some time.
Some few DNS loopups can take several seconds.
Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
probably turn off hostname lookups in their logs and just log IP numbers by using
C<HostnameLookups Off> in Apache C<httpd.conf> and then use I<ipaddr> afterwards if necessary.
=cut
our %IPADDR_memo;
sub ipaddr {
my $ipnr=shift;
#hm, NOTE: The 2 parameter on the next code line is not 2 for all OSes,
#but seems to work in Linux and HPUX. Den correct way is to use the
and C<< <td>s >> as an array of arrayrefs. And strips away any html
inside the C<< <td>s >> as well.
my @table = ht2t($html,'some string occuring before the <table> you want');
Input: One or two arguments.
First argument: the html where a C<< <table> >> is to be found and converted.
Second argument: (optional) If the html contains more than one C<<
<table> >>, and you do not want the first one, applying a second
argument is a way of telling C<ht2t> which to capture: the one with this word
or string occurring before it.
Output: An array of arrayrefs.
C<ht2t()> is a quick and dirty way of scraping (or harvesting as it is
also called) data from a web page. Look too L<HTML::Parse> to do this
more accurate.
Example:
}
=head2 makedir
Input: One or two arguments.
Works like perls C<mkdir()> except that C<makedir()> will create nesessary parent directories if they dont exists.
First input argument: A directory name (absolute, starting with C< / > or relative).
Second input argument: (optional) permission bits. Using the normal C<< 0777^umask() >> as the default if no second input argument is provided.
Example:
makedir("dirB/dirC")
...will create directory C<dirB> if it does not already exists, to be able to create C<dirC> inside C<dirB>.
Returns true on success, otherwise false.
C<makedir()> memoizes directories it has checked for existence before (trading memory and for speed).
=head2 read_conf
B<First argument:> A file name or a reference to a string with settings in the format described below.
B<Second argument, optional:> A reference to a hash. This hash will have the settings from the file (or stringref).
The hash do not have to be empty beforehand.
Returns a hash with the settings as in this examples:
my %conf = read_conf('/etc/your/thing.conf');
print $conf{sectionA}{knobble}; #prints ABC if the file is as shown below
print $conf{sectionA}{gobble}; #prints ZZZ, the last gobble
print $conf{switch}; #prints OK here as well, unsectioned value
print $conf{part2}{password}; #prints oh:no= x
File use for the above example:
switch: OK #before first section, the '' (empty) section
[sectionA]
knobble: ABC
gobble: XYZ #this gobble is overwritten by the gobble on the next line
gobble: ZZZ
[part2]
password: oh:no= x #should be better
text: { values starting with { continues
until reaching a line with }
Everything from # and behind is regarded comments and ignored. Comments can be on any line.
To keep a # char, put a \ in front of it.
A C< : > or C< = > separates keys and values. Spaces at the beginning or end of lines are
ignored (after removal of #comments), as are any spaces before and after : and = separators.
Empty lines or lines with no C< : > or C< = > is also ignored. Keys and values can contain
internal spaces and tabs, but not at the beginning or end.
Multi-line values must start and end with { and }. Using { and } keep spaces at the start
or end in both one-line and multi-line values.
Sections are marked with C<< [sectionname] >>. Section names, keys and values is case
sensitive. C<Key:values> above the first section or below and empty C<< [] >> is placed
both in the empty section in the returned hash and as top level key/values.
C<read_conf> can be a simpler alternative to the core module L<Config::Std> which has
its own hassles.
$Acme::Tools::Read_conf_empty_section=1; #default 0 (was 1 in version 0.16)
my %conf = read_conf('/etc/your/thing.conf');
print $conf{''}{switch}; #prints OK with the file above
print $conf{switch}; #prints OK here as well
=cut
our $Read_conf_empty_section=0;
sub read_conf {
my($fn,$hr)=(@_,{});
my $conf=ref($fn)?$$fn:readfile($fn);
$conf=~s,\s*(?<!\\)#.*,,g;
my($section,@l)=('',split"\n",$conf);
while(@l) {
my $l=shift@l;
if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
$section=$1;
$$hr{$1}||={};
}
elsif( $l=~/^\s*([^\:\=]+?)\s*[:=]\s*(.*?)\s*$/ ) {
my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
my $v=&$ml($2);
$$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
$$hr{$1}=$v if !length($section);
}
}
%$hr;
}
# my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
# s,<INCLUDE ([^>]+)>,"".readfile(&$incfn($1)),eg; #todo
=head2 openstr
#todo: sub stdouterr{}
=head1 TIME FUNCTIONS
=head2 tms
Timestring, works somewhat like the Gnu/Linux C<date> command and Oracle's C<to_char()>
Converts timestamps to more readable forms of time strings.
Converts seconds since I<epoch> and time strings on the form C<YYYYMMDD-HH24:MI:SS> to other forms.
B<Input:> One, two or three arguments.
B<First argument:> A format string.
B<Second argument: (optional)> An epock C<time()> number or a time
string of the form YYYYMMDD-HH24:MI:SS or YYYYMMDDTHH:MI:SS or
YYYY-MM-DDTHH:MI:SS (in which T is litteral and HH is the 24-hour
version of hours) or YYYYMMDD. Uses the current C<time()> if the
second argument is missing.
TODO: Formats with % as in C<man date> (C<%Y%m%d> and so on)
B<Third argument: (optional> True or false. If true and first argument
is eight digits: Its interpreted as a date like YYYYMMDD time string,
not an epoch time. If true and first argument is six digits its
interpreted as a date like DDMMYY (not YYMMDD!).
B<Output:> a date or clock string on the wanted form.
Dy Three letters: Sun Mon Tue Wed Thu Fri Sat
DAY Upper case
DY Upper case
Dth 1st 2nd 3rd 4th 5th ... 11th 12th ... 20th 21st 22nd 23rd 24th ... 30th 31st
WW Week number of the year 01-53 according to the ISO8601-definition (which most countries uses)
WWUS Week number of the year 01-53 according to the most used definition in the USA.
Other definitions also exists.
epoch Converts a time string from YYYYMMDD-HH24:MI:SS, YYYYMMDD-HH24:MI:SS, YYYYMMDDTHH:MI:SS,
YYYY-MM-DDTHH:MI:SS or YYYYMMDD to the number of seconds since January 1st 1970.
Commonly known as the Unix epoch.
JDN Julian day number. Integer. The number of days since the day starting at noon on January 1 4713 BC
JD Same as JDN but a float accounting for the time of day
B<Third argument:> (optional) Is_date. False|true, default false. If true, the second argument is
interpreted as a date of the form YYYYMMDD, not as a number of seconds since epoch (January 1st 1970).
=cut
#Se også L</tidstrk> og L</tidstr>
our $Tms_pattern;
our %Tms_str=
('MÃ
NED' => [4, 'JANUAR','FEBRUAR','MARS','APRIL','MAI','JUNI','JULI',
'AUGUST','SEPTEMBER','OKTOBER','NOVEMBER','DESEMBER' ],
'MÃ¥ned' => [4, 'Januar','Februar','Mars','April','Mai','Juni','Juli',
if(@_==1){
my @lt=localtime();
$_[0] eq 'YYYY' and return 1900+$lt[5];
$_[0] eq 'YYYYMMDD' and return sprintf("%04d%02d%02d",1900+$lt[5],1+$lt[4],$lt[3]);
$_[0] =~ $Re_isnum and @lt=localtime($_[0]) and return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]);
}
my($format,$time,$is_date)=@_;
$time=time_fp() if !defined$time;
($time,$format)=($format,$time) if @_>=2 and $format=~/^[\d+\:\-\.]+$/; #swap /hm/
my @lt=localtime($time);
#todo? $is_date=0 if $time=~s/^\@(\-?\d)/$1/; #@n where n is sec since epoch makes it clear that its not a formatted, as in `date`
#todo? date --date='TZ="America/Los_Angeles" 09:00 next Fri' #`info date`
# Fri Nov 13 18:00:00 CET 2015
#date --date="next Friday" #--date or -d
#date --date="last friday"
#date --date="2 days ago"
#date --date="yesterday" #or tomorrow
#date --date="-1 day" #date --date='10 week'
if( $is_date ){
my $yy2c=sub{10+$_[0]>$lt[5]%100?"20":"19"}; #hm 10+
=cut
sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
(($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
=head2 time_fp
No input arguments.
Return the same number as perls C<time()> except with decimals (fractions of a second, _fp as in floating point number).
print time_fp(),"\n";
print time(),"\n";
Could write:
1116776232.38632
...if that is the time now.
1116776232
...from perl's internal C<time()> if C<Time::HiRes> isn't installed and available.
=cut
sub time_fp { # {return 0+gettimeofday} is just as well?
eval{ require Time::HiRes } or return time();
my($sec,$mic)=Time::HiRes::gettimeofday();
return $sec+$mic/1e6; #1e6 not portable?
}
sub timems {
eval{ require Time::HiRes } or return time();
my($sec,$mic)=Time::HiRes::gettimeofday();
return $sec*1000+$mic/1e3;
}
=head2 sleep_fp
sleep_fp() work as the built in C<< sleep() >> but also accepts fractional seconds:
sleep_fp(0.020); # sleeps for 20 milliseconds
Sub sleep_fp do a C<require Time::HiRes>, thus it might take some
extra time the first call. To avoid that, add C<< use Time::HiRes >>
to your code. Sleep_fp should not be trusted for accuracy to more than
a tenth of a second. Virtual machines tend to be less accurate (sleep
longer) than physical ones. This was tested on VMware and RHEL
(Linux). See also L<Time::HiRes>.
=head2 sleeps
=head2 sleepms
=head2 sleepus
=head2 sleepns
sleep_fp(0.020); #sleeps for 20 milliseconds
sleeps(0.020); #sleeps for 20 milliseconds, sleeps() is a synonym to sleep_fp()
sleepms(20); #sleeps for 20 milliseconds
sleepus(20000); #sleeps for 20000 microseconds = 20 milliseconds
sleepns(20000000); #sleeps for 20 million nanoseconds = 20 milliseconds
=cut
sub sleep_fp { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
sub sleeps { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
sub sleepms { eval{require Time::HiRes} or (sleep(shift()/1e3),return);Time::HiRes::sleep(shift()/1e3) }
sub sleepus { eval{require Time::HiRes} or (sleep(shift()/1e6),return);Time::HiRes::sleep(shift()/1e6) }
sub sleepns { eval{require Time::HiRes} or (sleep(shift()/1e9),return);Time::HiRes::sleep(shift()/1e9) }
=head2 eta
}
my $avg=$sum/$sumw;
return $avg;
# return avg(@eta);
#return $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-2][1])/($$a[-1][0]-$$a[-2][0]);
1;
}
=head2 sleep_until
sleep_until(0.5) sleeps until half a second has passed since the last
call to sleep_until. This example starts the next job excactly ten
seconds after the last job started even if the last job lasted for a
while (but not more than ten seconds):
for(@jobs){
sleep_until(10);
print localtime()."\n";
...heavy job....
}
Might print:
Thu Jan 12 16:00:00 2012
Thu Jan 12 16:00:10 2012
Thu Jan 12 16:00:20 2012
...and so on even if the C<< ...heavy job... >>-part takes more than a
second to complete. Whereas if sleep(10) was used, each job would
spend more than ten seconds in average since the work time would be
added to sleep(10).
Note: sleep_until() will remember the time of ANY last call of this sub,
not just the one on the same line in the source code (this might change
in the future). The first call to sleep_until() will be the same as
sleep_fp() or Perl's own sleep() if the argument is an integer.
=cut
our $Time_last_sleep_until;
C<decode()> and C<decode_num()> works just as Oracles C<decode()>.
C<decode()> and C<decode_num()> accordingly uses perl operators C<eq> and C<==> for comparison.
Examples:
my $a=123;
print decode($a, 123,3, 214,4, $a); # prints 3
print decode($a, 123=>3, 214=>4, $a); # prints 3, same thing since => is synonymous to comma in Perl
The first argument is tested against the second, fourth, sixth and so on,
and then the third, fifth, seventh and so on is
returned if decode() finds an equal string or number.
In the above example: 123 maps to 3, 124 maps to 4 and the last argument $a is returned elsewise.
More examples:
my $a=123;
print decode($a, 123=>3, 214=>7, $a); # also 3, note that => is synonym for , (comma) in perl
print decode($a, 122=>3, 214=>7, $a); # prints 123
return 0;
}
=head2 KID_ok
Checks if a norwegian KID number has an ok control digit.
To check if a customer has typed the number correctly.
This uses the LUHN algorithm (also known as mod-10) from 1960 which is also used
internationally in control digits for credit card numbers, and Canadian social security ID numbers as well.
The algorithm, as described in Phrack (47-8) (a long time hacker online publication):
"For a card with an even number of digits, double every odd numbered
digit and subtract 9 if the product is greater than 9. Add up all the
even digits as well as the doubled-odd digits, and the result must be
a multiple of 10 or it's not a valid card. If the card has an odd
number of digits, perform the same addition doubling the even numbered
digits instead."
B<Input:>
One or more numeric arguments:
First: x (first returned element)
Second: y (up to y but not including y)
Third: step, default 1. The step between each returned element
If a fourth, fifth and so on arguments are given, they change the step for each returned element. As first derivative, second derivative.
B<Output:>
If one argument: returns the array C<(0 .. x-1)>
If two arguments: returns the array C<(x .. y-1)>
If three arguments: The default step is 1. Use a third argument to use a different step.
B<Examples:>
permutations('a','b'); #returns (['a','b'],['b','a'])
permutations('a','b','c'); #returns (['a','b','c'],['a','c','b'],
# ['b','a','c'],['b','c','a'],
# ['c','a','b'],['c','b','a'])
Up to five input arguments C<permutations()> is probably as fast as it
can be in this pure perl implementation (see source). For more than
five, it could be faster. How fast is it now: Running with different
n, this many time took that many seconds:
n times seconds
-- ------- ---------
2 100000 0.32
3 10000 0.09
4 10000 0.33
5 1000 0.18
6 100 0.27
7 10 0.21
8 1 0.17
9 1 1.63
10 1 17.00
Returns a data structure as a string. See also C<Data::Dumper>
(serialize was created long time ago before Data::Dumper appeared on
CPAN, before CPAN even...)
B<Input:> One to four arguments.
First argument: A reference to the structure you want.
Second argument: (optional) The name the structure will get in the output string.
If second argument is missing or is undef or '', it will get no name in the output.
Third argument: (optional) The string that is returned is also put
into a created file with the name given in this argument. Putting a
C<< > >> char in from of the filename will append that file
instead. Use C<''> or C<undef> to not write to a file if you want to
use a fourth argument.
Fourth argument: (optional) A number signalling the depth on which newlines is used in the output.
The default is infinite (some big number) so no extra newlines are output.
'7'=>'x');
Areas of use:
- Debugging (first and foremost)
- Storing arrays and hashes and data structures of those on file, database or sending them over the net
- eval earlier stored string to get back the data structure
Be aware of the security implications of C<eval>ing a perl code string
stored somewhere that unauthorized users can change them! You are
probably better of using L<YAML::Syck> or L<Storable> without
enabling the CODE-options if you have such security issues.
More on decompiling Perl-code: L<Storable> or L<B::Deparse>.
=head2 dserialize
Debug-serialize, dumping data structures for you to look at.
Same as C<serialize()> but the output is given a newline every 80th character.
(Every 80th or whatever C<$Acme::Tools::Dserialize_width> contains)
=cut
=head1 JUST FOR FUN
=head2 brainfu
B<Input:> one or two arguments
First argument: a string, source code of the brainfu
language. String containing the eight charachters + - < > [ ] . ,
Every other char is ignored silently.
Second argument: if the source code contains commas (,) the second
argument is the input characters in a string.
B<Output:> The resulting output from the program.
Example:
print brainfu(<<""); #prints "Hallo Verden!\n"
++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>---.+++++++++++..+++.>++.<<++++++++++++++
.>----------.+++++++++++++.--------------.+.+++++++++.>+.>.
filters need only a 32 bit hash function, the result from md5() are
split (C<unpack>-ed) into 4 parts 32 bits each and are treated as if 4
hash functions was called at once (speedup). Using different salts to
the key on each md5 results in different hash functions.
Digest::SHA512 would have been even better since it returns more bits,
if it werent for the fact that it's much slower than Digest::MD5.
String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:
time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6' #5.56 sec
time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6' #2.79 sec, faster but not per bit
time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)
Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.
=head2 Theory and math behind bloom filters
L<http://www.internetmathematics.org/volumes/1/4/Broder.pdf>
L<http://blogs.sun.com/jrose/entry/bloom_filters_in_a_nutshell>
L<http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html>
Keeps uid, gid, mode (chmod) and mtime.
-p Show a progress meter using the pv program if installed
-k Keeps original file
-v Verbose, shows info on degree of compression and file
number if more than one file is being converted
-o Overwrites existing result file, otherwise stop with error msg
-1 .. -9 Degree of compression, -1 fastest .. -9 best
-e With -t xz (or 2xz) passes -e to xz (-9e = extreme compression)
-L rate With -p. Slow down, ex: -L 200K means 200 kilobytes per second
-D sec With -p. Only turn on progress meter (pv) after x seconds
-i sec With -p. Info update rate
-l With -p. Line mode
-I With -p. Show ETA as time of arrival as well as time left
-q With -p. Quiet. Useful with -L to limit rate, but no output
The options -L -D -i -l -I -q implicitly turns on -p. Those options are passed
through to pv. See: man pv.
=head3 due
Like C<du> command but views space used by file extentions instead of dirs. Options:
my %s=map{($_=>++$i)}@argv; #sort
my %f=map{($_=>[$_])}@argv; #also weeds out dupl params
for my $c (@checks){
my @f=map @{$f{$_}}, sort keys %f;
if($o{p} and $c eq $checks[-1]){ #view progress for last check, todo: eta() is wacky here! everywhere?
my $sum=@f?sum(map -s$_,@f):0;
my($corg,$cnt,$cntmb,$mb)=($c,0,0,$sum/1e6);
$c=sub{
$cntmb+=(-s$_[0])/1e6;
my $eol=++$cnt==@f?"\n":"\r";
print STDERR sprintf("%d/%d files checked (%d%%), %d/%d MB (%d%%), ETA in %d sec $eol",
$cnt, 0+@f, 100*$cnt/@f, $cntmb, $mb, 100*$cntmb/$mb,
curb(nvl(eta($cnt,0+@f),time)-time(),0,1e7));
&$corg(@_)
};
}
my %n; push @{$n{&$c($_)}}, $_ for @f;
delete @n{grep@{$n{$_}}<2,keys%n};
%f=%n;
}
return %f if $o{F};
my $pr=sprintf"%0.1f%%",$szold?100*$sznew/$szold:0;
#todo: my $szuncmp=-s$cntfile&&time()-(stat($cntfile))[9]<10 ? qx(cat $cntfile) : '';
#todo: $o{h} ? printf("%6.1f%% %9s => %9s => %9s %s\n", $pr,(map bytes_readable($_),$szold,$szuncmp,$sznew),$_)
#todo: : printf("%6.1f%% %11d b => %11d b => %11 b %s\n",$pr,$szold,$szuncmp,$sznew,$_)
my $str= $o{h}
? sprintf("%-7s %9s => %9s", $pr,(map bytes_readable($_),$szold,$sznew))
: sprintf("%-7s %11d b => %11d b", $pr,$szold,$sznew);
if(@argv>1){
$i++;
$str=$i<@argv
? " ETA:".sprintf("%-8s",sec_readable(eta('z2z',$bsf,$sum)-time_fp()))." $str"
: " TA: 0s $str"
if $sum>1e6;
$str="$i/".@argv." $str";
}
print "$str $new\n";
}
}
if($o{v} and @argv>1){
my $bytes=$o{h}?'':'bytes ';
my $str=
sprintf "%d files compressed in %.3f seconds from %s to %s $bytes (%s bytes) %.1f%% of original\n",
0+@argv,
time_fp()-$start,
(map{$o{h}?bytes_readable($_):$_}($sum,$sumnew,$sumnew-$sum)),
100*$sumnew/$sum;
$str=~s,\((\d),(+$1,;
print $str;
}
}
=head2 args
options and an optional C<:> behind the letter or digit marks that the
switch takes an argument.
=cut
sub args {
my $switches=shift;
my $hashref=shift;
my $re_sw='^([a-z0-9]:?)+$';
croak "ERR: args: first arg $switches dont match $re_sw\n" if $switches !~ /$re_sw/i;
croak "ERR: second arg to args() not hashref\n" if ref($hashref) ne 'HASH';
local @ARGV=@_;
require Getopt::Std;
Getopt::Std::getopts($switches => $hashref);
(@ARGV);
}
sub opts {
my($def, $hashref, @a)=@_;
@a=@ARGV if @_<=2;
my %def=map{/(\w)(:?)/;($1=>$2?2:1)}$def=~/(\w:?)/g;
0.24 Feb 2019 fixed failes on perl 5.16 and older
0.23 Jan 2019 Subs: logn, egrep, which. More UTF-8 "oriented" (lower, upper, ...)
Commands: zsize, finddup, due (improved), conv (improved, [MGT]?Wh
and many more units), due -M for stdin of filenames.
0.22 Feb 2018 Subs: subarr, sim, sim_perm, aoh2sql. command: resubst
0.21 Mar 2017 Improved nicenum() and its tests
0.20 Mar 2017 Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
throttle timems refa refaa refah refh refha refhh refs
eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
Commands: 2bz2 2gz 2xz z2z
0.172 Dec 2015 Subs: curb openstr pwgen sleepms sleepnm srlz tms username
self_update install_acme_command_tools
Commands: conv due freq wipe xcat (see "Commands")
0.16 Feb 2015 bigr curb cpad isnum parta parth read_conf resolve_equation
roman2int trim. Improved: conv (numbers currency) range ("derivatives")
t/02_general.t view on Meta::CPAN
#--uniq
my @t=(7,2,3,3,4,2,1,4,5,3,"x","xx","x",02,"07");
ok( join( " ", uniq @t ) eq '7 2 3 4 1 5 x xx 07' );
#--union
ok( join( ",", union([1,2,3],[2,3,3,4,4]) ) eq '1,2,3,4' );
#--minus
ok( join( " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] ) ) eq 'five 1 2' );
#--intersect
ok( join(" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )) eq '4 3 five' );
#--not_intersect
ok( join( " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )) eq '1 2' );
#--subhash
my %pop = ( Norway=>4800000, Sweeden=>8900000, Finland=>5000000,
Denmark=>5100000, Iceland=>260000, India => 1e9 );
ok_ref({subhash(\%pop,qw/Norway Sweeden Denmark/)},
{Denmark=>5100000,Norway=>4800000,Sweeden=>8900000}, 'subhash');
#--hashtrans
my%h = ( 1 => {a=>33,b=>55},
2 => {a=>11,b=>22},
t/03_bloomfilter.t view on Meta::CPAN
# perl Makefile.PL;make;perl -Iblib/lib t/03_bloomfilter.t
use lib '.'; BEGIN{require 't/common.pl'}
use Test::More tests => 28;
my $error_rate=0.02;
my $capacity=10000;
my $bf=bfinit($error_rate, $capacity);
my $t=time_fp();
bfadd($bf, map $_*2,0..$capacity-1);
#deb "Adds pr sec: ".int($capacity/(time_fp()-$t))."\n";
#bfadd($bf, $_) for map $_*2,0..$capacity-1;
deb serialize({%$bf,filter=>''},'bf','',1);
deb "Filter has capacity $$bf{capacity}\n";
deb "Filter has $$bf{key_count} keys\n";
deb "Filter has ".length($$bf{filter})." bytes\n";
deb "Filter has $$bf{filterlength} bits of which ".bfsum($bf)." (".int(100*bfsum($bf)/$$bf{filterlength})."%) are on\n";
deb "Filter has $$bf{hashfuncs} hash functions\n";
my @c=bfcheck($bf,0..$capacity*2); #test next ok: $c[2000]=0;
#deb "$_->".bfcheck($bf,$_)."\n" for 0..200;
t/04_resolve.t view on Meta::CPAN
if($ENV{ATDEBUG}){
deb "Resolve: ".resolve(sub{my($x)=(@_); $x**2 - 4*$x -1},20,2)."\n";
deb "Resolve: ".resolve(sub{my($x)=@_; $x**log($x)-$x},0,3)."\n";
deb "Resolve: ".resolve(sub{$_[0]})." iters=$Acme::Tools::Resolve_iterations\n";
}
my $e;
ok(resolve(sub{my($x)=@_; $x**2 - 4*$x -21}) == -3 ,'first solution');
ok(($e=resolve(sub{ $_**2 - 4*$_ - 21 })) == -3 ,"first solution with \$_ (=$e)");
ok(resolve(sub{$_**2 - 4*$_ -21},0,3) == 7 ,'second solution, start 3');
ok(resolve(sub{my($x)=@_; $x**2 - 4*$x -21},0,2) == 7 ,'second solution, start 2');
my $f=sub{ $_**2 - 4*$_ - 21 };
ok(do{my$r=resolve($f,0,2); $r== 7} ,'second solution, start 2');
ok(resolve($f,0,2) == 7 ,'second solution, start 2');
ok(resolve($f,0,2) == 7 ,'second solution, start 2');
ok($Resolve_iterations > 1 ,"iterations=$Resolve_iterations");
ok($Resolve_last_estimate == 7 ,"last_estimate=$Resolve_last_estimate (should be 7)");
eval{ resolve(sub{1}) }; # 1=0
ok($@=~/Div by zero/);
ok(!defined $Resolve_iterations);
ok(!defined $Resolve_last_estimate);
my $c;
eval{$e=resolve(sub{$c++; sleep_fp(0.02); $_**2 - 4*$_ -21},0,.02,undef,undef,0.05)};
deb "x=$e, est=$Resolve_last_estimate, iters=$Resolve_iterations, time=$Resolve_time, c=$c -- $@\n";
t/21_read_conf.t view on Meta::CPAN
use Test::More tests => 3;
my $c=<<'END';
#tester
hei: fdas #heihei
hopp: and {u dont stoppp #
#dfsa
dfsa
dsa
[ section1] #
hei : { fds1
312321
123321}
båt: 4231\#3
bil: 213+123
sykkel : { x }
ski: {
staver
}
[section2]
hei: fds1 312321 123321
bil= 213+123:2=1 #: and = are ok in values
sykkel: sdfkdsa
[section3]
[ section2 ]
båt: 4231
END
my %c=rc(\$c);
my %s0=( hei =>'fdas',
hopp =>'and {u dont stoppp' );
my %fasit=(
%s0,#''=>\%s0,
'section1'=>{'bil'=>'213+123',
'båt'=>'4231#3',
'hei'=>" fds1\cJ312321\cJ123321",
'sykkel'=>' x ',
'ski'=>"\n staver\n",
},
'section2'=>{'bil'=>'213+123:2=1',
'båt'=>'4231',
'hei'=>'fds1 312321 123321',
'sykkel'=>'sdfkdsa'
},
'section3'=>{}
);
my $t;
sub rc {$t=time_fp();my%c=read_conf(@_);$t=time_fp()-$t;%c}
sub sjekk {
my $f=serialize(\%fasit,'c','',1);
my $s=serialize(\%c,'c','',1);
ok($s eq $f, sprintf("read_conf %10.6f sek (".length($s)." bytes)",$t)) or warn"s=$s\nf=$f\n";
}
sjekk(); #1
my $f=tmp()."/acme-tools.read_conf.tmp";
eval{writefile($f,$c)};$@&&ok(1)&&exit;
%c=(); rc($f,\%c);
sjekk(); #2
$Acme::Tools::Read_conf_empty_section=1; #default 0
$fasit{''}=\%s0;
%c=rc($f);
sjekk(); #3
t/25_pwgen.t view on Meta::CPAN
# make test
# perl Makefile.PL; make; perl -Iblib/lib t/25_pwgen.t
use lib '.'; BEGIN{require 't/common.pl'}
use Test::More tests => 11; if($^O ne 'linux'){ ok(1) for 1..11; exit }
sub tstr{sprintf(" (%d trials, %.5f sec)",$Acme::Tools::Pwgen_trials, $Acme::Tools::Pwgen_sec)}
SKIP: {
skip "- strangely pwgen-croak-test fails on windows sometime", 2 if $^O ne 'linux';
local $Acme::Tools::Pwgen_max_sec=0.001;
eval{pwgen(3)}; ok($@=~/pwgen.*25_pwgen.t/,"pwgen croak works: ".trim($@));
local $Acme::Tools::Pwgen_max_trials=3;
eval{pwgen(3)}; ok($@=~/pwgen.*after 3 .*25_pwgen.t/,"pwgen croak works: ".trim($@));
};
ok(length(pwgen())==8, 'default len 8');
my $n=300;
$Acme::Tools::Pwgen_max_sec=1;
sub test{/^[a-z0-9]/i and /[A-Z]/ and /[a-z]/ and /\d/ and /[\,\-\.\/\&\%\_\!]/};
my @pw=grep test(), pwgen(0,$n);
ok(@pw==$n, "pwgen ok ".@pw.tstr());
$n=50;
@pw=grep/^[A-Z]{20}$/,pwgen(20,$n,'A-Z');
ok(@pw==$n, "pwgen ok ".@pw);
$n=50;
@pw=grep/^[A-Z\d]{8}$/&&!/\D\D/,pwgen(8,$n,'A-Z0-9',qr/[ABC]/,qr/\d/,sub{!/\D\D/});
t/31_readable.t view on Meta::CPAN
1153433 => '1.10 MB',
1181116006 => '1.10 GB',
1209462790553 => '1.10 TB',
1088516511498 => '0.99 TB'
);
my($br,@brk)=('',sort {$a<=>$b} keys %br);
ok(($br=bytes_readable($_)) eq $br{$_}, "bytes_readable($_) == $br (should be $br{$_})") for @brk;
s/( [^B])/0$1/ for values %br;
ok(($br=bytes_readable($_,3)) eq $br{$_}, "bytes_readable($_,3) == $br (should be $br{$_})") for @brk;
#----------sec_readable
my %sr=(
0 => '0s',
0.0123 => '0.0123s',
-0.0123 =>'-0.0123s',
1.23 => '1.23s',
1 => '1s',
9.87 => '9.87s',
10 => '10s',
10.1 => '10.1s',
59 => '59s',
t/31_readable.t view on Meta::CPAN
-1331 =>'-22m 11s',
13331 => '3h 42m',
133331 => '1d 13h',
1333331 => '15d 10h',
13333331 => '154d 7h',
133333331 => '4yr 82d',
1333333331 => '42yr 91d',
133333333331 => '4225yr 28d',
);
my($sr,@srk)=('',sort {$a<=>$b} keys %sr);
ok(($sr=sec_readable($_)) eq $sr{$_}, "sec_readable($_) == $sr (should be $sr{$_})") for @srk;
t/33_refsubs.t view on Meta::CPAN
my $ref_to_array = [1,2,3];
my $ref_to_hash = {1,100,2,200,3,300};
my $ref_to_scalar = \"String";
ok( refa $ref_to_array );
ok( refh $ref_to_hash );
ok( refs $ref_to_scalar );
my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
my $ref_to_hash_of_arrays = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
my $ref_to_hash_of_hashes = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };
ok( refaa $ref_to_array_of_arrays );
ok( refah $ref_to_array_of_hashes );
ok( refha $ref_to_hash_of_arrays );
ok( refhh $ref_to_hash_of_hashes );
my $a=[2,3,4];
t/test_fork_bloom.pl view on Meta::CPAN
my $error_rate=0.01;
my($pid,@pid);
for my $job (0..$jobs-1){
unlink"/tmp/bf$job.bf";
next if fork();
my $t=time_fp();
my @keys=grep$_%$jobs==$job,1..$cap;
#my @keys=map rand(), 1..$cap/$jobs;
my $bf=bfinit(error_rate=>$error_rate,capacity=>$cap,keys=>\@keys);
bfstore($bf,"/tmp/bf$job.bf");
print "job $job finished, ".(time_fp()-$t)." sec\n";
exit;
}
1 while wait() != -1;
print "building finished\n";
my $bf=bfinit(error_rate=>$error_rate,capacity=>$cap);
for my $job (0..$jobs-1){
print "Adding bloom filter $job...";
my $t=time_fp();
bfaddbf($bf,bfretrieve("/tmp/bf$job.bf"));
print "took ".(time_fp()-$t)." sec\n";
}
print int($$bf{filterlength}/8)," bytes\n";
printf "%.1f%%\n",100*bfsum($bf)/$$bf{filterlength};
print "keys: $$bf{key_count}\n";
print "found: ".bfgrep($bf,[1..$cap/10])."\n";
my $tests=10000;
my $errs=bfgrep($bf,[$cap+1..$cap+1+$tests]);
print "Error rate: $errs/$tests = ".($errs/$tests)."\n";
bfstore($bf,"/tmp/bfall.bf");
t/test_pi.pl view on Meta::CPAN
wget https://gmplib.org/download/misc/gmp-chudnovsky.c
sudo apt-get install libgmpv4-dev
gcc -s -Wall -o gmp-chudnovsky gmp-chudnovsky.c -lgmp -lm
wget http://beej.us/blog/data/pi-chudnovsky-gmp/chudnovsky_c.txt; mv chudnovsky_c.txt chudnovsky.c
gcc -O2 -Wall -o chudnovsky chudnovsky.c -lgmp
time ./chudnovsky 1000 #3.141592.......... 1000 decimals in 0.004s, 10000 in 0.22s, 100000 in 42s
wget http://www.angio.net/pi/digits/pi1000000.txt
time perl -nle'print $-[0]." ".($+[0]-$-[0])." ".substr($_,$-[0],$+[0]-$-[0]) while /(\d)\1\1\1\1\1+/g' pi1000000.txt #pos of 6+ consec same decs
( run in 1.673 second using v1.01-cache-2.11-cpan-39bf76dae61 )