Acme-Tools
view release on metacpan or search on metacpan
return num2code($_[0],0,$_[1]) if @_==2;
my($num,$digits,$validchars,$start)=@_;
my $l=length($validchars);
my $key;
$digits||=9e9;
no warnings;
croak if $num<$start;
$num-=$start;
for(1..$digits){
$key=substr($validchars,$num%$l,1).$key;
$num=int($num/$l);
last if $digits==9e9 and !$num;
}
croak if $num>0;
return $key;
}
sub code2num {
my($code,$validchars,$start)=@_; $start=0 if!defined$start;
my $l=length($validchars);
my $num=0;
$num=$num*$l+index($validchars,$_) for split//,$code;
return $num+$start;
}
=head2 base
Numbers in any number system of base between 2 and 36. Using capital letters A-Z for base higher than 10.
base(2,15) # 1111 2 --> binary
base(8,4096) # 10000 8 --> octal
base(10,4096) # 4096 of course
base(16,254) # FE 16 --> hex
base(16,254.3) # FE 16 --> hex, can not handle decimal numbers (yet...todo)
base(36,123456) # FE 16 --> hex, can not handle decimal numbers (yet...todo)
base(36,1234567891011) # FR5HUHC3 base36 using all 0-9 and A-Z as digits, 10+26=36
base(37,1) # die with message 'base not 2-36'
base($x,0) # 0
base(16, 14,15,16,17) # list of four elements: E F 10 11
=head2 dec2bin dec2hex dec2oct bin2dec bin2hex bin2oct hex2dec hex2bin hex2oct oct2dec oct2bin oct2hex
print dec2bin(101); # 1100101
print dec2hex(101); # 65
print dec2oct(101); # 145
print bin2dec(1010011110); # 670
print bin2hex(1010011110); # 29e
print bin2oct(1010011110); # 1236
print hex2dec(101); # 257
print hex2bin(101); # 100000001
print hex2oct(101); # 401
print oct2dec(101); # 65
print oct2bin(101); # 1000001
print oct2hex(101); # 41
=cut
sub _base{my($b,$n)=@_;$n?_base($b,int$n/$b).chr(48+$n%$b+7*($n%$b>9)):''} #codegolf
sub base {
my($b,$n)=@_;
@_>2 ? (map base($b,$_),@_[1..$#_])
:$b<2||$b>36 ? croak"base not 2-36"
:$n>0 ? _base($b,$n)
:$n<0 ? "-"._base($b,-$n)
:!defined $n ? undef
:$n==0 ? 0
: croak
}
sub dec2bin { sprintf"%b",shift }
sub dec2hex { sprintf"%x",shift }
sub dec2oct { sprintf"%o",shift }
sub bin2dec { oct("0b".shift) }
sub bin2hex { sprintf"%x",oct("0b".shift) }
sub bin2oct { sprintf"%o",oct("0b".shift) }
sub hex2dec { hex(shift) }
sub hex2bin { sprintf"%b",hex(shift) }
sub hex2oct { sprintf"%o",hex(shift) }
sub oct2dec { oct(shift) }
sub oct2bin { sprintf"%b",oct(shift) }
sub oct2hex { sprintf"%x",oct(shift) }
=head2 gcd
I< C<">The Euclidean algorithm (also called Euclid's algorithm) is an
algorithm to determine the greatest common divisor (gcd) of two
integers. It is one of the oldest algorithms known, since it appeared
in the classic Euclid's Elements around 300 BC. The algorithm does not
require factoring.C<"> >
B<Input:> two or more positive numbers (integers, without decimals that is)
B<Output:> an integer
B<Example:>
print gcd(12, 8); # prints 4
Because the (prime number) factors of 12 is 2 * 2 * 3 and the factors of 8 is 2 * 2 * 2
and the common ('overlapping') factors for both 12 and 8 is then 2 * 2 and the result becomes 4.
B<Example two>:
print gcd(90, 135, 315); # prints 45
print gcd(2*3*3*5, 3*3*3*5, 3*3*5*7); # prints 45 ( = 3*3*5 which is common to all three args)
Implementation:
sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
One way of putting it: Keep replacing the larger of the two numbers with the difference between them until you got two equal numbers. Then thats the answer.
L<http://en.wikipedia.org/wiki/Greatest_common_divisor>
L<http://en.wikipedia.org/wiki/Euclidean_algorithm>
=cut
sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
#hm sub gcd { my($a,$b)=@_; ($a,$b)=($b,$a%$b) while $b; $a }
=head2 lcm
C<lcm()> finds the Least Common Multiple of two or more numbers (integers).
B<Input:> two or more positive numbers (integers)
B<Output:> an integer number
Example: C< 2/21 + 1/6 = 4/42 + 7/42 = 11/42>
Where 42 = lcm(21,6).
B<Example:>
print lcm(45,120,75); # prints 1800
Because the factors are:
45 = 2^0 * 3^2 * 5^1
120 = 2^3 * 3^1 * 5^1
75 = 2^0 * 3^1 * 5^2
Take the bigest power of each primary number (2, 3 and 5 here).
Which is 2^3, 3^2 and 5^2. Multiplied this is 8 * 9 * 25 = 1800.
sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
Seems to works with L<Math::BigInt> as well: (C<lcm> of all integers from 1 to 200)
perl -MAcme::Tools -MMath::BigInt -le'print lcm(map Math::BigInt->new($_),1..200)'
337293588832926264639465766794841407432394382785157234228847021917234018060677390066992000
=cut
sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
=head2 resolve
Resolves an equation by Newtons method.
B<Input:> 1-6 arguments. At least one argument.
First argument: must be a coderef to a subroutine (a function)
Second argument: if present, the target, f(x)=target. Default 0.
Third argument: a start position for x. Default 0.
Fourth argument: a small delta value. Default 1e-4 (0.0001).
Fifth argument: a maximum number of iterations before resolve gives up
and carps. Default 100 (if fifth argument is not given or is
undef). The number 0 means infinite here. If the derivative of the
start position is zero or close to zero more iterations are typically
needed.
Sixth argument: A number of seconds to run before giving up. If both
fifth and sixth argument is given and > 0, C<resolve> stops at
whichever comes first.
B<Output:> returns the number C<x> for C<f(x)> = 0
...or equal to the second input argument if present.
B<Example:>
The equation C<< x^2 - 4x - 21 = 0 >> has two solutions: -3 and 7.
The result of C<resolve> will depend on the start position:
print resolve(sub{ $_**2 - 4*$_ - 21 }); # -3 with $_ as your x
print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 }); # -3 more elaborate call
print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 },0,3); # 7 with start position 3
print "Iterations: $Acme::Tools::Resolve_iterations\n"; # 3 or larger, about 10-15 is normal
The variable C< $Acme::Tools::Resolve_iterations > (which is exported) will be set
to the last number of iterations C<resolve> used. Also if C<resolve> dies (carps).
The variable C< $Acme::Tools::Resolve_last_estimate > (which is exported) will be
set to the last estimate. This number will often be close to the solution and can
be used even if C<resolve> dies (carps).
B<BigFloat-example:>
If either second, third or fourth argument is an instance of L<Math::BigFloat>, so will the result be:
use Acme::Tools;
my $equation = sub{ $_ - 1 - 1/$_ };
my $gr1 = resolve( $equation, 0, 1 ); #
kcal, kilocalorie, kilocalories,
newtonmeter, newtonmeters, th, thermie
force: N, _N, dyn, dyne, dynes, lb, newton
length: NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
in, inch, inches, km, league, lightyear, ls, ly,
m, meter, meters, mi, mil, mile, miles,
nautical mile, nautical miles, nmi,
parsec, pc, planck, yard, yard_imperical, yd, Ã
, ångstrøm, angstrom
mass: Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey
mileage: mpg, l/100km, l/km, l/10km, lp10km, l/mil, liter_pr_100km, liter_pr_km, lp100km
money: AED, ARS, AUD, BGN, BHD, BND, BRL, BWP, CAD, CHF, CLP, CNY,
COP, CZK, DKK, EUR, GBP, HKD, HRK, HUF, IDR, ILS, INR, IRR,
ISK, JPY, KRW, KWD, KZT, LKR, LTL, LVL, LYD, MUR, MXN, MYR,
NOK, NPR, NZD, OMR, PHP, PKR, PLN, QAR, RON, RUB, SAR, SEK,
SGD, THB, TRY, TTD, TWD, USD, VEF, ZAR, BTC, LTC, mBTC, XBT
Currency rates are automatically updated from the net
at least every 24h since last update (on linux/cygwin).
numbers: dec, hex, bin, oct, roman, dozen, doz, dz, dusin, gross, gro,
gr, great_gross, small_gross (not supported: decimal numbers)
power: BTU, BTU/h, BTU/s, BTUph, GWhpy, J/s, Jps, MWhpy, TWhpy,
W, Whpy, _W, ftlb/min, ftlb/s, hk, hp, kWh/yr, kWhpy
pressure: N/m2, Pa, _Pa, at, atm, bar, mbar, pascal, psi, torr
radioactivity: Bq, becquerel, curie
speed: _m/s, km/h, km/t, kmh, kmph, kmt, m/s, mi/h, mph, mps,
kn, knot, knots, kt, kts, mach, machs, c, fps, ft/s, ftps
temperature: C, F, K, celsius, fahrenheit, kelvin
time: _s, biennium, century, d, day, days, decade, dy, fortnight,
h, hour, hours, hr, indiction, jubilee, ke, lustrum, m,
millennium, min, minute, minutes, mo, moment, mon, month,
olympiad, quarter, s, season, sec, second, seconds, shake,
tp, triennium, w, week, weeks, y, y365, ySI, ycommon,
year, years, ygregorian, yjulian, ysideral, ytropical
volume: l, L, _L, _l, cm3, m3, ft3, in3, liter, liters, litre, litres,
gal, gallon, gallon_imp, gallon_uk, gallon_us, gallons,
pint, pint_imp, pint_uk, pint_us, tsp, tablespoon, teaspoon,
floz, floz_uk, therm, thm, fat, bbl, Mbbl, MMbbl, drum,
container (or container20), container40, container40HC, container45HC
See: L<http://en.wikipedia.org/wiki/Units_of_measurement>
=cut
#TODO: @arr2=conv(\@arr1,"from","to") # should be way faster than:
#TODO: @arr2=map conv($_,"from","to"),@arr1
#TODO: conv(123456789,'b','h'); # h converts to something human-readable
our %conv=(
length=>{
m => 1,
_m => 1,
meter => 1,
meters => 1,
metre => 1,
metres => 1,
km => 1000,
mil => 10000, #scandinavian #also: inch/1000!
in => 0.0254,
inch => 0.0254,
inches => 0.0254,
ft => 0.0254*12, #0.3048 m
feet => 0.0254*12, #0.3048 m
yd => 0.0254*12*3, #0.9144 m
yard => 0.0254*12*3, #0.9144 m
yards => 0.0254*12*3, #0.9144 m
fathom => 0.0254*12*3*2, #1.8288 m
fathoms => 0.0254*12*3*2, #1.8288 m
chain => 0.0254*12*3*22, #20.1168 m
chains => 0.0254*12*3*22, #20.1168 m
furlong => 0.0254*12*3*22*10, #201.168 m
furlongs=> 0.0254*12*3*22*10, #201.168 m
mi => 0.0254*12*3*22*10*8, #1609.344 m
mile => 0.0254*12*3*22*10*8, #1609.344 m
miles => 0.0254*12*3*22*10*8,
league => 0.0254*12*3*22*10*8*3, #4828.032 m
leagues => 0.0254*12*3*22*10*8*3, #4828.032 m
yard_imp => 0.914398416,
yard_imperical => 0.914398416,
NM => 1852, #nautical mile
nmi => 1852, #nautical mile
'nautical mile' => 1852,
'nautical miles' => 1852,
micron => 1e-6,
microns => 1e-6,
micrometre => 1e-6,
micrometres => 1e-6,
'Ã
' => 1e-10,
'ångstrøm' => 1e-10,
'angstrom' => 1e-10,
fm => 1e-15,
fermi => 1e-15, #in honour of Enrico Fermi
fermis => 1e-15, #in honour of Enrico Fermi
femtometer => 1e-15, #derived from "femten" (=fifteen in Norwegian and Danish)
femtometre => 1e-15,
femtometers => 1e-15, #derived from "femten" (=fifteen in Norwegian and Danish)
femtometres => 1e-15,
attometer => 1e-18, #derived from "atten/atton" (=eighteen)
attometre => 1e-18,
attometers => 1e-18, #derived from "atten/atton" (=eighteen)
attometres => 1e-18,
ly => 299792458*3600*24*365.25,
lightyear => 299792458*3600*24*365.25, # = 9460730472580800 by def
ls => 299792458, #light-second
au => 149597870700, # by def: meters earth to sun
astronomical_unit => 149597870700,
ZAR => 0.667117, #south africa rand
},
numbers =>{
dec=>1,hex=>1,bin=>1,oct=>1,roman=>1, des=>1,#des: spelling error in v0.15-0.16
dusin=>1,dozen=>1,doz=>1,dz=>1,gross=>144,gr=>144,gro=>144,great_gross=>12*144,small_gross=>10*12,
}
);
our $conv_prepare_time=0;
our $conv_prepare_money_time=0;
sub conv_prepare {
my %b =(da =>1e+1, h =>1e+2, k =>1e+3, M =>1e+6, G =>1e+9, T =>1e+12, P =>1e+15, E =>1e+18, Z =>1e+21, Y =>1e+24, H =>1e+27);
my %big =(deca=>1e+1, hecto=>1e+2, kilo =>1e+3, mega =>1e+6, giga=>1e+9, tera=>1e+12, peta =>1e+15, exa =>1e+18, zetta=>1e+21, yotta=>1e+24, hella=>1e+27);
my %s =(d =>1e-1, c =>1e-2, m =>1e-3,'µ' =>1e-6, u=>1e-6, n =>1e-9, p =>1e-12, f =>1e-15, a =>1e-18, z =>1e-21, y =>1e-24);
my %small=(deci=>1e-1, centi=>1e-2, milli=>1e-3, micro =>1e-6, nano=>1e-9, pico=>1e-12, femto=>1e-15, atto=>1e-18, zepto=>1e-21, yocto=>1e-24);
# myria=> 10000 #obsolete
# demi => 1/2, double => 2 #obsolete
# lakh => 1e5, crore => 1e7 #south asian
my %x = (%s,%b);
for my $type (keys%conv) {
for(grep/^_/,keys%{$conv{$type}}) {
my $c=$conv{$type}{$_};
delete$conv{$type}{$_};
my $unit=substr($_,1);
$conv{$type}{$_.$unit}=$x{$_}*$c for keys%x;
}
}
$conv_prepare_time=time();
}
our $Currency_rates_url = 'http://calthis.com/currency-rates';
our $Currency_rates_expire = 6*3600;
sub conv_prepare_money {
eval {
require LWP::Simple;
my $td=$^O=~/^(?:linux|cygwin)$/?"/tmp":"/tmp"; #hm wrong!
my $fn="$td/acme-tools-currency-rates.data";
if( !-e$fn or time() - (stat($fn))[9] >= $Currency_rates_expire){
LWP::Simple::getstore($Currency_rates_url,"$fn.$$.tmp"); # get ... see getrates.cmd
die "nothing downloaded" if !-s"$fn.$$.tmp";
rename "$fn.$$.tmp",$fn;
chmod 0666,$fn;
}
my $d=readfile($fn);
my %r=$d=~/^\s*([A-Z]{3}) +(\d+\.\d+)\b/gm;
$r{lc($_)}=$r{$_} for keys%r;
#warn serialize([minus([sort keys(%r)],[sort keys(%{$conv{money}})])],'minus'); #ARS,AED,COP,BWP,LVL,BHD,NPR,LKR,QAR,KWD,LYD,SAR,KZT,CLP,IRR,VEF,TTD,OMR,MUR,BND
#warn serialize([minus([sort keys(%{$conv{money}})],[sort keys(%r)])],'minus'); #LTC,I44,BTC,BYR,TWI,NOK,XDR
$conv{money}={%{$conv{money}},%r} if keys(%r)>20;
};
carp "conv: conv_prepare_money (currency conversion automatic daily updated rates) - $@\n" if $@;
$conv{money}{"m$_"}=$conv{money}{$_}/1000 for qw/BTC XBT/;
$conv_prepare_money_time=time();
1; #not yet
}
sub conv {
my($num,$from,$to)=@_;
croak "conf requires 3 args" if @_!=3;
conv_prepare() if !$conv_prepare_time;
my $types=sub{ my $unit=shift; [sort grep$conv{$_}{$unit}, keys%conv] };
my @types=map{ my $ru=$_; my $r;$r=&$types($_) and @$r and $$ru=$_ and last for ($$ru,uc($$ru),lc($$ru)); $r }(\$from,\$to);
my @err=map "Unit ".[$from,$to]->[$_]." is unknown",grep!@{$types[$_]},0..1;
my @type=intersect(@types);
push @err, "from=$from and to=$to has more than one possible conversions: ".join(", ", @type) if @type>1;
push @err, "from $from (".(join(",",@{$types[0]})||'?').") and "
."to $to (" .(join(",",@{$types[1]})||'?').") has no known common dimension (unit type).\n" if @type<1;
croak join"\n",map"conv: $_",@err if @err;
my $type=$type[0];
conv_prepare_money() if $type eq 'money' and time() >= $conv_prepare_money_time + $Currency_rates_expire;
return conv_temperature(@_) if $type eq 'temperature';
return conv_numbers(@_) if $type eq 'numbers';
my $c=$conv{$type};
my($cf,$ct)=@{$conv{$type}}{$from,$to};
my $r= $cf>0 && $ct<0 ? -$ct/$num/$cf
: $cf<0 && $ct>0 ? -$cf/$num/$ct
: $cf*$num/$ct;
# print STDERR "$num $from => $to from=$ff to=$ft r=$r\n";
return $r;
}
sub conv_temperature { #http://en.wikipedia.org/wiki/Temperature#Conversion
my($t,$from,$to)=(shift(),map uc(substr($_,0,1)),@_);
$from=~s/K/C/ and $t-=273.15;
#$from=~s/R/F/ and $t-=459.67; #rankine
return $t if $from eq $to;
{CK=>sub{$t+273.15},
FC=>sub{($t-32)*5/9},
CF=>sub{$t*9/5+32},
FK=>sub{($t-32)*5/9+273.15},
}->{$from.$to}->();
}
sub conv_numbers {
my($n,$fr,$to)=@_;
my $dec=$fr eq 'dec' ? $n
:$fr eq 'hex' ? hex($n)
:$fr eq 'oct' ? oct($n)
:$fr eq 'bin' ? oct("0b$n")
:$fr =~ /^(dusin|dozen|doz|dz)$/ ? $n*12
:$fr =~ /^(gross|gr|gro)$/ ? $n*144
:$fr eq 'great_gross' ? $n*12*144
:$fr eq 'small_gross' ? $n*12*10
:$fr eq 'skokk' ? $n*60 #norwegian unit
:$fr eq 'roman' ? roman2int($n)
:$fr eq 'des' ? $n
:croak "Conv from $fr not supported yet";
my $ret=$to eq 'dec' ? $dec
:$to eq 'hex' ? sprintf("%x",$dec)
:$to eq 'oct' ? sprintf("%o",$dec)
:$to eq 'bin' ? sprintf("%b",$dec)
:$to =~ /^(dusin|dozen|doz|dz)$/ ? $dec/12
:$to =~ /^(gross|gr|gro)$/ ? $dec/144
:$to eq 'great_gross' ? $dec/(12*144)
:$to eq 'small_gross' ? $dec/(12*10)
:$to eq 'skokk' ? $dec/60
:$to eq 'roman' ? int2roman($dec)
:$to eq 'des' ? $dec
:croak "Conv to $to not suppoerted yet";
$ret;
}
#http://en.wikipedia.org/wiki/Norwegian_units_of_measurement
=head2 bytes_readable
Converts a number of bytes to something human readable.
Input 1: a number
Input 2: optionally the number of decimals if >1000 B. Default is 2.
Output: a string containing:
the number with a B behind if the number is less than 1000
the number divided by 1024 with two decimals and "kB" behind if the number is less than 1024*1000
the number divided by 1048576 with two decimals and "MB" behind if the number is less than 1024*1024*1000
the number divided by 1073741824 with two decimals and "GB" behind if the number is less than 1024*1024*1024*1000
the number divided by 1099511627776 with two decimals and "TB" behind otherwise
# : $r=~s,^CD,,i ? 400+roman2int($r)
# : $r=~s,^C,,i ? 100+roman2int($r)
# : $r=~s,^XC,,i ? 90+roman2int($r)
# : $r=~s,^L,,i ? 50+roman2int($r)
# : $r=~s,^XL,,i ? 40+roman2int($r)
# : $r=~s,^X,,i ? 10+roman2int($r)
# : $r=~s,^IX,,i ? 9+roman2int($r)
# : $r=~s,^V,,i ? 5+roman2int($r)
# : $r=~s,^IV,,i ? 4+roman2int($r)
# : $r=~s,^I,,i ? 1+roman2int($r)
# : !length($r) ? 0
# : croak "Invalid roman number $r";
#}
=head2 distance
B<Input:> the four decimal numbers of two GPS positions: latutude1, longitude1, latitude2, longitude2
B<Output:> the air distance in meters between the two points
Calculation is done using the Haversine Formula for spherical distance:
a = sin((lat2-lat1)/2)^2
+ sin((lon2-lon1)/2)^2 * cos(lat1) * cos(lat2);
c = 2 * atan2(min(1,sqrt(a)),
min(1,sqrt(1-a)))
distance = c * R
With earth radius set to:
R = Re - (Re-Rp) * sin(abs(lat1+lat2)/2)
Where C<Re = 6378137.0> (equatorial radius) and C<Rp = 6356752.3> (polar radius).
B<Example:>
my @oslo = ( 59.93937, 10.75135); # oslo in norway
my @rio = (-22.97673, -43.19508); # rio in brazil
printf "%.1f km\n", distance(@oslo,@rio)/1000; # 10431.7 km
printf "%.1f km\n", distance(@rio,@oslo)/1000; # 10431.7 km
printf "%.1f nmi\n", distance(@oslo,@rio)/1852.000; # 5632.7 nmi (nautical miles)
printf "%.1f miles\n",distance(@oslo,@rio)/1609.344; # 6481.9 miles
printf "%.1f miles\n",conv(distance(@oslo,@rio),"meters","miles");# 6481.9 miles
See L<http://www.faqs.org/faqs/geography/infosystems-faq/>
and L<http://mathforum.org/library/drmath/view/51879.html>
and L<http://en.wikipedia.org/wiki/Earth_radius>
and L<Geo::Direction::Distance>, but Acme::Tools::distance() is about 8 times faster.
=cut
our $Distance_factor = $PI / 180;
sub acos { atan2( sqrt(1 - $_[0] * $_[0]), $_[0] ) }
sub distance_great_circle {
my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
return $R*acos(sin($lat1)*sin($lat2)+cos($lat1)*cos($lat2)*cos($lon2-$lon1))
}
sub distance {
my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
my $a= sin(($lat2-$lat1)/2)**2
+ sin(($lon2-$lon1)/2)**2 * cos($lat1) * cos($lat2);
my $sqrt_a =sqrt($a); $sqrt_a =1 if $sqrt_a >1;
my $sqrt_1ma=sqrt(1-$a); $sqrt_1ma=1 if $sqrt_1ma>1;
my $c=2*atan2($sqrt_a,$sqrt_1ma);
my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
return $c*$R;
}
=head2 big
=head2 bigi
=head2 bigf
=head2 bigr
=head2 bigscale
big, bigi, bigf, bigr and bigscale are sometimes convenient shorthands for using
C<< Math::BigInt->new() >>, C<< Math::BigFloat->new() >> and C<< Math::BigRat->new() >>
(preferably with the GMP for faster calculations). Examples:
my $num1 = big(3); #returns a new Math::BigInt-object
my $num2 = big('3.0'); #returns a new Math::BigFloat-object
my $num3 = big(3.0); #returns a new Math::BigInt-object
my $num4 = big(3.1); #returns a new Math::BigFloat-object
my $num5 = big('2/7'); #returns a new Math::BigRat-object
my($i1,$f1,$i2,$f2) = big(3,'3.0',3.0,3.1); #returns the four new numbers, as the above four lines
#uses wantarray
print 2**200; # 1.60693804425899e+60
print big(2)**200; # 1606938044258990275541962092341162602522202993782792835301376
print 2**big(200); # 1606938044258990275541962092341162602522202993782792835301376
print big(2**200); # 1606938044258990000000000000000000000000000000000000000000000
print 1/7; # 0.142857142857143
print 1/big(7); # 0 because of integer arithmetics
print 1/big(7.0); # 0 because 7.0 is viewed as an integer, see bigf below
print 1/big('7.0'); # 0.1428571428571428571428571428571428571429
print 1/bigf(7); # 0.1428571428571428571428571428571428571429
print bigf(1/7); # 0.142857142857143 probably not what you wanted
print 1/bigf(7); # 0.1428571428571428571428571428571428571429
bigscale(80); # for increased precesion (default is 40)
print 1/bigf(7); # 0.14285714285714285714285714285714285714285714285714285714285714285714285714285714
In C<big()> the characters C<< . >> and C<< / >> will make it return a
Math::BigFloat- and Math::BigRat-object accordingly. Or else a Math::BigInt-object is returned.
Instead of guessing, use C<bigi>, C<bigf> and C<bigr> to return what you want.
B<Note:> Acme::Tools does not depend on Math::BigInt and
Math::BigFloat and GMP, but these four big*-subs do (by C<require>).
To use big, bigi, bigf and bigr effectively you should
install Math::BigInt::GMP and Math::BigFloat::GMP like this:
sudo cpanm Math::BigFloat Math::GMP Math::BingInt::GMP # or
sudo cpan Math::BigFloat Math::GMP Math::BingInt::GMP # or
sudo yum install perl-Math-BigInt-GMP perl-Math-GMP # on RedHat, RHEL or
sudo apt-get install libmath-bigint-gmp-perl libmath-gmp-perl # on Ubuntu or some other way
Unless GMP is installed for perl like this, the Math::Big*-modules
will fall back to using similar but slower built in modules. See: L<https://gmplib.org/>
=cut
sub bigi {
eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
if (wantarray) { return (map Math::BigInt->new($_),@_) }
else { return Math::BigInt->new($_[0]) }
}
sub bigf {
eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
if (wantarray) { return (map Math::BigFloat->new($_),@_) }
else { return Math::BigFloat->new($_[0]) }
}
sub bigr {
eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
if (wantarray) { return (map Math::BigRat->new($_),@_) }
else { return Math::BigRat->new($_[0]) }
}
sub big {
wantarray
? (map /\./ ? bigf($_) : /\// ? bigr($_) : bigi($_), @_)
: $_[0]=~/\./ ? bigf($_[0]) : $_[0]=~/\// ? bigr($_[0]) : bigi($_[0]);
}
sub bigscale {
@_==1 or croak "bigscale requires one and only one argument";
my $scale=shift();
eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
Math::BigInt->div_scale($scale);
Math::BigFloat->div_scale($scale);
Math::BigRat->div_scale($scale);
return;
}
#my $R_authalic=6371007.2; #earth radius in meters, mean, Authalic radius, real R varies 6353-6384km, http://en.wikipedia.org/wiki/Earth_radius
#*)
# ( 6378157.5, 6356772.2 ) #hmm
#my $e=0.081819218048345;#sqrt(1 - $b**2/$a**2); #eccentricity of the ellipsoid
#my($a,$b)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
#warn "e=$e\n";
#warn "t=".(1 - $e**2)."\n";
#warn "n=".((1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5)."\n";
#my $t=1 - $e**2;
#my $n=(1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5;
#warn "t=$t\n";
#warn "n=$n\n";
#$a * (1 - $e**2) / ((1 - $e**2 * sin(($lat1+$lat2)/2)**2)**1.5); #hmm avg lat
#$R=$a * $t/$n;
#=head2 fractional
#=cut
sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
carp "fractional: NOT FINISHED";
my $n=shift;
print "----fractional n=$n\n";
my $nn=$n; my $dec;
$nn=~s,\.(\d+)$,$dec=length($1);$1.,;
my $l;
my $max=0;
my($te,$ne);
for(1..length($nn)/2){
if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
print "_ = $_ ".length($2)."\n";
if(length($2)>$max){
$l=$_;
$te="$1$3"-$1;
$max=length($2);
}
}
}
return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
print "l=$l max=$max\n";
$ne="9" x $l;
print log($n),"\n";
my $st=sub{print "status: ".($te/$ne)." n=$n ".($n/$te*$ne)."\n"};
while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
while($te/$n/$ne<0.99){ &$st(); $te*=10 }
&$st();
while(1){
print curb( $enthusiasm, 1, 10 ); # prints 10
print curb( $enthusiasm, 20, 100 ); # prints 20
print curb(\$enthusiasm, 1, 10 ); # prints 10 and sets $enthusiasm = 10
print $enthusiasm; # prints 10
=cut
sub curb {
my($val,$min,$max)=@_;
# todo: undef min|max => dont curb min|max
croak "curb: wrong args" if @_!=3 or !defined$min or !defined$max or !defined$val or $min>$max;
return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
$val < $min ? $min :
$val > $max ? $max :
$val;
}
sub bound { curb(@_) }
=head2 log10
=head2 log2
=head2 logn
print log10(1000); # prints 3
print log10(10000*sqtr(10)); # prints 4.5
print log2(16); # prints 4
print logn(4096, 8); # prints 4 (12/3=4)
print logn($PI, 2.71828182845905); # same as print log($PI) using perls builtin log()
=cut
sub log10 { log($_[0]) / log(10) }
sub log2 { log($_[0]) / log(2) }
sub logn { log($_[0]) / log($_[1]) }
=head1 STRINGS
=head2 upper
=head2 lower
Returns input string as uppercase or lowercase.
Can be used if Perls build in C<uc()> and C<lc()> for some reason does not convert æøå or other latin1 letters outsize a-z.
Converts C<< æøåäëïöüÿâêîôûãõà èìòùáéÃóúýñð >> to and from C<< ÃÃÃ
ÃÃÃÃÃ?ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ >>
See also C<< perldoc -f uc >> and C<< perldoc -f lc >>
=head2 trim
Removes space from the beginning and end of a string. Whitespace (C<< \s >>) that is.
And removes any whitespace inside the string of more than one char, leaving the first whitespace char. Thus:
trim(" asdf \t\n 123 ") eq "asdf 123"
trim(" asdf\t\n 123\n") eq "asdf\t123"
Works on C<< $_ >> if no argument i given:
print join",", map trim, " please ", " remove ", " my ", " spaces "; # please,remove,my,spaces
print join",", trim(" please ", " remove ", " my ", " spaces "); # works on arrays as well
my $s=' please '; trim(\$s); # now $s eq 'please'
trim(\@untrimmedstrings); # trims array strings inplace
@untrimmedstrings = map trim, @untrimmedstrings; # same, works on $_
trim(\$_) for @untrimmedstrings; # same, works on \$_
=head2 lpad
=head2 rpad
Left or right pads a string to the given length by adding one or more spaces at the end for I<rpad> or at the start for I<lpad>.
B<Input:> First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
Default is space.
rpad('gomle',9); # 'gomle '
lpad('gomle',9); # ' gomle'
rpad('gomle',9,'-'); # 'gomle----'
lpad('gomle',9,'+'); # '++++gomle'
rpad('gomle',4); # 'goml'
lpad('gomle',4); # 'goml'
rpad('gomle',7,'xyz'); # 'gomlxy'
lpad('gomle',10,'xyz'); # 'xyzxygoml'
=head2 cpad
Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.
cpad('mat',5) eq ' mat '
cpad('mat',4) eq 'mat '
cpad('mat',6) eq ' mat '
cpad('mat',9) eq ' mat '
cpad('mat',5,'+') eq '+mat+'
cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'
=cut
sub upper {no warnings;my $s=@_?shift:$_;$s=~tr/a-zæøåäëïöü.âêîôûãõà èìòùáéÃóúýñð/A-ZÃÃÃ
ÃÃÃÃÃ.ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ/;$s}
sub lower {no warnings;my $s=@_?shift:$_;$s=~tr/A-ZÃÃÃ
ÃÃÃÃÃ.ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ/a-zæøåäëïöü.âêîôûãõà èìòùáéÃóúýñð/;$s}
sub trim {
return trim($_) if !@_;
return map trim($_), @_ if @_>1;
my $s=shift;
if(ref($s) eq 'SCALAR'){ $$s=~s,^\s+|(?<=\s)\s+|\s+$,,g; return $$s}
if(ref($s) eq 'ARRAY') { trim(\$_) for @$s; return $s }
$s=~s,^\s+|(?<=\s)\s+|\s+$,,g if defined $s;
$s;
}
sub rpad {
my($s,$l,$p)=@_;
$p=' ' if @_<3 or !length($p);
$s.=$p while length($s)<$l;
substr($s,0,$l);
}
sub lpad {
my($s,$l,$p)=@_;
$p=' ' if @_<3 or !length($p);
$l<length($s)
? substr($s,0,$l)
: substr($p x (1+$l/length($p)), 0, $l-length($s)).$s;
}
sub cpad {
my($s,$l,$p)=@_;
$p=' ' if @_<3 or !length($p);
my $ls=length($s);
return substr($s,0,$l) if $l<$ls;
$p=$p x (($l-$ls+2)/length($p));
substr($p, 0, ($l-$ls )/2) . $s .
substr($p, 0, ($l-$ls+1)/2);
}
sub cpad_old {
my($s,$l,$p)=@_;
$p=' ' if !length($p);
return substr($s,0,$l) if $l<length($s);
my $i=0;
while($l>length($s)){
my $pc=substr($p,($i==int($i)?1:-1)*($i%length($p)),1);
$i==int($i) ? ($s.=$pc) : ($s=$pc.$s);
$i+=1/2;
}
$s;
}
=head2 trigram
B<Input:> A string (i.e. a name). And an optional x (see example 2)
B<Output:> A list of this strings trigrams (See examlpe)
B<Example 1:>
print join ", ", trigram("Kjetil Skotheim");
Prints:
Kje, jet, eti, til, il , l S, Sk, Sko, kot, oth, the, hei, eim
B<Example 2:>
Default is 3, but here 4 is used instead in the second optional input argument:
print join ", ", trigram("Kjetil Skotheim", 4);
And this prints:
Kjet, jeti, etil, til , il S, l Sk, Sko, Skot, koth, othe, thei, heim
C<trigram()> was created for "fuzzy" name searching. If you have a database of many names,
addresses, phone numbers, customer numbers etc. You can use trigram() to search
among all of those at the same time. If the search form only has one input field.
One general search box.
Store all of the trigrams of the trigram-indexed input fields coupled
with each person, and when you search, you take each trigram of you
query string and adds the list of people that has that trigram. The
search result should then be sorted so that the persons with most hits
are listed first. Both the query strings and the indexed database
fields should have a space added first and last before C<trigram()>-ing
them.
This search algorithm is not includes here yet...
C<trigram()> should perhaps have been named ngram for obvious reasons.
=head2 sliding
Same as trigram (except there is no default width). Works also with arrayref instead of string.
Example:
sliding( ["Reven","rasker","over","isen"], 2 )
Result:
( ['Reven','rasker'], ['rasker','over'], ['over','isen'] )
=head2 chunks
Splits strings and arrays into chunks of given size:
my @a = chunks("Reven rasker over isen",7);
my @b = chunks([qw/Og gubben satt i kveldinga og koste seg med skillinga/], 3);
Resulting arrays:
( 'Reven r', 'asker o', 'ver ise', 'n' )
( ['Og','gubben','satt'], ['i','kveldinga','og'], ['koste','seg','med'], ['skillinga'] )
=head2 chars
chars("Tittentei"); # ('T','i','t','t','e','n','t','e','i')
=cut
sub trigram { sliding($_[0],$_[1]||3) }
sub sliding {
my($s,$w)=@_;
return map substr($s,$_,$w), 0..length($s)-$w if !ref($s);
return map [@$s[$_..$_+$w-1]], 0..@$s-$w if ref($s) eq 'ARRAY';
}
sub chunks {
my($s,$w)=@_;
return $s=~/(.{1,$w})/g if !ref($s);
return map [@$s[$_*$w .. min($_*$w+$w-1,$#$s)]], 0..$#$s/$w if ref($s) eq 'ARRAY';
}
sub chars { split//, shift }
=head2 repl
Synonym for replace().
=head2 replace
Return the string in the first input argument, but where pairs of search-replace strings (or rather regexes) has been run.
Works as C<replace()> in Oracle, or rather regexp_replace() in Oracle 10 and onward. Except that this C<replace()> accepts more than three arguments.
Examples:
print replace("water","ater","ine"); # Turns water into wine
print replace("water","ater"); # w
print replace("water","at","eath"); # weather
print replace("water","wa","ju",
"te","ic",
"x","y", # No x is found, no y is returned
'r$',"e"); # Turns water into juice. 'r$' says that the r it wants
# to change should be the last letters. This reveals that
# second, fourth, sixth and so on argument is really regexs,
# not normal strings. So use \ (or \\ inside "") to protect
# the special characters of regexes. You probably also
# should write qr/regexp/ instead of 'regexp' if you make
# use of regexps here, just to make it more clear that
# these are really regexps, not strings.
print replace('JACK and JUE','J','BL'); # prints BLACK and BLUE
print replace('JACK and JUE','J'); # prints ACK and UE
print replace("abc","a","b","b","c"); # prints ccc (not bcc)
If the first argument is a reference to a scalar variable, that variable is changed "in place".
Example:
my $str="test";
replace(\$str,'e','ee','s','S');
print $str; # prints teeSt
=cut
sub replace { repl(@_) }
sub repl {
my $str=shift;
return $$str=replace($$str,@_) if ref($str) eq 'SCALAR';
#return ? if ref($str) eq 'ARRAY';
#return ? if ref($str) eq 'HASH';
while(@_){
my($fra,$til)=(shift,shift);
defined $til ? $str=~s/$fra/$til/g : $str=~s/$fra//g;
}
return $str;
}
=head1 ARRAYS
print max(2,7,10); # 10
print maxs("2","7","10"); # 7
print maxs(2,7,10); # 7
=cut
sub min {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ < $min } $min }
sub mins {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ lt $min} $min }
sub max {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ > $max } $max }
sub maxs {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ gt $max} $max }
=head2 zip
B<Input:> Two or more arrayrefs. A number of equal sized arrays
containing numbers, strings or anything really.
B<Output:> An array of those input arrays zipped (interlocked, merged) into each other.
print join " ", zip( [1,3,5], [2,4,6] ); # 1 2 3 4 5 6
print join " ", zip( [1,4,7], [2,5,8], [3,6,9] ); # 1 2 3 4 5 6 7 8 9
Example:
zip() creates a hash where the keys are found in the first array and values in the secord in the correct order:
my @media = qw/CD DVD VHS LP Blueray/;
my @count = qw/20 12 2 4 3/;
my %count = zip(\@media,\@count); # or zip( [@media], [@count] )
print "I got $count{DVD} DVDs\n"; # I got 12 DVDs
Dies (croaks) if the two lists are of different sizes
...or any input argument is not an array ref.
=cut
sub zip {
my @t=@_;
ref($_) ne 'ARRAY' and croak "ERROR: zip should have arrayrefs as arguments" for @t;
@{$t[$_]} != @{$t[0]} and croak "ERROR: zip should have equal sized arrays" for 1..$#t;
my @res;
for my $i (0..@{$t[0]}-1){
push @res, $$_[$i] for @t;
}
return @res;
}
=head2 sim
B<Input:> Two or more strings
B<Output:> A number 0 - 1 indicating the similarity between two strings.
Requires L<String::Similarity> where the real magic happens.
sim("Donald Duck", "Donald E. Knuth"); # returns 0.615
sim("Kalle Anka", "Kalle And")' # returns 0.842
sim("Kalle Anka", "Kalle Anka"); # returns 1
sim("Kalle Anka", "kalle anka"); # returns 0.8
sim(map lc, "Kalle Anka", "kalle anka"); # returns 1
Todo: more doc
=cut
#Todo:
#peat -le'print join", ",sim("GskOk",[zip([qw(Gsk_ok Vgdoknr Personnavn Adferdkode Ordenkode G_kok)],[0..5])],0.7,0.127)'
#Use of uninitialized value in subroutine entry at /usr/local/share/perl/5.22.1/Acme/Tools.pm line 2365.
#Use of uninitialized value $simlikest in numeric ge (>=) at /usr/local/share/perl/5.22.1/Acme/Tools.pm line 2366.
#Use of uninitialized value in subroutine entry at /usr/local/share/perl/5.22.1/Acme/Tools.pm line 2365.
#Use of uninitialized value $simnestlikest in numeric ge (>=) at /usr/local/share/perl/5.22.1/Acme/Tools.pm line 2372.
sub sim {
require String::Similarity;
my($str,@r)=@_;
return String::Similarity::similarity(@_) if @r==1; #to param
my($min,$mindiff);
if(ref($r[0]) eq 'ARRAY'){
($min,$mindiff)=@r[1,2];
@r=@{$r[0]};
}
$min=0 if!defined$min;
my($simlikest,$simnestlikest,$likest,$idlikest)=(-1,-1);
for(@r){
my($s,$id)=ref($_) eq 'ARRAY' ? @$_ : ($_);
my $sim=String::Similarity::similarity($str,$s,$simnestlikest//0);
if($sim>=$simlikest){
($simnestlikest,$likest,$simlikest)=($simlikest,$s,$sim);
$idlikest=$id if defined$id;
}
elsif($sim>=$simnestlikest){
$simnestlikest=$sim;
}
}
my@ret=($simlikest,$likest);
@ret=(undef,undef) if $simnestlikest>0 and $simlikest-$simnestlikest<$mindiff;
@ret=(undef,undef) if $simlikest<$min;
@ret=(@ret,$simnestlikest,$simlikest,$likest);
push(@ret, $ret[0] ? $idlikest : undef) if defined $idlikest;
return wantarray?@ret:$ret[0];
}
=head2 sim_perm
B<Input:> Two strings
B<Output:> A number 0 - 1 indicating the maximum similarity between two strings tested
against all permutations of both strings split on C<< [\s,]+ >> and where the string
with most words (i.e. names) are cut to as many words as the one with least words.
Requires L<String::Similarity> where the real magic happens.
While sim() is case sensitive, sim_perm() is not.
Name1 Name2 sim() sim_perm()
---------------------------------- ------------------------------------- ----- ----------
Humphrey DeForest Bogart Bogart Humphrey DeForest 0.71 1.00
Humphrey Bogart Humphrey Gump Bogart 0.86 1.00
Humphrey deforest Bogart Bogart DeForest 0.41 1.00
Humfrey DeForest Boghart BOGART HUMPHREY 0.05 0.87
Humphrey Bogart Humphrey 0.70 1.00
Humfrey Deforest Boghart BOGART D. HUMFREY 0.15 0.78 *)
Presley, Elvis Aaron Elvis Presley 0.424242 1.00
sim_perm() was written to identify double-profiles in databases: two people with
either the same (or similar) email or phone number or zip code and similar enough
names are going on the list of probable doubles.
*) Todo: deal with initials better, should be higher than 0.78
=cut
sub sim_perm {
require String::Similarity;
my($s1,$s2)=map {s/^\s*(.+?)\s*$/$1/;$_} map upper($_), @_; #/r v5.14
croak if !length($s1) or !length($s2);
my $max;
for(cart([permutations(split(/[\s,]+/,$s1))],
[permutations(split(/[\s,]+/,$s2))])) {
my($n1,$n2)=@$_;
if(@$n1>@$n2){ pop@$n1 while @$n1>@$n2 }
else { pop@$n2 while @$n1<@$n2 }
my($str1,$str2)=map join(" ",@$_),($n1,$n2);
if(defined $max){
my $sim=String::Similarity::similarity($str1,$str2,$max);
$max=$sim if $sim>$max;
}
else {
$max=String::Similarity::similarity($str1,$str2);
}
last if $max==1;
}
return $max;
}
=head2 pushsort
Adds one or more element to a numerically sorted array and keeps it sorted.
pushsort @a, 13; # this...
push @a, 13; @a = sort {$a<=>$b} @a; # is the same as this, but the former is faster if @a is large
=head2 pushsortstr
Same as pushsort except that the array is kept sorted alphanumerically (cmp) instead of numerically (<=>). See L</pushsort>.
pushsort @a, "abc"; # this...
push @a, "abc"; @a = sort @a; # is the same as this, but the former is faster if @a is large
=cut
#todo: use List::BinarySearch::XS 'binsearch_pos';
our $Pushsort_cmpsub=undef;
sub pushsort (\@@) {
my $ar=shift;
#not needed but often faster
if(!defined $Pushsort_cmpsub and @$ar+@_<100){ #hm speedup?
@$ar=(sort {$a<=>$b} (@$ar,@_));
return 0+@$ar;
}
for my $v (@_){
#not needed but often faster
if(!defined $Pushsort_cmpsub){ #faster rank() in most cases
push @$ar, $v and next if $v>=$$ar[-1];
unshift @$ar, $v and next if $v< $$ar[0];
}
splice @$ar, binsearch($v,$ar,1,$Pushsort_cmpsub)+1, 0, $v;
}
0+@$ar
}
sub pushsortstr(\@@){ local $Pushsort_cmpsub=sub{$_[0]cmp$_[1]}; pushsort(@_) } #speedup: copy sub pushsort
=head2 binsearch
Returns the position of an element in a numerically sorted array. Returns undef if the element is not found.
B<Input:> Two, three or four arguments
B<First argument:> the element to find. Usually a number.
B<Second argument:> a reference to the array to search in. The array
should be sorted in ascending numerical order (se exceptions below).
B<Third argument:> Optional. Default false.
If true, whether result I<not found> should return undef or a fractional position.
If the third argument is false binsearch returns undef if the element is not found.
If the third argument is true binsearch returns 0.5 plus closest position below the searched value.
Returns C< last position + 0.5 > if the searched element is greater than all elements in the sorted array.
Returns C< -0.5 > if the searched element is less than all elements in the sorted array.
Fourth argument: Optional. Default C<< sub { $_[0] <=> $_[1] } >>.
If present, the fourth argument is either:
=over 4
=item * a code-ref that alters the way binsearch compares two elements, default is C<< sub{$_[0]<=>$_[1]} >>
=item * a string that works as a hash key (column name), see example below
=back
B<Examples:>
binsearch(10,[5,10,15,20]); # 1
binsearch(10,[20,15,10,5],undef,sub{$_[1]<=>$_[0]}); # 2 search arrays sorted numerically in opposite order
binsearch("c",["a","b","c","d"],undef,sub{$_[0]cmp$_[1]}); # 2 search arrays sorted alphanumerically
binsearchstr("b",["a","b","c","d"]); # 1 search arrays sorted alphanumerically
my @data=( map { {num=>$_, sqrt=>sqrt($_), square=>$_**2} }
grep !$_%7, 1..1000000 );
my $i = binsearch( {num=>913374}, \@data, undef, sub {$_[0]{num} <=> $_[1]{num}} );
my $i = binsearch( {num=>913374}, \@data, undef, 'num' ); #same as previous line
my $found_hashref = defined $i ? $data[$i] : undef;
=head2 binsearchstr
Same as binsearch except that the arrays is sorted alphanumerically
(cmp) instead of numerically (<=>) and the searched element is a
string, not a number. See L</binsearch>.
=cut
our $Binsearch_steps;
our $Binsearch_maxsteps=100;
sub binsearch {
my($search,$aref,$insertpos,$cmpsub)=@_; #search pos of search in array
croak "binsearch did not get arrayref as second arg" if ref($aref) ne 'ARRAY';
croak "binsearch got fourth arg which is not a code-ref" if defined $cmpsub and ref($cmpsub) and ref($cmpsub) ne 'CODE';
if(defined $cmpsub and !ref($cmpsub)){
my $key=$cmpsub;
$cmpsub = sub{ $_[0]{$key} <=> $_[1]{$key} };
}
return $insertpos ? -0.5 : undef if !@$aref;
my($min,$max)=(0,$#$aref);
$Binsearch_steps=0;
while (++$Binsearch_steps <= $Binsearch_maxsteps) {
my $middle=int(($min+$max+0.5)/2);
my $middle_value=$$aref[$middle];
#croak "binsearch got non-sorted array" if !$cmpsub and $$aref[$min]>$$aref[$min]
# or $cmpsub and &$cmpsub($$aref[$min],$$aref[$min])>0;
if( !$cmpsub and $search < $middle_value
or $cmpsub and &$cmpsub($search,$middle_value) < 0 ) { #print "<\n";
$max=$min, next if $middle == $max and $min != $max;
return $insertpos ? $middle-0.5 : undef if $middle == $max;
$max=$middle;
}
elsif( !$cmpsub and $search > $middle_value
or $cmpsub and &$cmpsub($search,$middle_value) > 0 ) { #print ">\n";
$min=$max, next if $middle == $min and $max != $min;
return $insertpos ? $middle+0.5 : undef if $middle == $min;
$min=$middle;
}
else { #print "=\n";
return $middle;
}
}
croak "binsearch exceded $Binsearch_maxsteps steps";
}
sub binsearchfast { # binary search routine finds index just below value
my ($x,$v)=@_;
my ($klo,$khi)=(0,$#{$x});
my $k;
while (($khi-$klo)>1) {
$k=int(($khi+$klo)/2);
if ($$x[$k]>$v) { $khi=$k; } else { $klo=$k; }
}
my $max = rank(-1, [101,102,103,102,101]); #103
my @contest = ({name=>"Alice",score=>14},{name=>"Bob",score=>13},{name=>"Eve",score=>12});
my $second = rank(2, \@contest, sub{$_[1]{score}<=>$_[0]{score}})->{name}; #Bob
=head2 rankstr
Just as C<rank> but sorts alphanumerically (strings, cmp) instead of numerically.
=cut
sub rank {
my($rank,$aref,$cmpsub)=@_;
if($rank<0){
$cmpsub||=sub{$_[0]<=>$_[1]};
return rank(-$rank,$aref,sub{0-&$cmpsub});
}
my @sort;
local $Pushsort_cmpsub=$cmpsub;
for(@$aref){
pushsort @sort, $_;
pop @sort if @sort>$rank;
}
return wantarray ? @sort : $sort[$rank-1];
}
sub rankstr {wantarray?(rank(@_,sub{$_[0]cmp$_[1]})):rank(@_,sub{$_[0]cmp$_[1]})}
=head2 egrep
Extended grep.
Works like L<grep> but with more insight: local vars $i, $n, $prev, $next, $prevr and $nextr are available:
$i is the current index, starts with 0, ends with the length of the input array minus one
$n is the current element number, starts with 1, $n = $i + 1
$prev is the previous value (undef if current is first)
$next is the next value (undef if current is last)
$prevr is the previous value, rotated so that the previous of the first element is the last element
$nextr is the next value, rotated so that the next of the last element is the first element
$_ is the current value, just as with Perls built-in grep
my @a = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20); # 1..20
my @r = egrep { $_ % 3 == 0 } @a; # @r is 3, 6, 9, 12, 15, 18. Plain grep could have been used here
my @r = egrep { $i==1 or $next==12 or $prev==14 } @a; # @r is now 2, 11, 15
my @a=2..44;
egrep { $prev =~/4$/ or $next =~/2$/ } @a; # 5, 11, 15, 21, 25, 31, 35, 41
egrep { $prevr=~/4$/ or $nextr=~/2$/ } @a; # 2, 5, 11, 15, 21, 25, 31, 35, 41, 44
egrep { $i%7==0 } @a; # 2, 9, 16, 23, 30, 37, 44
egrep { $n%7==0 } @a; # 8, 15, 22, 29, 36, 43
=cut
sub egrep (&@) {
my($code,$i,$package)=(shift,-1,(caller)[0]);
my %h=map{($_=>"${package}::$_")}qw(i n prev next prevr nextr);
no strict 'refs';
grep {
#no strict 'refs'; #not here! "no" not allowed in expression in perl5.16
local ${$h{i}} = ++$i;
local ${$h{n}} = $i+1;
local ${$h{prev}} = $i>0?$_[$i-1]:undef;
local ${$h{next}} = $i<$#_?$_[$i+1]:undef;
local ${$h{prevr}} = $_[$i>0?$i-1:$#_];
local ${$h{nextr}} = $_[$i<$#_?$i+1:0];
&$code;
}
@_;
}
=head2 eqarr
B<Input:> Two or more references to arrays.
B<Output:> True (1) or false (0) for whether or not the arrays are numerically I<and> alphanumerically equal.
Comparing each element in each array with both C< == > and C< eq >.
Examples:
eqarr([1,2,3],[1,2,3],[1,2,3]); # 1 (true)
eqarr([1,2,3],[1,2,3],[1,2,4]); # 0 (false)
eqarr([1,2,3],[1,2,3,4]); # undef (different size, false)
eqarr([1,2,3]); # croak (should be two or more arrays)
eqarr([1,2,3],1,2,3); # croak (not arraysrefs)
=cut
sub eqarr {
my @arefs=@_;
croak if @arefs<2;
ref($_) ne 'ARRAY' and croak for @arefs;
@{$arefs[0]} != @{$arefs[$_]} and return undef for 1..$#arefs;
my $ant;
for my $ar (@arefs[1..$#arefs]){
for(0..@$ar-1){
++$ant and $ant>100 and croak ">100"; #TODO: feiler ved sammenligning av to tabeller > 10000(?) tall
return 0 if $arefs[0][$_] ne $$ar[$_]
or $arefs[0][$_] != $$ar[$_];
}
}
return 1;
}
=head2 sorted
Return true if the input array is numerically sorted.
@a=(1..10); print "array is sorted" if sorted @a; #true
Optionally the last argument can be a comparison sub:
@person=({Rank=>1,Name=>'Amy'}, {Rank=>2,Name=>'Paula'}, {Rank=>3,Name=>'Ruth'});
print "Persons are sorted" if sorted @person, sub{$_[0]{Rank}<=>$_[1]{Rank}};
=head2 sortedstr
Return true if the input array is I<alpha>numerically sorted.
@a=(1..10); print "array is sorted" if sortedstr @a; #false
@a=("01".."10"); print "array is sorted" if sortedstr @a; #true
=cut
sub sorted (\@@) {
my($a,$cmpsub)=@_;
if($cmpsub){ &$cmpsub($$a[$_],$$a[$_+1])>0 and return 0 for 0..$#$a-1 }
else { $$a[$_] > $$a[$_+1] and return 0 for 0..$#$a-1 }
return 1;
}
#sub sortedstr { sorted(@_,sub{$_[0]cmp$_[1]}) }
sub sortedstr { $_[$_] gt $_[$_+1] and return 0 for 0..$#$_-1; return 1 }
sub sortby {
my($arr,@by)=@_;
die if grep/^-/,@by; #hm 4now todo! - dash meaning descending order
my $pattern=join(" ",map"%-40s",@by);#hm 4now bad, cant handle numeric sort
map$$_[0],
sort{$$a[1]cmp$$b[1]}
map[$_,sprintf($pattern,@$_{@by})],
@$arr;
}
=head2 subarrays
Returns all 2^n-1 combinatory subarrays of an array where each element
of input array participates or not. Note: The empty array is not among
the returned arrayrefs unless an empty input is given.
my @a = subarrays( 'a', 'b', 'c' ); # same as:
my @a = ( ['a' ],
[ 'b'],
['a','b'],
[ 'c'],
['a', 'c'],
[ 'b','c'],
['a','b','c'] );
sub subarrays { map { my $n = 2*$_; [ grep {($n/=2)%2} @_ ] } 1 .. 2**@_-1 } #implemented as
=cut
sub subarrays { map { my $n = 2*$_; [ grep {($n/=2)%2} @_ ] } 1 .. 2**@_-1 }
=head2 part
B<Input:> A code-ref and a list
B<Output:> Two array-refs
Like C<grep> but returns the false list as well. Partitions a list
into two lists where each element goes into the first or second list
whether the predicate (a code-ref) is true or false for that element.
my( $odd, $even ) = part {$_%2} (1..8);
print for @$odd; #prints 1 3 5 7
print for @$even; #prints 2 4 6 8
(Works like C< partition() > in the Scala programming language)
=head2 parth
Like C<part> but returns any number of lists. Not just two. Sort of like I<group by> in SQL.
B<Input:> A code-ref and a list
B<Output:> A hash where the returned values from the code-ref are keys and the values are arrayrefs to the list elements which gave those keys.
my %hash = parth { uc(substr($_,0,1)) } ('These','are','the','words','of','this','array');
print serialize(\%hash);
Result:
%hash = ( T=>['These','the','this'],
A=>['are','array'],
O=>['of'],
W=>['words'] )
=head2 parta
Like L<parth> but returns an array of lists where the predicate returns an index number.
my @a = parta { length } qw/These are the words of this array/;
Result:
@a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )
Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.
=cut
sub part (&@) { my($c,@r)=(shift,[],[]); push @{ $r[ &$c?0:1 ] }, $_ for @_; @r }
sub parth (&@) { my($c,%r)=(shift); push @{ $r{ &$c } }, $_ for @_; %r }
sub parta (&@) { my($c,@r)=(shift); push @{ $r[ &$c ] }, $_ for @_; @r }
#sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map
=head2 refa
=head2 refh
=head2 refs
=head2 refaa
=head2 refah
=head2 refha
=head2 refhh
Returns true or false (1 or 0) if the argument is an arrayref, hashref, scalarref, ref to an array of arrays, ref to an array of hashes
Examples:
my $ref_to_array = [1,2,3];
my $ref_to_hash = {1,100,2,200,3,300};
my $ref_to_scalar = \"String";
print "arrayref" if ref($ref_to_array) eq 'ARRAY'; #true
print "hashref" if ref($ref_to_hash) eq 'HASH'; #true
print "scalarref" if ref($ref_to_scalar) eq 'SCALAR'; #true
print "arrayref" if refa($ref_to_array); #also true, without: eq 'ARRAY'
print "hashref" if refh($ref_to_hash); #also true, without: eq 'HASH'
print "scalarref" if refs($ref_to_scalar); #also true, without: eq 'SCALAR'
my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
my $ref_to_hash_of_arrays = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
my $ref_to_hash_of_hashes = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };
print "aa" if refaa($ref_to_array_of_arrays); #true
print "ah" if refah($ref_to_array_of_hashes); #true
print "ha" if refha($ref_to_hash_of_arrays); #true
print "hh" if refhh($ref_to_hash_of_hashes); #true
=cut
sub refa { ref($_[0]) eq 'ARRAY' ? 1 : ref($_[0]) ? 0 : undef }
sub refh { ref($_[0]) eq 'HASH' ? 1 : ref($_[0]) ? 0 : undef }
sub refs { ref($_[0]) eq 'SCALAR' ? 1 : ref($_[0]) ? 0 : undef }
sub refaa { ref($_[0]) eq 'ARRAY' ? refa($_[0][0]) : ref($_[0]) ? 0 : undef }
sub refah { ref($_[0]) eq 'ARRAY' ? refh($_[0][0]) : ref($_[0]) ? 0 : undef }
sub refha { ref($_[0]) eq 'HASH' ? refa((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
sub refhh { ref($_[0]) eq 'HASH' ? refh((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
=head2 pushr
=head2 popr
=head2 shiftr
=head2 unshiftr
=head2 splicer
=head2 keysr
=head2 valuesr
=head2 eachr
=head2 joinr
In Perl versions 5.12 - 5.22 push, pop, shift, unshift, splice, keys, values and each
handled references to arrays and references to hashes just as if they where arrays and hashes. Examples:
my $person={name=>'Gaga', array=>[1,2,3]};
push $person{array} , 4; #works in perl 5.12-5.22 but not before and after
push @{ $person{array} }, 4; #works in all perl5 versions
pushr $person{array} , 4; #use Acme::Tools and this should work in perl >= 5.8
popr $person{array}; #returns 4
=cut
sub pushr { push @{shift()}, @_ } # ? ($@)
sub popr { pop @{shift()} }
sub shiftr { shift @{shift()} }
sub unshiftr { unshift @{shift()}, @_ }
sub splicer { @_==1 ? splice( @{shift()} )
:@_==2 ? splice( @{shift()}, shift() )
:@_==3 ? splice( @{shift()}, shift(), shift() )
:@_>=4 ? splice( @{shift()}, shift(), shift(), @_ ) : croak }
sub keysr { ref($_[0]) eq 'HASH' ? keys(%{shift()}) : keysr({@{shift()}}) } #hm sort(keys%{shift()}) ?
sub valuesr { values( %{shift()} ) }
sub eachr { ref($_[0]) eq 'HASH' ? each(%{shift()})
#:ref($_[0]) eq 'ARRAY' ? each(@{shift()}) # perl 5.8.8 cannot compile each on array! eval?
: croak("eachr needs hashref or arrayref got '".ref($_[0])."'") }
sub joinr {join(shift(),@{shift()})}
#sub mapr # som scala: hvis map faar subref se kalles den sub paa hvert elem og resultatet returneres
#sub eachr { each(%{shift()}) }
=head2 pile
B<Input:> a pile size s and a list
B<Output:> A list of lists of length s or the length of the remainer in
the last list. Piles together the input list in lists of the given size.
my @list=(1,2,3,4,5,6,7,8,9,10);
my @piles = pile(3, @list ); # ([1,2,3], [4,5,6], [7,8,9], [10])
my $i=0;
my @piles = parta {$i++/3} @list; # same as above pile(3, @list)
=cut
sub pile { my $size=shift; my @r; for (@_){ push@r,[] if !@r or 0+@{$r[-1]}>=$size; push @{$r[-1]}, $_ } @r }
=head2 aoh2sql
my @oceania=(
{Area=>undef, Capital=>'Pago Pago', Code=>'AS', Name=>'American Samoa', Population=>54343},
{Area=>7686850, Capital=>'Canberra', Code=>'AU', Name=>'Australia', Population=>22751014},
{Area=>undef, Capital=>'West Island', Code=>'CC', Name=>'Cocos (Keeling) Islands', Population=>596},
{Area=>240, Capital=>'Avarua', Code=>'CK', Name=>'Cook Islands', Population=>9838},
{Area=>undef, Capital=>'Flying Fish Cove', Code=>'CX', Name=>'Christmas Island', Population=>1530},
{Area=>18270, Capital=>'Suva', Code=>'FJ', Name=>'Fiji', Population=>909389},
{Area=>702, Capital=>'Palikir', Code=>'FM', Name=>'Micronesia, Federated States of', Population=>105216},
{Area=>549, Capital=>'Hagatna (Agana)', Code=>'GU', Name=>'Guam', Population=>161785},
{Area=>811, Capital=>'Tarawa', Code=>'KI', Name=>'Kiribati', Population=>105711},
{Area=>181.3, Capital=>'Majuro', Code=>'MH', Name=>'Marshall Islands', Population=>72191},
{Area=>19060, Capital=>'Noumea', Code=>'NC', Name=>'New Caledonia', Population=>271615},
{Area=>undef, Capital=>'Kingston', Code=>'NF', Name=>'Norfolk Island', Population=>2210},
{Area=>21, Capital=>'Yaren District', Code=>'NR', Name=>'Nauru', Population=>9540},
{Area=>260, Capital=>'Alofi', Code=>'NU', Name=>'Niue', Population=>1190},
{Area=>268680, Capital=>'Wellington', Code=>'NZ', Name=>'New Zealand', Population=>4438393},
{Area=>undef, Capital=>'Papeete', Code=>'PF', Name=>'French Polynesia', Population=>282703},
{Area=>462840, Capital=>'Port Moresby', Code=>'PG', Name=>'Papua New Guinea', Population=>6672429},
{Area=>undef, Capital=>'Adamstown', Code=>'PN', Name=>'Pitcairn', Population=>48},
{Area=>458, Capital=>'Melekeok', Code=>'PW', Name=>'Palau', Population=>21265},
{Area=>28450, Capital=>'Honiara', Code=>'SB', Name=>'Solomon Islands', Population=>622469},
{Area=>undef, Capital=>undef, Code=>'TK', Name=>'Tokelau', Population=>1337},
{Area=>26, Capital=>'Funafuti', Code=>'TV', Name=>'Tuvalu', Population=>10869},
{Area=>12200, Capital=>'Port-Vila', Code=>'VU', Name=>'Vanuatu', Population=>272264},
{Area=>undef, Capital=>'Mata-Utu', Code=>'WF', Name=>'Wallis and Futuna', Population=>15500},
{Area=>2944, Capital=>'Apia', Code=>'WS', Name=>'Samoa (Western)', Population=>197773}
);
print aoh2sql(\@oceania,{
name=>'country',
drop=>2,
#number=>'numeric', #default
#varchar=>'varchar', #default, change to varchar2 if Oracle
#date=>'date', #default, perhaps change to 'timestamp with time zone' if postgres
#varchar_maxlen=>4000, #default, 4000 (used to be?) is max in Oracle
#create=>1, #default, use 0 to dont include create table
#drop=>0, #default 0: dont include drop table x; 1: drop table x; 2: drop table if exists x;
#end=>"commit;\n",
#begin=>"begin;\n",
begin;
drop table if exists country;
create table country (
Area numeric(9,1),
Capital varchar(16),
Code varchar(2) not null,
Name varchar(36) not null,
Population numeric(9)
);
insert into country values (null,'Pago Pago','AS','American Samoa',54343);
insert into country values (7686850,'Canberra','AU','Australia',22751014);
insert into country values (null,'West Island','CC','Cocos (Keeling) Islands',596);
insert into country values (240,'Avarua','CK','Cook Islands',9838);
insert into country values (null,'Flying Fish Cove','CX','Christmas Island',1530);
insert into country values (18270,'Suva','FJ','Fiji',909389);
insert into country values (702,'Palikir','FM','Micronesia, Federated States of',105216);
insert into country values (549,'Hagatna (Agana)','GU','Guam',161785);
insert into country values (811,'Tarawa','KI','Kiribati',105711);
insert into country values (181.3,'Majuro','MH','Marshall Islands',72191);
insert into country values (19060,'Noumea','NC','New Caledonia',271615);
insert into country values (null,'Kingston','NF','Norfolk Island',2210);
insert into country values (21,'Yaren District','NR','Nauru',9540);
insert into country values (260,'Alofi','NU','Niue',1190);
insert into country values (268680,'Wellington','NZ','New Zealand',4438393);
insert into country values (null,'Papeete','PF','French Polynesia',282703);
insert into country values (462840,'Port Moresby','PG','Papua New Guinea',6672429);
insert into country values (null,'Adamstown','PN','Pitcairn',48);
insert into country values (458,'Melekeok','PW','Palau',21265);
insert into country values (28450,'Honiara','SB','Solomon Islands',622469);
insert into country values (null,null,'TK','Tokelau',1337);
insert into country values (26,'Funafuti','TV','Tuvalu',10869);
insert into country values (12200,'Port-Vila','VU','Vanuatu',272264);
insert into country values (null,'Mata-Utu','WF','Wallis and Futuna',15500);
insert into country values (2944,'Apia','WS','Samoa (Western)',197773);
commit;
=cut
sub aoh2sql {
my($aoh,$conf)=@_;
my %def=( #defaults
name=>'my_table',
number=>'numeric',
varchar=>'varchar',
date=>'date',
varchar_maxlen=>4000,
create=>1,
drop=>0, # 1 drop table if exists, 2 plain drop
end=>"commit;\n",
begin=>"begin;\n",
fix_colnames=>0,
);
my %conf=(%def,(@_<2?():%$conf));
# $conf{$_}||=$def{$_} for keys%def;
my %col;
map $col{$_}++, keys %$_ for @$aoh;
my @col=sort keys %col;
my @colerr=grep!/^[a-z]\w+$/i,@col;
croak "Invalid column name(s): @colerr" if @colerr and !$conf{fix_colnames};
my(%t,%tdb);
for my $c (@col){
my($l,$s,$p,$nn,%ant,$t)=(0,0,0,0);
for my $r (@$aoh){
my $v=$$r{$c};
next if !defined$v or $v!~/\S/;
$nn++;
$l=length($v) if length($v)>$l;
no warnings 'uninitialized';
if($v=~/^(18|19|20)\d\d(0[1-9]|1[0-2])(0[1-9]|1\d|2\d|3[01])-?\d\d:?\d\d:?\d\d$/ and $conf{date}){
$ant{date}++;
next;
}
elsif($v=~/^\s*[-+]?(\d*)(\.\d+)?([Ee]\-?\d+)?\s*$/ and length("$1$2") and $conf{number}){
$ant{number}++;
$s=length("$1.$2") if length("$1.$2")>$s;#hm
$p=length($2)-1 if $2 and length($2)-1>$p;
next;
}
else {
$ant{varchar}++;
}
}
$t||='varchar' if $ant{varchar} or $ant{number} and $ant{date};
$t||='number' if $ant{number};
$t||='date' if $ant{date};
$t||='varchar'; #hm
$l=$conf{varchar_maxlen} if $conf{varchar_maxlen} and $l>$conf{varchar_maxlen};
$l||=1;
my $tdb;
$tdb="$conf{$t}($l)" if $t eq 'varchar';
$tdb="$conf{$t}($s)" if $t eq 'number' and $p==0;
$tdb="$conf{$t}($s,$p)" if $t eq 'number' and $p>0 and ++$s;
$tdb.=" not null" if $nn == 0+@$aoh;
$t{$c}=$t;
$tdb{$c}=$tdb;
}
my $sql;
$sql="create table $conf{name} (".
join(",",map sprintf("\n %-30s %s",do{s/\W+//g;$_},$tdb{$_}), @col). "\n);\n\n" if $conf{create};
my $val=sub{my($v,$t)=@_;defined$v or $v="";!length($v)?'null':$t eq 'number' ? $v : "'".repl($v,"\'","''")."'"};
for my $r (@$aoh){
my $v=join",",map &$val($$r{$_},$t{$_}), @col;
$sql.="insert into $conf{name} values ($v);\n";
}
$sql="drop table $conf{name};\n\n$sql" if $conf{drop}==1;
$sql="drop table if exists $conf{name};\n\n$sql" if $conf{drop}>=2;
$sql="$conf{begin}\n$sql" if $conf{begin};
$sql.=$conf{end};
$sql;
}
sub aoh2xls { croak "Not implemented yet: aoh2xls" }
=head1 STATISTICS
=head2 sum
Returns the sum of a list of numbers. Undef is ignored.
print sum(1,3,undef,8); # 12
print sum(1..1000); # 500500
print sum(undef); # undef
=cut
sub sum { my $sum; no warnings; defined($_) and $sum+=$_ for @_; $sum }
=head2 avg
Returns the I<average> number of a list of numbers. That is C<sum / count>
print avg( 2, 4, 9); # 5 (2+4+9) / 3 = 5
print avg( [2, 4, 9] ); # 5 pass by reference, same result but faster for large arrays
Also known as I<arithmetic mean>.
Pass by reference: If one argument is given and it is a reference to an array,
this array is taken as the list of numbers. This mode is about twice as fast
for 10000 numbers or more. It most likely also saves memory.
=cut
sub avg {
my($sum,$n,@a)=(0,0);
no warnings;
if( @_==0 ) { return undef }
if( @_==1 and ref($_[0]) eq 'ARRAY' ){ @a=grep defined,@{$_[0]} }
else { @a=grep defined,@_ }
if( @a==0 ) { return undef }
$sum+=$_ for @a;
return $sum/@a
}
=head2 geomavg
Returns the I<geometric average> (a.k.a I<geometric mean>) of a list of numbers.
print geomavg(10,100,1000,10000,100000); # 1000
print 0+ (10*100*1000*10000*100000) ** (1/5); # 1000 same thing
print exp(avg(map log($_),10,100,1000,10000,100000)); # 1000 same thing, this is how geomavg() works internally
=cut
sub geomavg { exp(avg(map log($_), @_)) }
=head2 harmonicavg
Returns the I<harmonic average> (a.k.a I<geometric mean>) of a list of numbers. L<http://en.wikipedia.org/wiki/Harmonic_mean>
print harmonicavg(10,11,12); # 3 / ( 1/10 + 1/11 + 1/12) = 10.939226519337
=cut
sub harmonicavg { my $s; $s+=1/$_ for @_; @_/$s }
=head2 variance
C<< variance = ( sum (x[i]-Average)**2)/(n-1) >>
=cut
sub variance {
my $sumx2; $sumx2+=$_*$_ for @_;
my $sumx; $sumx+=$_ for @_;
(@_*$sumx2-$sumx*$sumx)/(@_*(@_-1));
}
=head2 stddev
C<< Standard_Deviation = sqrt(variance) >>
Standard deviation (stddev) is a measurement of the width of a normal
distribution where one stddev on each side of the mean covers 68% and
two stddevs 95%. Normal distributions are sometimes called Gauss curves
or Bell shapes. L<https://en.wikipedia.org/wiki/Standard_deviation>
stddev(4,5,6,5,6,4,3,5,5,6,7,6,5,7,5,6,4) # = 1.0914103126635
avg(@testscores) + stddev(@testscores) # = the score for one stddev above avg, 115
avg(@testscores) - stddev(@testscores) # = the score for one stddev below avg, 85
=cut
sub stddev {
return undef if @_==0;
return stddev(\@_) if @_>0 and !ref($_[0]);
my $ar=shift;
return undef if @$ar==0;
return 0 if @$ar==1;
my $sumx2; $sumx2 += $_*$_ for @$ar;
my $sumx; $sumx += $_ for @$ar;
sqrt( (@$ar*$sumx2-$sumx*$sumx)/(@$ar*(@$ar-1)) );
}
=head2 rstddev
Relative stddev = stddev / avg
=cut
sub rstddev { stddev(@_) / avg(@_) }
=head2 median
Returns the median value of a list of numbers. The list do not have to
be sorted.
Example 1, list having an odd number of numbers:
print median(1, 100, 101); # 100
100 is the middlemost number after sorting.
Example 2, an even number of numbers:
print median(1005, 100, 101, 99); # 100.5
100.5 is the average of the two middlemost numbers.
=cut
sub median {
no warnings;
my @list = sort {$a<=>$b} @_;
my $n=@list;
$n%2 ? $list[($n-1)/2]
: ($list[$n/2-1] + $list[$n/2])/2;
}
=head2 percentile
Returns one or more percentiles of a list of numbers.
Percentile 50 is the same as the I<median>, percentile 25 is the first
quartile, 75 is the third quartile.
B<Input:>
First argument is your wanted percentile, or a refrence to a list of percentiles you want from the dataset.
If the first argument to percentile() is a scalar, this percentile is returned.
If the first argument is a reference to an array, then all those percentiles are returned as an array.
Second, third, fourth and so on argument are the numbers from which you want to find the percentile(s).
B<Examples:>
This finds the 50-percentile (the median) to the four numbers 1, 2, 3 and 4:
print "Median = " . percentile(50, 1,2,3,4); # 2.5
This:
@data=(11, 5, 3, 5, 7, 3, 1, 17, 4, 2, 6, 4, 12, 9, 0, 5);
@p = map percentile($_,@data), (25, 50, 75);
Is the same as this:
@p = percentile([25, 50, 75], @data);
But the latter is faster, especially if @data is large since it sorts
the numbers only once internally.
B<Example:>
Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
Average (or mean) is 143
Median is 15.5 (which is the average of 9 and 22 who both equally lays in the middle)
The 25-percentile is 6.25 which are between 6 and 7, but closer to 6.
The 75-percentile is 46.5, which are between 39 and 49 but close to 49.
Linear interpolation is used to find the 25- and 75-percentile and any
other x-percentile which doesn't fall exactly on one of the numbers in
the set.
B<Interpolation:>
As you saw, 6.25 are closer to 6 than to 7 because 25% along the set of
the twelve numbers is closer to the third number (6) than to he fourth
(7). The median (50-percentile) is also really interpolated, but it is
always in the middle of the two center numbers if there are an even count
of numbers.
However, there is two methods of interpolation:
Example, we have only three numbers: 5, 6 and 7.
Method 1: The most common is to say that 5 and 7 lays on the 25- and
75-percentile. This method is used in Acme::Tools.
Method 2: In Oracle databases the least and greatest numbers
always lay on the 0- and 100-percentile.
As an argument on why Oracles (and others?) definition is not the best way is to
look at your data as for instance temperature measurements. If you
place the highest temperature on the 100-percentile you are sort of
saying that there can never be a higher temperatures in future measurements.
A quick non-exhaustive Google survey suggests that method 1 here is most used.
The larger the data sets, the less difference there is between the two methods.
B<Extrapolation:>
In method one, when you want a percentile outside of any possible
interpolation, you use the smallest and second smallest to extrapolate
from. For instance in the data set C<5, 6, 7>, if you want an
x-percentile of x < 25, this is below 5.
If you feel tempted to go below 0 or above 100, C<percentile()> will
I<die> (or I<croak> to be more precise)
);
(Oracle also provides a similar function: C<percentile_disc> where I<disc>
is short for I<discrete>, meaning no interpolation is taking
place. Instead the closest number from the data set is picked.)
=cut
sub percentile {
my(@p,@t,@ret);
if(ref($_[0]) eq 'ARRAY'){ @p=@{shift()} }
elsif(not ref($_[0])) { @p=(shift()) }
else{croak()}
@t=@_;
return if !@p;
croak if !@t;
@t=sort{$a<=>$b}@t;
push@t,$t[0] if @t==1;
for(@p){
croak if $_<0 or $_>100;
my $i=(@t+1)*$_/100-1;
push@ret,
$i<0 ? $t[0]+($t[1]-$t[0])*$i:
$i>$#t ? $t[-1]+($t[-1]-$t[-2])*($i-$#t):
$i==int($i)? $t[$i]:
$t[$i]*(int($i+1)-$i) + $t[$i+1]*($i-int($i));
}
return @p==1 ? $ret[0] : @ret;
}
=head1 RANDOM
=head2 random
B<Input:> One or two arguments.
B<Output:>
If two integer arguments: returns a random integer between the integers in argument one and two.
If the first argument is an arrayref: returns a random member of that array without changing the array.
If the first argument is an arrayref and there is a second arg: return that many random members of that array
If the first argument is an hashref and there is no second arg: return a random key weighted by the values of that hash
If the first argument is an hashref and there is a second arg: return that many random keys weighted by the values of that hash
If there is no second argument and the first is an integer, a random integer between 0 and that number is returned. Including 0 and the number itself.
B<Examples:>
$dice=random(1,6); # 1, 2, 3, 4, 5 or 6
$dice=random([1..6]); # same as previous
@dice=random([1..6],10); # 10 dice tosses
$dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2}); # weighted dice with 6 being twice as likely as the others
@dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2},10); # 10 weighted dice tosses
print random({head=>0.4999,tail=>0.4999,edge=>0.0002}); # coin toss (sum 1 here but not required to be)
print random(2); # prints 0, 1 or 2
print 2**random(7); # prints 1, 2, 4, 8, 16, 32, 64 or 128
@dice=map random([1..6]), 1..10; # as third example above, but much slower
perl -MAcme::Tools -le 'print for random({head=>0.499,tail=>0.499,edge=>0.002},10000);' | sort | uniq -c
=cut
sub random {
my($from,$to)=@_;
my $ref=ref($from);
if($ref eq 'ARRAY'){
my @r=map $$from[rand@$from], 1..$to||1;
return @_>1?@r:$r[0];
}
elsif($ref eq 'HASH') {
my @k=keys%$from;
my $max;do{no warnings 'uninitialized';$_>$max and $max=$_ or $_<0 and croak"negative weight" for values%$from};
my @r=map {my$r;1 while $$from{$r=$k[rand@k]}<rand($max);$r} 1..$to||1;
return @_>1?@r:$r[0];
}
($from,$to)=(0,$from) if @_==1;
($from,$to)=($to,$from) if $from>$to;
return int($from+rand(1+$to-$from));
}
#todo?: https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution
=head2 random_gauss
Returns an pseudo-random number with a Gaussian distribution instead
of the uniform distribution of perls C<rand()> or C<random()> in this
module. The algorithm is a variation of the one at
L<http://www.taygeta.com/random/gaussian.html> which is both faster
and better than adding a long series of C<rand()>.
Uses perls C<rand> function internally.
B<Input:> 0 - 3 arguments.
First argument: the average of the distribution. Default 0.
Second argument: the standard deviation of the distribution. Default 1.
Third argument: If a third argument is present, C<random_gauss>
returns an array of that many pseudo-random numbers. If there is no
third argument, a number (a scalar) is returned.
B<Output:> One or more pseudo-random numbers with a Gaussian distribution. Also known as a Bell curve or Normal distribution.
Example:
my @I=random_gauss(100, 15, 100000); # produces 100000 pseudo-random numbers, average=100, stddev=15
#my @I=map random_gauss(100, 15), 1..100000; # same but more than three times slower
print "Average is: ".avg(@I)."\n"; # prints a number close to 100
print "Stddev is: ".stddev(@I)."\n"; # prints a number close to 15
my @M=grep $_>100+15*2, @I; # those above 130
print "Percent above two stddevs: ".(100*@M/@I)."%\n"; #prints a number close to 2.2%
Example 2:
my $num=1e6;
my @h; $h[$_/2]++ for random_gauss(100,15, $num);
$h[$_] and printf "%3d - %3d %6d %s\n",
$_*2,$_*2+1,$h[$_],'=' x ($h[$_]*1000/$num)
for 1..200/2;
...prints an example of the famous Bell curve:
44 - 45 70
46 - 47 114
48 - 49 168
50 - 51 250
52 - 53 395
54 - 55 588
56 - 57 871
58 - 59 1238 =
60 - 61 1807 =
62 - 63 2553 ==
64 - 65 3528 ===
66 - 67 4797 ====
68 - 69 6490 ======
70 - 71 8202 ========
72 - 73 10577 ==========
74 - 75 13319 =============
76 - 77 16283 ================
78 - 79 20076 ====================
80 - 81 23742 =======================
82 - 83 27726 ===========================
84 - 85 32205 ================================
86 - 87 36577 ====================================
88 - 89 40684 ========================================
90 - 91 44515 ============================================
92 - 93 47575 ===============================================
94 - 95 50098 ==================================================
96 - 97 52062 ====================================================
98 - 99 53338 =====================================================
100 - 101 52834 ====================================================
102 - 103 52185 ====================================================
104 - 105 50472 ==================================================
106 - 107 47551 ===============================================
108 - 109 44471 ============================================
110 - 111 40704 ========================================
112 - 113 36642 ====================================
114 - 115 32171 ================================
116 - 117 28166 ============================
118 - 119 23618 =======================
120 - 121 19873 ===================
122 - 123 16360 ================
124 - 125 13452 =============
126 - 127 10575 ==========
128 - 129 8283 ========
130 - 131 6224 ======
while (@r<$num) {
my($x1,$x2,$w);
do {
$x1=2.0*rand()-1.0;
$x2=2.0*rand()-1.0;
$w=$x1*$x1+$x2*$x2;
} while $w>=1.0;
$w=sqrt(-2.0*log($w)/$w) * $stddev;
push @r, $x1*$w + $avg,
$x2*$w + $avg;
}
pop @r if @r > $num;
return $r[0] if @_<3;
return @r;
}
=head2 mix
Mixes an array in random order. In-place if given an array reference or not if given an array.
C<mix()> could also have been named C<shuffle()>, as in shuffling a deck of cards.
Example:
This:
print mix("a".."z"),"\n" for 1..3;
...could write something like:
trgoykzfqsduphlbcmxejivnwa
qycatilmpgxbhrdezfwsovujkn
ytogrjialbewcpvndhkxfzqsmu
B<Input:>
=over 4
=item 1.
Either a reference to an array as the only input. This array will then be mixed I<in-place>. The array will be changed:
This: C<< @a=mix(@a) >> is the same as: C<< mix(\@a) >>.
=item 2.
Or an array of zero, one or more elements.
=back
Note that an input-array which COINCIDENTLY SOME TIMES has one element
(but more other times), and that element is an array-ref, you will
probably not get the expected result.
To check distribution:
perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n
The letters a-z should occur around 1000 times each.
Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)
perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'
(Uses L</cart>, which is not a typo, see further down here)
Note: C<List::Util::shuffle()> is approximately four times faster. Both respects the Perl built-in C<srand()>.
=cut
sub mix {
if(@_==1 and ref($_[0]) eq 'ARRAY'){ #just one arg, and its ref array
my $r=$_[0];
push@$r,splice(@$r,rand(@$r-$_),1) for 0..(@$r-1);
return $r;
}
else{
my@e=@_;
push@e,splice(@e,rand(@e-$_),1) for 0..$#e;
return @e;
}
}
=head2 pwgen
Generates random passwords.
B<Input:> 0-n args
* First arg: length of password(s), default 8
* Second arg: number of passwords, default 1
* Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!
* Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:
sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
...meaning the password should:
* start and end with: a letter a-z (lower- or uppercase) or a digit 0-9
* should contain at least one char from each of the groups lower, upper, digit and special char
To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
own C<rand()> internally.
C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
pwgen tries to find a password. Defaults for those are 0.01 and 10000.
Whenever one of the two limits is reached, a first generates a croak.
Examples:
my $pw=pwgen(); # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
my $pw=pwgen(12); # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
my @pw=pwgen(0,10); # 10 random 8 chars passwords, containing the same possible chars
my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z
pwgen(3); # dies, defaults require chars in each of 4 group (see above)
pwgen(5,1,'A-C0-9', qr/^\D{3}\d{2}$/); # a 5 char string starting with three A, B or Cs and endring with two digits
pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above
Examples of adding additional requirements to the default ones:
my @pwreq = ( qr/^[A-C]/ );
pwgen(8,1,'','',@pwreq); # use defaults for allowed chars and the standard requirements
# but also demand that the password must start with A, B or C
push @pwreq, sub{ not /[a-z]{3}/i };
pwgen(8,1,'','',@pwreq); # as above and in addition the password should not contain three
# or more consecutive letters (to avoid "offensive" words perhaps)
=cut
our $Pwgen_max_sec=0.01; #max seconds/password before croak (for hard to find requirements)
our $Pwgen_max_trials=10000; #max trials/password before croak (for hard to find requirements)
our $Pwgen_sec=0; #seconds used in last call to pwgen()
our $Pwgen_trials=0; #trials in last call to pwgen()
sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
sub pwgen {
my($len,$num,$chars,@req)=@_;
$len||=8;
$num||=1;
$chars||='A-Za-z0-9,-./&%_!';
$req[0]||=\&pwgendefreq if !$_[2];
$chars=~s/([$_])-([$_])/join("","$1".."$2")/eg for ('a-z','A-Z','0-9');
my($c,$t,@pw,$d)=(length($chars),time_fp());
($Pwgen_trials,$Pwgen_sec)=(0,0);
TRIAL:
while(@pw<$num){
croak "pwgen timeout after $Pwgen_trials trials"
if ++$Pwgen_trials >= $Pwgen_max_trials
or ($d=time_fp()-$t) > $Pwgen_max_sec*$num
and $d!~/^\d+$/; #jic int from time_fp
my $pw=join"",map substr($chars,rand($c),1),1..$len;
for my $r (@req){
if (ref($r) eq 'CODE' ){ local$_=$pw; &$r() or next TRIAL }
elsif(ref($r) eq 'Regexp'){ no warnings; $pw=~$r or next TRIAL }
else { croak "pwgen: invalid req type $r ".ref($r) }
}
push@pw,$pw;
}
$Pwgen_sec=time_fp()-$t;
return $pw[0] if $num==1;
return @pw;
}
# =head1 veci
#
# Perls C<vec> takes 1, 2, 4, 8, 16, 32 and possibly 64 as its third argument.
#
# This limitation is removed with C<veci> (vec improved, but much slower)
#
# The third argument still needs to be 32 or lower (or possibly 64 or lower).
#
# =cut
#
# sub vecibs ($) {
# my($s,$o,$b,$new)=@_;
# if($b=~/^(1|2|4|8|16|32|64)$/){
# return vec($s,$o,$b)=$new if @_==4;
# return vec($s,$o,$b);
# }
# my $bb=$b<4?4:$b<8?8:$b<16?16:$b<32?32:$b<64?64:die;
# my $ob=int($o*$b/$bb);
# my $v=vec($s,$ob,$bb)*2**$bb+vec($s,$ob+1,$bb);
# $v & (2**$b-1)
# }
=head1 SETS
=head2 distinct
Returns the values of the input list, sorted alfanumerically, but only
one of each value. This is the same as L</uniq> except uniq does not
sort the returned list.
Example:
print join(", ", distinct(4,9,3,4,"abc",3,"abc")); # 3, 4, 9, abc
print join(", ", distinct(4,9,30,4,"abc",30,"abc")); # 30, 4, 9, abc note: alphanumeric sort
=cut
sub distinct { sort keys %{{map {($_,1)} @_}} }
=head2 in
Returns I<1> (true) if first argument is in the list of the remaining arguments. Uses the perl-operator C<< eq >>.
Otherwise it returns I<0> (false).
print in( 5, 1,2,3,4,6); # 0
print in( 4, 1,2,3,4,6); # 1
print in( 'a', 'A','B','C','aa'); # 0
print in( 'a', 'A','B','C','a'); # 1
I guess in perl 5.10 or perl 6 you could use the C<< ~~ >> operator instead.
=head2 in_num
Just as sub L</in>, but for numbers. Internally uses the perl operator C<< == >> instead of C< eq >.
print in(5000, '5e3'); # 0
print in(5000, 5e3); # 1 since 5e3 is converted to 5000 before the call
print in_num(5000, 5e3); # 1
print in_num(5000, '+5.0e03'); # 1
=cut
sub in { no warnings 'uninitialized'; my $val=shift; $_ eq $val and return 1 for @_; return 0 }
sub in_num { no warnings 'uninitialized'; my $val=shift; $_ == $val and return 1 for @_; return 0 }
=head2 union
Input: Two arrayrefs. (Two lists, that is)
Output: An array containing all elements from both input lists, but no element more than once even if it occurs twice or more in the input.
Example, prints 1,2,3,4:
perl -MAcme::Tools -le 'print join ",", union([1,2,3],[2,3,3,4,4])' # 1,2,3,4
=cut
sub union { my %seen; grep !$seen{$_}++, map @{shift()},@_ }
=head2 minus
Input: Two arrayrefs.
Output: An array containing all elements in the first input array but not in the second.
Example:
perl -MAcme::Tools -le 'print join " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] )'
Output is C<< five 1 2 >>.
=cut
sub minus {
my %seen;
my %notme=map{($_=>1)}@{$_[1]};
grep !$notme{$_}&&!$seen{$_}++, @{$_[0]};
}
=head2 intersect
Input: Two arrayrefs
Output: An array containing all elements which exists in both input arrays.
Example:
perl -MAcme::Tools -le 'print join" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )' # 4 3 five
Output: C<< 4 3 five >>
=cut
sub intersect {
my %first=map{($_=>1)}@{$_[0]};
my %seen;
return grep{$first{$_}&&!$seen{$_}++}@{$_[1]};
}
=head2 not_intersect
Input: Two arrayrefs
Output: An array containing all elements member of just one of the input arrays (not both).
Example:
perl -MAcme::Tools -le ' print join " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'
The output is C<< 1 2 >>.
=cut
sub not_intersect {
my %code;
my %seen;
for(@{$_[0]}){$code{$_}|=1}
for(@{$_[1]}){$code{$_}|=2}
return grep{$code{$_}!=3&&!$seen{$_}++}(@{$_[0]},@{$_[1]});
}
=head2 uniq
Input: An array of strings (or numbers)
Output: The same array in the same order, except elements which exists earlier in the list.
Same as L</distinct> but distinct sorts the returned list, I<uniq> does not.
Example:
my @t=(7,2,3,3,4,2,1,4,5,3,"x","xx","x",02,"07");
print join " ", uniq @t; # prints 7 2 3 4 1 5 x xx 07
Beware of using C<sort> like the following because sort will see C<uniq>
as the subroutine for comparing elements! Which you most likely didnt mean.
This has nothing to do with the way uniq is implemented. It's Perl's C<sort>.
print sort uniq('a','dup','z','dup'); # will return this four element array: a dup z dup
print sort(uniq('a','dup','z','dup')); # better, probably what you meant
print distinct('a','dup','z','dup')); # same, distinct includes alphanumeric sort
=cut
sub uniq(@) { my %seen; grep !$seen{$_}++, @_ }
=head1 HASHES
=head2 subhash
Copies a subset of keys/values from one hash to another.
B<Input:> First argument is a reference to a hash. The rest of the arguments are a list of the keys of which key/value-pair you want to be copied.
B<Output:> The hash consisting of the keys and values you specified.
2 => {a=>11,b=>22},
3 => {a=>88,b=>99} );
print serialize({hashtrans(\%h)},'v');
Gives:
%v=( 'a'=>{'1'=>'33','2'=>'11','3'=>'88'},
'b'=>{'1'=>'55','2'=>'22','3'=>'99'} );
=cut
#Hashtrans brukes automatisk når første argument er -1 i sub hashtabell()
sub hashtrans {
my $h=shift;
my %new;
for my $k (keys%$h){
my $r=$$h{$k};
for(keys%$r){
$new{$_}{$k}=$$r{$_};
}
}
return %new;
}
=head2 a2h
B<Input:> array of arrays
B<Output:> array of hashes
Transforms an array of arrays (arrayrefs) to an array of hashes (hashrefs).
Example:
my @h = a2h( ['Name', 'Age', 'Gender'], #1st row become keys
['Alice', 20, 'F'],
['Bob', 30, 'M'],
['Eve', undef, 'F'] );
Result array @h:
(
{Name=>'Alice', Age=>20, Gender=>'F'},
{Name=>'Bob', Age=>30, Gender=>'M'},
{Name=>'Eve', Age=>undef, Gender=>'F'},
);
=head2 h2a
B<Input:> array of hashes
B<Output:> array of arrays
Opposite of L</a2h>
=cut
sub a2h {
my @col=@{shift@_};
map { my%h;@h{@col}=@$_;\%h} @_;
}
sub h2a {
my %c;
map $c{$_}++, keys%$_ for @_;
my @c=sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c;
(\@c,map[@$_{@c}],@_);
}
=head1 COMPRESSION
L</zipb64>, L</unzipb64>, L</zipbin>, L</unzipbin>, L</gzip>, and L</gunzip>
compresses and uncompresses strings to save space in disk, memory,
database or network transfer. Trades time for space. (Beware of wormholes)
=head2 zipb64
Compresses the input (text or binary) and returns a base64-encoded string of the compressed binary data.
No known limit on input length, several MB has been tested, as long as you've got the RAM...
B<Input:> One or two strings.
First argument: The string to be compressed.
Second argument is optional: A I<dictionary> string.
B<Output:> a base64-kodet string of the compressed input.
The use of an optional I<dictionary> string will result in an even
further compressed output in the dictionary string is somewhat similar
to the string that is compressed (the data in the first argument).
If x relatively similar string are to be compressed, i.e. x number
automatic of email responses to some action by a user, it will pay of
to choose one of those x as a dictionary string and store it as
such. (You will also use the same dictionary string when decompressing
using L</unzipb64>.
The returned string is base64 encoded. That is, the output is 33%
larger than it has to be. The advantage is that this string more
easily can be stored in a database (without the hassles of CLOB/BLOB)
or perhaps easier transfer in http POST requests (it still needs some
url-encoding, normally). See L</zipbin> and L</unzipbin> for the
same without base 64 encoding.
Example 1, normal compression without dictionary:
$txt = "Test av komprimering, hva skjer? " x 10; # ten copies of this norwegian string, $txt is now 330 bytes (or chars rather...)
print length($txt)," bytes input!\n"; # prints 330
$zip = zipb64($txt); # compresses
print length($zip)," bytes output!\n"; # prints 65
print $zip; # prints the base64 string ("noise")
$output=unzipb64($zip); # decompresses
print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
print length($output),"\n"; # prints 330
Example 2, same compression, now with dictionary:
$txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
$dict = "Testing av kompresjon, hva vil skje?"; # dictionary with certain similarities
# of the text to be compressed
$zip2 = zipb64($txt,$dict); # compressing with $dict as dictionary
print length($zip2)," bytes output!\n"; # prints 49, which is less than 65 in ex. 1 above
$output=unzipb64($zip2,$dict); # uses $dict in the decompressions too
print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
$Ipnum>=$Iprange_start &&
$Ipnum<=$Iprange_start + 2**(32-$1)-1;
}
=head2 webparams
B<Input:> (optional)
Zero or one input argument: A string of the same type often found behind the first question mark (C<< ? >>) in URLs.
This string can have one or more parts separated by C<&> chars.
Each part consists of C<key=value> pairs (with the first C<=> char being the separation char).
Both C<key> and C<value> can be url-encoded.
If there is no input argument, C<webparams> uses C<< $ENV{QUERY_STRING} >> instead.
If also C<< $ENV{QUERY_STRING} >> is lacking, C<webparams()> checks if C<< $ENV{REQUEST_METHOD} eq 'POST' >>.
In that case C<< $ENV{CONTENT_LENGTH} >> is taken as the number of bytes to be read from C<STDIN>
and those bytes are used as the missing input argument.
The environment variables QUERY_STRING, REQUEST_METHOD and CONTENT_LENGTH is
typically set by a web server following the CGI standard (which Apache and
most of them can do I guess) or in mod_perl by Apache. Although you are
probably better off using L<CGI>. Or C<< $R->args() >> or C<< $R->content() >> in mod_perl.
B<Output:>
C<webparams()> returns a hash of the key/value pairs in the input argument. Url-decoded.
If an input string has more than one occurrence of the same key, that keys value in the returned hash will become concatenated each value separated by a C<,> char. (A comma char)
Examples:
use Acme::Tools;
my %R=webparams();
print "Content-Type: text/plain\n\n"; # or rather \cM\cJ\cM\cJ instead of \n\n to be http-compliant
print "My name is $R{name}";
Storing those four lines in a file in the directory designated for CGI-scripts
on your web server (or perhaps naming the file .cgi is enough), and C<chmod +x
/.../cgi-bin/script> and the URL
L<http://some.server.somewhere/cgi-bin/script?name=HAL> will print
C<My name is HAL> to the web page.
L<http://some.server.somewhere/cgi-bin/script?name=Bond&name=+James+Bond> will print C<My name is Bond, James Bond>.
=cut
sub webparams {
my $query=shift();
$query=$ENV{QUERY_STRING} if !defined $query;
if(!defined $query and $ENV{REQUEST_METHOD} eq "POST"){
read(STDIN,$query , $ENV{CONTENT_LENGTH});
$ENV{QUERY_STRING}=$query;
}
my %R;
for(split("&",$query)){
next if !length($_);
my($nkl,$verdi)=map urldec($_),split("=",$_,2);
$R{$nkl}=exists$R{$nkl}?"$R{$nkl},$verdi":$verdi;
}
return %R;
}
=head2 urlenc
Input: a string
Output: the same string URL encoded so it can be sent in URLs or POST requests.
In URLs (web addresses) certain characters are illegal. For instance I<space> and I<newline>.
And certain other chars have special meaning, such as C<+>, C<%>, C<=>, C<?>, C<&>.
These illegal and special chars needs to be encoded to be sent in
URLs. This is done by sending them as C<%> and two hex-digits. All
chars can be URL encodes this way, but it's necessary just on some.
Example:
$search="Ãstdal, Ã
ge";
my $url="http://machine.somewhere.com/search?q=" . urlenc($search);
print $url;
Prints C<< http://machine.somewhere.com/search?q=%D8stdal%2C%20%C5ge >>
=cut
sub urlenc {
my $str=shift;
$str=~s/([^\w\-\.\/\,\[\]])/sprintf("%%%02x",ord($1))/eg; #more chars is probably legal...
return $str;
}
=head2 urldec
Opposite of L</urlenc>.
Example, this returns 'C< ø>'. That is space and C<< ø >>.
urldec('+%C3')
=cut
sub urldec {
my $str=shift;
$str=~s/\+/ /gs;
$str=~s/%([a-f\d]{2})/pack("C", hex($1))/egi;
return $str;
}
=head2 ht2t
C<ht2t> is short for I<html-table to table>.
This sub extracts an html-C<< <table> >>s and returns its C<< <tr>s >>
and C<< <td>s >> as an array of arrayrefs. And strips away any html
inside the C<< <td>s >> as well.
my @table = ht2t($html,'some string occuring before the <table> you want');
Input: One or two arguments.
First argument: the html where a C<< <table> >> is to be found and converted.
Second argument: (optional) If the html contains more than one C<<
<table> >>, and you do not want the first one, applying a second
argument is a way of telling C<ht2t> which to capture: the one with this word
or string occurring before it.
Output: An array of arrayrefs.
C<ht2t()> is a quick and dirty way of scraping (or harvesting as it is
also called) data from a web page. Look too L<HTML::Parse> to do this
more accurate.
Example:
use Acme::Tools;
use LWP::Simple;
my $url = "http://en.wikipedia.org/wiki/List_of_countries_by_population";
for( ht2t( get($url), "Countries" ) ) {
my($rank, $country, $pop) = @$_;
$pop =~ s/,//g;
printf "%3d | %-32s | %9d\n", @$_ if $pop>0;
}
Output:
1 | China | 1367740000
2 | India | 1262090000
3 | United States | 319043000
4 | Indonesia | 252164800
5 | Brazil | 203404000
...and so on.
=cut
sub ht2t {
my($f,$s,$r)=@_; 1>@_||@_>3 and croak; $s='' if @_==1;
$f=~s,.*?($s).*?(<table.*?)</table.*,$2,si;
my $e=0;$e++ while index($f,$s=chr($e))>=$[;
$f=~s/<t(d|r|h).*?>/\l$1$s/gsi;
$f=~s/\s*<.*?>\s*/ /gsi;
my @t=split("r$s",$f);shift @t;
$r||=sub{s/&(#160|nbsp);/ /g;s/&/&/g;s/^\s*(.*?)\s*$/$1/s;
s/(\d) (\d)/$1$2/g if /^[\d \.\,]+$/};
for(@t){my @r=split/[dh]$s/;shift@r;$_=[map{&$r;$_}@r]}
@t;
}
=head1 FILES, DIRECTORIES
=head2 writefile
Justification:
Perl needs three or four operations to make a file out of a string:
open my $FILE, '>', $filename or die $!;
print $FILE $text;
close($FILE);
This is way simpler:
writefile($filename,$text);
Sub writefile opens the file i binary mode (C<binmode()>) and has two usage modes:
B<Input:> Two arguments
B<First argument> is the filename. If the file exists, its overwritten.
If the file can not be opened for writing, a die (a croak really) happens.
B<Second input argument> is one of:
=over 4
=item * Either a scaler. That is a normal string to be written to the file.
=item * Or a reference to a scalar. That referred text is written to the file.
=item * Or a reference to an array of scalars. This array is the written to the
file element by element and C<< \n >> is automatically appended to each element.
=back
Alternativelly, you can write several files at once.
Example, this:
writefile('file1.txt','The text....tjo');
writefile('file2.txt','The text....hip');
writefile('file3.txt','The text....and hop');
...is the same as this:
writefile([
['file1.txt','The text....tjo'],
['file2.txt','The text....hip'],
['file3.txt','The text....and hop'],
]);
Automatic compression:
writefile('file.txt.gz','my text is compressed by /bin/gzip before written to the file');
Extentions C<.gz>, C<.bz2> and C<.xz> are recognized for compression. See also C<readfile()> and C<openstr()>.
readfile('filename.txt',\$data);
Reading the lines of a file into an array:
my @lines;
readfile('filnavn.txt',\@lines);
for(@lines){
...
}
Note: Chomp is done on each line. That is, any newlines (C<< \n >>) will be removed.
If C<@lines> is non-empty, this will be lost.
Sub readfile is context aware. If an array is expected it returns an array of the lines without a trailing C<< \n >>.
The last example can be rewritten:
for(readfile('filnavn.txt')){
...
}
With two input arguments, nothing (undef) is returned from C<readfile()>.
Automatic decompression:
my $txt = readfile('file.txt.gz'); #uses /bin/gunzip to decompress content
Extentions C<.gz>, C<.bz2> and C<.xz> are recognized for decompression. See also C<writefile()> and C<openstr()>.
=cut
#http://blogs.perl.org/users/leon_timmermans/2013/05/why-you-dont-need-fileslurp.html
#todo: readfile with grep-filter code ref in a third arg (avoid reading all into mem)
sub readfile {
my($filename,$ref)=@_;
if(@_==1){
if(wantarray){ my @data; readfile($filename,\@data); return @data }
else { my $data; readfile($filename,\$data); return $data }
}
else {
open my $fh,openstr($filename) or croak("ERROR: readfile $! $?");
if ( ref($ref) eq 'SCALAR') { $$ref=join"",<$fh> }
elsif( ref($ref) eq 'ARRAY' ) { while(my $l=<$fh>){ chomp($l); push @$ref, $l } }
else { croak "ERROR: Second arg to readfile should be a ref to a scalar og array" }
close($fh);
return;#?
}
}
=head2 readdirectory
B<Input:>
Name of a directory.
B<Output:>
A list of all files in it, except of C<.> and C<..> (on linux/unix systems, all directories have a C<.> and C<..> directory).
The names of all types of files are returned: normal files, directories, symbolic links,
pipes, semaphores. That is every thing shown by C<ls -la> except C<.> and C<..>
C<readdirectory> do not recurce down into subdirectories (but see example below).
B<Example:>
my @files = readdirectory("/tmp");
B<Why readdirectory?>
Sometimes calling the built ins C<opendir>, C<readdir> and C<closedir> seems a tad tedious, since this:
my $dir="/usr/bin";
opendir(D,$dir);
my @files=map "$dir/$_", grep {!/^\.\.?$/} readdir(D);
closedir(D);
Is the same as this:
my @files=readdirectory("/usr/bin");
See also: L<File::Find>
B<Why not readdirectory?>
On huge directories with perhaps tens or houndreds of thousands of
files, readdirectory() will consume more memory than perls
opendir/readdir. This isn't usually a concern anymore for modern
computers with gigabytes of RAM, but might be the rationale behind
Perls more tedious way created in the 80s. The same argument goes for
file slurping. On the other side it's also a good practice to never
assume to much on available memory and the number of files if you
don't know for certain that enough memory is available whereever your
code is run or that the size of the directory is limited.
B<Example:>
How to get all files in the C</tmp> directory including all subdirectories below of any depth:
my @files=("/tmp");
map {-d $_ and unshift @files,$_ or push @files,$_} readdirectory(shift(@files)) while -d $files[0];
...or to avoid symlinks and only get real files:
map {-d and !-l and unshift @files,$_ or -f and !-l and push @files,$_} readdirectory(shift(@files)) while -d $files[0];
=cut
sub readdirectory {
my $dir=shift;
opendir(my $D,$dir);
my @filer=map "$dir/$_", grep {!/^\.\.?$/} readdir($D);
closedir($D);
return @filer;
}
=head2 basename
The basename and dirname functions behaves like the *nix shell commands with the same names.
B<Input:> One or two arguments: Filename and an optional suffix
B<Output:> Returns the filename with any directory and (if given) the suffix removed.
basename('/usr/bin/perl') # returns 'perl'
basename('/usr/local/bin/report.pl','.pl') # returns 'report' since .pl at the end is removed
basename('report2.pl','.pl') # returns 'report2'
basename('report2.pl','.\w+') # returns 'report2.pl', probably not what you meant
basename('report2.pl',qr/.\w+/) # returns 'report2', use qr for regex
=head2 dirname
B<Input:> A filename including path
B<Output:> Removes the filename path and returns just the directory path up until but not including
the last /. Return just a one char C<< . >> (period string) if there is no directory in the input.
dirname('/usr/bin/perl') # returns '/usr/bin'
dirname('perl') # returns '.'
=head2 username
Returns the current linux/unix username, for example the string root
print username(); #just (getpwuid($<))[0] but more readable perhaps
=cut
sub basename { my($f,$s)=(@_,'');$s=quotemeta($s)if!ref($s);$f=~m,^(.*/)?([^/]*?)($s)?$,;$2 }
sub dirname { $_[0]=~m,^(.*)/,;defined($1) && length($1) ? $1 : '.' }
sub username { (getpwuid($<))[0] }
=head2 wipe
Deletes a file by "wiping" it on the disk. Overwrites the file before deleting. (May not work properly on SSDs)
B<Input:>
* Arg 1: A filename
* Optional arg 2: number of times to overwrite file. Default is 3 if omitted, 0 or undef
* Optional arg 3: keep (true/false), wipe() but no delete of file
B<Output:> Same as the C<unlink()> (remove file): 1 for success, 0 or false for failure.
See also: L<https://www.google.com/search?q=wipe+file>, L<http://www.dban.org/>
=cut
sub wipe {
my($file,$times,$keep)=@_;
$times||=3;
croak "ERROR: File $file nonexisting\n" if not -f $file or not -e $file;
my $size=-s$file;
C<read_conf> can be a simpler alternative to the core module L<Config::Std> which has
its own hassles.
$Acme::Tools::Read_conf_empty_section=1; #default 0 (was 1 in version 0.16)
my %conf = read_conf('/etc/your/thing.conf');
print $conf{''}{switch}; #prints OK with the file above
print $conf{switch}; #prints OK here as well
=cut
our $Read_conf_empty_section=0;
sub read_conf {
my($fn,$hr)=(@_,{});
my $conf=ref($fn)?$$fn:readfile($fn);
$conf=~s,\s*(?<!\\)#.*,,g;
my($section,@l)=('',split"\n",$conf);
while(@l) {
my $l=shift@l;
if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
$section=$1;
$$hr{$1}||={};
}
elsif( $l=~/^\s*([^\:\=]+?)\s*[:=]\s*(.*?)\s*$/ ) {
my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
my $v=&$ml($2);
$$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
$$hr{$1}=$v if !length($section);
}
}
%$hr;
}
# my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
# s,<INCLUDE ([^>]+)>,"".readfile(&$incfn($1)),eg; #todo
=head2 openstr
# returned from openstr:
open my $FH, openstr("fil.txt") or die; # fil.txt
open my $FH, openstr("fil.gz") or die; # zcat fil.gz |
open my $FH, openstr("fil.bz2") or die; # bzcat fil.bz2 |
open my $FH, openstr("fil.xz") or die; # xzcat fil.xz |
open my $FH, openstr(">fil.txt") or die; # > fil.txt
open my $FH, openstr(">fil.gz") or die; # | gzip > fil.gz
open my $FH, openstr(">fil.bz2") or die; # | bzip2 > fil.bz2
open my $FH, openstr(">fil.xz") or die; # | xz > fil.bz2
Environment variable PATH is used. So in the examples above, /bin/gzip
is returned instead of gzip if /bin is the first directory in
$ENV{PATH} containing an executable file gzip. Dirs /usr/bin, /bin and
/usr/local/bin is added to PATH in openstr(). They are checked even if
PATH is empty.
See also C<writefile()> and C<readfile()> for automatic compression and decompression using C<openstr>.
=cut
our @Openstrpath=(grep$_,split(":",$ENV{PATH}),qw(/usr/bin /bin /usr/local/bin));
our $Magic_openstr=1;
sub openstr_prog { @Openstrpath or return $_[0];(grep -x$_, map "$_/$_[0]", @Openstrpath)[0] or croak"$_[0] not found" }
sub openstr {
my($fn,$ext)=(shift()=~/^(.*?(?:\.(t?gz|bz2|xz))?)$/i);
return $fn if !$ext or !$Magic_openstr;
$fn =~ /^\s*>/
? "| ".(openstr_prog({qw/gz gzip bz2 bzip2 xz xz tgz gzip/ }->{lc($ext)})).$fn
: openstr_prog({qw/gz zcat bz2 bzcat xz xzcat tgz zcat/}->{lc($ext)})." $fn |";
}
=head2 printed
Redirects C<print> and C<printf> from STDOUT to a string which is returned.
my $p = printed { print "hello!" }; # now $p eq 'hello!'
my $p = printed { some_sub() }; # now $p contains whatever was printed by some_sub() and the subs call from it
=cut
sub printed (&) { my $s; open(local *STDOUT, '>', \$s) or croak "ERR: $! $?"; shift->(); $s } #todo catch stderr also?
#todo: sub stdin{}
#todo: sub stdout{}
#todo: sub stderr{}
#todo: sub stdouterr{}
=head1 TIME FUNCTIONS
=head2 tms
Timestring, works somewhat like the Gnu/Linux C<date> command and Oracle's C<to_char()>
Converts timestamps to more readable forms of time strings.
Converts seconds since I<epoch> and time strings on the form C<YYYYMMDD-HH24:MI:SS> to other forms.
B<Input:> One, two or three arguments.
B<First argument:> A format string.
B<Second argument: (optional)> An epock C<time()> number or a time
string of the form YYYYMMDD-HH24:MI:SS or YYYYMMDDTHH:MI:SS or
YYYY-MM-DDTHH:MI:SS (in which T is litteral and HH is the 24-hour
version of hours) or YYYYMMDD. Uses the current C<time()> if the
second argument is missing.
TODO: Formats with % as in C<man date> (C<%Y%m%d> and so on)
B<Third argument: (optional> True or false. If true and first argument
is eight digits: Its interpreted as a date like YYYYMMDD time string,
not an epoch time. If true and first argument is six digits its
interpreted as a date like DDMMYY (not YYMMDD!).
B<Output:> a date or clock string on the wanted form.
B<Examples:>
Prints C<< 3. july 1997 >> if thats the dato today:
perl -MAcme::Tools -le 'print timestr("D. month YYYY")'
print tms("HH24:MI"); # prints 23:55 if thats the time now
JD Same as JDN but a float accounting for the time of day
B<Third argument:> (optional) Is_date. False|true, default false. If true, the second argument is
interpreted as a date of the form YYYYMMDD, not as a number of seconds since epoch (January 1st 1970).
=cut
#Se også L</tidstrk> og L</tidstr>
our $Tms_pattern;
our %Tms_str=
('MÃ
NED' => [4, 'JANUAR','FEBRUAR','MARS','APRIL','MAI','JUNI','JULI',
'AUGUST','SEPTEMBER','OKTOBER','NOVEMBER','DESEMBER' ],
'MÃ¥ned' => [4, 'Januar','Februar','Mars','April','Mai','Juni','Juli',
'August','September','Oktober','November','Desember'],
'måned' => [4, 'januar','februar','mars','april','mai','juni','juli',
'august','september','oktober','november','desember'],
'MÃ
NE.' => [4, 'JAN.','FEB.','MARS','APR.','MAI','JUNI','JULI','AUG.','SEP.','OKT.','NOV.','DES.'],
'MÃ¥ne.' => [4, 'Jan.','Feb.','Mars','Apr.','Mai','Juni','Juli','Aug.','Sep.','Okt.','Nov.','Des.'],
'måne.' => [4, 'jan.','feb.','mars','apr.','mai','juni','juli','aug.','sep.','okt.','nov.','des.'],
'MÃ
NE' => [4, 'JAN','FEB','MARS','APR','MAI','JUNI','JULI','AUG','SEP','OKT','NOV','DES'],
'MÃ¥ne' => [4, 'Jan','Feb','Mars','Apr','Mai','Juni','Juli','Aug','Sep','Okt','Nov','Des'],
'måne' => [4, 'jan','feb','mars','apr','mai','juni','juli','aug','sep','okt','nov','des'],
'MÃ
N' => [4, 'JAN','FEB','MAR','APR','MAI','JUN','JUL','AUG','SEP','OKT','NOV','DES'],
'MÃ¥n' => [4, 'Jan','Feb','Mar','Apr','Mai','Jun','Jul','Aug','Sep','Okt','Nov','Des'],
'mån' => [4, 'jan','feb','mar','apr','mai','jun','jul','aug','sep','okt','nov','des'],
'MONTH' => [4, 'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY',
'AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER'],
'Month' => [4, 'January','February','March','April','May','June','July',
'August','September','October','November','December'],
'month' => [4, 'january','february','march','april','may','june','july',
'august','september','october','november','december'],
'MONT.' => [4, 'JAN.','FEB.','MAR.','APR.','MAY','JUNE','JULY','AUG.','SEP.','OCT.','NOV.','DEC.'],
'Mont.' => [4, 'Jan.','Feb.','Mar.','Apr.','May','June','July','Aug.','Sep.','Oct.','Nov.','Dec.'],
'mont.' => [4, 'jan.','feb.','mar.','apr.','may','june','july','aug.','sep.','oct.','nov.','dec.'],
'MONT' => [4, 'JAN','FEB','MAR','APR','MAY','JUNE','JULY','AUG','SEP','OCT','NOV','DEC'],
'Mont' => [4, 'Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec'],
'mont' => [4, 'jan','feb','mar','apr','may','june','july','aug','sep','oct','nov','dec'],
'MON' => [4, 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],
'Mon' => [4, 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
'mon' => [4, 'jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'],
'DAY' => [6, 'SUNDAY','MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY'],
'Day' => [6, 'Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'],
'day' => [6, 'sunday','monday','tuesday','wednesday','thursday','friday','saturday'],
'DY' => [6, 'SUN','MON','TUE','WED','THU','FRI','SAT'],
'Dy' => [6, 'Sun','Mon','Tue','Wed','Thu','Fri','Sat'],
'dy' => [6, 'sun','mon','tue','wed','thu','fri','sat'],
'DAG' => [6, 'SÃNDAG','MANDAG','TIRSDAG','ONSDAG','TORSDAG','FREDAG','LÃRDAG'],
'Dag' => [6, 'Søndag','Mandag','Tirsdag','Onsdag','Torsdag','Fredag','Lørdag'],
'dag' => [6, 'søndag','mandag','tirsdag','onsdag','torsdag','fredag','lørdag'],
'DG' => [6, 'Søn','MAN','TIR','ONS','TOR','FRE','LÃR'],
'Dg' => [6, 'SÃn','Man','Tir','Ons','Tor','Fre','Lør'],
'dg' => [6, 'søn','man','tir','ons','tor','fre','lør'],
);
my $_tms_inited=0;
sub tms_init {
return if $_tms_inited++;
for(qw(MAANED Maaned maaned MAAN Maan maan),'MAANE.','Maane.','maane.'){
$Tms_str{$_}=$Tms_str{replace($_,"aa","Ã¥","AA","Ã
")};
}
$Tms_pattern=join("|",map{quotemeta($_)}
sort{length($b)<=>length($a)}
keys %Tms_str);
#without sort "måned" could be "mared" because "mån"=>"mar"
}
sub totime {
}
=head2 s2t
Convert strings to "time pieces". Example:
my($dd,$mm,$yyyy,$str) = s2t("18/february/2019:13:53","DD","MM","YYYY","YYYYMMDD-HH24:MI:SS")
print "dd: $dd mm: $mm yyyy: $yyyy str: $str\n"; # dd: 18 mm: 02 yyyy: 2019 str: 20190218-13:53:00
=cut
sub s2t {
require Date::Parse;
my $s=shift;
if($s=~/\b(?:mai|okt|des|juni|juli|februar)/i){ #fix norwegian/danish (for now)
$s=~s/\bMai\b/May/i; $s=~s/\bmai\b/may/i; $s=~s/\bMAI\b/MAY/i;
$s=~s/\bOkt\b/Oct/i; $s=~s/\bokt\b/oct/i; $s=~s/\bOKT\b/OCT/i;
$s=~s/\bDes/Dec/; $s=~s/\bdes/dec/; $s=~s/\bDES/DEC/;
$s=~s/\bFebruar\b/February/; $s=~s/\bfebruar\b/february/; $s=~s/\bFEBRUAR\b/FEBRUARY/;
$s=~s/\bjuli\b/July/i;
$s=~s/\bjuni\b/June/i;
}
elsif($s =~ /^[19]\d{9}$/){ $s=localtime($s) } #hm, make faster
elsif($s =~ /^[19]\d{12}$/
and int($s/1000) =~ /^[19]\d{9}$/){ $s=localtime($s/1000) } #hm
elsif($s=~/^((?:17|18|19|20|21)\d\d)(0[1-9]|1[012])(0[1-9]|[12]\d|3[01])$/){#hm
$s="$1-$2-$3T00:00:00";
}
return Date::Parse::str2time($s) if !@_;
return tms(Date::Parse::str2time($s),shift(@_)) if 0+@_ == 1;
return map tms(Date::Parse::str2time($s),$_), @_;
}
sub date_ok {
my($y,$m,$d)=@_;
return date_ok($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
return 0 if $y!~/^\d\d\d\d$/;
return 0 if $m<1||$m>12||$d<1||$d>(31,$y%4||$y%100==0&&$y%400?28:29,31,30,31,30,31,31,30,31,30,31)[$m-1];
return 1;
}
sub weeknum {
return weeknum(tms('YYYYMMDD')) if @_<1;
return weeknum($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
my($year,$month,$day)= @_;
eval{
if(@_<2){
if($year=~/^\d{8}$/) { ($year,$month,$day)=unpack("A4A2A2",$year) }
elsif($year>99999999){ ($year,$month,$day)=(localtime($year))[5,4,3]; $year+=1900; $month++ }
else {die}
}
elsif(@_!=3){croak}
croak if !date_ok(sprintf("%04d%02d%02d",$year,$month,$day));
};
croak "ERROR: Wrong args Acme::Tools::weeknum(".join(",",@_).")" if $@;
use integer;#heltallsdivisjon
my $y=$year+4800-(14-$month)/12;
my $j=$day+(153*($month+(14-$month)/12*12-3)+2)/5+365*$y+$y/4-$y/100+$y/400-32045;
my $d=($j+31741-$j%7)%146097%36524%1461;
return (($d-$d/1460)%365+$d/1460)/7+1;
}
#perl -MAcme::Tools -le 'print "$_ ".tms($_."0501","day",1) for 2015..2026'
sub tms {
return undef if @_>1 and not defined $_[1]; #time=undef => undef
if(@_==1){
my @lt=localtime();
$_[0] eq 'YYYY' and return 1900+$lt[5];
$_[0] eq 'YYYYMMDD' and return sprintf("%04d%02d%02d",1900+$lt[5],1+$lt[4],$lt[3]);
$_[0] =~ $Re_isnum and @lt=localtime($_[0]) and return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]);
}
my($format,$time,$is_date)=@_;
$time=time_fp() if !defined$time;
($time,$format)=($format,$time) if @_>=2 and $format=~/^[\d+\:\-\.]+$/; #swap /hm/
my @lt=localtime($time);
#todo? $is_date=0 if $time=~s/^\@(\-?\d)/$1/; #@n where n is sec since epoch makes it clear that its not a formatted, as in `date`
#todo? date --date='TZ="America/Los_Angeles" 09:00 next Fri' #`info date`
# Fri Nov 13 18:00:00 CET 2015
#date --date="next Friday" #--date or -d
#date --date="last friday"
#date --date="2 days ago"
#date --date="yesterday" #or tomorrow
#date --date="-1 day" #date --date='10 week'
if( $is_date ){
my $yy2c=sub{10+$_[0]>$lt[5]%100?"20":"19"}; #hm 10+
$time=totime(&$yy2c($1)."$1$2$3")."000000" if $time=~/^(\d\d)(\d\d)(\d\d)$/;
$time=totime("$1$2${3}000000") if $time=~/^((?:18|19|20)\d\d)(\d\d)(\d\d)$/; #hm 18-20?
}
else {
y=>'YY',
Y=>'YYYY',
#z=>'TZHHMI', #time zone hour minute e.g. -0430
#':z'=>'TZHH:MI',
#'::z'=>'TZHH:MI:SS',
#':::z'=>'TZ', #number of :'s necessary precision, e.g. -02 or +03:30
#Z=>'TZN', #e.g. CET, EDT, ...
);
my $pkeys=join"|",keys%p;
$format=~s,\%($pkeys),$p{$1},g;
$format=~s/($Tms_pattern)/$Tms_str{$1}[1+$lt[$Tms_str{$1}[0]]]/g;
$format=~s/YYYY / 1900+$lt[5] /gxe;
$format=~s/(\s?)yyyy / $lt[5]==(localtime)[5]?"":$1.(1900+$lt[5])/gxe;
$format=~s/YY / sprintf("%02d",$lt[5]%100) /gxei;
$format=~s|CC | sprintf("%02d",(1900+$lt[5])/100) |gxei;
$format=~s/MM / sprintf("%02d",$lt[4]+1) /gxe;
$format=~s/mm / sprintf("%d",$lt[4]+1) /gxe;
$format=~s,M/ , ($lt[4]+1).'/' ,gxe;
$format=~s,/M , '/'.($lt[4]+1) ,gxe;
$format=~s/DD / sprintf("%02d",$lt[3]) /gxe;
$format=~s/d0w|dow0 / $lt[6] /gxei;
$format=~s/dow / $lt[6]?$lt[6]:7 /gxei;
$format=~s/d0y|doy0 / $lt[7] /gxei; #0-364 (365 leap)
$format=~s/doy / $lt[7]+1 /gxei; #1-365 (366 leap)
$format=~s/D(?![AaGgYyEeNn]) / $lt[3] /gxe; #EN pga desember og wednesday
$format=~s/dd / sprintf("%d",$lt[3]) /gxe;
$format=~s/hh12|HH12 / sprintf("%02d",$lt[2]<13?$lt[2]||12:$lt[2]-12)/gxe;
$format=~s/HH24|HH24|HH|hh / sprintf("%02d",$lt[2]) /gxe;
$format=~s/MI / sprintf("%02d",$lt[1]) /gxei;
$format=~s{SS\.([1-9]) }{ sprintf("%0*.$1f",3+$1,$lt[0]+(repl($time,qr/^[^\.]+/)||0)) }gxei;
$format=~s/SS(?:\.0)? / sprintf("%02d",$lt[0]) /gxei;
$format=~s/(?:am|pm|apm|xm) / $lt[2]<13 ? 'am' : 'pm' /gxe;
$format=~s/(?:AM|PM|APM|XM) / $lt[2]<13 ? 'AM' : 'PM' /gxe;
$format=~s/WWI|WW / sprintf("%02d",weeknum($time)) /gxei;
$format=~s/W / weeknum($time) /gxei;
$format;
}
=head2 easter
Input: A year (a four digit number)
Output: array of two numbers: day and month of Easter Sunday that year. Month 3 means March and 4 means April.
sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
(($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
...is a "golfed" version of Oudins algorithm (1940) L<http://astro.nmsu.edu/~lhuber/leaphist.html>
(see also http://www.smart.net/~mmontes/ec-cal.html )
Valid for any Gregorian year. Dates repeat themselves after 70499183
lunations = 2081882250 days = ca 5699845 years. However, our planet will
by then have a different rotation and spin time...
Example:
( $day, $month ) = easter( 2012 ); # $day == 8 and $month == 4
Example 2:
my @e=map sprintf("%02d%02d", reverse(easter($_))), 1800..300000;
print "First: ".min(@e)." Last: ".max(@e)."\n"; # First: 0322 Last: 0425
Note: The Spencer Jones formula differs Oudins used in C<easter()> in some years
before 1498. However, in that period the Julian calendar with a different formula was
used anyway. Countries introduced the current Gregorian calendar between 1583 and 1926.
=cut
sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
(($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
=head2 time_fp
No input arguments.
Return the same number as perls C<time()> except with decimals (fractions of a second, _fp as in floating point number).
print time_fp(),"\n";
print time(),"\n";
Could write:
1116776232.38632
...if that is the time now.
Or just:
1116776232
...from perl's internal C<time()> if C<Time::HiRes> isn't installed and available.
=cut
sub time_fp { # {return 0+gettimeofday} is just as well?
eval{ require Time::HiRes } or return time();
my($sec,$mic)=Time::HiRes::gettimeofday();
return $sec+$mic/1e6; #1e6 not portable?
}
sub timems {
eval{ require Time::HiRes } or return time();
my($sec,$mic)=Time::HiRes::gettimeofday();
return $sec*1000+$mic/1e3;
}
=head2 sleep_fp
sleep_fp() work as the built in C<< sleep() >> but also accepts fractional seconds:
sleep_fp(0.020); # sleeps for 20 milliseconds
Sub sleep_fp do a C<require Time::HiRes>, thus it might take some
extra time the first call. To avoid that, add C<< use Time::HiRes >>
to your code. Sleep_fp should not be trusted for accuracy to more than
a tenth of a second. Virtual machines tend to be less accurate (sleep
longer) than physical ones. This was tested on VMware and RHEL
(Linux). See also L<Time::HiRes>.
=head2 sleeps
=head2 sleepms
=head2 sleepus
=head2 sleepns
sleep_fp(0.020); #sleeps for 20 milliseconds
sleeps(0.020); #sleeps for 20 milliseconds, sleeps() is a synonym to sleep_fp()
sleepms(20); #sleeps for 20 milliseconds
sleepus(20000); #sleeps for 20000 microseconds = 20 milliseconds
sleepns(20000000); #sleeps for 20 million nanoseconds = 20 milliseconds
=cut
sub sleep_fp { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
sub sleeps { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
sub sleepms { eval{require Time::HiRes} or (sleep(shift()/1e3),return);Time::HiRes::sleep(shift()/1e3) }
sub sleepus { eval{require Time::HiRes} or (sleep(shift()/1e6),return);Time::HiRes::sleep(shift()/1e6) }
sub sleepns { eval{require Time::HiRes} or (sleep(shift()/1e9),return);Time::HiRes::sleep(shift()/1e9) }
=head2 eta
Estimated time of arrival (ETA).
for(@files){
...do work on file...
my $eta = eta( ++$i, 0+@files ); # file now, number of files
print "" . localtime($eta);
}
TODO: eta is borken and out of wack, good idea?: http://en.wikipedia.org/wiki/Kalman_filter
=head2 etahhmm
...NOT YET
=cut
our %Eta;
our $Eta_forgetfulness=2;
sub eta {
my($id,$pos,$end,$time_fp)=( @_==2 ? (join(";",caller()),@_) : @_ );
$time_fp||=time_fp();
my $a=$Eta{$id}||=[];
push @$a, [$pos,$time_fp];
@$a=@$a[map$_*2,0..@$a/2] if @$a>40; #hm 40
splice(@$a,-2,1) if @$a>1 and $$a[-2][0]==$$a[-1][0]; #same pos as last
return undef if @$a<2;
my @eta;
for(2..@$a){
push @eta, $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-$_][1])/($$a[-1][0]-$$a[-$_][0]);
}
my($sum,$sumw,$w)=(0,0,1);
for(@eta){
$sum+=$w*$_;
$sumw+=$w;
$w/=$Eta_forgetfulness;
}
my $avg=$sum/$sumw;
return $avg;
# return avg(@eta);
#return $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-2][1])/($$a[-1][0]-$$a[-2][0]);
1;
}
=head2 sleep_until
sleep_until(0.5) sleeps until half a second has passed since the last
call to sleep_until. This example starts the next job excactly ten
seconds after the last job started even if the last job lasted for a
while (but not more than ten seconds):
for(@jobs){
sleep_until(10);
print localtime()."\n";
...heavy job....
}
Might print:
Thu Jan 12 16:00:00 2012
Thu Jan 12 16:00:10 2012
Thu Jan 12 16:00:20 2012
...and so on even if the C<< ...heavy job... >>-part takes more than a
second to complete. Whereas if sleep(10) was used, each job would
spend more than ten seconds in average since the work time would be
added to sleep(10).
Note: sleep_until() will remember the time of ANY last call of this sub,
not just the one on the same line in the source code (this might change
in the future). The first call to sleep_until() will be the same as
sleep_fp() or Perl's own sleep() if the argument is an integer.
=cut
our $Time_last_sleep_until;
sub sleep_until {
my $s=@_==1?shift():0;
my $time=time_fp();
my $sleep=$s-($time-nvl($Time_last_sleep_until,0));
$Time_last_sleep_until=time;
sleep_fp($sleep) if $sleep>0;
}
my %thr;
our %ldist_cache;
sub ldist {
my($s,$t,$l) = @_;
return length($t) if !$s;
return length($s) if !$t;
%ldist_cache=() if !$l and 1000<0+%ldist_cache;
$ldist_cache{$s,$t} ||=
do {
my($s1,$t1) = ( substr($s,1), substr($t,1) );
substr($s,0,1) eq substr($t,0,1)
? ldist($s1,$t1)
: 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
};
}
=head1 OTHER
=head2 nvl
The I<no value> function (or I<null value> function)
C<nvl()> takes two or more arguments. (Oracles nvl-function take just two)
Returns the value of the first input argument with length() > 0.
Return I<undef> if there is no such input argument.
In perl 5.10 and perl 6 this will most often be easier with the C< //
> operator, although C<nvl()> and C<< // >> treats empty strings C<"">
differently. Sub nvl here considers empty strings and undef the same.
=cut
sub nvl {
return $_[0] if defined $_[0] and length($_[0]) or @_==1;
return $_[1] if @_==2;
return nvl(@_[1..$#_]) if @_>2;
return undef;
}
=head2 decode_num
See L</decode>.
=head2 decode
C<decode()> and C<decode_num()> works just as Oracles C<decode()>.
C<decode()> and C<decode_num()> accordingly uses perl operators C<eq> and C<==> for comparison.
Examples:
my $a=123;
print decode($a, 123,3, 214,4, $a); # prints 3
print decode($a, 123=>3, 214=>4, $a); # prints 3, same thing since => is synonymous to comma in Perl
The first argument is tested against the second, fourth, sixth and so on,
and then the third, fifth, seventh and so on is
returned if decode() finds an equal string or number.
In the above example: 123 maps to 3, 124 maps to 4 and the last argument $a is returned elsewise.
More examples:
my $a=123;
print decode($a, 123=>3, 214=>7, $a); # also 3, note that => is synonym for , (comma) in perl
print decode($a, 122=>3, 214=>7, $a); # prints 123
print decode($a, 123.0 =>3, 214=>7); # prints 3
print decode($a, '123.0'=>3, 214=>7); # prints nothing (undef), no last argument default value here
print decode_num($a, 121=>3, 221=>7, '123.0','b'); # prints b
Sort of:
decode($string, %conversion, $default);
The last argument is returned as a default if none of the keys in the keys/value-pairs matched.
A more perl-ish and often faster way of doing the same:
{123=>3, 214=>7}->{$a} || $a # (beware of 0)
=cut
sub decode {
croak "Must have a mimimum of two arguments" if @_<2;
my $uttrykk=shift;
if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
else { !defined shift and return shift or shift for 1..@_/2 }
return shift;
}
sub decode_num {
croak "Must have a mimimum of two arguments" if @_<2;
my $uttrykk=shift;
if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }
else { !defined shift and return shift or shift for 1..@_/2 }
return shift;
}
=head2 qrlist
Input: An array of values to be used to test againts for existence.
Output: A reference to a regular expression. That is a C<qr//>
The regex sets $1 if it match.
Example:
my @list=qw/ABc XY DEF DEFG XYZ/;
my $filter=qrlist("ABC","DEF","XY."); # makes a regex of it qr/^(\QABC\E|\QDEF\E|\QXY.\E)$/
my @filtered= grep { $_ =~ $filter } @list; # returns DEF and XYZ, but not XYZ because the . char is taken literally
Note: Filtering with hash lookups are WAY faster.
Source:
sub qrlist (@) { my $str=join"|",map quotemeta, @_; qr/^($str)$/ }
=cut
sub qrlist (@) {
my $str=join"|",map quotemeta,@_;
return qr/^($str)$/;
}
=head2 ansicolor
Perhaps easier to use than L<Term::ANSIColor> ?
B<Input:> One argument. A string where the char C<¤> have special
meaning and is replaced by color codings depending on the letter
following the C<¤>.
B<Output:> The same string, but with C<¤letter> replaced by ANSI color
codes respected by many types terminal windows. (xterm, telnet, ssh,
telnet, rlog, vt100, cygwin, rxvt and such...).
B<Codes for ansicolor():>
¤r red
¤g green
¤b blue
¤y yellow
¤m magenta
¤B bold
¤u underline
¤c clear
¤¤ reset, quits and returns to default text color.
B<Example:>
print ansicolor("This is maybe ¤ggreen¤¤?");
Prints I<This is maybe green?> where the word I<green> is shown in green.
If L<Term::ANSIColor> is not installed or not found, returns the input
string with every C<¤> including the following code letters
removed. (That is: ansicolor is safe to use even if Term::ANSIColor is
not installed, you just don't get the colors).
See also L<Term::ANSIColor>.
=cut
sub ansicolor {
my $txt=shift;
eval{require Term::ANSIColor} or return replace($txt,qr/¤./);
my %h=qw/r red g green b blue y yellow m magenta B bold u underline c clear ¤ reset/;
my $re=join"|",keys%h;
$txt=~s/¤($re)/Term::ANSIColor::color($h{$1})/ge;
return $txt;
}
=head2 ccn_ok
Checks if a Credit Card number (CCN) has correct control digits according to the LUHN-algorithm from 1960.
This method of control digits is used by MasterCard, Visa, American Express,
Discover, Diners Club / Carte Blanche, JCB and others.
B<Input:>
A credit card number. Can contain non-digits, but they are removed internally before checking.
One person: one way
Two persons: two ways (they can swap places)
Three persons: 6
Four persons: 24
Five persons: 120
Six persons: 720
The formula is C<x!> where the postfix unary operator C<!>, also known as I<faculty> is defined as:
C<x! = x * (x-1) * (x-2) ... * 1>. Example: C<5! = 5 * 4 * 3 * 2 * 1 = 120>.Run this to see the 100 first C<< n! >>
perl -MAcme::Tools -le'$i=big(1);print "$_!=",$i*=$_ for 1..100'
1! = 1
2! = 2
3! = 6
4! = 24
5! = 120
6! = 720
7! = 5040
8! = 40320
9! = 362880
10! = 3628800
.
.
.
100! = 93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000
C<permutations()> takes a list and return a list of arrayrefs for each
of the permutations of the input list:
permutations('a','b'); #returns (['a','b'],['b','a'])
permutations('a','b','c'); #returns (['a','b','c'],['a','c','b'],
# ['b','a','c'],['b','c','a'],
# ['c','a','b'],['c','b','a'])
Up to five input arguments C<permutations()> is probably as fast as it
can be in this pure perl implementation (see source). For more than
five, it could be faster. How fast is it now: Running with different
n, this many time took that many seconds:
n times seconds
-- ------- ---------
2 100000 0.32
3 10000 0.09
4 10000 0.33
5 1000 0.18
6 100 0.27
7 10 0.21
8 1 0.17
9 1 1.63
10 1 17.00
If the first argument is a coderef, that sub will be called for each permutation and the return from those calls with be the real return from C<permutations()>. For example this:
print for permutations(sub{join"",@_},1..3);
...will print the same as:
print for map join("",@$_), permutations(1..3);
...but the first of those two uses less RAM if 3 has been say 9.
Changing 3 with 10, and many computers hasn't enough memory
for the latter.
The examples prints:
123
132
213
231
312
321
If you just want to say calculate something on each permutation,
but is not interested in the list of them, you just don't
take the return. That is:
my $ant;
permutations(sub{$ant++ if $_[-1]>=$_[0]*2},1..9);
...is the same as:
$$_[-1]>=$$_[0]*2 and $ant++ for permutations(1..9);
...but the first uses next to nothing of memory compared to the latter. They have about the same speed.
(The examples just counts the permutations where the last number is at least twice as large as the first)
C<permutations()> was created to find all combinations of a persons
name. This is useful in "fuzzy" name searches with
L<String::Similarity> if you can not be certain what is first, middle
and last names. In foreign or unfamiliar names it can be difficult to
know that.
=cut
#TODO: see t/test_perm.pl and t/test_perm2.pl
sub permutations {
my $code=ref($_[0]) eq 'CODE' ? shift() : undef;
$code and @_<6 and return map &$code(@$_),permutations(@_);
return [@_] if @_<2;
return ([@_[0,1]],[@_[1,0]]) if @_==2;
return ([@_[0,1,2]],[@_[0,2,1]],[@_[1,0,2]],
[@_[1,2,0]],[@_[2,0,1]],[@_[2,1,0]]) if @_==3;
return ([@_[0,1,2,3]],[@_[0,1,3,2]],[@_[0,2,1,3]],[@_[0,2,3,1]],
[@_[0,3,1,2]],[@_[0,3,2,1]],[@_[1,0,2,3]],[@_[1,0,3,2]],
[@_[1,2,0,3]],[@_[1,2,3,0]],[@_[1,3,0,2]],[@_[1,3,2,0]],
[@_[2,0,1,3]],[@_[2,0,3,1]],[@_[2,1,0,3]],[@_[2,1,3,0]],
[@_[2,3,0,1]],[@_[2,3,1,0]],[@_[3,0,1,2]],[@_[3,0,2,1]],
[@_[3,1,0,2]],[@_[3,1,2,0]],[@_[3,2,0,1]],[@_[3,2,1,0]]) if @_==4;
return ([@_[0,1,2,3,4]],[@_[0,1,2,4,3]],[@_[0,1,3,2,4]],[@_[0,1,3,4,2]],[@_[0,1,4,2,3]],
[@_[0,1,4,3,2]],[@_[0,2,1,3,4]],[@_[0,2,1,4,3]],[@_[0,2,3,1,4]],[@_[0,2,3,4,1]],
[@_[0,2,4,1,3]],[@_[0,2,4,3,1]],[@_[0,3,1,2,4]],[@_[0,3,1,4,2]],[@_[0,3,2,1,4]],
[@_[0,3,2,4,1]],[@_[0,3,4,1,2]],[@_[0,3,4,2,1]],[@_[0,4,1,2,3]],[@_[0,4,1,3,2]],
[@_[0,4,2,1,3]],[@_[0,4,2,3,1]],[@_[0,4,3,1,2]],[@_[0,4,3,2,1]],[@_[1,0,2,3,4]],
[@_[1,0,2,4,3]],[@_[1,0,3,2,4]],[@_[1,0,3,4,2]],[@_[1,0,4,2,3]],[@_[1,0,4,3,2]],
[@_[1,2,0,3,4]],[@_[1,2,0,4,3]],[@_[1,2,3,0,4]],[@_[1,2,3,4,0]],[@_[1,2,4,0,3]],
[@_[1,2,4,3,0]],[@_[1,3,0,2,4]],[@_[1,3,0,4,2]],[@_[1,3,2,0,4]],[@_[1,3,2,4,0]],
[@_[1,3,4,0,2]],[@_[1,3,4,2,0]],[@_[1,4,0,2,3]],[@_[1,4,0,3,2]],[@_[1,4,2,0,3]],
[@_[1,4,2,3,0]],[@_[1,4,3,0,2]],[@_[1,4,3,2,0]],[@_[2,0,1,3,4]],[@_[2,0,1,4,3]],
[@_[2,0,3,1,4]],[@_[2,0,3,4,1]],[@_[2,0,4,1,3]],[@_[2,0,4,3,1]],[@_[2,1,0,3,4]],
[@_[2,1,0,4,3]],[@_[2,1,3,0,4]],[@_[2,1,3,4,0]],[@_[2,1,4,0,3]],[@_[2,1,4,3,0]],
[@_[2,3,0,1,4]],[@_[2,3,0,4,1]],[@_[2,3,1,0,4]],[@_[2,3,1,4,0]],[@_[2,3,4,0,1]],
[@_[2,3,4,1,0]],[@_[2,4,0,1,3]],[@_[2,4,0,3,1]],[@_[2,4,1,0,3]],[@_[2,4,1,3,0]],
[@_[2,4,3,0,1]],[@_[2,4,3,1,0]],[@_[3,0,1,2,4]],[@_[3,0,1,4,2]],[@_[3,0,2,1,4]],
[@_[3,0,2,4,1]],[@_[3,0,4,1,2]],[@_[3,0,4,2,1]],[@_[3,1,0,2,4]],[@_[3,1,0,4,2]],
[@_[3,1,2,0,4]],[@_[3,1,2,4,0]],[@_[3,1,4,0,2]],[@_[3,1,4,2,0]],[@_[3,2,0,1,4]],
[@_[3,2,0,4,1]],[@_[3,2,1,0,4]],[@_[3,2,1,4,0]],[@_[3,2,4,0,1]],[@_[3,2,4,1,0]],
[@_[3,4,0,1,2]],[@_[3,4,0,2,1]],[@_[3,4,1,0,2]],[@_[3,4,1,2,0]],[@_[3,4,2,0,1]],
[@_[3,4,2,1,0]],[@_[4,0,1,2,3]],[@_[4,0,1,3,2]],[@_[4,0,2,1,3]],[@_[4,0,2,3,1]],
[@_[4,0,3,1,2]],[@_[4,0,3,2,1]],[@_[4,1,0,2,3]],[@_[4,1,0,3,2]],[@_[4,1,2,0,3]],
[@_[4,1,2,3,0]],[@_[4,1,3,0,2]],[@_[4,1,3,2,0]],[@_[4,2,0,1,3]],[@_[4,2,0,3,1]],
[@_[4,2,1,0,3]],[@_[4,2,1,3,0]],[@_[4,2,3,0,1]],[@_[4,2,3,1,0]],[@_[4,3,0,1,2]],
[@_[4,3,0,2,1]],[@_[4,3,1,0,2]],[@_[4,3,1,2,0]],[@_[4,3,2,0,1]],[@_[4,3,2,1,0]]) if @_==5;
my(@r,@p,@c,@i,@n); @i=(0,@_); @p=@c=1..@_; @n=1..@_-1;
PERM:
while(1){
if($code){if(defined wantarray){push(@r,&$code(@i[@p]))}else{&$code(@i[@p])}}else{push@r,[@i[@p]]}
for my$i(@n){splice@p,$i,0,shift@p;next PERM if --$c[$i];$c[$i]=$i+1}
return@r
}
}
=head2 perm
print @$_,"\n" for perm("a".."c"); # prints six lines: abc acb bac bca cab cba
=head2 permute
my $c = permute { print @_,"\n" } "a".."c"; # prints six lines: abc acb bac bca cab cba
print "count: $c\n"; # prints 6 = 3*2*1 = 3!
The permute BLOCK needs to return true (which print does) for permute to continue:
my $c = permute { print @_,"\n"; rand()<.5 } "a".."d"; # probably prints less than 24 strings
print "count: $c\n"; # prints random number up to 24 = 4*3*2*1 = 4!
=head2 permute_continue
my @abc = ("a", "b", "c");
my @start = ("b", "a", "c"); # starting sequence to continue from
my $c = permute_continue { print @_,"\n" } \@abc, \@start; # prints four lines: bac bca cab cba
my $c = permute { print @_,"\n" } \@abc, \@start; # same, =permute_continue when coreref+arrayref+arrayref
print "count: $c\n"; # prints 6-2 = 3*2*1-2 = 3!-2
The permute BLOCK needs to return true (which print does) for permute to continue:
my $c = permute { print @_,"\n"; rand()<.5 } "a".."d"; # probably prints less than 24 strings
print "count: $c\n"; # prints random number up to 24 = 4*3*2*1 = 4!
=cut
sub perm {
my(@i,@r) = 0..$#_;
@_ || return;
while ( push @r, [@_[@i]] ) {
my $p = $#i || last;
--$p || last while $i[$p-1] > $i[$p];
push @i, reverse splice @i, my$q=$p;
++$q while $i[$p-1] > $i[$q];
@i[$p-1,$q] = @i[$q,$p-1];
}
@r
}
sub permute (&@) {
return permute_continue(@_) if 'CODE,ARRAY,ARRAY' eq join',',map ref,@_;
my $f = shift;
my @i = 0..$#_;
my $n = 0;
@_ || do{ &$f(@_); return 0 };
while ( ++$n and &$f(@_[@i]) ) {
my $p = $#i || last;
--$p || last while $i[$p-1] > $i[$p];
push @i, reverse splice @i, my$q=$p;
++$q while $i[$p-1] > $i[$q];
@i[$p-1,$q] = @i[$q,$p-1];
}
$n;
}
#Fischer-Krause permutation starting from a specific sequence, for example to farm out permute to more than one process
sub permute_continue (&\@\@) {
my ($f,$begin,$from) = @_;
my %h; @h{@$begin} = 0 .. $#$begin;
my @idx = @h{@$from};
my $n = 0;
while ( ++$n and &$f(@$begin[@idx]) ) {
my $p = $#idx || last;
--$p || last while $idx[$p-1] > $idx[$p];
push @idx, reverse splice @idx, my$q=$p;
++$q while $idx[$p-1] > $idx[$q];
@idx[$p-1,$q]=@idx[$q,$p-1];
}
$n
}
=head2 cart
Cartesian product
B<Easy usage:>
Input: two or more arrayrefs with accordingly x, y, z and so on number of elements.
Output: An array of x * y * z number of arrayrefs. The arrays being the cartesian product of the input arrays.
It can be useful to think of this as joins in SQL. In C<select> statements with
more than one table behind C<from>, but without any C<where> condition to join the tables.
B<Advanced usage, with condition(s):>
B<Input:>
- Either two or more arrayrefs with x, y, z and so on number of elements.
- Or coderefs to subs containing condition checks. Somewhat like C<where> conditions in SQL.
B<Output:> An array of x * y * z number of arrayrefs (the cartesian product)
minus the ones that did not fulfill the condition(s).
This of is as joins with one or more where conditions as coderefs.
The coderef input arguments can be placed last or among the array refs
to save both runtime and memory if the conditions depend on
arrays further back.
B<Examples, this:>
for(cart(\@a1,\@a2,\@a3)){
my($a1,$a2,$a3) = @$_;
print "$a1,$a2,$a3\n";
}
Prints the same as this:
for my $a1 (@a1){
for my $a2 (@a2){
for my $a3 (@a3){
print "$a1,$a2,$a3\n";
}
}
}
B<This:> with a condition: the sum of the first two should be divisible by 3:
for( cart( \@a1, \@a2, sub{sum(@$_)%3==0}, \@a3 ) ) {
my($a1,$a2,$a3)=@$_;
print "$a1,$a2,$a3\n";
}
Prints the same as this:
for my $a1 (@a1){
for my $a2 (@a2){
next if 0==($a1+$a2)%3;
for my $a3 (@a3){
print "$a1,$a2,$a3\n";
}
}
}
B<Examples, from the tests:>
my @a1 = (1,2);
my @a2 = (10,20,30);
my @a3 = (100,200,300,400);
my $s = join"", map "*".join(",",@$_), cart(\@a1,\@a2,\@a3);
ok( $s eq "*1,10,100*1,10,200*1,10,300*1,10,400*1,20,100*1,20,200"
."*1,20,300*1,20,400*1,30,100*1,30,200*1,30,300*1,30,400"
."*2,10,100*2,10,200*2,10,300*2,10,400*2,20,100*2,20,200"
."*2,20,300*2,20,400*2,30,100*2,30,200*2,30,300*2,30,400");
$s=join"",map "*".join(",",@$_), cart(\@a1,\@a2,\@a3,sub{sum(@$_)%3==0});
ok( $s eq "*1,10,100*1,10,400*1,20,300*1,30,200*2,10,300*2,20,200*2,30,100*2,30,400");
B<Example, hash-mode:>
Returns hashrefs instead of arrayrefs:
my @cards=cart( # @card gets 5200 hashrefs, 100 decks of 52 cards
deck => [1..100],
rank => [qw(2 3 4 5 6 7 8 9 10 J Q K A)],
suit => [qw(heart diamond club star)],
);
for my $card ( mix(@cards) ) {
print "From deck number $$card{deck} we got $$card{rank} $$card{suit}\n";
}
Note: using sub-ref filters do not work (yet) in hash-mode. Use grep on result instead.
=cut
sub cart {
my @ars=@_;
if(!ref($_[0])){ #if hash-mode detected
my(@k,@v); push@k,shift@ars and push@v,shift@ars while @ars;
return map{my%h;@h{@k}=@$_;\%h}cart(@v);
}
my @res=map[$_],@{shift@ars};
for my $ar (@ars){
@res=grep{&$ar(@$_)}@res and next if ref($ar) eq 'CODE';
@res=map{my$r=$_;map{[@$r,$_]}@$ar}@res;
}
return @res;
}
sub cart_easy { #not tested, not exported http://stackoverflow.com/questions/2457096/in-perl-how-can-i-get-the-cartesian-product-of-multiple-sets
my $last = pop @_;
@_ ? (map {my$left=$_; map [@$left, $_], @$last } cart_easy(@_) )
: (map [$_], @$last);
}
=head2 reduce
From: Why Functional Programming Matters: L<http://www.md.chalmers.se/~rjmh/Papers/whyfp.pdf> L<http://www.cse.chalmers.se/~rjmh/Papers/whyfp.html>
L<http://www.md.chalmers.se/~rjmh/Papers/whyfp.html>
DON'T TRY THIS AT HOME, C PROGRAMMERS.
sub reduce (&@) {
my ($proc, $first, @rest) = @_;
return $first if @rest == 0;
local ($a, $b) = ($first, reduce($proc, @rest));
return $proc->();
}
Many functions can then be implemented with very little code. Such as:
sub mean { (reduce {$a + $b} @_) / @_ }
=cut
sub reduce (&@) {
my ($proc, $first, @rest) = @_;
return $first if @rest == 0;
no warnings;
local ($a, $b) = ($first, reduce($proc, @rest));
return $proc->();
}
=head2 pivot
Resembles the pivot table function in Excel.
C<pivot()> is used to spread out a slim and long table to a visually improved layout.
For instance spreading out the results of C<group by>-selects from SQL:
pivot( arrayref, columnname1, columnname2, ...)
pivot( ref_to_array_of_arrayrefs, @list_of_names_to_down_fields )
The first argument is a ref to a two dimensional table.
The rest of the arguments is a list which also signals the number of
columns from left in each row that is ending up to the left of the
data table, the rest ends up at the top and the last element of
each row ends up as data.
top1 top1 top1 top1
left1 left2 left3 top2 top2 top2 top2
----- ----- ----- ---- ---- ---- ----
data data data data
data data data data
data data data data
Example:
my @table=(
["1997","Gerd", "Weight", "Summer",66],
["1997","Gerd", "Height", "Summer",170],
["1997","Per", "Weight", "Summer",75],
["1997","Per", "Height", "Summer",182],
["1997","Hilde","Weight", "Summer",62],
["1997","Hilde","Height", "Summer",168],
["1997","Tone", "Weight", "Summer",70],
["1997","Gerd", "Weight", "Winter",64],
["1997","Gerd", "Height", "Winter",158],
["1997","Per", "Weight", "Winter",73],
["1997","Per", "Height", "Winter",180],
["1997","Hilde","Weight", "Winter",61],
["1997","Hilde","Height", "Winter",164],
["1997","Tone", "Weight", "Winter",69],
["1998","Gerd", "Weight", "Summer",64],
["1998","Gerd", "Height", "Summer",171],
["1998","Per", "Weight", "Summer",76],
["1998","Per", "Height", "Summer",182],
["1998","Hilde","Weight", "Summer",62],
["1998","Hilde","Height", "Summer",168],
["1998","Tone", "Weight", "Summer",70],
["1998","Gerd", "Weight", "Winter",64],
["1998","Gerd", "Height", "Winter",171],
["1998","Per", "Weight", "Winter",74],
["1998","Per", "Height", "Winter",183],
["1998","Hilde","Weight", "Winter",62],
["1998","Hilde","Height", "Winter",168],
["1998","Tone", "Weight", "Winter",71],
);
.
my @reportA=pivot(\@table,"Year","Name");
print "\n\nReport A\n\n".tablestring(\@reportA);
Will print:
Report A
Year Name Height Height Weight Weight
Summer Winter Summer Winter
---- ----- ------ ------ ------ ------
1997 Gerd 170 158 66 64
1997 Hilde 168 164 62 61
1997 Per 182 180 75 73
1997 Tone 70 69
1998 Gerd 171 171 64 64
1998 Hilde 168 168 62 62
1998 Per 182 183 76 74
1998 Tone 70 71
.
my @reportB=pivot([map{$_=[@$_[0,3,2,1,4]]}(@t=@table)],"Year","Season");
print "\n\nReport B\n\n".tablestring(\@reportB);
Will print:
Report B
Year Season Height Height Height Weight Weight Weight Weight
Gerd Hilde Per Gerd Hilde Per Tone
---- ------ ------ ------ ----- ----- ------ ------ ------
1997 Summer 170 168 182 66 62 75 70
1997 Winter 158 164 180 64 61 73 69
1998 Summer 171 168 182 64 62 76 70
1998 Winter 171 168 183 64 62 74 71
.
my @reportC=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name","Attributt");
print "\n\nReport C\n\n".tablestring(\@reportC);
Will print:
Report C
Name Attributt 1997 1997 1998 1998
Summer Winter Summer Winter
----- --------- ------ ------ ------ ------
Gerd Height 170 158 171 171
Gerd Weight 66 64 64 64
Hilde Height 168 164 168 168
Hilde Weight 62 61 62 62
Per Height 182 180 182 183
Per Weight 75 73 76 74
Tone Weight 70 69 70 71
.
my @reportD=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name");
print "\n\nReport D\n\n".tablestring(\@reportD);
Will print:
Report D
Name Height Height Height Height Weight Weight Weight Weight
1997 1997 1998 1998 1997 1997 1998 1998
Summer Winter Summer Winter Summer Winter Summer Winter
----- ------ ------ ------ ------ ------ ------ ------ ------
Gerd 170 158 171 171 66 64 64 64
Hilde 168 164 168 168 62 61 62 62
Per 182 180 182 183 75 73 76 74
Tone 70 69 70 71
Options:
Options to sort differently and show sums and percents are available. (...MORE DOC ON THAT LATER...)
See also L<Data::Pivot>
=cut
sub pivot {
my($tabref,@vertikalefelt)=@_;
my %opt=ref($vertikalefelt[-1]) eq 'HASH' ? %{pop(@vertikalefelt)} : ();
my $opt_sum=1 if $opt{sum};
my $opt_pro=exists $opt{prosent}?$opt{prosent}||0:undef;
my $sortsub = $opt{'sortsub'} || \&_sortsub;
my $sortsub_bortover = $opt{'sortsub_bortover'} || $sortsub;
my $sortsub_nedover = $opt{'sortsub_nedover'} || $sortsub;
#print serialize(\%opt,'opt');
#print serialize(\$opt_pro,'opt_pro');
my $antned=0+@vertikalefelt;
my $bakerst=-1+@{$$tabref[0]};
my(%h,%feltfinnes,%sum);
#print "Bakerst<$bakerst>\n";
for(@$tabref){
my $rad=join($;,@$_[0..($antned-1)]);
my $felt=join($;,@$_[$antned..($bakerst-1)]);
my $verdi=$$_[$bakerst];
length($rad) or $rad=' ';
length($felt) or $felt=' ';
$h{$rad}{$felt}=$verdi;
$h{$rad}{"%$felt"}=$verdi;
if($opt_sum or defined $opt_pro){
$h{$rad}{Sum}+=$verdi;
$sum{$felt}+=$verdi;
$sum{Sum}+=$verdi;
}
$feltfinnes{$felt}++;
$feltfinnes{"%$felt"}++ if $opt_pro;
}
my @feltfinnes = sort $sortsub_bortover keys%feltfinnes;
push @feltfinnes, "Sum" if $opt_sum;
my @t=([@vertikalefelt,map{replace($_,$;,"\n")}@feltfinnes]);
#print serialize(\@feltfinnes,'feltfinnes');
#print serialize(\%h,'h');
#print "H = ".join(", ",sort _sortsub keys%h)."\n";
for my $rad (sort $sortsub_nedover keys(%h)){
my @rad=(split($;,$rad),
map { defined($_)?$_:exists$opt{undefined}?$opt{undefined}:undef }
map {
if(/^\%/ and defined $opt_pro){
my $sum=$h{$rad}{Sum};
my $verdi=$h{$rad}{$_};
if($sum!=0){
defined $verdi
?sprintf("%*.*f",3+1+$opt_pro,$opt_pro,100*$verdi/$sum)
:$verdi;
}
else{
$verdi!=0?"div0":$verdi;
}
}
else{
$h{$rad}{$_};
}
}
@feltfinnes);
push(@t,[@rad]);
}
push(@t,"-",["Sum",(map{""}(2..$antned)),map{print "<$_>\n";$sum{$_}}@feltfinnes]) if $opt_sum;
return @t;
}
# default sortsub for pivot()
sub _sortsub {
no warnings;
#my $c=($a<=>$b)||($a cmp $b);
#return $c if $c;
#printf "%-30s %-30s ",replace($a,$;,','),replace($b,$;,',');
my @a=split $;,$a;
my @b=split $;,$b;
for(0..$#a){
my $c=$a[$_]<=>$b[$_];
return $c if $c and "$a[$_]$b[$_]"!~/[iI][nN][fF]|þ|â/i; # hm inf(inity)
$c=$a[$_]cmp$b[$_];
return $c if $c;
}
return 0;
}
=head2 tablestring
B<Input:> a reference to an array of arrayrefs -- a two dimensional table of strings and numbers
B<Output:> a string containing the textual table -- a string of two or more lines
The first arrayref in the list refers to a list of either column headings (scalar)
or ... (...more later...)
In this output table:
- the columns will not be wider than necessary by its widest value (any <html>-tags are removed in every internal width-calculation)
- multi-lined cell values are handled also
- and so are html-tags, if the output is to be used inside <pre>-tags on a web page.
- columns with just numeric values are right justified (header row excepted)
Example:
print tablestring([
[qw/AA BB CCCC/],
[123,23,"d"],
[12,23,34],
[77,88,99],
["lin\nes",12,"asdff\nfdsa\naa"],[0,22,"adf"]
]);
Prints this string of 11 lines:
AA BB CCCC
--- -- -----
123 23 d
12 23 34
77 8 99
lin 12 asdff
es fdsa
=cut
sub tablestring {
my $tab=shift;
my %o=$_[0] ? %{shift()} : ();
my $remove_empty = $o{remove_empty_columns};
my $no_multiline_space = $o{no_multiline_space};
my $nodup = $o{nodup}||0;
my $no_header_line = $o{no_header_line};
my $pagesize = exists $o{pagesize} ? $o{pagesize}-3 : 9999999;
my $left_force = $o{left};
my(@width,@left,@height,@not_empty,@nodup);
my $head=1;
my $i=0;
my $j;
for(@$tab){
$j=0;
$height[$i]=0;
my $nodup_rad=$nodup;
if(ref($_) eq 'ARRAY'){
for(@$_){
my $cell=$_;
$width[$j]||=0;
if($nodup_rad and $i>0 and $$tab[$i][$j] eq $$tab[$i-1][$j] || ($nodup_rad=0)){
$cell=$nodup==1?"":$nodup;
$nodup[$i][$j]=1;
}
else{
my $height=0;
my $wider;
no warnings;
$not_empty[$j]=1 if !$head && length($cell)>0;
for(split("\n",$cell)){
$wider=/<input.+type=text.+size=(\d+)/i?$1:0; #hm
s/<[^>]+>//g;
$height++;
s/>/>/g;
s/</</g;
$width[$j]=length($_)+1+$wider if length($_)+1+$wider>$width[$j];
$left[$j]=1 if $_ && !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !$head;
}
if( $height>1 && !$no_multiline_space){
$height++ if !$head;
$height[$i-1]++ if $i>1 && $height[$i-1]==1;
}
$height[$i]=$height if $height>$height[$i];
}
$j++;
}
}
else{
$height[$i]=1;
$no_header_line=1;
}
$head=0;
$i++;
}
$i=$#height;
$j=$#width;
if($i==0 or $left_force) { @left=map{1}(0..$j) }
else { for(0..$j){ $left[$_]=1 if !$not_empty[$_] } }
my @tabout;
my $row_start_line=0;
my @header;
my $header_last;
for my $x (0..$i){
if($$tab[$x] eq '-'){
my @tegn=map {$$tab[$x-1][$_]=~/\S/?"-":" "} (0..$j);
$tabout[$row_start_line]=join(" ",map {$tegn[$_] x ($width[$_]-1)} (0..$j));
}
else{
for my $y (0..$j){
next if $remove_empty && !$not_empty[$y];
no warnings;
my @cell = !$header_last&&$nodup&&$nodup[$x][$y]
? ($nodup>0?():((" " x (($width[$y]-length($nodup))/2)).$nodup))
: split("\n",$$tab[$x][$y]);
for(0..($height[$x]-1)){
my $line=$row_start_line+$_;
my $txt=shift(@cell);
$txt='' if !defined$txt;
$txt=sprintf("%*s",$width[$y]-1,$txt) if length($txt)>0 && !$left[$y] && ($x>0 || $no_header_line);
$tabout[$line].=$txt;
if($y==$j){
$tabout[$line]=~s/\s+$//;
}
else{
my $wider;
$wider = $txt=~/<input.+type=text.+size=(\d+)/i?1+$1:0;
$txt=~s/<[^>]+>//g;
$txt=~s/>/>/g;
$txt=~s/</</g;
$tabout[$line].= ' ' x ($width[$y]-length($txt)-$wider);
}
}
}
}
$row_start_line+=$height[$x];
#--lage streker?
if(not $no_header_line){
if($x==0){
for my $y (0..$j){
next if $remove_empty && !$not_empty[$y];
$tabout[$row_start_line].=('-' x ($width[$y]-1))." ";
}
$row_start_line++;
@header=("",@tabout);
}
elsif(
$x%$pagesize==0 || $nodup>0&&!$nodup[$x+1][$nodup-1]
and $x+1<@$tab
and !$no_header_line
)
{
push(@tabout,@header);
$row_start_line+=@header;
$header_last=1;
}
else{
$header_last=0;
}
}
}#for x
return join("\n",@tabout)."\n";
}
=head2 serialize
=head2 srlz
Synonym to L</serialize>, but remove unnecessary single quote chars around
C<< \w+ >>-keys and number values (except numbers with leading zeros). Example:
serialize:
%s=('action'=>{'del'=>'0','ins'=>'0','upd'=>'18'},'post'=>'1348','pre'=>'1348',
'updcol'=>{'Laerestednr'=>'18','Studietypenr'=>'18','Undervisningssted'=>'7','Url'=>'11'},
'where'=>'where 1=1');
srlz:
%s=(action=>{del=>0,ins=>0,upd=>18},post=>1348,pre=>1348,
updcol=>{Laerestednr=>18,Studietypenr=>18,Undervisningssted=>7,Url=>11},
where=>'where 1=1');
Todo: update L</serialize> to do the same, but in the right way. (For now
srlz runs the string from serialize() through two C<< s/// >>, this will break
in certain cases). L</srlz> will be kept as a synonym (or the other way around).
=cut
sub srlz {
my $s=serialize(@_);
$s=~s,'(\w+)'=>,$1=>,g;
$s=~s,=>'([+-]?(0|[1-9]\d*)(\.\d+)?([eE][-+]?\d+)?)',=>$1,g; #ikke ledende null! hm
$s;
}
=head2 cnttbl
my %nordic_country_population=(Norway=>5214890,Sweden=>9845155,Denmark=>5699220,Finland=>5496907,Iceland=>331310);
print cnttbl(\%nordic_country_population);
Iceland 331310 1.25%
Norway 5214890 19.61%
Finland 5496907 20.67%
Denmark 5699220 21.44%
Sweden 9845155 37.03%
SUM 26587482 100.00%
Todo: Levels...:
my %sales=(
Toyota=>{Prius=>19,RAV=>12,Auris=>18,Avensis=>7},
Volvo=>{V40=>14, XC90=>4},
Nissan=>{Leaf=>19,Qashqai=>17},
Tesla=>{ModelS=>8}
);
print cnttbl(\%sales);
Toyota SUM 56
Volvo SUM 18
Nissan SUM 36
Tesla SUM 8
SUM SUM 56 100%
=cut
sub cnttbl {
my $hr=shift;
my $maxlen=max(3,map length($_),keys%$hr);
join"",ref((values%$hr)[0])
?do{ map {my$o=$_;join("",map rpad($$o[0],$maxlen)." $_\n",split("\n",$$o[1]))}
map [$_,cnttbl($$hr{$_})],
sort keys%$hr }
:do{ my $sum=sum(values%$hr);
my $fmt=repl("%-xs %yd %6.2f%%\n",x=>$maxlen,y=>length($sum));
map sprintf($fmt,@$_,100*$$_[1]/$sum),
(map[$_,$$hr{$_}],sort{$$hr{$a}<=>$$hr{$b} or $a cmp $b}keys%$hr),
(['SUM',$sum]) }
}
=head2 ref_deep
NOT IMPLEMENTED
Same as ref, but goes deeper.
print ref_deep( { 10=>[1,'ten'], 100=>[2,'houndred'], 1000=>[3,'thousand'] } ); # prints HASH_of_ARRAYS
print ref_deep( { 10=>'ten', 100=>[2,'houndred'], 1000=>[3,'thousand'] } ); # prints same (mixed, deepest)
print ref_deep( { 1=>[{a=>3,b=>6},{a=>1,b=>8}], 5=>[{a=>2,b=>5},{a=>7,b=>1}] } ); # HASH_of_ARRAYS_of_HASHES
(Todo, not supported: circular, alternatives for mixed)
=cut
sub ref_deep {
my $s=shift; #
}
=head2 nicenum
print 14.3 - 14.0; # 0.300000000000001
print 34.3 - 34.0; # 0.299999999999997
print nicenum( 14.3 - 14.0 ); # 0.3
print nicenum( 34.3 - 34.0 ); # 0.3
=cut
our $Nicenum;
sub nicenum { #hm
$Nicenum=$_[0];
$Nicenum=~s/([\.,]\d*)((\d)\3\3\3\3\3)\d$/$1$2$3$3$3$3$3$3$3$3$3/;
my $r=0+$Nicenum;
#warn "nn $_[0] --> $Nicenum --> $r\n";
$r;
}
=head2 sys
Call instead of C<system> if you want C<die> (Carp::croak) when something fails.
sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
=cut
sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
=head2 recursed
Returns true or false (actually 1 or 0) depending on whether the
current sub has been called by itself or not.
sub xyz
{
=head1 JUST FOR FUN
=head2 brainfu
B<Input:> one or two arguments
First argument: a string, source code of the brainfu
language. String containing the eight charachters + - < > [ ] . ,
Every other char is ignored silently.
Second argument: if the source code contains commas (,) the second
argument is the input characters in a string.
B<Output:> The resulting output from the program.
Example:
print brainfu(<<""); #prints "Hallo Verden!\n"
++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>---.+++++++++++..+++.>++.<<++++++++++++++
.>----------.+++++++++++++.--------------.+.+++++++++.>+.>.
See L<http://en.wikipedia.org/wiki/Brainfuck>
=head2 brainfu2perl
Just as L</brainfu> but instead it return the perl code to which the
brainfu code is translated. Just C<< eval() >> this perl code to run.
Example:
print brainfu2perl('>++++++++[<++++++++>-]<++++++++.>++++++[<++++++>-]<---.');
Prints this string:
my($c,$o,@b)=(0); sub out{$o.=chr($b[$c]) for 1..$_[0]||1}
++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
++$b[$c];++$c;--$b[$c];}--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
++$b[$c];++$b[$c];out;++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$c;--$b[$c];}
--$c;--$b[$c];--$b[$c];--$b[$c];out;$o;
=head2 brainfu2perl_optimized
Just as L</brainfu2perl> but optimizes the perl code. The same
example as above with brainfu2perl_optimized returns this equivalent
but shorter perl code:
$b[++$c]+=8;while($b[$c]){$b[--$c]+=8;--$b[++$c]}$b[--$c]+=8;out;$b[++$c]+=6;
while($b[$c]){$b[--$c]+=6;--$b[++$c]}$b[--$c]-=3;out;$o;
=cut
sub brainfu { eval(brainfu2perl(@_)) }
sub brainfu2perl {
my($bf,$inp)=@_;
my $perl='my($c,$inp,$o,@b)=(0,\''.$inp.'\'); no warnings; sub out{$o.=chr($b[$c]) for 1..$_[0]||1}'."\n";
$perl.='sub inp{$inp=~s/(.)//s and $b[$c]=ord($1)}'."\n" if $inp and $bf=~/,/;
$perl.=join("",map/\+/?'++$b[$c];':/\-/?'--$b[$c];':/\[/?'while($b[$c]){':/\]/?'}':/>/?'++$c;':/</?'--$c;':/\./?'out;':/\,/?'inp;':'',split//,$bf).'$o;';
$perl;
}
sub brainfu2perl_optimized {
my $perl=brainfu2perl(@_);
$perl=~s{(((\+|\-)\3\$b\[\$c\];){2,})}{ '$b[$c]'.$3.'='.(grep/b/,split//,$1).';' }gisex;
1 while $perl=~s/\+\+\$c;\-\-\$c;//g + $perl=~s/\-\-\$c;\+\+\$c;//g;
$perl=~s{((([\-\+])\3\$c;){2,})}{"\$c$3=".(grep/c/,split//,$1).';'}gisex;
$perl=~s{((\+\+|\-\-)\$c;([^;{}]+;))}{my($o,$s)=($2,$3);$s=~s/\$c/$o\$c/?$s:$1}ge;
$perl=~s/\$c(\-|\+)=(\d+);(\+\+|\-\-)\$b\[\$c\]/$3.'$b[$c'.$1.'='.$2.'];'/ge;
$perl=~s{((out;){2,})}{'out('.(grep/o/,split//,$1).');'}ge;
$perl=~s/;}/}/g;$perl=~s/;+/;/g;
$perl;
}
=head1 BLOOM FILTER SUBROUTINES
Bloom filters can be used to check whether an element (a string) is a
member of a large set using much less memory or disk space than other
data structures. Trading speed and accuracy for memory usage. While
risking false positives, Bloom filters have a very strong space
advantage over other data structures for representing sets.
In the example below, a set of 100000 phone numbers (or any string of
any length) can be "stored" in just 91230 bytes if you accept that you
can only check the data structure for existence of a string and accept
false positives with an error rate of 0.03 (that is three percent, error
rates are given in numbers larger than 0 and smaller than 1).
You can not retrieve the strings in the set without using "brute
force" methods and even then you would get slightly more strings than
you put in because of the error rate inaccuracy.
Bloom Filters have many uses.
See also: L<http://en.wikipedia.org/wiki/Bloom_filter>
See also: L<Bloom::Filter>
=head2 bfinit
Initialize a new Bloom Filter:
my $bf = bfinit( error_rate=>0.01, capacity=>100000 );
The same:
my $bf = bfinit( 0.01, 100000 );
since two arguments is interpreted as error_rate and capacity accordingly.
=head2 bfadd
bfadd($bf, $_) for @phone_numbers; # Adding strings one at a time
bfadd($bf, @phone_numbers); # ...or all at once (faster)
Returns 1 on success. Dies (croaks) if more strings than capacity is added.
=head2 Internals and speed
The internal hash-functions are C<< md5( "$key$salt" ) >> from L<Digest::MD5>.
Since C<md5> returns 128 bits and most medium to large sized bloom
filters need only a 32 bit hash function, the result from md5() are
split (C<unpack>-ed) into 4 parts 32 bits each and are treated as if 4
hash functions was called at once (speedup). Using different salts to
the key on each md5 results in different hash functions.
Digest::SHA512 would have been even better since it returns more bits,
if it werent for the fact that it's much slower than Digest::MD5.
String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:
time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6' #5.56 sec
time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6' #2.79 sec, faster but not per bit
time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)
Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.
=head2 Theory and math behind bloom filters
L<http://www.internetmathematics.org/volumes/1/4/Broder.pdf>
L<http://blogs.sun.com/jrose/entry/bloom_filters_in_a_nutshell>
L<http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html>
See also Scaleable Bloom Filters: L<http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf> (not implemented in Acme::Tools)
...and perhaps L<http://intertrack.naist.jp/Matsumoto_IEICE-ED200805.pdf>
=cut
sub bfinit {
return bfretrieve(@_) if @_==1;
return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
require Digest::MD5;
@_%2&&croak "Arguments should be a hash of equal number of keys and values";
my %arg=@_;
my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
my $bf={error_rate => 0.001, #default p
capacity => 100000, #default n
min_hashfuncs => 1,
max_hashfuncs => 100,
counting_bits => 1, #default: not counting filter
adaptive => 0,
%arg, #arguments
key_count => 0,
overflow => {},
version => $Acme::Tools::VERSION,
};
croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
@$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
@$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
$$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x new empty filter
$$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
:$$bf{filterlength}<=2**32/4 ? "N*"
: "Q*";
bfadd($bf,@{$arg{keys}}) if $arg{keys};
return $bf;
}
sub bfaddbf {
my($bf,$bf2)=@_;
my $differror=join"\n",
map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
grep $$bf{$_} ne $$bf2{$_},
qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
croak $differror if $differror;
croak "Can not add adaptive bloom filters" if $$bf{adaptive};
my $count=$$bf{key_count}+$$bf2{key_count};
croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
if $count > $$bf{capacity};
$$bf{key_count}+=$$bf2{key_count};
if($$bf{counting_bits}==1){
$$bf{filter} |= $$bf2{filter};
#$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
}
else {
my $cb=$$bf{counting_bits};
for(0..$$bf{filterlength}-1){
my $sum=
vec($$bf{filter}, $_,$cb)+
vec($$bf2{filter},$_,$cb);
if( $sum>2**$cb-1 ){
$sum=2**$cb-1;
$$bf{overflow}{$_}++;
}
vec($$bf{filter}, $_,$cb)=$sum;
no warnings;
$$bf{overflow}{$_}+=$$bf2{overflow}{$_}
and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb)
if exists $$bf2{overflow}{$_};
}
}
return $bf; #for convenience
}
sub bfsum {
my($bf)=@_;
return unpack( "%32b*", $$bf{filter}) if $$bf{counting_bits}==1;
my($sum,$cb)=(0,$$bf{counting_bits});
$sum+=vec($$bf{filter},$_,$cb) for 0..$$bf{filterlength}-1;
return $sum;
}
sub bfadd {
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$n,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','capacity','counting_bits','adaptive'};
for(@$keysref){
#croak "Key should be scalar" if ref($_);
$$bf{key_count} >= $n and croak "Exceeded filter capacity $n" or $$bf{key_count}++;
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
if ($cb==1 and !$adaptive) { # normal bloom filter
vec($$bf{filter}, $h[$_] % $m, 1) = 1 for 0..$k-1;
}
elsif ($cb>1) { # counting bloom filter
for(0..$k-1){
my $pos=$h[$_] % $m;
my $c=
vec($$bf{filter}, $pos, $cb) =
vec($$bf{filter}, $pos, $cb) + 1;
if($c==0){
vec($$bf{filter}, $pos, $cb) = -1;
$$bf{overflow}{$pos}++
and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb);
}
}
}
elsif ($adaptive) { # adaptive bloom filter
my($i,$key,$bit)=(0+@h,$_);
for(0..$$bf{filterlength}-1){
$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
my $pos=shift(@h) % $m;
$bit=vec($$bf{filter}, $pos, 1);
vec($$bf{filter}, $pos, 1)=1;
last if $_>=$k-1 and $bit==0;
}
}
else {croak}
}
return 1;
}
sub bfcheck {
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','counting_bits','adaptive'};
my $wa=wantarray();
if(!$adaptive){ # normal bloom filter or counting bloom filter
return map {
my $match = 1; # match if every bit is on
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
return $match if !$wa;
$match;
} @$keysref;
}
else { # adaptive bloom filter
return map {
my($match,$i,$key,$bit,@h)=(1,0,$_);
for(0..$$bf{filterlength}-1){
$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
my $pos=shift(@h) % $m;
$bit=vec($$bf{filter}, $pos, 1);
$match++ if $_ > $k-1 and $bit==1;
$match=0 if $_ <= $k-1 and $bit==0;
last if $bit==0;
}
return $match if !$wa;
$match;
} @$keysref;
}
}
sub bfgrep { # just a copy of bfcheck with map replaced by grep
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
return grep {
my $match = 1; # match if every bit is on
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
$match;
} @$keysref;
}
sub bfgrepnot { # just a copy of bfgrep with $match replaced by not $match
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
return grep {
my $match = 1; # match if every bit is on
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
!$match;
} @$keysref;
}
sub bfdelete {
require Digest::MD5;
my($bf,@keys)=@_;
return if !@keys;
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
for my $key (@$keysref){
my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
$$bf{key_count}==0 and croak "Deleted all and then some" or $$bf{key_count}--;
my($ones,$croak,@pos)=(0);
for(0..$k-1){
my $pos=$h[$_] % $m;
my $c=
vec($$bf{filter}, $pos, $cb);
vec($$bf{filter}, $pos, $cb)=$c-1;
$croak="Cannot delete a non-existing key $key" if $c==0;
$croak="Cannot delete with previously overflown position. Try doubleing counting_bits"
if $c==1 and ++$ones and $$bf{overflow}{$pos};
}
if($croak){ #rollback
vec($$bf{filter}, $h[$_] % $m, $cb)=
vec($$bf{filter}, $h[$_] % $m, $cb)+1 for 0..$k-1;
croak $croak;
}
}
return $bf;
}
sub bfstore {
require Storable;
Storable::store(@_);
}
sub bfretrieve {
require Storable;
my $bf=Storable::retrieve(@_);
carp "Retrieved bloom filter was stored in version $$bf{version}, this is version $VERSION" if $$bf{version}>$VERSION;
return $bf;
}
sub bfclone {
require Storable;
return Storable::dclone(@_); #could be faster
}
sub bfdimensions_old {
my($n,$p,$mink,$maxk, $k,$flen,$m)=
@_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'},1)
:@_==2 ? (@_,1,100,1)
: croak "Wrong number of arguments (".@_."), should be 2";
croak "p ($p) should be > 0 and < 1" if not ( 0<$p && $p<1 );
$m=-1*$_*$n/log(1-$p**(1/$_)) and (!defined $flen or $m<$flen) and ($flen,$k)=($m,$_) for $mink..$maxk;
$flen = int(1+$flen);
return ($flen,$k);
}
sub bfdimensions {
my($n,$p,$mink,$maxk)=
@_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'})
:@_==2 ? (@_,1,100)
: croak "Wrong number of arguments (".@_."), should be 2";
my $k=log(1/$p)/log(2); # k hash funcs
my $m=-$n*log($p)/log(2)**2; # m bits in filter
return ($m+0.5,min($maxk,max($mink,int($k+0.5))));
}
#crontab -e
#01 4,10,16,22 * * * /usr/bin/perl -MAcme::Tools -e'Acme::Tools::_update_currency_file("/var/www/html/currency-rates")' > /dev/null 2>&1
sub _update_currency_file { #call from cron
my $fn=shift()||'/var/www/html/currency-rates';
my %exe=map+($_=>"/usr/bin/$_"),qw/curl ci/;-x$_ or croak for values %exe;
open my $F, '>', $fn or die"ERROR: Could not write file $fn ($!)\n";
print $F "#-- Currency rates ".localtime()." (".time().")\n";
print $F "# File generated by Acme::Tools version $VERSION\n";
print $F "# Updated every 6th hour on http://calthis.com/currency-rates\n";
print $F "NOK 1.000000000\n";
my $amount=1000;
my $data=qx($exe{curl} -s "https://www.x-rates.com/table/?from=NOK&amount=$amount");
$data=~s,to=([A-Z]{3})(.)>,$2>$1</td><td>,g;
my @data=ht2t($data,"Alphabetical order"); shift @data;
@data=map "$$_[1] ".($$_[4]>1e-2?$$_[4]:$$_[2]?sprintf("%.8f",$amount/$$_[2]):0)."\n",@data;
my %data=map split,@data;
my $json=qx( $exe{curl} -s https://api.coinmarketcap.com/v1/ticker/ );
eval "require JSON;"; croak if $@;
my $arr=JSON::decode_json($json);
for my $c (qw(BTC LTC XBT ETH XRP BCH ETC)) {
my @a=grep$$_{symbol} eq $c,@$arr;
next if @a != 1 or !$a[0]{price_usd};
push @data, "$c ".($a[0]{price_usd}*$data{USD})."\n";
}
#die srlz(\@data,'data');
print $F sort(@data);
close($F);
qx($exe{ci} -l -m. -d $fn) if -w"$fn,v";
}
sub ftype {
my $f=shift;
-e $f and
-f$f ? 'file' # -f File is a plain file.
:-d$f ? 'dir' # -d File is a directory.
:-l$f ? 'symlink' # -l File is a symbolic link.
:-p$f ? 'pipe' # -p File is a named pipe (FIFO), or Filehandle is a pipe.
:-S$f ? 'socket' # -S File is a socket.
:-b$f ? 'blockfile' # -b File is a block special file.
:-c$f ? 'charfile' # -c File is a character special file.
:-t$f ? 'ttyfile' # -t Filehandle is opened to a tty.
: ''
or undef;
}
sub ext2mime {
my $ext=shift(); #or filename
#http://www.sitepoint.com/web-foundations/mime-types-complete-list/
croak "todo: ext2mime not yet implemented";
#return "application/json";#feks
}
sub base64 ($;$) { #
if ($] >= 5.006) {
require bytes;
croak "base64 failed: only defined for bytes"
if bytes::length($_[0]) > length($_[0])
or $] >= 5.008 && $_[0] =~ /[^\0-\xFF]/
}
my $eol=defined$_[1]?$_[1]:"\n";
my $res=pack("u",$_[0]);
$res=~s/^.//mg;
$res=~s/\n//g;
$res=~tr|` -_|AA-Za-z0-9+/|;
my $pad=(3-length($_[0])%3)%3;
$res=~s/.{$pad}$/'=' x $pad/e if $pad;
$res=~s/(.{1,76})/$1$eol/g if length($eol); #todo !=76
$res;
}
our $Fix_unbase64=0;
sub unbase64 ($) {
my $s=shift;
$s=~tr,0-9a-zA-Z+=/,,cd;
if($Fix_unbase64){ $s.='=' while length($s)%4 }
croak "unbase64 failed: length ".length($s)." not multiple of 4" if length($s)%4;
$s=~s/=+$//;
$s=~tr|A-Za-z0-9+/| -_|;
length($s) ? unpack("u",join'',map(chr(32+length($_)*3/4).$_,$s=~/(.{1,60})/gs)) : "";
}
=head1 COMMANDS
=head2 install_acme_command_tools
sudo perl -MAcme::Tools -e install_acme_command_tools
Wrote executable /usr/local/bin/conv
Wrote executable /usr/local/bin/due
Wrote executable /usr/local/bin/xcat
Wrote executable /usr/local/bin/freq
Wrote executable /usr/local/bin/deldup
Wrote executable /usr/local/bin/ccmd
Wrote executable /usr/local/bin/z2z
Wrote executable /usr/local/bin/2gz
Wrote executable /usr/local/bin/2gzip
Wrote executable /usr/local/bin/2bz2
Wrote executable /usr/local/bin/2bzip2
Wrote executable /usr/local/bin/2xz
Wrote executable /usr/local/bin/resubst
Examples of commands then made available:
conv 1 USD EUR #might show 0.88029 if thats the current currency rate. Uses conv()
conv .5 in cm #reveals that 1/2 inch is 1.27 cm, see doc on conv() for all supported units
due [-h] /path/1/ /path/2/ #like du, but show statistics on file extentions instead of subdirs
xcat file #like cat, zcat, bzcat or xzcat in one. Uses file extention to decide. Uses openstr()
freq file #reads file(s) or stdin and view counts of each byte 0-255
ccmd grep string /huge/file #caches stdout+stderr for 15 minutes (default) for much faster results later
ccmd "sleep 2;echo hello" #slow first time. Note the quotes!
ccmd "du -s ~/*|sort -n|tail" #ccmd store stdout+stderr in /tmp files (default)
z2z [-pvk1-9oe -t type] files #convert from/to .gz/bz2/xz files, -p progress, -v verbose (output result),
#-k keep org file, -o overwrite, 1-9 compression degree, -e for xz does "extreme"
#compressions, very slow. For some data types this reduces size significantly
#2xz and 2bz2 depends on xz and bzip2 being installed on system
2xz #same as z2z with -t xz
2bz2 #same as z2z with -t bz2
2gz #same as z2z with -t gz
rttop
trunc file(s)
wipe file(s)
=head3 z2z
=head3 2xz
=head3 2bz2
=head3 2gz
The commands C<2xz>, C<2bz2> and C<2gz> are just synonyms for C<z2z> with an implicitly added option C<-t xz>, C<-t xz> or C<-t gz> accordingly.
z2z [-p -k -v -o -1 -2 -3 -4 -5 -6 -7 -8 -9 ] files
Converts (recompresses) files from one compression type to another. For instance from .gz to .bz2
Keeps uid, gid, mode (chmod) and mtime.
the same first 8 kilobytes, 3) of those: find duplicate files by
finding the MD5sums of the whole files.
finddup [-d -s -h] paths/ files/* ... #reports (+deletes with -d) duplicate files
#-s for symlinkings dups, -h for hardlink
finddup <files> # print duplicate files, <files> might be filenames and directories
finddup -a <files> # print duplicate files, also print the first file
finddup -d <files> # delete duplicate files, use -v to also print them before deletion
finddup -s <files> # make symbolic links of duplicate files
finddup -h <files> # make hard links of duplicate files
finddup -v ... # verbose, print before -d, -s or -h
finddup -n -d <files> # dry run: show rm commands without actually running them
finddup -n -s <files> # dry run: show ln commands to make symlinks of duplicate files todo:NEEDS FIX!
finddup -n -h <files> # dry run: show ln commands to make hard links of duplicate files
finddup -q ... # quiet
finddup -k o # keep oldest with -d, -s, -h, consider newer files duplicates
finddup -k n # keep newest with -d, -s, -h, consider older files duplicates
finddup -k O # same as -k o, just use access time instead of modify time
finddup -k N # same as -k n, just use access time instead of modify time
finddup -0 ... # use ascii 0 instead of the normal \n, for xargs -0
finddup -P n # use n bytes from start of file in 1st md5 check (default 8192)
finddup -p # view progress in last and slowest of the three steps
Default ordering of files without C<-k n> or C<-k o> is the order they
are mentioned on the command line. For directory args the order might be
random: use C<< dir/* >> to avoid that (but then dot files are not included).
=cut
sub install_acme_command_tools {
my $dir=(grep -d$_, @_, '/usr/local/bin', '/usr/bin')[0];
for( qw( conv due xcat freq finddup ccmd trunc wipe rttop z2z 2gz 2gzip 2bz2 2bzip2 2xz resubst zsize) ){
unlink("$dir/$_");
writefile("$dir/$_", "#!$^X\nuse Acme::Tools;\nAcme::Tools::cmd_$_(\@ARGV);\n");
sys("/bin/chmod +x $dir/$_"); #hm umask
print "Wrote executable $dir/$_\n";
}
}
sub cmd_conv { print conv(@ARGV)."\n" }
our @Due_fake_stdin;
#TODO: output from tar tvf and ls and find -ls
sub cmd_due {
my %o;
my @argv=opts("zkKmhciMCAPate:lE:t",\%o,@_);
require File::Find;
no warnings 'uninitialized';
die"$0: -l not implemented yet\n" if $o{l}; #man du: default is not to count hardlinks more than once, with -l it does
die"$0: -h, -k or -m can not be used together\n" if $o{h}+$o{k}+$o{m}>1;
die"$0: -c and -a can not be used together\n" if $o{a}+$o{c}>1;
die"$0: -k and -m can not be used together\n" if $o{k}+$o{m}>1;
die"$0: -M, -C, -A can not be used together\n" if $o{M}+$o{C}+$o{A}>1;
my(%c,%b,$cnt,$bts,%xtime);
my $zext=$o{z}?'(\.(z|Z|gz|bz2|xz|rz|kr|lrz|rz))?':'';
$o{E}||=11;
my $r=qr/(\.[^\.\/]{1,$o{E}}$zext)$/i;
my $qrexcl=exists$o{e}?qr/$o{e}/:0;
#TODO: ought to work: tar cf - .|tar tvf -|due
my $x=$o{M}?9:$o{C}?10:$o{A}?8:9;
if(-p STDIN or @Due_fake_stdin){
die "due: can not combine STDIN and args\n" if @argv;
my $stdin=join"",map"$_\n",@Due_fake_stdin; #test
open(local *STDIN, '<', \$stdin) or die "ERR: $! $?\n" if $stdin;
my $rl=qr/(^| )\-[rwx\-sS]{9}\s+(?:\d )?(?:[\w\-]+(?:\/|\s+)[\w\-]+)\s+(\d+)\s+.*?([^\/]*\.[\w,\-]+)?$/;
my $MorP=$o{M}||$o{C}||$o{A}||$o{P}?"due: -M, -C, -A and -P not yet implemented for STDIN unless list of filenames only\n":0;
while(<STDIN>){
chomp;
next if /\/$/;
my($f,$sz,$xtime)=(/$rl/?($3,$2):-f$_?($_,(stat)[7,$x]):next);
# 1576142 240 -rw-r--r-- 1 root root 242153 april 4 2016 /opt/wine-staging/share/wine/wine.inf
my $ext=$f=~$r?$1:'';
$ext=lc($ext) if $o{i};
$cnt++; $c{$ext}++;
$bts+=$sz; $b{$ext}+=$sz;
defined $xtime and $xtime{$ext}.=",$xtime" or die $MorP if $MorP;
}
}
else { #hm DRY
@argv=('.') if !@argv;
File::Find::find({follow=>0, wanted =>
sub {
return if !-f$_;
return if $qrexcl and defined $File::Find::name and $File::Find::name=~$qrexcl;
my($sz,$xtime)=(stat($_))[7,$x];
my $ext=m/$r/?$1:'';
$ext=lc($ext) if $o{i};
$cnt++; $c{$ext}++;
$bts+=$sz; $b{$ext}+=$sz;
$xtime{$ext}.=",$xtime" if $o{M} || $o{C} || $o{A} || $o{P};
1;
} },@argv);
}
my($f,$s)=$o{k}?("%14.2f kb",sub{$_[0]/1024})
:$o{K}?("%14.2f Kb",sub{$_[0]/1000})
:$o{m}?("%14.2f mb",sub{$_[0]/1024**2})
:$o{h}?("%14s", sub{bytes_readable($_[0])})
: ("%14d b", sub{$_[0]});
my @e=$o{a}?(sort(keys%c))
:$o{c}?(sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c)
: (sort{$b{$a}<=>$b{$b} or $a cmp $b}keys%c);
my $perc=!$o{M}&&!$o{C}&&!$o{A}&&!$o{P}?sub{""}:
sub{
my @p=$o{P}?(10,50,90):(50);
my @m=@_>0 ? do {grep$_, split",", $xtime{$_[0]}}
: do {grep$_, map {split","} values %xtime};
my @r=percentile(\@p,@m);
@r=(min(@m),@r,max(@m)) if $o{M}||$o{C}||$o{A};
@r=map int($_), @r;
my $fmt=$o{t}?'YYYY/MM/DD-MM:MI:SS':'YYYY/MM/DD';
@r=map tms($_,$fmt), @r;
" ".join(" ",@r);
};
my $width=max( 10, grep $_, map length($_), @e );
@e=@e[-10..-1] if $o{t} and @e>10; #-t tail
printf("%-*s %8d $f %7.2f%%%s\n",$width,$_,$c{$_},&$s($b{$_}),100*$b{$_}/$bts,&$perc($_)) for @e;
printf("%-*s %8d $f %7.2f%%%s\n",$width,"Sum",$cnt,&$s($bts),100,&$perc());
}
sub cmd_resubst {
my %o;
my $zo="123456789e";
my @argv=opts("f:t:vno:gi$zo",\%o,@_);
if(exists$o{t}){ $o{t}=~s,\\,\$, } else { $o{t}='' }
my($i,$tc,$tbfr,$tbto)=(0,0,0,0);
for my $file (@argv){
my $zopt=join" ",map"-$_",grep$o{$_},split//,$zo;
my $oext=$o{o}?$o{o}:$file=~/\.(gz|bz2|xz)$/i?$1:'';
my $open_out_pre=$oext?"|".openstr_prog({qw/gz gzip bz2 bzip2 xz xz/}->{lc($oext)})." $zopt":'';
my $open_out="$open_out_pre > $file.tmp$$";
my $open_in=openstr($file);
# die srlz(\%o,'o','',1);
open my $I, $open_in or croak"ERR: open $open_in failed. $! $?\n";
open my $O, $open_out or croak"ERR: open $open_out failed. $! $?\n";
my $c=0;
my $mod=join"",grep$o{$_},qw(g i);
eval"while(<\$I>){ \$c+=s/\$o{f}/$o{t}/$mod;print \$O \$_ }";
$tc+=$c;
close($I);close($O);
chall($file,"$file.tmp$$") or croak"ERR: chall $file\n" if !$o{n};
my($bfr,$bto)=(-s$file,-s"$file.tmp$$");
unlink $file or croak"ERR: cant rm $file\n";
my $newfile=$o{o}?repl($file,qr/\.(gz|bz2|xz)$/i,".$oext"):$file;
rename("$file.tmp$$",$newfile) or croak"ERR: rename $file.tmp$$ -> $newfile failed\n";
if($o{v}){
my $pr=$bfr?100*$bto/$bfr:0;
printf "%*d/%d %*s %7d =>%8d b (%2d%%) %s\n",
length(0+@argv), ++$i, 0+@argv, -15, "$tc/$c", $bfr, $bto, $pr, $file;
$tbfr+=$bfr;
$tbto+=$bto;
}
}
if($o{v} and @argv>1){
printf "Replaces: %d Bytes before: %d After: %d Change: %.1f%%\n",
$tc, $tbfr, $tbto, $tbfr?100*($tbto-$tbfr)/$tbfr:0
}
$tc;
}
sub cmd_xcat {
for my $fn (@_){
my $os=openstr($fn);
open my $FH, $os or warn "xcat: cannot open $os ($!)\n" and next;
#binmode($FH);#hm?
print while <$FH>;
close($FH);
}
}
sub cmd_freq {
my(@f,$i);
map $f[$_]++, unpack("C*",$_) while <>;
my $s=" " x 12;map{print"$_$s$_$s$_\n"}("BYTE CHAR COUNT","---- ----- -------");
my %m=(145,"DOS-æ",155,"DOS-ø",134,"DOS-Ã¥",146,"DOS-Ã",157,"DOS-Ã",143,"DOS-Ã
",map{($_," ")}0..31);
printf("%4d %5s%8d".(++$i%3?$s:"\n"),$_,$m{$_}||chr,$f[$_]) for grep$f[$_],0..255;print "\n";
my @no=grep!$f[$_],0..255; print "No bytes for these ".@no.": ".join(" ",@no)."\n";
}
sub cmd_deldup {
cmd_finddup('-d',@_);
}
sub cmd_finddup {
# http://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
# die "todo: finddup not ready yet"
my %o;
my @argv=opts("ak:dhsnqv0P:FMRp",\%o,@_); $o{P}=1024*8 if!defined$o{P}; $o{k}='' if!defined$o{k};
croak"ERR: cannot combine -a with -d, -s or -h" if $o{a} and $o{d}||$o{s}||$o{h};
require File::Find;
@argv=map{
my @f;
if(-d$_){ File::Find::find({follow=>0,wanted=>sub{return if !-f$_;push@f,$File::Find::name;1}},$_) }
else { @f=($_) }
@f;
}@argv;
my %md5sum;
my $md5sum=sub{$md5sum{$_[0]}=md5sum($_[0]) if!defined$md5sum{$_[0]}}; #memoize
my $md5sum_1st_part=sub{
open my $fh, "<", $_[0] or die "ERR: Could not read $_[0]";
binmode($fh);
my $buf; read($fh,$buf,$o{P});
close($fh);
md5sum(\$buf);
};
my @checks=( #todo: stat()[0,1] (or[0,1,7]?) and diff filename => no need for md5, is hardlink! just linux?
sub{-s$_[0]},
sub{-s$_[0]<=$o{P}?md5sum($_[0]):&$md5sum_1st_part($_[0])},
sub{md5sum($_[0])}
);
pop @checks if $o{M}; #4tst
my $i=0;
my %s=map{($_=>++$i)}@argv; #sort
my %f=map{($_=>[$_])}@argv; #also weeds out dupl params
for my $c (@checks){
my @f=map @{$f{$_}}, sort keys %f;
if($o{p} and $c eq $checks[-1]){ #view progress for last check, todo: eta() is wacky here! everywhere?
my $sum=@f?sum(map -s$_,@f):0;
my($corg,$cnt,$cntmb,$mb)=($c,0,0,$sum/1e6);
$c=sub{
$cntmb+=(-s$_[0])/1e6;
my $eol=++$cnt==@f?"\n":"\r";
print STDERR sprintf("%d/%d files checked (%d%%), %d/%d MB (%d%%), ETA in %d sec $eol",
$cnt, 0+@f, 100*$cnt/@f, $cntmb, $mb, 100*$cntmb/$mb,
curb(nvl(eta($cnt,0+@f),time)-time(),0,1e7));
&$corg(@_)
};
}
my %n; push @{$n{&$c($_)}}, $_ for @f;
delete @n{grep@{$n{$_}}<2,keys%n};
%f=%n;
}
return %f if $o{F};
my@r=sort{$s{$$a[0]}<=>$s{$$b[0]}}values%f;
my $si={qw(o 9 n 9 O 8 N 8)}->{$o{k}}; #stat index: 9=mtime, 8=atime
my $sort=lc$o{k} eq 'o' ? sub{sprintf"%011d%9d", (stat($_[0]))[$si],$s{$_[0]}}
:lc$o{k} eq 'n' ? sub{sprintf"%011d%9d",1e11-(stat($_[0]))[$si],$s{$_[0]}}
: sub{sprintf "%9d", $s{$_[0]}};
@$_=map$$_[1],sort{$$a[0]cmp$$b[0]}map[&$sort($_),$_],@$_ for @r;
my %of; #dup of
for my $r (@r){
$of{$_}=$$r[0] for @$r[1..$#$r];
}
my $nl=$o{0}?"\x00":"\n";
my $print=sub{$o{q} or print $_[0]};
my $do=sub{ $o{v} && &$print("$_[0]$nl"); qx($_[0]) };
my $go=sub{ $o{n} ? &$print("$_[0]$nl") : &$do($_[0]) };
&$print(join$nl,map join("",map"$_$nl",@$_),@r) and return if $o{a};
@r=map@$_[1..$#$_],@r;
return @r if $o{R}; #hm
unlink@r if $o{d}||$o{s}||$o{h} and !$o{n}; #delete duplicates
map &$go(qq(rm "$_") ),@r if $o{d}&& $o{n}; #delete duplicates, dryrun
map &$go(qq(ln "$of{$_}" "$_")),@r if $o{h}; #replace duplicates with hardlink
map &$go(qq(ln -s "$of{$_}" "$_")),@r if $o{s}; #replace duplicates with symlink,
#todo: BUG! abc/def/file -> ghi/file should be abc/def/file -> ../../ghi/file
return if $o{q} or $o{n}; #quiet or dryrun
&$print("$_$nl") for @r;
}
#http://stackoverflow.com/questions/11900239/can-i-cache-the-output-of-a-command-on-linux-from-cli
our $Ccmd_cache_dir='/tmp/acme-tools-ccmd-cache';
our $Ccmd_cache_expire=15*60; #default 15 minutes
sub cmd_ccmd {
require Digest::MD5;
my $cmd=join" ",@_;
my $d="$Ccmd_cache_dir/".username();
makedir($d);
my $md5=Digest::MD5::md5_hex($cmd);
my($fno,$fne)=map"$d/cmd.$md5.std$_","out","err";
my $too_old=sub{time()-(stat(shift))[9] >= $Ccmd_cache_expire};
unlink grep &$too_old($_), <$d/*.std???>;
sys("($cmd) > $fno 2> $fne") if !-e$fno or &$too_old($fno);
print STDOUT "".readfile($fno);
print STDERR "".readfile($fne);
}
sub cmd_trunc { die "todo: trunc not ready yet"} #truncate a file, size 0, keep all other attr
#todo: wipe -n 4 filer* #virker ikke! tror det er args() eller opts() som ikke virker
sub cmd_wipe {
my %o;
my @argv=opts("n:k0123456789",\%o,@_);
die if 1<grep exists$o{$_},'n',0..9;
$o{$_} and $o{n}=$_ for 0..9;
wipe($_,$o{n},$o{k}) for @argv;
}
sub which { my $prog=shift; -x "$_/$prog" and return "$_/$prog" for split /:/, $ENV{PATH} }
sub cmd_2gz {cmd_z2z("-t","gz", @_)}
sub cmd_2gzip {cmd_z2z("-t","gz", @_)}
sub cmd_2bz2 {cmd_z2z("-t","bz2",@_)}
sub cmd_2bzip2 {cmd_z2z("-t","bz2",@_)}
sub cmd_2xz {cmd_z2z("-t","xz", @_)}
#todo: sub cmd_7z
#todo: .tgz same as .tar.gz (but not .tbz2/.txz)
sub cmd_z2z {
my %o;
my $pvopts="L:D:i:lIq";
my @argv=opts("pt:kvhon123456789es:$pvopts",\%o,@_);
my $t=repl(lc$o{t},qw/gzip gz bzip2 bz2/);
die "due: unknown compression type $o{t}, known are gz, bz2 and xz" if $t!~/^(gz|bz2|xz)$/;
$o{p}=1 if!defined$o{p} and grep$pvopts=~/$_/,keys%o;
delete $o{e} if $o{e} and $o{t} ne 'xz' and warn "-e available only for type xz\n";
my $sum=sum(map -s$_,@argv);
print "Converting ".@argv." files, total ".bytes_readable($sum)."\n" if $o{v} and @argv>1;
my $cat='cat';
if($o{p}){ if(which('pv')){ $cat='pv' } else { warn repl(<<"",qr/^\s+/) } }
due: pv for -p not found, install with sudo yum install pv, sudo apt-get install pv or similar
$o{$_} and $o{$_}=" " for qw(l q); #still true, but no cmd arg for:
$o{I} and $o{I}="-pterb";
exists$o{$_} and $cat=~s,pv,pv -$_ $o{$_}, for $pvopts=~/(\w)/g; #warn "cat: $cat\n";
my $sumnew=0;
my $start=time_fp();
my($i,$bsf)=(0,0);#bytes so far
$Eta{'z2z'}=[];eta('z2z',0,$sum);
#@argv=map$$_[1],sort{$$a[0]cmp$$b[0]}map{[$opt{
for(@argv){
my $new=$_; $new=~s/(\.(gz|bz2|xz))?$/.$t/i or die;
my $ext=defined($2)?lc($2):'';
my $same=/^$new$/; $new.=".tmp" if $same; die if $o{k} and $same;
next if !-e$_ and warn"$_ do not exists\n";
next if !-r$_ and warn"$_ is not readable\n";
next if -e$new and !$o{o} and warn"$new already exists, skipping (use -o to overwrite)\n";
my $unz={qw/gz gunzip bz2 bunzip2 xz unxz/}->{$ext}||'';
#todo: my $cntfile="/tmp/acme-tools-z2z-wc-c.$$";
#todo: my $cnt="tee >(wc -c>$cntfile)" if $ENV{SHELL}=~/bash/ and $o{v}; #hm dash vs bash
my $z= {qw/gz gzip bz2 bzip2 xz xz/}->{$t};
$z.=" -$_" for grep$o{$_},1..9,'e';
$z.=" -$_ $o{$_}" for grep exists$o{$_},'L';
my $cmd=qq($cat "$_"|$unz|$z>"$new");
#todo: "$cat $_|$unz|$cnt|$z>$new";
#cat /tmp/kontroll-linux.xz|unxz|tee >(wc -c>/tmp/p)|gzip|wc -c;cat /tmp/p
$cmd=~s,\|+,|,g; #print "cmd: $cmd\n";
sys($cmd);
chall($_,$new) or croak("$0 cannot chmod|chown|touch $new") if !$o{n};
my($szold,$sznew)=map{-s$_}($_,$new);
$bsf+=-s$_;
unlink $_ if !$o{k};
rename($new, replace($new,qr/.tmp$/)) or die if $same;
if($o{v}){
$sumnew+=$sznew;
my $pr=sprintf"%0.1f%%",$szold?100*$sznew/$szold:0;
#todo: my $szuncmp=-s$cntfile&&time()-(stat($cntfile))[9]<10 ? qx(cat $cntfile) : '';
#todo: $o{h} ? printf("%6.1f%% %9s => %9s => %9s %s\n", $pr,(map bytes_readable($_),$szold,$szuncmp,$sznew),$_)
#todo: : printf("%6.1f%% %11d b => %11d b => %11 b %s\n",$pr,$szold,$szuncmp,$sznew,$_)
my $str= $o{h}
? sprintf("%-7s %9s => %9s", $pr,(map bytes_readable($_),$szold,$sznew))
: sprintf("%-7s %11d b => %11d b", $pr,$szold,$sznew);
if(@argv>1){
$i++;
$str=$i<@argv
? " ETA:".sprintf("%-8s",sec_readable(eta('z2z',$bsf,$sum)-time_fp()))." $str"
: " TA: 0s $str"
if $sum>1e6;
$str="$i/".@argv." $str";
}
print "$str $new\n";
}
}
if($o{v} and @argv>1){
my $bytes=$o{h}?'':'bytes ';
my $str=
sprintf "%d files compressed in %.3f seconds from %s to %s $bytes (%s bytes) %.1f%% of original\n",
0+@argv,
time_fp()-$start,
(map{$o{h}?bytes_readable($_):$_}($sum,$sumnew,$sumnew-$sum)),
100*$sumnew/$sum;
$str=~s,\((\d),(+$1,;
print $str;
}
}
=head2 args
Parses command line options and arguments:
my %opt;
my @argv=Acme::Tools::args('i:nJ123',\%opt,@ARGV); #returns remaining command line elements after C<-o ptions> are parsed into C<%opt>.
Uses C<Getopt::Std::getopts()>. First arg names the different one char
options and an optional C<:> behind the letter or digit marks that the
switch takes an argument.
=cut
sub args {
my $switches=shift;
my $hashref=shift;
my $re_sw='^([a-z0-9]:?)+$';
croak "ERR: args: first arg $switches dont match $re_sw\n" if $switches !~ /$re_sw/i;
croak "ERR: second arg to args() not hashref\n" if ref($hashref) ne 'HASH';
local @ARGV=@_;
require Getopt::Std;
Getopt::Std::getopts($switches => $hashref);
(@ARGV);
}
sub opts {
my($def, $hashref, @a)=@_;
@a=@ARGV if @_<=2;
my %def=map{/(\w)(:?)/;($1=>$2?2:1)}$def=~/(\w:?)/g;
my $o1=join"",grep$def{$_}==1,sort keys%def;
my $o= join"", sort keys%def;
my @r;
while(@a){
my $a=shift(@a);
if($a=~/^-([$o1])([$o].*)$/){
unshift@a,"-$1","-$2";
}
elsif($a=~/^-(\w)(.*)$/){
my $d=$def{$1}//0;
push@{$$hashref{$1}},$d==1 && length($2) ? croak"opt -$1 has no arg (is $2 here)"
:$d==1 ? 1
:$d==2 && length($2) ? $2
:$d==2 ? shift(@a)
:croak"unknown opt -$1";
}
elsif($a eq '--'){
last;
}
else {
push @r, $a;
}
}
$_=join",",@$_ for values %$hashref;
(@r,@a)
}
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp -
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp Tools.pm
sub cmd_zsize {
my %o;
my @argv=opts("heEpts",\%o,@_);
my $stdin=!@argv || join(",",@argv) eq '-';
@argv=("/tmp/acme-tools.$$.stdin") if $stdin;
writefile($argv[0],join("",<STDIN>)) if $stdin;
my @prog=grep qx(which $_), qw(gzip bzip2 xz zstd brotli);
for my $f (@argv){
my $sf=-s$f;
print "--- $f does not exists\n" and next if !-e$f;
print "--- $f is not a file\n" and next if !-f$f;
print "--- $f ($sf b) is not readable\n" and next if !-r$f;
print "--- $sf b ".bytes_readable($sf)." ".($stdin?"-":$f)."\n";
next if !$sf;
my(@t,@s);
for my $prog (@prog){
next if !qx(which $prog);
my @l=1..9;
push @l,map"e$_",1..9 if $prog eq 'xz' and $o{e};
@l=map"e$_",1..9 if $prog eq 'xz' and $o{E};
@l=map 10+$_,@l if $prog eq 'zstd';
@l=map"q $_",3..11 if $prog eq 'brotli';
printf "%-6s",$prog;
push @t, $prog, [] if $o{t};
push @s, $prog, [] if $o{p} and $o{s};
for my $l (@l){ #level
my $t=time_fp();
my $b=qx(cat $f|$prog -$l|wc -c);
push@{$t[-1]},time_fp()-$t if $o{t};
push@{$s[-1]},$b if $o{p} and $o{s};
$o{p} ? printf("%9.1f%% ",100*$b/$sf)
:$o{h} ? printf("%10s ",bytes_readable($b))
: printf("%10d ",$b);
}
print "\n";
}
while(@s){
printf "%-6s",shift@s;
$o{h}?printf("%10s ",bytes_readable($_)):printf("%10d ",$_) for @{shift@s}; print "\n";
}
while(@t){
printf "%-6s",shift@t;
printf "%9.3fs ",$_ for @{shift@t}; print "\n";
}
}
unlink $argv[0] if $stdin;
}
sub cmd_rttop { die "rttop: not implemented here yet.\n" }
sub cmd_whichpm { die "whichpm: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
sub cmd_catal { die "catal: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
#todo: cmd_tabdiff (fra sonyk)
#todo: cmd_catlog (ala catal med /etc/catlog.conf, default er access_log)
=head1 DATABASE STUFF - NOT IMPLEMENTED YET
Uses L<DBI>. Comming soon...
$Dbh
dlogin
dlogout
drow
drows
drowc
drowsc
dcols
dpk
dsel
ddo
dins
dupd
ddel
dcommit
drollback
=cut
#my$dummy=<<'SOON';
sub dtype {
my $connstr=shift;
return 'SQLite' if $connstr=~/(\.sqlite|sqlite:.*\.db)$/i;
return 'Oracle' if $connstr=~/\@/;
#my $sth=do{$Sth{$Dbh,$q} ||= $Dbh->prepare_cached($q)};
my $sth=$Dbh->prepare_cached($q);
$sth->execute(@b);
my @r=$sth->fetchrow_array;
$sth->finish if $$Dbh{Driver}{Name} eq 'SQLite';
#$dbh->selectrow_array($statement);
return @r==1?$r[0]:@r;
}
sub drows {
}
sub drowc {
}
sub drowsc {
}
sub dcols {
}
sub dpk {
}
sub dsel {
}
sub ddo {
my @arg=_dattrarg(@_);
#warn serialize(\@arg,'arg','',1);
$Dbh->do(@arg); #hm cache?
}
sub dins {
}
sub dupd {
}
sub ddel {
}
sub dcommit { $Dbh->commit }
sub drollback { $Dbh->rollback }
sub _dattrarg {
my @arg=@_;
splice @arg,1,0, ref($arg[-1]) eq 'HASH' ? pop(@arg) : {};
@arg;
}
=head2 self_update
Update Acme::Tools to newest version quick and dirty:
function pmview(){ ls -ld `perl -M$1 -le'$m=shift;$mi=$m;$mi=~s,::,/,g;print $INC{"$mi.pm"};warn"Version ".${$m."::VERSION"}."\n"' $1`;}
pmview Acme::Tools #view date and version before
sudo perl -MAcme::Tools -e Acme::Tools::self_update #update to newest version
pmview Acme::Tools #view date and version after
Does C<cd> to where Acme/Tools.pm are and then wget -N https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm
TODO: cmd_acme_tools_self_update, accept --no-check-certificate to use on curl
=cut
our $Wget;
our $Self_update_url='https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm'; #todo: change site
sub self_update {
#in($^O,'linux','cygwin') or die"ERROR: self_update works on linux and cygwin only";
$Wget||=(grep -x$_,map"$_/wget",'/usr/bin','/bin','/usr/local/bin','.')[0]; #hm --no-check-certificate
-x$Wget or die"ERROR: wget ($Wget) executable not found\n";
my $d=dirname(__FILE__);
sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
sys("cd $d; $Wget -N ".($ARGV[0]||$Self_update_url));
sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
}
1;
package Acme::Tools::BloomFilter;
use 5.008; use strict; use warnings; use Carp;
sub new { my($class,@p)=@_; my $self=Acme::Tools::bfinit(@p); bless $self, $class }
sub add { &Acme::Tools::bfadd }
sub addbf { &Acme::Tools::bfaddbf }
sub check { &Acme::Tools::bfcheck }
sub grep { &Acme::Tools::bfgrep }
sub grepnot { &Acme::Tools::bfgrepnot }
sub delete { &Acme::Tools::bfdelete }
sub store { &Acme::Tools::bfstore }
sub retrieve { &Acme::Tools::bfretrieve }
sub clone { &Acme::Tools::bfclone }
sub sum { &Acme::Tools::bfsum }
1;
# Ny versjon:
# - git clone https://github.com/kjetillll/Acme-Tools.git
# - c-s todo
# - endre $VERSION
# - endre Release history under HISTORY
# - endre årstall under =head1 COPYRIGHT
# - oppd default valutakurser inkl datoen
# - emacs Changes
# - emacs README versjon + aarstall
# - diff -byW200 <(grep -a ^sub Acme-Tools-0.22/Tools.pm|sort) <(grep -a ^sub Tools.pm|sort)|less
# - emacs MANIFEST legg til ev nye t/*.t
# - perl Makefile.PL && make test
# - /usr/bin/perl Makefile.PL && make test
# - perlbrew exec "perl Makefile.PL && time make test"
# - perlbrew exec "perl Makefile.PL && make test" | grep -P '^(perl-|All tests successful)'
# - perlbrew use perl-5.10.1; perl Makefile.PL && make test; perlbrew off
# - test evt i cygwin og mingw-perl
# - pod2html Tools.pm > Tools.html ; firefox Tools.html
# - https://metacpan.org/pod/Acme::Tools
# - http://cpants.cpanauthors.org/dist/Acme-Tools #kvalitee
# - perl Makefile.PL && make test && make dist
# - cp -p *tar.gz /htdocs/
# - #ci -l -mversjon -d `cat MANIFEST` #no
# - git add `cat MANIFEST`
# - git status
# - git commit -am versjon
# - git push #eller:
# - git push origin master
# - http://pause.perl.org/
# - tegnsett/utf8-kroell
# - https://rt.cpan.org/Dist/Display.html?Queue=Acme-Tools
# http://en.wikipedia.org/wiki/Birthday_problem#Approximations
# memoize_expire() http://perldoc.perl.org/Memoize/Expire.html
# memoize_file_expire()
# memoize_limit_size() #lru
( run in 0.843 second using v1.01-cache-2.11-cpan-140bd7fdf52 )