Acme-Tools

 view release on metacpan or  search on metacpan

Tools.pm  view on Meta::CPAN

  base64
  unbase64
  opts
  ed
  changed
  $Edcursor
  brainfu
  brainfu2perl
  brainfu2perl_optimized
  bfinit
  bfsum
  bfaddbf
  bfadd
  bfcheck
  bfgrep
  bfgrepnot
  bfdelete
  bfstore
  bfretrieve
  bfclone
  bfdimensions
  $PI
  install_acme_command_tools

  $Dbh
  dlogin
  dlogout
  drow
  drows
  drowc
  drowsc
  dcols
  dpk
  dsel
  ddo
  dins
  dupd
  ddel
  dcommit
  drollback
);

our $PI = '3.141592653589793238462643383279502884197169399375105820974944592307816406286';

=head1 NAME

Acme::Tools - Lots of more or less useful subs lumped together and exported into your namespace

=head1 SYNOPSIS

 use Acme::Tools;

 print sum(1,2,3);                   # 6
 print avg(2,3,4,6);                 # 3.75
 print median(2,3,4,6);              # 3.5
 print percentile(25, 101..199);     # 125

 my @list = minus(\@listA, \@listB); # set operation
 my @list = union(\@listA, \@listB); # set operation

 print length(gzip("abc" x 1000));   # far less than 3000

 writefile("/dir/filename",$string); # convenient
 my $s=readfile("/dir/filename");    # also convenient

 print "yes!" if between($PI,3,4);

 print percentile(0.05, @numbers);

 my @even = range(1000,2000,2);      # even numbers between 1000 and 2000
 my @odd  = range(1001,2001,2);

 my $dice = random(1,6);
 my $color = random(['red','green','blue','yellow','orange']);

 pushr $arrayref[$num], @stuff;      # push @{ $arrayref[$num] }, @stuff ... popr, shiftr, unshiftr

 print 2**200;       # 1.60693804425899e+60
 print big(2)**200;  # 1606938044258990275541962092341162602522202993782792835301376

 ...and much more.

=encoding utf8

=head1 ABSTRACT

About 120 more or less useful perl subroutines lumped together and exported into your namespace.

=head1 DESCRIPTION

Subs created and collected since the mid-90s.

=head1 INSTALLATION

 sudo cpan Acme::Tools
 sudo cpanm Acme::Tools   # after: sudo apt-get install cpanminus make   # for Ubuntu 12.04

Or to get the very newest:

 git clone https://github.com/kjetillll/Acme-Tools.git
 cd Acme-Tools
 perl Makefile.PL
 make test
 sudo make install

=head1 EXPORT

Almost every sub, about 90 of them.

Beware of namespace pollution. But what did you expect from an Acme module?

=head1 NUMBERS

=head2 num2code

See L</code2num>

=head2 code2num

C<num2code()> convert numbers (integers) from the normal decimal system to some arbitrary other number system.
That can be binary (2), oct (8), hex (16) or others.

Example:

 print num2code(255,2,"0123456789ABCDEF");  # prints FF
 print num2code( 14,2,"0123456789ABCDEF");  # prints 0E

...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
...and 14 are converted to 0E, with leading 0 because of the second argument 2.

Example:

 print num2code(1234,16,"01")

Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.

To convert back:

 print code2num("0000010011010010","01");  #prints 1234

C<num2code()> can be used to compress numeric IDs to something shorter:

 $chars="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_";
 print num2code("241274432",5,$chars);     # prints EOOv0
 print code2num("EOOv0",$chars);           # prints 241274432

=cut

#Math::BaseCnv

sub num2code {
  return num2code($_[0],0,$_[1]) if @_==2;
  my($num,$digits,$validchars,$start)=@_;
  my $l=length($validchars);
  my $key;
  $digits||=9e9;
  no warnings;
  croak if $num<$start;
  $num-=$start;
  for(1..$digits){
    $key=substr($validchars,$num%$l,1).$key;
    $num=int($num/$l);
    last if $digits==9e9 and !$num;
  }
  croak if $num>0;
  return $key;
}

sub code2num {
  my($code,$validchars,$start)=@_; $start=0 if!defined$start;
  my $l=length($validchars);
  my $num=0;
  $num=$num*$l+index($validchars,$_) for split//,$code;
  return $num+$start;
}

=head2 base

Numbers in any number system of base between 2 and 36. Using capital letters A-Z for base higher than 10.

 base(2,15)                 # 1111  2 --> binary
 base(8,4096)               # 10000 8 --> octal
 base(10,4096)              # 4096 of course
 base(16,254)               # FE   16 --> hex
 base(16,254.3)             # FE   16 --> hex, can not handle decimal numbers (yet...todo)
 base(36,123456)            # FE   16 --> hex, can not handle decimal numbers (yet...todo)
 base(36,1234567891011)     # FR5HUHC3  base36 using all 0-9 and A-Z as digits, 10+26=36
 base(37,1)                 # die with message 'base not 2-36'
 base($x,0)                 # 0
 base(16, 14,15,16,17)      # list of four elements: E F 10 11

=head2 dec2bin dec2hex dec2oct bin2dec bin2hex bin2oct hex2dec hex2bin hex2oct oct2dec oct2bin oct2hex

 print dec2bin(101);          # 1100101
 print dec2hex(101);          # 65
 print dec2oct(101);          # 145
 print bin2dec(1010011110);   # 670
 print bin2hex(1010011110);   # 29e
 print bin2oct(1010011110);   # 1236
 print hex2dec(101);          # 257
 print hex2bin(101);          # 100000001
 print hex2oct(101);          # 401
 print oct2dec(101);          # 65
 print oct2bin(101);          # 1000001
 print oct2hex(101);          # 41

=cut

sub _base{my($b,$n)=@_;$n?_base($b,int$n/$b).chr(48+$n%$b+7*($n%$b>9)):''} #codegolf
sub base {
  my($b,$n)=@_;
  @_>2        ? (map base($b,$_),@_[1..$#_])
 :$b<2||$b>36 ? croak"base not 2-36"
 :$n>0        ?     _base($b,$n)
 :$n<0        ? "-"._base($b,-$n)
 :!defined $n ? undef
 :$n==0       ? 0
 :              croak
}

sub dec2bin { sprintf"%b",shift           }
sub dec2hex { sprintf"%x",shift           }
sub dec2oct { sprintf"%o",shift           }
sub bin2dec {             oct("0b".shift) }
sub bin2hex { sprintf"%x",oct("0b".shift) }
sub bin2oct { sprintf"%o",oct("0b".shift) }
sub hex2dec {             hex(shift)      }
sub hex2bin { sprintf"%b",hex(shift)      }
sub hex2oct { sprintf"%o",hex(shift)      }
sub oct2dec {             oct(shift)      }
sub oct2bin { sprintf"%b",oct(shift)      }

Tools.pm  view on Meta::CPAN


=item * number systems

=item * currencies

=back

B<Examples:>

 print conv( 2000, "meters", "miles" );  #prints 1.24274238447467
 print conv( 2.1, 'km', 'm');            #prints 2100
 print conv( 70,"cm","in");              #prints 27.5590551181102
 print conv( 4,"USD","EUR");             #prints 3.20481552905431 (depending on todays rates)
 print conv( 4000,"b","kb");             #prints 3.90625 (1 kb = 1024 bytes)
 print conv( 4000,"b","Kb");             #prints 4       (1 Kb = 1000 bytes)
 print conv( 1000,"mb","kb");            #prints 1024000
 print conv( 101010,"bin","roman");      #prints XLII
 print conv( "DCCXLII","roman","oct");   #prints 1346

B<Units, types of measurement and currencies supported by C<conv> are:>

Note: units starting with the symbol _ means that all metric
prefixes from yocto 10^-24 to yotta 10^+24 is supported, so _m means
km, cm, mm, µm and so on. And _N means kN, MN GN and so on.

Note2: Many units have synonyms: m, meter, meters ...

 acceleration: g, g0, m/s2, mps2

 angle:        binary_degree, binary_radian, brad, deg, degree, degrees,
               gon, grad, grade, gradian, gradians, hexacontade, hour,
               new_degree, nygrad, point, quadrant, rad, radian, radians,
               sextant, turn

 area:         a, ar, are, ares, bunder, ca, centiare, cho, cm2,
               daa, decare, decares, deciare, dekar,
               djerib, m2, dunam, dönöm, earths, feddan, ft2, gongqing, ha
               ha, hectare, hectares, hektar, jerib, km2, m2, manzana,
               mi2, mm2, mu, qing, rai, sotka,
               sqcm, sqft, sqkm, sqm, sqmi, sqmm
               stremmata, um2, µm2

 bytes:        Eb, Gb, Kb, KiB, Mb, Pb, Tb, Yb, Zb, b, byte,
               kb, kilobyte,  mb, megabyte,
               gb, gigabyte,  tb, terabyte,
               pb, petabyte,  eb, exabyte,
               zb, zettabyte, yb, yottabyte

 charge:       As, C, _e, coulomb, e

 current:      A, _A, N/m2

 energy:       BTU, Btu, J, Nm, W/s, Wh, Wps, Ws, _J, _eV,
               cal, calorie, calories, eV, electronvolt, BeV,
               erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh, MWh, GWh, TWh
               kcal, kilocalorie, kilocalories,
               newtonmeter, newtonmeters, th, thermie

 force:        N, _N, dyn, dyne, dynes, lb, newton

 length:       NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
               in, inch, inches, km, league, lightyear, ls, ly,
               m, meter, meters, mi, mil, mile, miles,
               nautical mile, nautical miles, nmi,
               parsec, pc, planck, yard, yard_imperical, yd, Å, ångstrøm, angstrom

 mass:         Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
               grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
               lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
               pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
               pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey

 mileage:      mpg, l/100km, l/km, l/10km, lp10km, l/mil, liter_pr_100km, liter_pr_km, lp100km

 money:        AED, ARS, AUD, BGN, BHD, BND, BRL, BWP, CAD, CHF, CLP, CNY,
               COP, CZK, DKK, EUR, GBP, HKD, HRK, HUF, IDR, ILS, INR, IRR,
               ISK, JPY, KRW, KWD, KZT, LKR, LTL, LVL, LYD, MUR, MXN, MYR,
               NOK, NPR, NZD, OMR, PHP, PKR, PLN, QAR, RON, RUB, SAR, SEK,
               SGD, THB, TRY, TTD, TWD, USD, VEF, ZAR,      BTC, LTC, mBTC, XBT
               Currency rates are automatically updated from the net
               at least every 24h since last update (on linux/cygwin).

 numbers:      dec, hex, bin, oct, roman, dozen, doz, dz, dusin, gross, gro,
               gr, great_gross, small_gross  (not supported: decimal numbers)

 power:        BTU, BTU/h, BTU/s, BTUph, GWhpy, J/s, Jps, MWhpy, TWhpy,
               W, Whpy, _W, ftlb/min, ftlb/s, hk, hp, kWh/yr, kWhpy

 pressure:     N/m2, Pa, _Pa, at, atm, bar, mbar, pascal, psi, torr

 radioactivity: Bq, becquerel, curie

 speed:        _m/s, km/h, km/t, kmh, kmph, kmt, m/s, mi/h, mph, mps,
               kn, knot, knots, kt, kts, mach, machs, c, fps, ft/s, ftps

 temperature:  C, F, K, celsius, fahrenheit, kelvin

 time:         _s, biennium, century, d, day, days, decade, dy, fortnight,
               h, hour, hours, hr, indiction, jubilee, ke, lustrum, m,
               millennium, min, minute, minutes, mo, moment, mon, month,
               olympiad, quarter, s, season, sec, second, seconds, shake,
               tp, triennium, w, week, weeks, y, y365, ySI, ycommon,
               year, years, ygregorian, yjulian, ysideral, ytropical

 volume:        l, L, _L, _l, cm3, m3, ft3, in3, liter, liters, litre, litres,
                gal, gallon, gallon_imp, gallon_uk, gallon_us, gallons,
                pint, pint_imp, pint_uk, pint_us, tsp, tablespoon, teaspoon,
                floz, floz_uk, therm, thm, fat, bbl, Mbbl, MMbbl, drum,
                container (or container20), container40, container40HC, container45HC

See: L<http://en.wikipedia.org/wiki/Units_of_measurement>

=cut

#TODO:  @arr2=conv(\@arr1,"from","to")         # should be way faster than:
#TODO:  @arr2=map conv($_,"from","to"),@arr1
#TODO:  conv(123456789,'b','h'); # h converts to something human-readable

our %conv=(
	 length=>{
		  m       => 1,
		  _m      => 1,
		  meter   => 1,
		  meters  => 1,
		  metre   => 1,
		  metres  => 1,
		  km      => 1000,
		  mil     => 10000,                   #scandinavian #also: inch/1000!
		  in      => 0.0254,
		  inch    => 0.0254,
		  inches  => 0.0254,
		  ft      => 0.0254*12,               #0.3048 m
		  feet    => 0.0254*12,               #0.3048 m
		  yd      => 0.0254*12*3,             #0.9144 m
		  yard    => 0.0254*12*3,             #0.9144 m
		  yards   => 0.0254*12*3,             #0.9144 m
		  fathom  => 0.0254*12*3*2,           #1.8288 m
		  fathoms => 0.0254*12*3*2,           #1.8288 m
		  chain   => 0.0254*12*3*22,          #20.1168 m
		  chains  => 0.0254*12*3*22,          #20.1168 m
		  furlong => 0.0254*12*3*22*10,       #201.168 m
		  furlongs=> 0.0254*12*3*22*10,       #201.168 m
		  mi      => 0.0254*12*3*22*10*8,     #1609.344 m
		  mile    => 0.0254*12*3*22*10*8,     #1609.344 m
		  miles   => 0.0254*12*3*22*10*8,
		  league  => 0.0254*12*3*22*10*8*3,   #4828.032 m
		  leagues => 0.0254*12*3*22*10*8*3,   #4828.032 m
		  yard_imp           => 0.914398416,
		  yard_imperical     => 0.914398416,
                  NM                 => 1852,           #nautical mile
                  nmi                => 1852,           #nautical mile
                  'nautical mile'    => 1852,
                  'nautical miles'   => 1852,
		  micron             => 1e-6,
		  microns            => 1e-6,
		  micrometre         => 1e-6,
		  micrometres        => 1e-6,
                  'Ã…'                => 1e-10,
                  'ångstrøm'         => 1e-10,
                  'angstrom'         => 1e-10,
		  fm                 => 1e-15,
		  fermi              => 1e-15, #in honour of Enrico Fermi
		  fermis             => 1e-15, #in honour of Enrico Fermi
		  femtometer         => 1e-15, #derived from "femten" (=fifteen in Norwegian and Danish)
		  femtometre         => 1e-15,
		  femtometers        => 1e-15, #derived from "femten" (=fifteen in Norwegian and Danish)
		  femtometres        => 1e-15,
		  attometer          => 1e-18, #derived from "atten/atton" (=eighteen)
		  attometre          => 1e-18,
		  attometers         => 1e-18, #derived from "atten/atton" (=eighteen)
		  attometres         => 1e-18,
		  ly                 => 299792458*3600*24*365.25,
		  lightyear          => 299792458*3600*24*365.25, # = 9460730472580800 by def
		  ls                 => 299792458,      #light-second
                  au                 => 149597870700,   # by def: meters earth to sun
                  astronomical_unit  => 149597870700,
                 'astronomical unit' => 149597870700,
                  pc                 => 149597870700*648000/$PI, #3.0857e16 = 3.26156 ly
                 _pc                 => 149597870700*648000/$PI,
                  parsec             => 149597870700*648000/$PI,
		  attoparsec         => 149597870700*648000/$PI/1e18,
		  apc                => 149597870700*648000/$PI/1e18,
		  planck             => 1.61619997e-35, #planck length
		  #Norwegian (old) lengths:
		  tomme         => 0.0254,
		  tommer        => 0.0254,
		  fot           => 0.0254*12,               #0.3048m
		  alen          => 0.0254*12*2,             #0.6096m
		  favn          => 0.0254*12*2*3,           #1.8288m
		  kvart         => 0.0254*12*2/4,           #0.1524m a quarter alen
                  #--https://upload.wikimedia.org/wikipedia/commons/e/eb/English_length_units_graph.svg
                  twip          => 0.0254 / 6 / 12 / 20,
                  point         => 0.0254 / 6 / 12,
                  pica          => 0.0254 / 6,
                  line          => 0.0254 / 12,
		  thou          => 0.0254 / 1000,
                  barleycorn    => 0.0254 / 3,
                  poppyseed     => 0.0254 / 3 / 4,
                  finger        => 0.0254 / 6 / 12 * 63,
                  palm          => 0.0254 * 3,
                  digit         => 0.0254 * 3 / 4,
                  nail          => 0.0254 * 3 / 4 * 3,
                  rack          => 0.0254 * 1.75,
                  stick         => 0.0254 * 2,
                  hand          => 0.0254 * 2 * 2,
                  foot          => 0.0254 * 2 * 2 * 3,
                  shaftment     => 0.0254 * 3 * 2,
                  span          => 0.0254 * 3 * 3,
                  ell           => 0.0254 * 3 * 3 * 5,
                  pace          => 0.0254 * 3 * 2 * 5,
                  step          => 0.0254 * 3 * 2 * 5,
                  cubit         => 0.0254 * 3 * 2 * 3,
                  rod           => 0.0254 * 3 * 2 * 3 * 11,
                  link          => 0.0254 * 3 * 2 * 3 * 11 / 25,
		  yard          => 0.0254 * 2 * 2 * 3 * 3,
                  grade         => 0.0254 * 3 * 2 * 5 * 2,
                  rope          => 0.0254 * 3 * 2 * 5 * 2 * 4,
                  skein         => 0.0254 * 3 * 3 * 5 * 96,                   # 96 ell
                  fathom        => 0.0254 * 2 * 2 * 3 * 3 * 2,                # 2 yard
                  spindle       => 0.0254 * 3 * 3 * 5 * 96 * 120,             # 120 skein
                  gunter_chain  => 0.0254 * 2 * 2 * 3 * 3 * 2 * 11,           # 11 fathom
                  ramsden_chain => 0.0254 * 3 * 2 * 5 * 2 * 4 * 5,            # 5 rope
                  shackle       => 0.0254 * 2 * 2 * 3 * 3 * 2 * 15,           # 15 fathom
                  cable         => 0.0254 * 2 * 2 * 3 * 3 * 2 * 100,          # 100 fathom
                  furlong       => 0.0254 * 2 * 2 * 3 * 3 * 2 * 11 * 10,      # 10 gunter_chain, 220 yard
                  roman_mile    => 0.0254 * 3 * 2 * 5 * 2 * 4 * 5 * 50,       # 50 ramsden_chain
                  statute_mile  => 0.0254 * 2 * 2 * 3 * 3 * 2 * 11 * 10 * 8,  # 8 furlong
                  nautic_mile   => 0.0254 * 2 * 2 * 3 * 3 * 2 * 100 * 10,     # 10 cable
                  league        => 0.0254 * 2 * 2 * 3 * 3 * 2 * 100 * 10 * 5, # 5 nautic_mile
		  siriometer    => 149597870700*1e6,                          # 1 million astronomical units
		 },
	 mass  =>{ #https://en.wikipedia.org/wiki/Unit_conversion#Mass
		  g            => 1,
		  _g           => 1,
		  gram         => 1,
		  grams        => 1,
		  kilo         => 1000,
		  kilos        => 1000,
		  t            => 1000000,
		  tonn         => 1000000,
		  tonne        => 1000000,
		  tonnes       => 1000000,
		  seer         => 933.1,          # ~14400 grains (if 933.104304), India, Aden, Saudi-Arabia
		  maund        => 37320,          # avg of Indias different mauds, ~ 40 x seer
		  lb           => 453.59237,      # ~453g
		  lbs          => 453.59237,
		  lbm          => 453.59237,
		  lb_av        => 453.59237,
		  lb_t         => 373.2417216,
		  lb_troy      => 373.2417216,    # 5760 grains = 453.59237*144/175
		  pound        => 453.59237,      # 7000 grains

Tools.pm  view on Meta::CPAN

		  container     => 33.1e3,  #container20
		  container20   => 33.1e3,
		  container40   => 67.5e3,
		  container40HC => 75.3e3,
		  container45HC => 86.1e3,
		  firkin        => 282*0.0254**3 * 8, #8 gallon_ale
		  #Norwegian:
                  meterfavn => 2 * 2 * 0.6,           #fire wood/ved 2.4 m3
                  storfavn  => 2 * 2 * 3,             #fire wood/ved 12 m3
		 },
	 time  =>{
		  s           => 1,
		  _s          => 1,
		  sec         => 1,
		  second      => 1,
		  seconds     => 1,
		  m           => 60,
		  min         => 60,
		  minute      => 60,
		  minutes     => 60,
		  h           => 60*60,
		  hr          => 60*60,
		  hour        => 60*60,
		  hours       => 60*60,
		  d           => 60*60*24,
		  dy          => 60*60*24,
		  day         => 60*60*24,
		  days        => 60*60*24,
		  w           => 60*60*24*7,
		  week        => 60*60*24*7,
		  weeks       => 60*60*24*7,
		  mo	      => 60*60*24 * 365.2425/12,
		  mon	      => 60*60*24 * 365.2425/12,
		  month	      => 60*60*24 * 365.2425/12,
		  quarter     => 60*60*24 * 365.2425/12 * 3, #3 months
		  season      => 60*60*24 * 365.2425/12 * 3, #3 months
		  y           => 60*60*24 * 365.2425, # 365+97/400    #97 leap yers in 400 years
		  year        => 60*60*24 * 365.2425,
		  years       => 60*60*24 * 365.2425,
		  yjulian     => 60*60*24 * 365.25,   # 365+1/4
		  y365        => 60*60*24 * 365,      # finance/science
		  ycommon     => 60*60*24 * 365,      # finance/science
		  ygregorian  => 60*60*24 * 365.2425, # 365+97/400
		 #ygaussian   => 365+(6*3600+9*60+56)/(24*3600),  # 365+97/400
                  ytropical   => 60*60*24 * 365.24219,
                  ysideral    => 365.256363004,
		  ySI         => 60*60*24*365.25, #31556925.9747
                  decade      =>   10 * 60*60*24*365.2425,
                  biennium    =>    2 * 60*60*24*365.2425,
                  triennium   =>    3 * 60*60*24*365.2425,
                  olympiad    =>    4 * 60*60*24*365.2425,
                  lustrum     =>    5 * 60*60*24*365.2425,
                  indiction   =>   15 * 60*60*24*365.2425,
		  jubilee     =>   50 * 60*60*24*365.2425,
		  century     =>  100 * 60*60*24*365.2425,
		  millennium  => 1000 * 60*60*24*365.2425,
                  shake       => 1e-8,
                  moment      => 3600/40,  #1/40th of an hour, used by Medieval Western European computists
		  ke          => 864,      #1/100th of a day, trad Chinese, 14m24s
		  fortnight   => 14*24*3600,
                  tp          => 5.3910632e-44,  #planck time, time for ligth to travel 1 planck length
		  nanocentury =>  100 * 60*60*24*365.2425 / 1e9,   #3.156 ~ pi seconds, response time limit (usability)
		  warhol      => 15*60,                            #"fifteen minutes of fame"
		 },
          speed=>{
                 'm/s'      => 1,
                '_m/s'      => 1,
                  mps       => 1,
                  mph       => 1609.344/3600,
                 'mi/h'     => 1609.344/3600,
                  kmh       => 1/3.6,
                  kmph      => 1/3.6,
                 'km/h'     => 1/3.6,
                  kmt       => 1/3.6, # t=time(=hour) or temps (scandinavian and french and dutch)
                 'km/t'     => 1/3.6,
 		  kt        => 1852/3600,
 		  kts       => 1852/3600,
 		  kn        => 1852/3600,
 		  knot      => 1852/3600,
 		  knots     => 1852/3600,
 		  knop      => 1852/3600,    #scandinavian
		  c         => 299792458,    #speed of light, exact due to definition of meter
		  mach      => 340.3,        #speed of sound
		  machs     => 340.3,
                  fps       => 0.3048, #0.0254*12
                  ftps      => 0.3048,
                 'ft/s'     => 0.3048,
                 },
	  acceleration=>{
                 'm/s2'     => 1,
                 'mps2'     => 1,
                  g         => 9.80665,
                  g0        => 9.80665,
                  #0-100kmh or ca 0-60 mph x seconds...
                 },
         temperature=>{  #http://en.wikipedia.org/wiki/Temperature#Conversion
                  C=>1, F=>1, K=>1, celsius=>1, fahrenheit=>1, kelvin=>1
                 },
         radioactivity=>{
                  Bq          => 1,
		  becquerel   => 1,
		  curie       => 3.7e10,
                 },
         current=> {
                  A     => 1,
                  _A    => 1,
                 'N/m2' => 2e-7,
	         },
         charge=>{
                  e       => 1,
                  _e      => 1,
                  C       => 6.24150964712042e+18,
                  coulomb => 6.24150964712042e+18,
                  As      => 6.24150964712042e+18,
                 #Faraday unit of charge ???
                 },
         power=> {
                  W        => 1,
 		  _W       => 1,
                 'J/s'     => 1,
                  Jps      => 1,

Tools.pm  view on Meta::CPAN

Examples:

 print bytes_readable(999);                              # 999 B
 print bytes_readable(1000);                             # 1000 B
 print bytes_readable(1001);                             # 0.98 kB
 print bytes_readable(1024);                             # 1.00 kB
 print bytes_readable(1153433.6);                        # 1.10 MB
 print bytes_readable(1181116006.4);                     # 1.10 GB
 print bytes_readable(1209462790553.6);                  # 1.10 TB
 print bytes_readable(1088516511498.24*1000);            # 990.00 TB
 print bytes_readable(1088516511498.24*1000, 3);         # 990.000 TB
 print bytes_readable(1088516511498.24*1000, 1);         # 990.0 TB

=cut

sub bytes_readable {
  my $bytes=shift();
  my $d=shift()||2; #decimals
  return undef if !defined $bytes;
  return "$bytes B"                         if abs($bytes) <= 2** 0*1000; #bytes
  return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) <  2**10*1000; #kilobyte
  return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) <  2**20*1000; #megabyte
  return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) <  2**30*1000; #gigabyte
  return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) <  2**40*1000; #terrabyte
  return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
}

=head2 sec_readable

Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.

 print sec_readable( 0 );           # 0s
 print sec_readable( 0.0123 );      # 0.0123s
 print sec_readable(-0.0123 );      # -0.0123s
 print sec_readable( 1.23 );        # 1.23s
 print sec_readable( 1 );           # 1s
 print sec_readable( 9.87 );        # 9.87s
 print sec_readable( 10 );          # 10s
 print sec_readable( 10.1 );        # 10.1s
 print sec_readable( 59 );          # 59s
 print sec_readable( 59.123 );      # 59.1s
 print sec_readable( 60 );          # 1m 0s
 print sec_readable( 60.1 );        # 1m 0s
 print sec_readable( 121 );         # 2m 1s
 print sec_readable( 131 );         # 2m 11s
 print sec_readable( 1331 );        # 22m 11s
 print sec_readable(-1331 );        # -22m 11s
 print sec_readable( 13331 );       # 3h 42m
 print sec_readable( 133331 );      # 1d 13h
 print sec_readable( 1333331 );     # 15d 10h
 print sec_readable( 13333331 );    # 154d 7h
 print sec_readable( 133333331 );   # 4yr 82d
 print sec_readable( 1333333331 );  # 42yr 91d

=cut

sub sec_readable {
  my $s=shift();
  my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
   !defined$s     ? undef
  :!length($s)    ? ''
  :$s<0           ? '-'.sec_readable(-$s)
  :$s<60 && int($s)==$s
                  ? $s."s"
  :$s<60          ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
  :$s<3600        ? int($s/60)."m " .($s%60)        ."s"
  :$s<24*3600     ? int($s/$h)."h " .int(($s%$h)/60)."m"
  :$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
  :                 int($s/$y)."yr ".int(($s%$y)/$d)."d";
}

=head2 int2roman

Converts integers to roman numbers.

B<Examples:>

 print int2roman(1234);   # prints MCCXXXIV
 print int2roman(1971);   # prints MCMLXXI

(Adapted subroutine from Peter J. Acklam, jacklam(&)math.uio.no)

 I = 1
 V = 5
 X = 10
 L = 50
 C = 100     (centum)
 D = 500
 M = 1000    (mille)

See also L<Roman>.

See L<http://en.wikipedia.org/wiki/Roman_numbers> for more.

=head2 roman2int

 roman2int("MCMLXXI") == 1971

=cut

#alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
#see also t/17_roman.t sub int2roman_old
sub int2roman {
    my $n=shift;
    !defined$n  ? undef
  : !length($n) ? ""
  : $n<0        ? "-".int2roman(-$n)
  : int($n)!=$n ? croak"int2roman: $n is not an integer"
#  : $] >= 5.014 ?        #s///r modifier introduced in perl v5.14
#        ("I" x $n)
#        =~s,I{1000},M,gr #unnecessary, but speedup for n>1000
#        =~s,I{100},C,gr  #unnecessary, but speedup for n>100
#        =~s,I{10},X,gr   #unnecessary, but speedup for n>10
#        =~s,IIIII,V,gr
#        =~s,IIII,IV,gr
#        =~s,VV,X,gr
#        =~s,VIV,IX,gr
#        =~s,XXXXX,L,gr
#        =~s,XXXX,XL,gr
#        =~s,LL,C,gr
#        =~s,LXL,XC,gr
#        =~s,CCCCC,D,gr
#        =~s,CCCC,CD,gr
#        =~s,DD,M,gr
#        =~s,DCD,CM,gr
  : do {
      $n="I" x $n;
      $n=~s,I{1000},M,g; #unnecessary, but speedup for n>1000
      $n=~s,I{100},C,g;  #unnecessary, but speedup for n>100
      $n=~s,I{10},X,g;   #unnecessary, but speedup for n>10
      $n=~s,IIIII,V,g;
      $n=~s,IIII,IV,g;
      $n=~s,VV,X,g;
      $n=~s,VIV,IX,g;
      $n=~s,XXXXX,L,g;
      $n=~s,XXXX,XL,g;
      $n=~s,LL,C,g;
      $n=~s,LXL,XC,g;
      $n=~s,CCCCC,D,g;
      $n=~s,CCCC,CD,g;
      $n=~s,DD,M,g;
      $n=~s,DCD,CM,g;
      $n
     }
}
sub int2roman_golfed{my$r='I'x pop;for(qw(IVX XLC CDM)){my($I,$V,$X)=split//;$r=~s,$I$I$I$I($I?),$1?$V:"$I$V",ge;$r=~s,$V($I?)$V,$1$X,g}$r}

sub roman2int {
  my($r,$n,%c)=(shift,0,'',0,qw/I 1 V 5 X 10 L 50 C 100 D 500 M 1000/);
  $r=~s/^-//?-roman2int($r):
  $r=~s/(C?)([DM])|(X?)([LCDM])|(I?)([VXLCDM])|(I)|(.)/
        croak "roman2int: Invalid number $r" if $8;
        $n += $c{$2||$4||$6||$7} - $c{$1||$3||$5||''}; ''/eg && $n
}

#sub roman2int_slow {
#  my $r=shift;
#     $r=~s,^\-,,  ?    0-roman2int($r)
#   : $r=~s,^M,,i  ? 1000+roman2int($r)
#   : $r=~s,^CM,,i ?  900+roman2int($r)
#   : $r=~s,^D,,i  ?  500+roman2int($r)
#   : $r=~s,^CD,,i ?  400+roman2int($r)
#   : $r=~s,^C,,i  ?  100+roman2int($r)
#   : $r=~s,^XC,,i ?   90+roman2int($r)
#   : $r=~s,^L,,i  ?   50+roman2int($r)
#   : $r=~s,^XL,,i ?   40+roman2int($r)
#   : $r=~s,^X,,i  ?   10+roman2int($r)
#   : $r=~s,^IX,,i ?    9+roman2int($r)
#   : $r=~s,^V,,i  ?    5+roman2int($r)
#   : $r=~s,^IV,,i ?    4+roman2int($r)
#   : $r=~s,^I,,i  ?    1+roman2int($r)
#   : !length($r)  ?    0
#   : croak "Invalid roman number $r";
#}

=head2 distance

B<Input:> the four decimal numbers of two GPS positions: latutude1, longitude1, latitude2, longitude2

B<Output:> the air distance in meters between the two points

Calculation is done using the Haversine Formula for spherical distance:

  a = sin((lat2-lat1)/2)^2
    + sin((lon2-lon1)/2)^2 * cos(lat1) * cos(lat2);

  c = 2 * atan2(min(1,sqrt(a)),
	        min(1,sqrt(1-a)))

  distance = c * R

With earth radius set to:

  R = Re - (Re-Rp) * sin(abs(lat1+lat2)/2)

Where C<Re = 6378137.0> (equatorial radius) and C<Rp = 6356752.3> (polar radius).

B<Example:>

 my @oslo = ( 59.93937,  10.75135);    # oslo in norway
 my @rio  = (-22.97673, -43.19508);    # rio in brazil

 printf "%.1f km\n",   distance(@oslo,@rio)/1000;                  # 10431.7 km
 printf "%.1f km\n",   distance(@rio,@oslo)/1000;                  # 10431.7 km
 printf "%.1f nmi\n",  distance(@oslo,@rio)/1852.000;              # 5632.7 nmi   (nautical miles)
 printf "%.1f miles\n",distance(@oslo,@rio)/1609.344;              # 6481.9 miles
 printf "%.1f miles\n",conv(distance(@oslo,@rio),"meters","miles");# 6481.9 miles

See L<http://www.faqs.org/faqs/geography/infosystems-faq/>

and L<http://mathforum.org/library/drmath/view/51879.html>

and L<http://en.wikipedia.org/wiki/Earth_radius>

and L<Geo::Direction::Distance>, but Acme::Tools::distance() is about 8 times faster.

=cut

our $Distance_factor = $PI / 180;
sub acos { atan2( sqrt(1 - $_[0] * $_[0]), $_[0] ) }
sub distance_great_circle {
  my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
  my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
  my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
  return $R*acos(sin($lat1)*sin($lat2)+cos($lat1)*cos($lat2)*cos($lon2-$lon1))
}

sub distance {
  my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
  my $a= sin(($lat2-$lat1)/2)**2
       + sin(($lon2-$lon1)/2)**2 * cos($lat1) * cos($lat2);
  my $sqrt_a  =sqrt($a);    $sqrt_a  =1 if $sqrt_a  >1;

Tools.pm  view on Meta::CPAN

Unless GMP is installed for perl like this, the Math::Big*-modules
will fall back to using similar but slower built in modules. See: L<https://gmplib.org/>

=cut

sub bigi {
  eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
  if (wantarray) { return (map Math::BigInt->new($_),@_)  }
  else           { return      Math::BigInt->new($_[0])   }
}
sub bigf {
  eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
  if (wantarray) { return (map Math::BigFloat->new($_),@_)  }
  else           { return      Math::BigFloat->new($_[0])   }
}
sub bigr {
  eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
  if (wantarray) { return (map Math::BigRat->new($_),@_)  }
  else           { return      Math::BigRat->new($_[0])   }
}
sub big {
  wantarray
  ? (map     /\./ ? bigf($_)    :        /\// ? bigr($_)    : bigi($_), @_)
  :   $_[0]=~/\./ ? bigf($_[0]) : $_[0]=~/\// ? bigr($_[0]) : bigi($_[0]);
}
sub bigscale {
  @_==1 or croak "bigscale requires one and only one argument";
  my $scale=shift();
  eval q(use Math::BigInt    try=>"GMP") if !$INC{'Math/BigInt.pm'};
  eval q(use Math::BigFloat  try=>"GMP") if !$INC{'Math/BigFloat.pm'};
  eval q(use Math::BigRat    try=>"GMP") if !$INC{'Math/BigRat.pm'};
  Math::BigInt->div_scale($scale);
  Math::BigFloat->div_scale($scale);
  Math::BigRat->div_scale($scale);
  return;
}

  #my $R_authalic=6371007.2; #earth radius in meters, mean, Authalic radius, real R varies 6353-6384km, http://en.wikipedia.org/wiki/Earth_radius
#*)
         #    ( 6378157.5, 6356772.2 )  #hmm
    #my $e=0.081819218048345;#sqrt(1 - $b**2/$a**2); #eccentricity of the ellipsoid
    #my($a,$b)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
    #warn "e=$e\n";
    #warn "t=".(1 - $e**2)."\n";
    #warn "n=".((1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5)."\n";
    #my $t=1 - $e**2;
    #my $n=(1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5;
    #warn "t=$t\n";
    #warn "n=$n\n";
    #$a * (1 - $e**2) / ((1 - $e**2 * sin(($lat1+$lat2)/2)**2)**1.5); #hmm avg lat
    #$R=$a * $t/$n;

#=head2 fractional
#=cut

sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
  carp "fractional: NOT FINISHED";
  my $n=shift;
  print "----fractional n=$n\n";
  my $nn=$n; my $dec;
  $nn=~s,\.(\d+)$,$dec=length($1);$1.,;
  my $l;
  my $max=0;
  my($te,$ne);
  for(1..length($nn)/2){
    if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
      print "_ = $_ ".length($2)."\n";
      if(length($2)>$max){
        $l=$_;
	$te="$1$3"-$1;
        $max=length($2);
      }
    }
  }
  return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
  print "l=$l max=$max\n";
  $ne="9" x $l;
  print log($n),"\n";
  my $st=sub{print "status: ".($te/$ne)."   n=$n   ".($n/$te*$ne)."\n"};
  while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
  while($te/$n/$ne<0.99){ &$st(); $te*=10 }
  &$st();
  while(1){
    my $d=gcd($te,$ne); print "gcd=$d\n";
    last if $d==1;
    $te/=$d; $ne/=$d;
  }
  &$st();
  wantarray ? ($te,$ne) : "$te/$ne"; #gcd()
}

=head2 isnum

B<Input:> String to be tested on this regexp:

C<< /^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x >>

If no argument is given isnum checks C<< $_ >>.

B<Output:> True or false (1 or 0)

 use Acme::Tools;
 my @e=('   +32.12354E-21  ', 2.2, '9' x 99999, ' -123.12', '29,323.31', '29 323.31');
 print isnum()       ? 'num' : 'str' for @e;  #prints num for every element except the last two
 print $_=~$Re_isnum ? 'num' : 'str' for @e;  #same but slighhly faster

=cut

our $Re_isnum     =qr/^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x;
our $Re_isnum_wolz=qr/^ \s* [\-\+]? (?: ([1-9]\d*|0)?\.\d+ | [1-9]\d* | 0 ) (?:[eE][\-\+]?\d+)?\s*$/x; #without leading zero
sub isnum {(@_?$_[0]:$_)=~$Re_isnum}

=head2 between

Input: Three arguments.

Returns: Something I<true> if the first argument is numerically between the two next. Uses Perls C<< < >>, C<< >= >> and C<< <= >> operators.

=head2 btw

Like L<between> but instead of assuming numbers it checks all three input args
and does alphanumerical comparisons (with Perl operators C<lt>, C<ge> and C<le>) if any of the
three input args don't look like a number or look like a number but with
one or more leading zeros.

 btw(1,1,10)             #true numeric order since all three looks like number according to =~$Re_isnum
 btw(1,'02',13)          #true leading zero in '02' leads to alphabetical order
 btw(10, 012,10)         #true leading zero here means oct number, 012 = 10 (8*1+2), so 10 is btw 10 and 10
 btw('003', '02', '09')  #false because '003' lt '02'
 btw('a', 'b', 'c')      #false because 'a' lt 'b'
 btw('a', 'B', 'c')      #true because upper case letters comes before lower case ones in the "ascii alphabet"

Tools.pm  view on Meta::CPAN

  return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
  $val < $min ? $min :
  $val > $max ? $max :
                $val;
}
sub bound { curb(@_) }

=head2 log10

=head2 log2

=head2 logn

 print log10(1000);                  # prints 3
 print log10(10000*sqtr(10));        # prints 4.5
 print log2(16);                     # prints 4
 print logn(4096, 8);                # prints 4 (12/3=4)
 print logn($PI, 2.71828182845905);  # same as  print log($PI)  using perls builtin log()

=cut

sub log10 { log($_[0]) / log(10)    }
sub log2  { log($_[0]) / log(2)     }
sub logn  { log($_[0]) / log($_[1]) }

=head1 STRINGS

=head2 upper

=head2 lower

Returns input string as uppercase or lowercase.

Can be used if Perls build in C<uc()> and C<lc()> for some reason does not convert æøå or other latin1 letters outsize a-z.

Converts C<< æøåäëïöüÿâêîôûãõàèìòùáéíóúýñð >> to and from C<< ÆØÅÄËÏÖÜ?ÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ >>

See also C<< perldoc -f uc >> and C<< perldoc -f lc >>

=head2 trim

Removes space from the beginning and end of a string. Whitespace (C<< \s >>) that is.
And removes any whitespace inside the string of more than one char, leaving the first whitespace char. Thus:

 trim(" asdf \t\n    123 ")  eq "asdf 123"
 trim(" asdf\t\n    123\n")  eq "asdf\t123"

Works on C<< $_ >> if no argument i given:

 print join",", map trim, " please ", " remove ", " my ", " spaces ";   # please,remove,my,spaces
 print join",", trim(" please ", " remove ", " my ", " spaces ");       # works on arrays as well
 my $s=' please '; trim(\$s);                                           # now  $s eq 'please'
 trim(\@untrimmedstrings);                                              # trims array strings inplace
 @untrimmedstrings = map trim, @untrimmedstrings;                       # same, works on $_
 trim(\$_) for @untrimmedstrings;                                       # same, works on \$_

=head2 lpad

=head2 rpad

Left or right pads a string to the given length by adding one or more spaces at the end for  I<rpad> or at the start for I<lpad>.

B<Input:> First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
Default is space.

 rpad('gomle',9);         # 'gomle    '
 lpad('gomle',9);         # '    gomle'
 rpad('gomle',9,'-');     # 'gomle----'
 lpad('gomle',9,'+');     # '++++gomle'
 rpad('gomle',4);         # 'goml'
 lpad('gomle',4);         # 'goml'
 rpad('gomle',7,'xyz');   # 'gomlxy'
 lpad('gomle',10,'xyz');  # 'xyzxygoml'

=head2 cpad

Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.

 cpad('mat',5)            eq ' mat '
 cpad('mat',4)            eq 'mat '
 cpad('mat',6)            eq ' mat  '
 cpad('mat',9)            eq '   mat   '
 cpad('mat',5,'+')        eq '+mat+'
 cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'

=cut

sub upper {no warnings;my $s=@_?shift:$_;$s=~tr/a-zæøåäëïöü.âêîôûãõàèìòùáéíóúýñð/A-ZÆØÅÄËÏÖÜ.ÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/;$s}
sub lower {no warnings;my $s=@_?shift:$_;$s=~tr/A-ZÆØÅÄËÏÖÜ.ÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/a-zæøåäëïöü.âêîôûãõàèìòùáéíóúýñð/;$s}

sub trim {
  return trim($_) if !@_;
  return map trim($_), @_ if @_>1;
  my $s=shift;
  if(ref($s) eq 'SCALAR'){ $$s=~s,^\s+|(?<=\s)\s+|\s+$,,g; return $$s}
  if(ref($s) eq 'ARRAY') { trim(\$_) for @$s; return $s }
  $s=~s,^\s+|(?<=\s)\s+|\s+$,,g if defined $s;
  $s;
}

sub rpad {
  my($s,$l,$p)=@_;
  $p=' ' if @_<3 or !length($p);
  $s.=$p while length($s)<$l;
  substr($s,0,$l);
}

sub lpad {
  my($s,$l,$p)=@_;
  $p=' ' if @_<3 or !length($p);
  $l<length($s)
  ? substr($s,0,$l)
  : substr($p x (1+$l/length($p)), 0, $l-length($s)).$s;
}

sub cpad {
  my($s,$l,$p)=@_;
  $p=' ' if @_<3 or !length($p);
  my $ls=length($s);
  return substr($s,0,$l) if $l<$ls;
  $p=$p x (($l-$ls+2)/length($p));
  substr($p, 0, ($l-$ls  )/2) . $s .
  substr($p, 0, ($l-$ls+1)/2);
}

sub cpad_old {
  my($s,$l,$p)=@_;
  $p=' ' if !length($p);
  return substr($s,0,$l) if $l<length($s);
  my $i=0;
  while($l>length($s)){
    my $pc=substr($p,($i==int($i)?1:-1)*($i%length($p)),1);
    $i==int($i) ? ($s.=$pc) : ($s=$pc.$s);
    $i+=1/2;
  }
  $s;
}

=head2 trigram

B<Input:> A string (i.e. a name). And an optional x (see example 2)

B<Output:> A list of this strings trigrams (See examlpe)

B<Example 1:>

 print join ", ", trigram("Kjetil Skotheim");

Prints:

 Kje, jet, eti, til, il , l S,  Sk, Sko, kot, oth, the, hei, eim

B<Example 2:>

Default is 3, but here 4 is used instead in the second optional input argument:

 print join ", ", trigram("Kjetil Skotheim", 4);

And this prints:

 Kjet, jeti, etil, til , il S, l Sk,  Sko, Skot, koth, othe, thei, heim

C<trigram()> was created for "fuzzy" name searching. If you have a database of many names,
addresses, phone numbers, customer numbers etc. You can use trigram() to search
among all of those at the same time. If the search form only has one input field.
One general search box.

Store all of the trigrams of the trigram-indexed input fields coupled
with each person, and when you search, you take each trigram of you
query string and adds the list of people that has that trigram. The
search result should then be sorted so that the persons with most hits
are listed first. Both the query strings and the indexed database
fields should have a space added first and last before C<trigram()>-ing
them.

This search algorithm is not includes here yet...

C<trigram()> should perhaps have been named ngram for obvious reasons.

=head2 sliding

Same as trigram (except there is no default width). Works also with arrayref instead of string.

Example:

 sliding( ["Reven","rasker","over","isen"], 2 )

Result:

  ( ['Reven','rasker'], ['rasker','over'], ['over','isen'] )

=head2 chunks

Splits strings and arrays into chunks of given size:

 my @a = chunks("Reven rasker over isen",7);
 my @b = chunks([qw/Og gubben satt i kveldinga og koste seg med skillinga/], 3);

Resulting arrays:

 ( 'Reven r', 'asker o', 'ver ise', 'n' )
 ( ['Og','gubben','satt'], ['i','kveldinga','og'], ['koste','seg','med'], ['skillinga'] )

=head2 chars

 chars("Tittentei");     # ('T','i','t','t','e','n','t','e','i')

=cut

sub trigram { sliding($_[0],$_[1]||3) }

sub sliding {
  my($s,$w)=@_;
  return map substr($s,$_,$w),   0..length($s)-$w  if !ref($s);
  return map [@$s[$_..$_+$w-1]], 0..@$s-$w         if ref($s) eq 'ARRAY';
}

sub chunks {
  my($s,$w)=@_;
  return $s=~/(.{1,$w})/g                                      if !ref($s);
  return map [@$s[$_*$w .. min($_*$w+$w-1,$#$s)]], 0..$#$s/$w  if ref($s) eq 'ARRAY';
}

sub chars { split//, shift }

=head2 repl

Synonym for replace().

=head2 replace

Return the string in the first input argument, but where pairs of search-replace strings (or rather regexes) has been run.

Works as C<replace()> in Oracle, or rather regexp_replace() in Oracle 10 and onward. Except that this C<replace()> accepts more than three arguments.

Examples:

 print replace("water","ater","ine");  # Turns water into wine
 print replace("water","ater");        # w
 print replace("water","at","eath");   # weather
 print replace("water","wa","ju",
                       "te","ic",
                       "x","y",        # No x is found, no y is returned
                       'r$',"e");      # Turns water into juice. 'r$' says that the r it wants
                                       # to change should be the last letters. This reveals that
                                       # second, fourth, sixth and so on argument is really regexs,
                                       # not normal strings. So use \ (or \\ inside "") to protect
                                       # the special characters of regexes. You probably also
                                       # should write qr/regexp/ instead of 'regexp' if you make
                                       # use of regexps here, just to make it more clear that
                                       # these are really regexps, not strings.

 print replace('JACK and JUE','J','BL'); # prints BLACK and BLUE
 print replace('JACK and JUE','J');      # prints ACK and UE
 print replace("abc","a","b","b","c");   # prints ccc           (not bcc)

If the first argument is a reference to a scalar variable, that variable is changed "in place".

Example:

 my $str="test";
 replace(\$str,'e','ee','s','S');
 print $str;                         # prints teeSt

=cut

sub replace { repl(@_) }
sub repl {
  my $str=shift;
  return $$str=replace($$str,@_) if ref($str) eq 'SCALAR';
 #return ? if ref($str) eq 'ARRAY';
 #return ? if ref($str) eq 'HASH';
  while(@_){
    my($fra,$til)=(shift,shift);
    defined $til ? $str=~s/$fra/$til/g : $str=~s/$fra//g;
  }
  return $str;
}

=head1 ARRAYS

=head2 subarr

The equivalent of C<substr> on arrays or C<splice> without changing the array.
Input: 1) array or arrayref, 2) offset and optionally 3) length. Without a
third argument, subarr returns the rest of the array.

 @top10    = subarr( @array, 0, 10);   # first 10
 @last_two = subarr( @array, -2, 2);   # last 2
 @last_two = subarr( $array_ref, -2);  # also last 2
 @last_six = subarr $array_ref, -6;    # parens are optional

The same can be obtained from C<< @array[$from..$to] >> but that dont work the
same way with negative offsets and boundary control of length.

=cut

#Todo: sjekk paastand over


#sub subarr(+$;$) { #perl>=5.14        # t/35_subarr.t
sub subarr { #perl<5.14
  my($a,$o,$l)=@_;
  $o=@$a+$o if $o<0;
  $o=0      if $o<0;
  $o=@$a-1  if $o>@$a-1;
  $l=@$a-$o if @_<3;
  croak     if $l<0;
  $l=@$a-$o if $l>@$a-$o;
  @$a[$o..$o+$l-1];
}

=head2 min

Returns the smallest number in a list. Undef is ignored.

 @lengths=(2,3,5,2,10,undef,5,4);
 $shortest = min(@lengths);   # returns 2

Note: The comparison operator is perls C<< < >>> which means empty strings is treated as C<0>, the number zero. The same goes for C<max()>, except of course C<< > >> is used instead.

 min(3,4,5)       # 3
 min(3,4,5,undef) # 3
 min(3,4,5,'')    # returns the empty string

=head2 max

Returns the largest number in a list. Undef is ignored.

 @heights=(123,90,134,undef,132);
 $highest = max(@heights);   # 134

=head2 mins

Just as L</min>, except for strings.

 print min(2,7,10);          # 2
 print mins("2","7","10");   # 10
 print mins(2,7,10);         # 10

=head2 maxs

Just as L</mix>, except for strings.

 print max(2,7,10);          # 10
 print maxs("2","7","10");   # 7
 print maxs(2,7,10);         # 7

=cut

sub min  {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ < $min } $min }
sub mins {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ lt $min} $min }
sub max  {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ > $max } $max }
sub maxs {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ gt $max} $max }

=head2 zip

B<Input:> Two or more arrayrefs. A number of equal sized arrays
containing numbers, strings or anything really.

B<Output:> An array of those input arrays zipped (interlocked, merged) into each other.

 print join " ", zip( [1,3,5], [2,4,6] );               # 1 2 3 4 5 6
 print join " ", zip( [1,4,7], [2,5,8], [3,6,9] );      # 1 2 3 4 5 6 7 8 9

Example:

zip() creates a hash where the keys are found in the first array and values in the secord in the correct order:

 my @media = qw/CD DVD VHS LP Blueray/;
 my @count = qw/20 12  2   4  3/;
 my %count = zip(\@media,\@count);                 # or zip( [@media], [@count] )
 print "I got $count{DVD} DVDs\n";                 # I got 12 DVDs

Dies (croaks) if the two lists are of different sizes

...or any input argument is not an array ref.

Tools.pm  view on Meta::CPAN

  return String::Similarity::similarity(@_) if @r==1; #to param
  my($min,$mindiff);
  if(ref($r[0]) eq 'ARRAY'){
    ($min,$mindiff)=@r[1,2];
    @r=@{$r[0]};
  }
  $min=0 if!defined$min;
  my($simlikest,$simnestlikest,$likest,$idlikest)=(-1,-1);
  for(@r){
    my($s,$id)=ref($_) eq 'ARRAY' ? @$_ : ($_);
    my $sim=String::Similarity::similarity($str,$s,$simnestlikest//0);
    if($sim>=$simlikest){
      ($simnestlikest,$likest,$simlikest)=($simlikest,$s,$sim);
      $idlikest=$id if defined$id;
    }
    elsif($sim>=$simnestlikest){
      $simnestlikest=$sim;
    }
  }
  my@ret=($simlikest,$likest);
  @ret=(undef,undef) if $simnestlikest>0 and $simlikest-$simnestlikest<$mindiff;
  @ret=(undef,undef) if $simlikest<$min;
  @ret=(@ret,$simnestlikest,$simlikest,$likest);
  push(@ret, $ret[0] ? $idlikest : undef) if defined $idlikest;
  return wantarray?@ret:$ret[0];
}

=head2 sim_perm

B<Input:> Two strings

B<Output:> A number 0 - 1 indicating the maximum similarity between two strings tested
against all permutations of both strings split on C<< [\s,]+ >> and where the string
with most words (i.e. names) are cut to as many words as the one with least words.

Requires L<String::Similarity> where the real magic happens.

While sim() is case sensitive, sim_perm() is not.

 Name1                              Name2                                 sim() sim_perm()
 ---------------------------------- ------------------------------------- ----- ----------
 Humphrey DeForest Bogart           Bogart Humphrey DeForest               0.71       1.00
 Humphrey Bogart                    Humphrey Gump Bogart                   0.86       1.00
 Humphrey deforest Bogart           Bogart DeForest                        0.41       1.00
 Humfrey DeForest Boghart           BOGART HUMPHREY                        0.05       0.87
 Humphrey                           Bogart Humphrey                        0.70       1.00
 Humfrey Deforest Boghart           BOGART D. HUMFREY                      0.15       0.78 *)
 Presley, Elvis Aaron               Elvis Presley                          0.424242   1.00

sim_perm() was written to identify double-profiles in databases: two people with
either the same (or similar) email or phone number or zip code and similar enough
names are going on the list of probable doubles.

*) Todo: deal with initials better, should be higher than 0.78

=cut

sub sim_perm {
  require String::Similarity;
  my($s1,$s2)=map {s/^\s*(.+?)\s*$/$1/;$_} map upper($_), @_; #/r v5.14
  croak if !length($s1) or !length($s2);
  my $max;
  for(cart([permutations(split(/[\s,]+/,$s1))],
           [permutations(split(/[\s,]+/,$s2))])) {
    my($n1,$n2)=@$_;
    if(@$n1>@$n2){    pop@$n1 while @$n1>@$n2 }
    else         {    pop@$n2 while @$n1<@$n2 }
    my($str1,$str2)=map join(" ",@$_),($n1,$n2);
    if(defined $max){
      my $sim=String::Similarity::similarity($str1,$str2,$max);
      $max=$sim if $sim>$max;
    }
    else {
      $max=String::Similarity::similarity($str1,$str2);
    }
    last if $max==1;
  }
  return $max;
}


=head2 pushsort

Adds one or more element to a numerically sorted array and keeps it sorted.

  pushsort @a, 13;                         # this...
  push     @a, 13; @a = sort {$a<=>$b} @a; # is the same as this, but the former is faster if @a is large

=head2 pushsortstr

Same as pushsort except that the array is kept sorted alphanumerically (cmp) instead of numerically (<=>). See L</pushsort>.

  pushsort @a, "abc";                      # this...
  push     @a, "abc"; @a = sort @a;        # is the same as this, but the former is faster if @a is large

=cut

#todo: use List::BinarySearch::XS 'binsearch_pos';
our $Pushsort_cmpsub=undef;
sub pushsort (\@@) {
  my $ar=shift;

  #not needed but often faster
  if(!defined $Pushsort_cmpsub and @$ar+@_<100){ #hm speedup?
    @$ar=(sort {$a<=>$b} (@$ar,@_));
    return 0+@$ar;
  }

  for my $v (@_){

    #not needed but often faster
    if(!defined $Pushsort_cmpsub){ #faster rank() in most cases
      push    @$ar, $v and next if $v>=$$ar[-1];
      unshift @$ar, $v and next if $v< $$ar[0];
    }

    splice @$ar, binsearch($v,$ar,1,$Pushsort_cmpsub)+1, 0, $v;
  }
  0+@$ar
}
sub pushsortstr(\@@){ local $Pushsort_cmpsub=sub{$_[0]cmp$_[1]}; pushsort(@_) } #speedup: copy sub pushsort

Tools.pm  view on Meta::CPAN

  my ($klo,$khi)=(0,$#{$x});
  my $k;
  while (($khi-$klo)>1) {
    $k=int(($khi+$klo)/2);
    if ($$x[$k]>$v) { $khi=$k; } else { $klo=$k; }
  }
  return $klo;
}


sub binsearchstr {binsearch(@_[0..2],sub{$_[0]cmp$_[1]})}

=head2 rank

B<Input:> Two or three arguments. N and an arrayref for the list to look at.

In scalar context: Returns the nth smallest number in an array. The array doesn't have to be sorted.

In array context: Returns the n smallest numbers in an array.

To return the n(th) largest number(s) instead of smallest, just negate n.

An optional third argument can be a sub that is used to compare the elements of the input array.

Examples:

 my $second_smallest = rank(2, [11,12,13,14]);  # 12
 my @top10           = rank(-10, [1..100]);     # 100, 99, 98, 97, 96, 95, 94, 93, 92, 91
 my $max             = rank(-1, [101,102,103,102,101]); #103
 my @contest         = ({name=>"Alice",score=>14},{name=>"Bob",score=>13},{name=>"Eve",score=>12});
 my $second          = rank(2, \@contest, sub{$_[1]{score}<=>$_[0]{score}})->{name}; #Bob

=head2 rankstr

Just as C<rank> but sorts alphanumerically (strings, cmp) instead of numerically.

=cut

sub rank {
  my($rank,$aref,$cmpsub)=@_;
  if($rank<0){
    $cmpsub||=sub{$_[0]<=>$_[1]};
    return rank(-$rank,$aref,sub{0-&$cmpsub});
  }
  my @sort;
  local $Pushsort_cmpsub=$cmpsub;
  for(@$aref){
    pushsort @sort, $_;
    pop @sort if @sort>$rank;
  }
  return wantarray ? @sort : $sort[$rank-1];
}
sub rankstr {wantarray?(rank(@_,sub{$_[0]cmp$_[1]})):rank(@_,sub{$_[0]cmp$_[1]})}

=head2 egrep

Extended grep.

Works like L<grep> but with more insight: local vars $i, $n, $prev, $next, $prevr and $nextr are available:

$i is the current index, starts with 0, ends with the length of the input array minus one

$n is the current element number, starts with 1, $n = $i + 1

$prev is the previous value (undef if current is first)

$next is the next value (undef if current is last)

$prevr is the previous value, rotated so that the previous of the first element is the last element

$nextr is the next value, rotated so that the next of the last element is the first element

$_ is the current value, just as with Perls built-in grep

 my @a = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);  # 1..20
 my @r = egrep { $_ % 3 == 0 } @a;  # @r is 3, 6, 9, 12, 15, 18. Plain grep could have been used here
 my @r = egrep { $i==1 or $next==12 or $prev==14 } @a;  # @r is now 2, 11, 15

 my @a=2..44;
 egrep { $prev =~/4$/ or $next =~/2$/ } @a;  # 5, 11, 15, 21, 25, 31, 35, 41
 egrep { $prevr=~/4$/ or $nextr=~/2$/ } @a;  # 2, 5, 11, 15, 21, 25, 31, 35, 41, 44
 egrep { $i%7==0 } @a;                       # 2, 9, 16, 23, 30, 37, 44
 egrep { $n%7==0 } @a;                       # 8, 15, 22, 29, 36, 43

=cut

sub egrep (&@) {
    my($code,$i,$package)=(shift,-1,(caller)[0]);
    my %h=map{($_=>"${package}::$_")}qw(i n prev next prevr nextr);
    no strict 'refs';
    grep {
	#no strict 'refs'; #not here! "no" not allowed in expression in perl5.16
	local ${$h{i}}     = ++$i;
	local ${$h{n}}     = $i+1;
	local ${$h{prev}}  = $i>0?$_[$i-1]:undef;
	local ${$h{next}}  = $i<$#_?$_[$i+1]:undef;
	local ${$h{prevr}} = $_[$i>0?$i-1:$#_];
	local ${$h{nextr}} = $_[$i<$#_?$i+1:0];
	&$code;
    }
    @_;
}

=head2 eqarr

B<Input:> Two or more references to arrays.

B<Output:> True (1) or false (0) for whether or not the arrays are numerically I<and> alphanumerically equal.
Comparing each element in each array with both C< == > and C< eq >.

Examples:

 eqarr([1,2,3],[1,2,3],[1,2,3]); # 1 (true)
 eqarr([1,2,3],[1,2,3],[1,2,4]); # 0 (false)
 eqarr([1,2,3],[1,2,3,4]);       # undef (different size, false)
 eqarr([1,2,3]);                 # croak (should be two or more arrays)
 eqarr([1,2,3],1,2,3);           # croak (not arraysrefs)

=cut

sub eqarr {

Tools.pm  view on Meta::CPAN


=head2 subarrays

Returns all 2^n-1 combinatory subarrays of an array where each element
of input array participates or not. Note: The empty array is not among
the returned arrayrefs unless an empty input is given.

 my @a = subarrays( 'a', 'b', 'c' );         # same as:
 my @a = ( ['a'    ],
           [    'b'],
           ['a','b'],
           [        'c'],
           ['a',    'c'],
           [    'b','c'],
           ['a','b','c'] );

 sub subarrays { map { my $n = 2*$_; [ grep {($n/=2)%2} @_ ] } 1 .. 2**@_-1 } #implemented as

=cut

sub subarrays { map { my $n = 2*$_; [ grep {($n/=2)%2} @_ ] } 1 .. 2**@_-1 }

=head2 part

B<Input:> A code-ref and a list

B<Output:> Two array-refs

Like C<grep> but returns the false list as well. Partitions a list
into two lists where each element goes into the first or second list
whether the predicate (a code-ref) is true or false for that element.

 my( $odd, $even ) = part {$_%2} (1..8);
 print for @$odd;   #prints 1 3 5 7
 print for @$even;  #prints 2 4 6 8

(Works like C< partition() > in the Scala programming language)

=head2 parth

Like C<part> but returns any number of lists. Not just two. Sort of like I<group by> in SQL.

B<Input:> A code-ref and a list

B<Output:> A hash where the returned values from the code-ref are keys and the values are arrayrefs to the list elements which gave those keys.

 my %hash = parth { uc(substr($_,0,1)) } ('These','are','the','words','of','this','array');
 print serialize(\%hash);

Result:

 %hash = (  T=>['These','the','this'],
            A=>['are','array'],
            O=>['of'],
            W=>['words']  )

=head2 parta

Like L<parth> but returns an array of lists where the predicate returns an index number.

 my @a = parta { length } qw/These are the words of this array/;

Result:

 @a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )

Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.

=cut

sub part  (&@) { my($c,@r)=(shift,[],[]); push @{ $r[ &$c?0:1 ] }, $_ for @_; @r }
sub parth (&@) { my($c,%r)=(shift);       push @{ $r{ &$c     } }, $_ for @_; %r }
sub parta (&@) { my($c,@r)=(shift);       push @{ $r[ &$c     ] }, $_ for @_; @r }
#sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map

=head2 refa

=head2 refh

=head2 refs

=head2 refaa

=head2 refah

=head2 refha

=head2 refhh

Returns true or false (1 or 0) if the argument is an arrayref, hashref, scalarref, ref to an array of arrays, ref to an array of hashes

Examples:

  my $ref_to_array  = [1,2,3];
  my $ref_to_hash   = {1,100,2,200,3,300};
  my $ref_to_scalar = \"String";
  print "arrayref"  if ref($ref_to_array)  eq 'ARRAY';  #true
  print "hashref"   if ref($ref_to_hash)   eq 'HASH';   #true
  print "scalarref" if ref($ref_to_scalar) eq 'SCALAR'; #true
  print "arrayref"  if refa($ref_to_array);             #also true, without: eq 'ARRAY'
  print "hashref"   if refh($ref_to_hash);              #also true, without: eq 'HASH'
  print "scalarref" if refs($ref_to_scalar);            #also true, without: eq 'SCALAR'

  my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
  my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
  my $ref_to_hash_of_arrays  = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
  my $ref_to_hash_of_hashes  = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };

  print "aa"  if refaa($ref_to_array_of_arrays);         #true
  print "ah"  if refah($ref_to_array_of_hashes);         #true
  print "ha"  if refha($ref_to_hash_of_arrays);          #true
  print "hh"  if refhh($ref_to_hash_of_hashes);          #true

=cut

sub refa  { ref($_[0]) eq 'ARRAY'  ? 1                         : ref($_[0]) ? 0 : undef }
sub refh  { ref($_[0]) eq 'HASH'   ? 1                         : ref($_[0]) ? 0 : undef }
sub refs  { ref($_[0]) eq 'SCALAR' ? 1                         : ref($_[0]) ? 0 : undef }
sub refaa { ref($_[0]) eq 'ARRAY'  ? refa($_[0][0])            : ref($_[0]) ? 0 : undef }
sub refah { ref($_[0]) eq 'ARRAY'  ? refh($_[0][0])            : ref($_[0]) ? 0 : undef }
sub refha { ref($_[0]) eq 'HASH'   ? refa((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
sub refhh { ref($_[0]) eq 'HASH'   ? refh((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }


=head2 pushr

=head2 popr

=head2 shiftr

=head2 unshiftr

=head2 splicer

=head2 keysr

=head2 valuesr

=head2 eachr

=head2 joinr

In Perl versions 5.12 - 5.22 push, pop, shift, unshift, splice, keys, values and each
handled references to arrays and references to hashes just as if they where arrays and hashes. Examples:

 my $person={name=>'Gaga', array=>[1,2,3]};
 push    $person{array}  , 4;  #works in perl 5.12-5.22 but not before and after
 push @{ $person{array} }, 4;  #works in all perl5 versions
 pushr   $person{array}  , 4;  #use Acme::Tools and this should work in perl >= 5.8
 popr    $person{array};       #returns 4

=cut

sub pushr    { push    @{shift()}, @_ } # ?    ($@)
sub popr     { pop     @{shift()}     }
sub shiftr   { shift   @{shift()}     }
sub unshiftr { unshift @{shift()}, @_ }
sub splicer  { @_==1 ? splice( @{shift()} )
              :@_==2 ? splice( @{shift()}, shift() )
              :@_==3 ? splice( @{shift()}, shift(), shift() )
              :@_>=4 ? splice( @{shift()}, shift(), shift(), @_ ) : croak }
sub keysr    { ref($_[0]) eq 'HASH' ? keys(%{shift()}) : keysr({@{shift()}})  } #hm sort(keys%{shift()}) ?
sub valuesr  { values( %{shift()} )    }
sub eachr    { ref($_[0]) eq 'HASH'  ? each(%{shift()})
             #:ref($_[0]) eq 'ARRAY' ? each(@{shift()})  # perl 5.8.8 cannot compile each on array! eval?
		   :                        croak("eachr needs hashref or arrayref got '".ref($_[0])."'") }
sub joinr    {join(shift(),@{shift()})}
#sub mapr    # som scala: hvis map faar subref se kalles den sub paa hvert elem og resultatet returneres

#sub eachr    { each(%{shift()}) }

=head2 pile

B<Input:> a pile size s and a list

B<Output:> A list of lists of length s or the length of the remainer in
the last list. Piles together the input list in lists of the given size.

 my @list=(1,2,3,4,5,6,7,8,9,10);
 my @piles = pile(3, @list );        # ([1,2,3], [4,5,6], [7,8,9], [10])
 my $i=0;
 my @piles = parta {$i++/3} @list;   # same as above pile(3, @list)

=cut

sub pile { my $size=shift; my @r; for (@_){ push@r,[] if !@r or 0+@{$r[-1]}>=$size; push @{$r[-1]}, $_ } @r }

=head2 aoh2sql

  my @oceania=(
    {Area=>undef,   Capital=>'Pago Pago',        Code=>'AS', Name=>'American Samoa',                  Population=>54343},
    {Area=>7686850, Capital=>'Canberra',         Code=>'AU', Name=>'Australia',                       Population=>22751014},
    {Area=>undef,   Capital=>'West Island',      Code=>'CC', Name=>'Cocos (Keeling) Islands',         Population=>596},
    {Area=>240,     Capital=>'Avarua',           Code=>'CK', Name=>'Cook Islands',                    Population=>9838},
    {Area=>undef,   Capital=>'Flying Fish Cove', Code=>'CX', Name=>'Christmas Island',                Population=>1530},
    {Area=>18270,   Capital=>'Suva',             Code=>'FJ', Name=>'Fiji',                            Population=>909389},
    {Area=>702,     Capital=>'Palikir',          Code=>'FM', Name=>'Micronesia, Federated States of', Population=>105216},
    {Area=>549,     Capital=>'Hagatna (Agana)',  Code=>'GU', Name=>'Guam',                            Population=>161785},
    {Area=>811,     Capital=>'Tarawa',           Code=>'KI', Name=>'Kiribati',                        Population=>105711},
    {Area=>181.3,   Capital=>'Majuro',           Code=>'MH', Name=>'Marshall Islands',                Population=>72191},
    {Area=>19060,   Capital=>'Noumea',           Code=>'NC', Name=>'New Caledonia',                   Population=>271615},
    {Area=>undef,   Capital=>'Kingston',         Code=>'NF', Name=>'Norfolk Island',                  Population=>2210},
    {Area=>21,      Capital=>'Yaren District',   Code=>'NR', Name=>'Nauru',                           Population=>9540},
    {Area=>260,     Capital=>'Alofi',            Code=>'NU', Name=>'Niue',                            Population=>1190},
    {Area=>268680,  Capital=>'Wellington',       Code=>'NZ', Name=>'New Zealand',                     Population=>4438393},
    {Area=>undef,   Capital=>'Papeete',          Code=>'PF', Name=>'French Polynesia',                Population=>282703},
    {Area=>462840,  Capital=>'Port Moresby',     Code=>'PG', Name=>'Papua New Guinea',                Population=>6672429},
    {Area=>undef,   Capital=>'Adamstown',        Code=>'PN', Name=>'Pitcairn',                        Population=>48},
    {Area=>458,     Capital=>'Melekeok',         Code=>'PW', Name=>'Palau',                           Population=>21265},
    {Area=>28450,   Capital=>'Honiara',          Code=>'SB', Name=>'Solomon Islands',                 Population=>622469},
    {Area=>undef,   Capital=>undef,              Code=>'TK', Name=>'Tokelau',                         Population=>1337},
    {Area=>26,      Capital=>'Funafuti',         Code=>'TV', Name=>'Tuvalu',                          Population=>10869},
    {Area=>12200,   Capital=>'Port-Vila',        Code=>'VU', Name=>'Vanuatu',                         Population=>272264},
    {Area=>undef,   Capital=>'Mata-Utu',         Code=>'WF', Name=>'Wallis and Futuna',               Population=>15500},
    {Area=>2944,    Capital=>'Apia',             Code=>'WS', Name=>'Samoa (Western)',                 Population=>197773}
  );

 print aoh2sql(\@oceania,{
    name=>'country',
    drop=>2,
   #number=>'numeric',    #default
   #varchar=>'varchar',   #default, change to varchar2 if Oracle
   #date=>'date',         #default, perhaps change to 'timestamp with time zone' if postgres
   #varchar_maxlen=>4000, #default, 4000 (used to be?) is max in Oracle
   #create=>1,            #default, use 0 to dont include create table
   #drop=>0,              #default 0: dont include drop table x; 1: drop table x; 2: drop table if exists x;
   #end=>"commit;\n",
   #begin=>"begin;\n",
   #fix_colnames=>0,
 });

Returns:

 begin;

 drop table if exists country;

Tools.pm  view on Meta::CPAN

 );

 insert into country values (null,'Pago Pago','AS','American Samoa',54343);
 insert into country values (7686850,'Canberra','AU','Australia',22751014);
 insert into country values (null,'West Island','CC','Cocos (Keeling) Islands',596);
 insert into country values (240,'Avarua','CK','Cook Islands',9838);
 insert into country values (null,'Flying Fish Cove','CX','Christmas Island',1530);
 insert into country values (18270,'Suva','FJ','Fiji',909389);
 insert into country values (702,'Palikir','FM','Micronesia, Federated States of',105216);
 insert into country values (549,'Hagatna (Agana)','GU','Guam',161785);
 insert into country values (811,'Tarawa','KI','Kiribati',105711);
 insert into country values (181.3,'Majuro','MH','Marshall Islands',72191);
 insert into country values (19060,'Noumea','NC','New Caledonia',271615);
 insert into country values (null,'Kingston','NF','Norfolk Island',2210);
 insert into country values (21,'Yaren District','NR','Nauru',9540);
 insert into country values (260,'Alofi','NU','Niue',1190);
 insert into country values (268680,'Wellington','NZ','New Zealand',4438393);
 insert into country values (null,'Papeete','PF','French Polynesia',282703);
 insert into country values (462840,'Port Moresby','PG','Papua New Guinea',6672429);
 insert into country values (null,'Adamstown','PN','Pitcairn',48);
 insert into country values (458,'Melekeok','PW','Palau',21265);
 insert into country values (28450,'Honiara','SB','Solomon Islands',622469);
 insert into country values (null,null,'TK','Tokelau',1337);
 insert into country values (26,'Funafuti','TV','Tuvalu',10869);
 insert into country values (12200,'Port-Vila','VU','Vanuatu',272264);
 insert into country values (null,'Mata-Utu','WF','Wallis and Futuna',15500);
 insert into country values (2944,'Apia','WS','Samoa (Western)',197773);
 commit;


=cut

sub aoh2sql {
    my($aoh,$conf)=@_;
    my %def=( #defaults
	name=>'my_table',
	number=>'numeric',
	varchar=>'varchar',
	date=>'date',
	varchar_maxlen=>4000,
	create=>1,
	drop=>0,  # 1 drop table if exists, 2 plain drop
	end=>"commit;\n",
	begin=>"begin;\n",
	fix_colnames=>0,
	);
    my %conf=(%def,(@_<2?():%$conf));
#    $conf{$_}||=$def{$_} for keys%def;
    my %col;
    map $col{$_}++, keys %$_ for @$aoh;
    my @col=sort keys %col;
    my @colerr=grep!/^[a-z]\w+$/i,@col;
    croak "Invalid column name(s): @colerr" if @colerr and !$conf{fix_colnames};
    my(%t,%tdb);
    for my $c (@col){
	my($l,$s,$p,$nn,%ant,$t)=(0,0,0,0);
	for my $r (@$aoh){
	    my $v=$$r{$c};
	    next if !defined$v or $v!~/\S/;
	    $nn++;
	    $l=length($v) if length($v)>$l;
	    no warnings 'uninitialized';
	    if($v=~/^(18|19|20)\d\d(0[1-9]|1[0-2])(0[1-9]|1\d|2\d|3[01])-?\d\d:?\d\d:?\d\d$/ and $conf{date}){
		$ant{date}++;
		next;
	    }
	    elsif($v=~/^\s*[-+]?(\d*)(\.\d+)?([Ee]\-?\d+)?\s*$/ and length("$1$2") and $conf{number}){
		$ant{number}++;
		$s=length("$1.$2") if length("$1.$2")>$s;#hm
		$p=length($2)-1 if $2 and length($2)-1>$p;
		next;
	    }
	    else {
		$ant{varchar}++;
	    }
	}
	$t||='varchar' if $ant{varchar}  or  $ant{number} and $ant{date};
	$t||='number'  if $ant{number};
	$t||='date'    if $ant{date};
	$t||='varchar'; #hm
	$l=$conf{varchar_maxlen} if $conf{varchar_maxlen} and $l>$conf{varchar_maxlen};
	$l||=1;
	my $tdb;
	$tdb="$conf{$t}($l)"    if $t eq 'varchar';
	$tdb="$conf{$t}($s)"    if $t eq 'number' and $p==0;
	$tdb="$conf{$t}($s,$p)" if $t eq 'number' and $p>0 and ++$s;
	$tdb.=" not null" if $nn == 0+@$aoh;
	$t{$c}=$t;
	$tdb{$c}=$tdb;
    }
    my $sql;
    $sql="create table $conf{name} (".
	 join(",",map sprintf("\n  %-30s %s",do{s/\W+//g;$_},$tdb{$_}), @col). "\n);\n\n" if $conf{create};
    my $val=sub{my($v,$t)=@_;defined$v or $v="";!length($v)?'null':$t eq 'number' ? $v : "'".repl($v,"\'","''")."'"};
    for my $r (@$aoh){
	my $v=join",",map &$val($$r{$_},$t{$_}), @col;
	$sql.="insert into $conf{name} values ($v);\n";
    }
    $sql="drop table $conf{name};\n\n$sql" if $conf{drop}==1;
    $sql="drop table if exists $conf{name};\n\n$sql" if $conf{drop}>=2;
    $sql="$conf{begin}\n$sql" if $conf{begin};
    $sql.=$conf{end};
    $sql;
}

sub aoh2xls { croak "Not implemented yet: aoh2xls" }


=head1 STATISTICS

=head2 sum

Returns the sum of a list of numbers. Undef is ignored.

 print sum(1,3,undef,8);   # 12
 print sum(1..1000);       # 500500
 print sum(undef);         # undef

=cut

sub sum { my $sum; no warnings; defined($_) and $sum+=$_ for @_; $sum }

=head2 avg

Returns the I<average> number of a list of numbers. That is C<sum / count>

 print avg(  2, 4, 9);   # 5      (2+4+9) / 3 = 5
 print avg( [2, 4, 9] ); # 5      pass by reference, same result but faster for large arrays

Also known as I<arithmetic mean>.

Pass by reference: If one argument is given and it is a reference to an array,
this array is taken as the list of numbers. This mode is about twice as fast
for 10000 numbers or more. It most likely also saves memory.

=cut

sub avg {
  my($sum,$n,@a)=(0,0);
  no warnings;
  if( @_==0 )                          { return undef             }
  if( @_==1 and ref($_[0]) eq 'ARRAY' ){ @a=grep defined,@{$_[0]} }
  else                                 { @a=grep defined,@_       }
  if( @a==0 )                          { return undef             }
  $sum+=$_ for @a;
  return $sum/@a
}

=head2 geomavg

Returns the I<geometric average> (a.k.a I<geometric mean>) of a list of numbers.

 print geomavg(10,100,1000,10000,100000);               # 1000
 print 0+ (10*100*1000*10000*100000) ** (1/5);          # 1000 same thing

Tools.pm  view on Meta::CPAN


...could write something like:

 trgoykzfqsduphlbcmxejivnwa
 qycatilmpgxbhrdezfwsovujkn
 ytogrjialbewcpvndhkxfzqsmu

B<Input:>

=over 4

=item 1.
Either a reference to an array as the only input. This array will then be mixed I<in-place>. The array will be changed:

This: C<< @a=mix(@a) >> is the same as:  C<< mix(\@a) >>.

=item 2.
Or an array of zero, one or more elements.

=back

Note that an input-array which COINCIDENTLY SOME TIMES has one element
(but more other times), and that element is an array-ref, you will
probably not get the expected result.

To check distribution:

 perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n

The letters a-z should occur around 1000 times each.

Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)

 perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'

(Uses L</cart>, which is not a typo, see further down here)

Note: C<List::Util::shuffle()> is approximately four times faster. Both respects the Perl built-in C<srand()>.

=cut

sub mix {
  if(@_==1 and ref($_[0]) eq 'ARRAY'){ #just one arg, and its ref array
    my $r=$_[0];
    push@$r,splice(@$r,rand(@$r-$_),1) for 0..(@$r-1);
    return $r;
  }
  else{
    my@e=@_;
    push@e,splice(@e,rand(@e-$_),1) for 0..$#e;
    return @e;
  }
}

=head2 pwgen

Generates random passwords.

B<Input:> 0-n args

* First arg: length of password(s), default 8

* Second arg: number of passwords, default 1

* Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!

* Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:

 sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}

...meaning the password should:
* start and end with: a letter a-z (lower- or uppercase) or a digit 0-9
* should contain at least one char from each of the groups lower, upper, digit and special char

To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
own C<rand()> internally.

C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
pwgen tries to find a password. Defaults for those are 0.01 and 10000.
Whenever one of the two limits is reached, a first generates a croak.

Examples:

 my $pw=pwgen();             # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
 my $pw=pwgen(12);           # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
 my @pw=pwgen(0,10);         # 10 random 8 chars passwords, containing the same possible chars
 my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z

 pwgen(3);                                # dies, defaults require chars in each of 4 group (see above)
 pwgen(5,1,'A-C0-9',  qr/^\D{3}\d{2}$/);  # a 5 char string starting with three A, B or Cs and endring with two digits
 pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above

Examples of adding additional requirements to the default ones:

 my @pwreq = ( qr/^[A-C]/ );
 pwgen(8,1,'','',@pwreq);    # use defaults for allowed chars and the standard requirements
                             # but also demand that the password must start with A, B or C

 push @pwreq, sub{ not /[a-z]{3}/i };
 pwgen(8,1,'','',@pwreq);    # as above and in addition the password should not contain three
                             # or more consecutive letters (to avoid "offensive" words perhaps)

=cut

our $Pwgen_max_sec=0.01;     #max seconds/password before croak (for hard to find requirements)
our $Pwgen_max_trials=10000; #max trials/password  before croak (for hard to find requirements)
our $Pwgen_sec=0;            #seconds used in last call to pwgen()
our $Pwgen_trials=0;         #trials in last call to pwgen()
sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
sub pwgen {
  my($len,$num,$chars,@req)=@_;
  $len||=8;
  $num||=1;
  $chars||='A-Za-z0-9,-./&%_!';
  $req[0]||=\&pwgendefreq if !$_[2];
  $chars=~s/([$_])-([$_])/join("","$1".."$2")/eg  for ('a-z','A-Z','0-9');
  my($c,$t,@pw,$d)=(length($chars),time_fp());
  ($Pwgen_trials,$Pwgen_sec)=(0,0);
  TRIAL:
  while(@pw<$num){
    croak "pwgen timeout after $Pwgen_trials trials"
      if ++$Pwgen_trials   >= $Pwgen_max_trials
      or ($d=time_fp()-$t) >  $Pwgen_max_sec*$num
            and $d!~/^\d+$/; #jic int from time_fp
    my $pw=join"",map substr($chars,rand($c),1),1..$len;
    for my $r (@req){
      if   (ref($r) eq 'CODE'  ){ local$_=$pw; &$r()    or next TRIAL }
      elsif(ref($r) eq 'Regexp'){ no warnings; $pw=~$r or next TRIAL }
      else                      { croak "pwgen: invalid req type $r ".ref($r) }
    }
    push@pw,$pw;
  }
  $Pwgen_sec=time_fp()-$t;
  return $pw[0] if $num==1;
  return @pw;
}

# =head1 veci
#
# Perls C<vec> takes 1, 2, 4, 8, 16, 32 and possibly 64 as its third argument.
#
# This limitation is removed with C<veci> (vec improved, but much slower)
#
# The third argument still needs to be 32 or lower (or possibly 64 or lower).
#
# =cut
#
# sub vecibs ($) {
#   my($s,$o,$b,$new)=@_;
#   if($b=~/^(1|2|4|8|16|32|64)$/){
#     return vec($s,$o,$b)=$new if @_==4;
#     return vec($s,$o,$b);
#   }
#   my $bb=$b<4?4:$b<8?8:$b<16?16:$b<32?32:$b<64?64:die;
#   my $ob=int($o*$b/$bb);
#   my $v=vec($s,$ob,$bb)*2**$bb+vec($s,$ob+1,$bb);
#   $v & (2**$b-1)
# }


=head1 SETS

=head2 distinct

Returns the values of the input list, sorted alfanumerically, but only
one of each value. This is the same as L</uniq> except uniq does not
sort the returned list.

Example:

 print join(", ", distinct(4,9,3,4,"abc",3,"abc"));    # 3, 4, 9, abc
 print join(", ", distinct(4,9,30,4,"abc",30,"abc"));  # 30, 4, 9, abc       note: alphanumeric sort

=cut

sub distinct { sort keys %{{map {($_,1)} @_}} }

Tools.pm  view on Meta::CPAN

      $new{$_}{$k}=$$r{$_};
    }
  }
  return %new;
}

=head2 a2h

B<Input:> array of arrays

B<Output:> array of hashes

Transforms an array of arrays (arrayrefs) to an array of hashes (hashrefs).

Example:

 my @h = a2h( ['Name', 'Age',  'Gender'],  #1st row become keys
              ['Alice', 20,    'F'],
              ['Bob',   30,    'M'],
              ['Eve',   undef, 'F'] );

Result array @h:

 (
   {Name=>'Alice', Age=>20,    Gender=>'F'},
   {Name=>'Bob',   Age=>30,    Gender=>'M'},
   {Name=>'Eve',   Age=>undef, Gender=>'F'},
 );

=head2 h2a

B<Input:> array of hashes

B<Output:> array of arrays

Opposite of L</a2h>

=cut

sub a2h {
    my @col=@{shift@_};
    map { my%h;@h{@col}=@$_;\%h} @_;
}

sub h2a {
    my %c;
    map $c{$_}++, keys%$_ for @_;
    my @c=sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c;
    (\@c,map[@$_{@c}],@_);
}

=head1 COMPRESSION

L</zipb64>, L</unzipb64>, L</zipbin>, L</unzipbin>, L</gzip>, and L</gunzip>
compresses and uncompresses strings to save space in disk, memory,
database or network transfer. Trades time for space. (Beware of wormholes)

=head2 zipb64

Compresses the input (text or binary) and returns a base64-encoded string of the compressed binary data.
No known limit on input length, several MB has been tested, as long as you've got the RAM...

B<Input:> One or two strings.

First argument: The string to be compressed.

Second argument is optional: A I<dictionary> string.

B<Output:> a base64-kodet string of the compressed input.

The use of an optional I<dictionary> string will result in an even
further compressed output in the dictionary string is somewhat similar
to the string that is compressed (the data in the first argument).

If x relatively similar string are to be compressed, i.e. x number
automatic of email responses to some action by a user, it will pay of
to choose one of those x as a dictionary string and store it as
such. (You will also use the same dictionary string when decompressing
using L</unzipb64>.

The returned string is base64 encoded. That is, the output is 33%
larger than it has to be.  The advantage is that this string more
easily can be stored in a database (without the hassles of CLOB/BLOB)
or perhaps easier transfer in http POST requests (it still needs some
url-encoding, normally). See L</zipbin> and L</unzipbin> for the
same without base 64 encoding.

Example 1, normal compression without dictionary:

  $txt = "Test av komprimering, hva skjer? " x 10;  # ten copies of this norwegian string, $txt is now 330 bytes (or chars rather...)
  print length($txt)," bytes input!\n";             # prints 330
  $zip = zipb64($txt);                              # compresses
  print length($zip)," bytes output!\n";            # prints 65
  print $zip;                                       # prints the base64 string ("noise")

  $output=unzipb64($zip);                              # decompresses
  print "Hurra\n" if $output eq $txt;               # prints Hurra if everything went well
  print length($output),"\n";                       # prints 330

Example 2, same compression, now with dictionary:

  $txt = "Test av komprimering, hva skjer? " x 10;  # Same original string as above
  $dict = "Testing av kompresjon, hva vil skje?";   # dictionary with certain similarities
                                                    # of the text to be compressed
  $zip2 = zipb64($txt,$dict);                          # compressing with $dict as dictionary
  print length($zip2)," bytes output!\n";           # prints 49, which is less than 65 in ex. 1 above
  $output=unzipb64($zip2,$dict);                       # uses $dict in the decompressions too
  print "Hurra\n" if $output eq $txt;               # prints Hurra if everything went well


Example 3, dictionary = string to be compressed: (out of curiosity)

  $txt = "Test av komprimering, hva skjer? " x 10;  # Same original string as above
  $zip3 = zipb64($txt,$txt);                           # hmm
  print length($zip3)," bytes output!\n";           # prints 25
  print "Hurra\n" if unzipb64($zip3,$txt) eq $txt;     # hipp hipp ...

zipb64() and zipbin() is really just wrappers around L<Compress::Zlib> and C<inflate()> & co there.

=cut

sub zipb64 {
  require MIME::Base64;
  return MIME::Base64::encode_base64(zipbin(@_));
}


=head2 zipbin

C<zipbin()> does the same as C<zipb64()> except that zipbin()
does not base64 encode the result. Returns binary data.

See L</zip> for documentation.

=cut

sub zipbin {
  require Compress::Zlib;
  my($data,$dict)=@_;
  my $x=Compress::Zlib::deflateInit(-Dictionary=>$dict||'',-Level=>Compress::Zlib::Z_BEST_COMPRESSION()) or croak();
  my($output,$status)=$x->deflate($data); croak() if $status!=Compress::Zlib::Z_OK();
  my($out,$status2)=$x->flush(); croak() if $status2!=Compress::Zlib::Z_OK();
  return $output.$out;
}

=head2 unzipb64

Opposite of L</zipb64>.

Input:

First argument: A string made by L</zipb64>

Second argument: (optional) a dictionary string which where used in L</zipb64>.

Output: The original string (be it text or binary).

See L</zipb64>.

=cut

sub unzipb64 {
  my($data,$dict)=@_;
  require MIME::Base64;
  unzipbin(MIME::Base64::decode_base64($data),$dict);
}

=head2 unzipbin

C<unzipbin()> does the same as L</unzip> except that C<unzipbin()>
wants a pure binary compressed string as input, not base64.

See L</unzipb64> for documentation.

=cut

Tools.pm  view on Meta::CPAN


=cut

sub gzip    { my $s=shift; eval"require Compress::Zlib"  if !$INC{'Compress/Zlib.pm'};  croak "Compress::Zlib not found"  if $@; Compress::Zlib::memGzip(    ref($s)?$s:\$s ) }
sub gunzip  { my $s=shift; eval"require Compress::Zlib"  if !$INC{'Compress/Zlib.pm'};  croak "Compress::Zlib not found"  if $@; Compress::Zlib::memGunzip(  ref($s)?$s:\$s ) }
sub bzip2   { my $s=shift; eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBzip(   ref($s)?$s:\$s ) }
sub bunzip2 { my $s=shift; eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBunzip( ref($s)?$s:\$s ) }

=head1 NET, WEB, CGI-STUFF

=head2 ipaddr

B<Input:> an IP-number

B<Output:> either an IP-address I<machine.sld.tld> or an empty string
if the DNS lookup didn't find anything.

Example:

 perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")'  # prints www.uio.no

Uses perls C<gethostbyaddr> internally.

C<ipaddr()> memoizes the results internally (using the
C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
particular IP number might take some time.

Some few DNS loopups can take several seconds.
Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
probably turn off hostname lookups in their logs and just log IP numbers by using
C<HostnameLookups Off> in Apache C<httpd.conf> and then use I<ipaddr> afterwards if necessary.

=cut

our %IPADDR_memo;
sub ipaddr {
  my $ipnr=shift;
  #hm, NOTE: The 2 parameter on the next code line is not 2 for all OSes,
  #but seems to work in Linux and HPUX. Den correct way is to use the
  #AF_INET constant in the Socket or the IO::Socket package.
  return $IPADDR_memo{$ipnr} ||= gethostbyaddr(pack("C4",split("\\.",$ipnr)),2);
}

=head2 ipnum

C<ipnum()> does the opposite of C<ipaddr()>

Does an attempt of converting an IP address (hostname) to an IP number.
Uses DNS name servers via perls internal C<gethostbyname()>.
Return empty string (undef) if unsuccessful.

 print ipnum("www.uio.no");   # prints 129.240.13.152

Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.

=cut

our %IPNUM_memo;
sub ipnum {
  my $ipaddr=shift;
  #croak "No $ipaddr" if !length($ipaddr);
  return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
  my $h=gethostbyname($ipaddr);
  #croak "No ipnum for $ipaddr" if !$h;
  return if !defined $h;
  my $ipnum = join(".",unpack("C4",$h));
  $IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
  return $IPNUM_memo{$ipaddr};
}

our $Ipnum_errmsg;
our $Ipnum;
sub ipnum_ok {
    my $ipnum=shift;
    $Ipnum=undef;
    eval{
      die "malformed ipnum $ipnum\n" if not $ipnum=~/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
      die "invalid ipnum $ipnum\n"   if grep$_>255,$1,$2,$3,$4;
      $Ipnum=$1*256**3 + $2*256**2 + $3*256 + $4;
    };
    my$r=($Ipnum_errmsg=$@) ? 0 : 1;
    $r
}
our $Iprange_errmsg;
our $Iprange_start;
sub iprange_ok {
    my $iprange=shift;
    $Iprange_start=undef;
    my($r,$m);
    eval{
      die "malformed iprange $iprange\n"   if not $iprange=~m|^(\d+)\.(\d+)\.(\d+)\.(\d+)(?:/(\d+))$|;
      die "iprange part should be 0-255\n" if grep$_<0||$_>255,$1,$2,$3,$4;
      die "iprange mask should be 0-32\n"  if defined$5 and $5>32;
      ($r,$m)=($1*256**3+$2*256**2+$3*256+$4,32-$5);
    };
    return if $Iprange_errmsg=$@;
    my $x=$r>>$m<<$m;
    return if $r!=$x and $Iprange_errmsg=sprintf("need zero in last %d bits, should be %d.%d.%d.%d/%d",
						 $m, $x>>24, ($x>>16)&255, ($x>>8)&255, $x&255, 32-$m);
    $Iprange_start=$r;
    return 1;
}
sub in_iprange {
  my($ipnum,$iprange)=@_;
  croak $Ipnum_errmsg   if !ipnum_ok($ipnum);
  croak $Iprange_errmsg if !iprange_ok($iprange=~m|/\d+$| ? $iprange : "$iprange/32");
  "$iprange/32"=~m|/(\d+)| or die;
  $Ipnum>=$Iprange_start &&
  $Ipnum<=$Iprange_start + 2**(32-$1)-1;
}

=head2 webparams

B<Input:> (optional)

Zero or one input argument: A string of the same type often found behind the first question mark (C<< ? >>) in URLs.

This string can have one or more parts separated by C<&> chars.

Each part consists of C<key=value> pairs (with the first C<=> char being the separation char).

Both C<key> and C<value> can be url-encoded.

If there is no input argument, C<webparams> uses C<< $ENV{QUERY_STRING} >> instead.

If also  C<< $ENV{QUERY_STRING} >> is lacking, C<webparams()> checks if C<< $ENV{REQUEST_METHOD} eq 'POST' >>.
In that case C<< $ENV{CONTENT_LENGTH} >> is taken as the number of bytes to be read from C<STDIN>
and those bytes are used as the missing input argument.

The environment variables QUERY_STRING, REQUEST_METHOD and CONTENT_LENGTH is
typically set by a web server following the CGI standard (which Apache and
most of them can do I guess) or in mod_perl by Apache. Although you are
probably better off using L<CGI>. Or C<< $R->args() >> or C<< $R->content() >> in mod_perl.

B<Output:>

C<webparams()> returns a hash of the key/value pairs in the input argument. Url-decoded.

If an input string has more than one occurrence of the same key, that keys value in the returned hash will become concatenated each value separated by a C<,> char. (A comma char)

Examples:

 use Acme::Tools;
 my %R=webparams();
 print "Content-Type: text/plain\n\n";                          # or rather \cM\cJ\cM\cJ instead of \n\n to be http-compliant
 print "My name is $R{name}";

Storing those four lines in a file in the directory designated for CGI-scripts
on your web server (or perhaps naming the file .cgi is enough), and C<chmod +x
/.../cgi-bin/script> and the URL
L<http://some.server.somewhere/cgi-bin/script?name=HAL> will print
C<My name is HAL> to the web page.

L<http://some.server.somewhere/cgi-bin/script?name=Bond&name=+James+Bond> will print C<My name is Bond, James Bond>.

=cut

sub webparams {
  my $query=shift();
  $query=$ENV{QUERY_STRING} if !defined $query;
  if(!defined $query  and  $ENV{REQUEST_METHOD} eq "POST"){
    read(STDIN,$query , $ENV{CONTENT_LENGTH});
    $ENV{QUERY_STRING}=$query;
  }
  my %R;
  for(split("&",$query)){
    next if !length($_);
    my($nkl,$verdi)=map urldec($_),split("=",$_,2);
    $R{$nkl}=exists$R{$nkl}?"$R{$nkl},$verdi":$verdi;
  }
  return %R;
}

=head2 urlenc

Input: a string

Output: the same string URL encoded so it can be sent in URLs or POST requests.

In URLs (web addresses) certain characters are illegal. For instance I<space> and I<newline>.
And certain other chars have special meaning, such as C<+>, C<%>, C<=>, C<?>, C<&>.

These illegal and special chars needs to be encoded to be sent in
URLs.  This is done by sending them as C<%> and two hex-digits. All
chars can be URL encodes this way, but it's necessary just on some.

Example:

 $search="Østdal, Åge";
 my $url="http://machine.somewhere.com/search?q=" . urlenc($search);
 print $url;

Prints C<< http://machine.somewhere.com/search?q=%D8stdal%2C%20%C5ge >>

=cut

sub urlenc {
  my $str=shift;
  $str=~s/([^\w\-\.\/\,\[\]])/sprintf("%%%02x",ord($1))/eg; #more chars is probably legal...
  return $str;
}

=head2 urldec

Opposite of L</urlenc>.

Example, this returns 'C< ø>'. That is space and C<< ø >>.

 urldec('+%C3')

=cut

sub urldec {
  my $str=shift;
  $str=~s/\+/ /gs;
  $str=~s/%([a-f\d]{2})/pack("C", hex($1))/egi;
  return $str;
}

=head2 ht2t

C<ht2t> is short for I<html-table to table>.

This sub extracts an html-C<< <table> >>s and returns its C<< <tr>s >>
and C<< <td>s >> as an array of arrayrefs. And strips away any html
inside the C<< <td>s >> as well.

Tools.pm  view on Meta::CPAN

Perls more tedious way created in the 80s.  The same argument goes for
file slurping. On the other side it's also a good practice to never
assume to much on available memory and the number of files if you
don't know for certain that enough memory is available whereever your
code is run or that the size of the directory is limited.

B<Example:>

How to get all files in the C</tmp> directory including all subdirectories below of any depth:

 my @files=("/tmp");
 map {-d $_ and unshift @files,$_ or push @files,$_} readdirectory(shift(@files)) while -d $files[0];

...or to avoid symlinks and only get real files:

 map {-d and !-l and unshift @files,$_ or -f and !-l and push @files,$_} readdirectory(shift(@files)) while -d $files[0];

=cut

sub readdirectory {
  my $dir=shift;
  opendir(my $D,$dir);
  my @filer=map "$dir/$_", grep {!/^\.\.?$/} readdir($D);
  closedir($D);
  return @filer;
}

=head2 basename

The basename and dirname functions behaves like the *nix shell commands with the same names.

B<Input:> One or two arguments: Filename and an optional suffix

B<Output:> Returns the filename with any directory and (if given) the suffix removed.

 basename('/usr/bin/perl')                   # returns 'perl'
 basename('/usr/local/bin/report.pl','.pl')  # returns 'report' since .pl at the end is removed
 basename('report2.pl','.pl')                # returns 'report2'
 basename('report2.pl','.\w+')               # returns 'report2.pl', probably not what you meant
 basename('report2.pl',qr/.\w+/)             # returns 'report2', use qr for regex

=head2 dirname

B<Input:> A filename including path

B<Output:> Removes the filename path and returns just the directory path up until but not including
the last /. Return just a one char C<< . >> (period string) if there is no directory in the input.

 dirname('/usr/bin/perl')                    # returns '/usr/bin'
 dirname('perl')                             # returns '.'

=head2 username

Returns the current linux/unix username, for example the string root

 print username();                        #just (getpwuid($<))[0] but more readable perhaps

=cut

sub basename { my($f,$s)=(@_,'');$s=quotemeta($s)if!ref($s);$f=~m,^(.*/)?([^/]*?)($s)?$,;$2 }
sub dirname  { $_[0]=~m,^(.*)/,;defined($1) && length($1) ? $1 : '.' }
sub username { (getpwuid($<))[0] }

=head2 wipe

Deletes a file by "wiping" it on the disk. Overwrites the file before deleting. (May not work properly on SSDs)

B<Input:>
* Arg 1: A filename
* Optional arg 2: number of times to overwrite file. Default is 3 if omitted, 0 or undef
* Optional arg 3: keep (true/false), wipe() but no delete of file

B<Output:> Same as the C<unlink()> (remove file): 1 for success, 0 or false for failure.

See also: L<https://www.google.com/search?q=wipe+file>, L<http://www.dban.org/>

=cut

sub wipe {
  my($file,$times,$keep)=@_;
  $times||=3;
  croak "ERROR: File $file nonexisting\n" if not -f $file or not -e $file;
  my $size=-s$file;
  open my $WIFH, '+<', $file or croak "ERROR: Unable to open $file: $!\n";
  binmode($WIFH);
  for(1..$times){
    my $block=chr(int(rand(256))) x 1024;#hm
    for(0..($size/1024)){
      seek($WIFH,$_*1024,0);
      print $WIFH $block;
    }
  }
  close($WIFH);
  $keep || unlink($file);
}

=head2 chall

Does chmod + utime + chown on one or more files.

Returns the number of files of which those operations was successful.

Mode, uid, gid, atime and mtime are set from the array ref in the first argument.

The first argument references an array which is exactly like an array returned from perls internal C<stat($filename)> -function.

Example:

 my @stat=stat($filenameA);
 chall( \@stat,       $filenameB, $filenameC, ... );  # by stat-array
 chall( $filenameA,   $filenameB, $filenameC, ... );  # by file name

Copies the chmod, owner, group, access time and modify time from file A to file B and C.

See C<perldoc -f stat>, C<perldoc -f chmod>, C<perldoc -f chown>, C<perldoc -f utime>

=cut


sub chall {
  my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks )

Tools.pm  view on Meta::CPAN


 my %conf = read_conf('/etc/your/thing.conf');
 print $conf{sectionA}{knobble};  #prints ABC if the file is as shown below
 print $conf{sectionA}{gobble};   #prints ZZZ, the last gobble
 print $conf{switch};             #prints OK here as well, unsectioned value
 print $conf{part2}{password};    #prints oh:no= x

File use for the above example:

 switch:    OK       #before first section, the '' (empty) section
 [sectionA]
 knobble:   ABC
 gobble:    XYZ      #this gobble is overwritten by the gobble on the next line
 gobble:    ZZZ
 [part2]
 password:  oh:no= x  #should be better
 text:      { values starting with { continues
              until reaching a line with }

Everything from # and behind is regarded comments and ignored. Comments can be on any line.
To keep a # char, put a \ in front of it.

A C< : > or C< = > separates keys and values.  Spaces at the beginning or end of lines are
ignored (after removal of #comments), as are any spaces before and after : and = separators.

Empty lines or lines with no C< : > or C< = > is also ignored. Keys and values can contain
internal spaces and tabs, but not at the beginning or end.

Multi-line values must start and end with { and }. Using { and } keep spaces at the start
or end in both one-line and multi-line values.

Sections are marked with C<< [sectionname] >>.  Section names, keys and values is case
sensitive. C<Key:values> above the first section or below and empty C<< [] >> is placed
both in the empty section in the returned hash and as top level key/values.

C<read_conf> can be a simpler alternative to the core module L<Config::Std> which has
its own hassles.

 $Acme::Tools::Read_conf_empty_section=1;        #default 0 (was 1 in version 0.16)
 my %conf = read_conf('/etc/your/thing.conf');
 print $conf{''}{switch};                        #prints OK with the file above
 print $conf{switch};                            #prints OK here as well

=cut

our $Read_conf_empty_section=0;
sub read_conf {
  my($fn,$hr)=(@_,{});
  my $conf=ref($fn)?$$fn:readfile($fn);
  $conf=~s,\s*(?<!\\)#.*,,g;
  my($section,@l)=('',split"\n",$conf);
  while(@l) {
    my $l=shift@l;
    if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
      $section=$1;
      $$hr{$1}||={};
    }
    elsif( $l=~/^\s*([^\:\=]+?)\s*[:=]\s*(.*?)\s*$/ ) {
      my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
      my $v=&$ml($2);
      $$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
      $$hr{$1}=$v if !length($section);
    }
  }
  %$hr;
}
#  my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
#    s,<INCLUDE ([^>]+)>,"".readfile(&$incfn($1)),eg; #todo


=head2 openstr

                                            # returned from openstr:
  open my $FH, openstr("fil.txt")  or die;  # fil.txt
  open my $FH, openstr("fil.gz")   or die;  # zcat fil.gz |
  open my $FH, openstr("fil.bz2")  or die;  # bzcat fil.bz2 |
  open my $FH, openstr("fil.xz")   or die;  # xzcat fil.xz |
  open my $FH, openstr(">fil.txt") or die;  # > fil.txt
  open my $FH, openstr(">fil.gz")  or die;  # | gzip > fil.gz
  open my $FH, openstr(">fil.bz2") or die;  # | bzip2 > fil.bz2
  open my $FH, openstr(">fil.xz")  or die;  # | xz    > fil.bz2

Environment variable PATH is used. So in the examples above, /bin/gzip
is returned instead of gzip if /bin is the first directory in
$ENV{PATH} containing an executable file gzip. Dirs /usr/bin, /bin and
/usr/local/bin is added to PATH in openstr(). They are checked even if
PATH is empty.

See also C<writefile()> and C<readfile()> for automatic compression and decompression using C<openstr>.

=cut

our @Openstrpath=(grep$_,split(":",$ENV{PATH}),qw(/usr/bin /bin /usr/local/bin));
our $Magic_openstr=1;
sub openstr_prog { @Openstrpath or return $_[0];(grep -x$_, map "$_/$_[0]", @Openstrpath)[0] or croak"$_[0] not found" }
sub openstr {
  my($fn,$ext)=(shift()=~/^(.*?(?:\.(t?gz|bz2|xz))?)$/i);
  return $fn if !$ext or !$Magic_openstr;
  $fn =~ /^\s*>/
      ?  "| ".(openstr_prog({qw/gz gzip bz2 bzip2 xz xz tgz gzip/   }->{lc($ext)})).$fn
      :        openstr_prog({qw/gz zcat bz2 bzcat xz xzcat tgz zcat/}->{lc($ext)})." $fn |";
}

=head2 printed

Redirects C<print> and C<printf> from STDOUT to a string which is returned.

 my $p = printed { print "hello!" };     # now $p eq 'hello!'
 my $p = printed { some_sub() };         # now $p contains whatever was printed by some_sub() and the subs call from it

=cut

sub printed (&) { my $s; open(local *STDOUT, '>', \$s) or croak "ERR: $! $?"; shift->(); $s } #todo catch stderr also?

#todo: sub stdin{}
#todo: sub stdout{}
#todo: sub stderr{}
#todo: sub stdouterr{}

=head1 TIME FUNCTIONS

=head2 tms

Tools.pm  view on Meta::CPAN


B<Third argument:> (optional) Is_date. False|true, default false. If true, the second argument is
interpreted as a date of the form YYYYMMDD, not as a number of seconds since epoch (January 1st 1970).

=cut

#Se også L</tidstrk> og L</tidstr>

our $Tms_pattern;
our %Tms_str=
	  ('MÃ…NED' => [4, 'JANUAR','FEBRUAR','MARS','APRIL','MAI','JUNI','JULI',
		          'AUGUST','SEPTEMBER','OKTOBER','NOVEMBER','DESEMBER' ],
	   'MÃ¥ned' => [4, 'Januar','Februar','Mars','April','Mai','Juni','Juli',
		          'August','September','Oktober','November','Desember'],
	   'måned' => [4, 'januar','februar','mars','april','mai','juni','juli',
		          'august','september','oktober','november','desember'],
	   'MÃ…NE.' => [4, 'JAN.','FEB.','MARS','APR.','MAI','JUNI','JULI','AUG.','SEP.','OKT.','NOV.','DES.'],
	   'MÃ¥ne.' => [4, 'Jan.','Feb.','Mars','Apr.','Mai','Juni','Juli','Aug.','Sep.','Okt.','Nov.','Des.'],
	   'måne.' => [4, 'jan.','feb.','mars','apr.','mai','juni','juli','aug.','sep.','okt.','nov.','des.'],
	   'MÃ…NE'  => [4, 'JAN','FEB','MARS','APR','MAI','JUNI','JULI','AUG','SEP','OKT','NOV','DES'],
	   'MÃ¥ne'  => [4, 'Jan','Feb','Mars','Apr','Mai','Juni','Juli','Aug','Sep','Okt','Nov','Des'],
	   'måne'  => [4, 'jan','feb','mars','apr','mai','juni','juli','aug','sep','okt','nov','des'],
	   'MÃ…N'   => [4, 'JAN','FEB','MAR','APR','MAI','JUN','JUL','AUG','SEP','OKT','NOV','DES'],
	   'MÃ¥n'   => [4, 'Jan','Feb','Mar','Apr','Mai','Jun','Jul','Aug','Sep','Okt','Nov','Des'],
	   'mån'   => [4, 'jan','feb','mar','apr','mai','jun','jul','aug','sep','okt','nov','des'],
	   'MONTH' => [4, 'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY',
		          'AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER'],
	   'Month' => [4, 'January','February','March','April','May','June','July',
		          'August','September','October','November','December'],
	   'month' => [4, 'january','february','march','april','may','june','july',
		          'august','september','october','november','december'],
	   'MONT.' => [4, 'JAN.','FEB.','MAR.','APR.','MAY','JUNE','JULY','AUG.','SEP.','OCT.','NOV.','DEC.'],
	   'Mont.' => [4, 'Jan.','Feb.','Mar.','Apr.','May','June','July','Aug.','Sep.','Oct.','Nov.','Dec.'],
	   'mont.' => [4, 'jan.','feb.','mar.','apr.','may','june','july','aug.','sep.','oct.','nov.','dec.'],
	   'MONT'  => [4, 'JAN','FEB','MAR','APR','MAY','JUNE','JULY','AUG','SEP','OCT','NOV','DEC'],
	   'Mont'  => [4, 'Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec'],
	   'mont'  => [4, 'jan','feb','mar','apr','may','june','july','aug','sep','oct','nov','dec'],
	   'MON'   => [4, 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],
	   'Mon'   => [4, 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
	   'mon'   => [4, 'jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'],
	   'DAY'   => [6, 'SUNDAY','MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY'],
	   'Day'   => [6, 'Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'],
	   'day'   => [6, 'sunday','monday','tuesday','wednesday','thursday','friday','saturday'],
	   'DY'    => [6, 'SUN','MON','TUE','WED','THU','FRI','SAT'],
	   'Dy'    => [6, 'Sun','Mon','Tue','Wed','Thu','Fri','Sat'],
	   'dy'    => [6, 'sun','mon','tue','wed','thu','fri','sat'],
	   'DAG'   => [6, 'SØNDAG','MANDAG','TIRSDAG','ONSDAG','TORSDAG','FREDAG','LØRDAG'],
	   'Dag'   => [6, 'Søndag','Mandag','Tirsdag','Onsdag','Torsdag','Fredag','Lørdag'],
	   'dag'   => [6, 'søndag','mandag','tirsdag','onsdag','torsdag','fredag','lørdag'],
	   'DG'    => [6, 'Søn','MAN','TIR','ONS','TOR','FRE','LØR'],
	   'Dg'    => [6, 'SØn','Man','Tir','Ons','Tor','Fre','Lør'],
	   'dg'    => [6, 'søn','man','tir','ons','tor','fre','lør'],
	   );
my $_tms_inited=0;
sub tms_init {
  return if $_tms_inited++;
  for(qw(MAANED Maaned maaned MAAN Maan maan),'MAANE.','Maane.','maane.'){
    $Tms_str{$_}=$Tms_str{replace($_,"aa","Ã¥","AA","Ã…")};
  }
  $Tms_pattern=join("|",map{quotemeta($_)}
		        sort{length($b)<=>length($a)}
			keys %Tms_str);
  #without sort "måned" could be "mared" because "mån"=>"mar"
}

sub totime {

}

=head2 s2t

Convert strings to "time pieces". Example:

 my($dd,$mm,$yyyy,$str) = s2t("18/february/2019:13:53","DD","MM","YYYY","YYYYMMDD-HH24:MI:SS")
 print "dd: $dd   mm: $mm   yyyy: $yyyy   str: $str\n";  # dd: 18   mm: 02   yyyy: 2019   str: 20190218-13:53:00

=cut

sub s2t {
  require Date::Parse;
  my $s=shift;
  if($s=~/\b(?:mai|okt|des|juni|juli|februar)/i){ #fix norwegian/danish (for now)
     $s=~s/\bMai\b/May/i;         $s=~s/\bmai\b/may/i;         $s=~s/\bMAI\b/MAY/i;
     $s=~s/\bOkt\b/Oct/i;         $s=~s/\bokt\b/oct/i;         $s=~s/\bOKT\b/OCT/i;
     $s=~s/\bDes/Dec/;            $s=~s/\bdes/dec/;            $s=~s/\bDES/DEC/;
     $s=~s/\bFebruar\b/February/; $s=~s/\bfebruar\b/february/; $s=~s/\bFEBRUAR\b/FEBRUARY/;
     $s=~s/\bjuli\b/July/i;
     $s=~s/\bjuni\b/June/i;
  }
  elsif($s           =~ /^[19]\d{9}$/){ $s=localtime($s)      } #hm, make faster
  elsif($s           =~ /^[19]\d{12}$/
    and int($s/1000) =~ /^[19]\d{9}$/){ $s=localtime($s/1000) } #hm
  elsif($s=~/^((?:17|18|19|20|21)\d\d)(0[1-9]|1[012])(0[1-9]|[12]\d|3[01])$/){#hm
      $s="$1-$2-$3T00:00:00";
  }
  return Date::Parse::str2time($s)                  if !@_;
  return tms(Date::Parse::str2time($s),shift(@_))   if 0+@_ == 1;
  return map tms(Date::Parse::str2time($s),$_), @_;
}

sub date_ok {
  my($y,$m,$d)=@_;
  return date_ok($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
  return 0 if $y!~/^\d\d\d\d$/;
  return 0 if $m<1||$m>12||$d<1||$d>(31,$y%4||$y%100==0&&$y%400?28:29,31,30,31,30,31,31,30,31,30,31)[$m-1];
  return 1;
}

sub weeknum {
  return weeknum(tms('YYYYMMDD')) if @_<1;
  return weeknum($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
  my($year,$month,$day)= @_;
  eval{
    if(@_<2){
      if($year=~/^\d{8}$/) { ($year,$month,$day)=unpack("A4A2A2",$year) }
      elsif($year>99999999){ ($year,$month,$day)=(localtime($year))[5,4,3]; $year+=1900; $month++ }
      else {die}
    }
    elsif(@_!=3){croak}
    croak if !date_ok(sprintf("%04d%02d%02d",$year,$month,$day));
  };

Tools.pm  view on Meta::CPAN

 Thu Jan 12 16:00:20 2012

...and so on even if the C<< ...heavy job... >>-part takes more than a
second to complete. Whereas if sleep(10) was used, each job would
spend more than ten seconds in average since the work time would be
added to sleep(10).

Note: sleep_until() will remember the time of ANY last call of this sub,
not just the one on the same line in the source code (this might change
in the future). The first call to sleep_until() will be the same as
sleep_fp() or Perl's own sleep() if the argument is an integer.

=cut

our $Time_last_sleep_until;
sub sleep_until {
  my $s=@_==1?shift():0;
  my $time=time_fp();
  my $sleep=$s-($time-nvl($Time_last_sleep_until,0));
  $Time_last_sleep_until=time;
  sleep_fp($sleep) if $sleep>0;
}

my %thr;
sub throttle {
  my($times,$mintime,$what)=@_;
  $what||=join(":",@{[caller(1)]}[3,2]);
  $thr{$what}||=[];
  my $thr=$thr{$what};
  push @$thr,time_fp();
  return if @$thr<$times;
  my $since=$$thr[-1]-shift(@$thr);
  my $sleep=$since<$mintime?$mintime-$since:0;
  sleep_fp($sleep);
  return $sleep;
}

=head2 leapyear

B<Input:> A year. A four digit number.

B<Output:> True (1) or false (0) of whether the year is a leap year or
not. (Uses current calendar even for periods before leapyears was used).

 print join(", ",grep leapyear($_), 1900..2014)."\n";

 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956,
 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012

Note: 1900 is not a leap year, but 2000 is. Years divided by 100 is a leap year only
if it can be divided by 400.

=cut

sub leapyear{$_[0]%400?$_[0]%100?$_[0]%4?0:1:0:1} #bool

#http://rosettacode.org/wiki/Levenshtein_distance#Perl
our %ldist_cache;
sub ldist {
  my($s,$t,$l) = @_;
  return length($t) if !$s;
  return length($s) if !$t;
  %ldist_cache=() if !$l and 1000<0+%ldist_cache;
  $ldist_cache{$s,$t} ||=
  do {
    my($s1,$t1) = ( substr($s,1), substr($t,1) );
    substr($s,0,1) eq substr($t,0,1)
      ? ldist($s1,$t1)
      : 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
  };
}

=head1 OTHER

=head2 nvl

The I<no value> function (or I<null value> function)

C<nvl()> takes two or more arguments. (Oracles nvl-function take just two)

Returns the value of the first input argument with length() > 0.

Return I<undef> if there is no such input argument.

In perl 5.10 and perl 6 this will most often be easier with the C< //
> operator, although C<nvl()> and C<< // >> treats empty strings C<"">
differently. Sub nvl here considers empty strings and undef the same.

=cut

sub nvl {
  return $_[0] if defined $_[0] and length($_[0]) or @_==1;
  return $_[1] if @_==2;
  return nvl(@_[1..$#_]) if @_>2;
  return undef;
}

=head2 decode_num

See L</decode>.

=head2 decode

C<decode()> and C<decode_num()> works just as Oracles C<decode()>.

C<decode()> and C<decode_num()> accordingly uses perl operators C<eq> and C<==> for comparison.

Examples:

 my $a=123;
 print decode($a, 123,3,  214,4, $a);     # prints 3
 print decode($a, 123=>3, 214=>4, $a);    # prints 3, same thing since => is synonymous to comma in Perl

The first argument is tested against the second, fourth, sixth and so on,
and then the third, fifth, seventh and so on is
returned if decode() finds an equal string or number.

In the above example: 123 maps to 3, 124 maps to 4 and the last argument $a is returned elsewise.

More examples:

 my $a=123;
 print decode($a, 123=>3, 214=>7, $a);              # also 3,  note that => is synonym for , (comma) in perl
 print decode($a, 122=>3, 214=>7, $a);              # prints 123
 print decode($a,  123.0 =>3, 214=>7);              # prints 3
 print decode($a, '123.0'=>3, 214=>7);              # prints nothing (undef), no last argument default value here
 print decode_num($a, 121=>3, 221=>7, '123.0','b'); # prints b

Sort of:

 decode($string, %conversion, $default);

The last argument is returned as a default if none of the keys in the keys/value-pairs matched.

A more perl-ish and often faster way of doing the same:

 {123=>3, 214=>7}->{$a} || $a                       # (beware of 0)

=cut

sub decode {
  croak "Must have a mimimum of two arguments" if @_<2;
  my $uttrykk=shift;
  if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
  else               { !defined shift    and return shift or shift for 1..@_/2 }
  return shift;
}

sub decode_num {
  croak "Must have a mimimum of two arguments" if @_<2;
  my $uttrykk=shift;
  if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }

Tools.pm  view on Meta::CPAN

Discover, Diners Club / Carte Blanche, JCB and others.

B<Input:>

A credit card number. Can contain non-digits, but they are removed internally before checking.

B<Output:>

Something true or false.

Or more accurately:

Returns C<undef> (false) if the input argument is missing digits.

Returns 0 (zero, which is false) is the digits is not correct according to the LUHN algorithm.

Returns 1 or the name of a credit card company (true either way) if the last digit is an ok control digit for this ccn.

The name of the credit card company is returned like this (without the C<'> character)

 Returns (wo '')                Starts on                Number of digits
 ------------------------------ ------------------------ ----------------
 'MasterCard'                   51-55                    16
 'Visa'                         4                        13 eller 16
 'American Express'             34 eller 37              15
 'Discover'                     6011                     16
 'Diners Club / Carte Blanche'  300-305, 36 eller 38     14
 'JCB'                          3                        16
 'JCB'                          2131 eller 1800          15

And should perhaps have had:

 'enRoute'                      2014 eller 2149          15

...but that card uses either another control algorithm or no control
digits at all. So C<enRoute> is never returned here.

If the control digits is valid, but the input does not match anything in the column C<starts on>, 1 is returned.

(This is also the same control digit mechanism used in Norwegian KID numbers on payment bills)

The first digit in a credit card number is supposed to tell what "industry" the card is meant for:

 MII Digit Value             Issuer Category
 --------------------------- ----------------------------------------------------
 0                           ISO/TC 68 and other industry assignments
 1                           Airlines
 2                           Airlines and other industry assignments
 3                           Travel and entertainment
 4                           Banking and financial
 5                           Banking and financial
 6                           Merchandizing and banking
 7                           Petroleum
 8                           Telecommunications and other industry assignments
 9                           National assignment

...although this has no meaning to C<Acme::Tools::ccn_ok()>.

The first six digits is I<Issuer Identifier>, that is the bank
(probably). The rest in the "account number", except the last digits,
which is the control digit. Max length on credit card numbers are 19
digits.

=cut

sub ccn_ok {
    my $ccn=shift(); #credit card number
    $ccn=~s/\D+//g;
    if(KID_ok($ccn)){
	return "MasterCard"                   if $ccn=~/^5[1-5]\d{14}$/;
	return "Visa"                         if $ccn=~/^4\d{12}(?:\d{3})?$/;
	return "American Express"             if $ccn=~/^3[47]\d{13}$/;
	return "Discover"                     if $ccn=~/^6011\d{12}$/;
	return "Diners Club / Carte Blanche"  if $ccn=~/^3(?:0[0-5]\d{11}|[68]\d{12})$/;
	return "JCB"                          if $ccn=~/^(?:3\d{15}|(?:2131|1800)\d{11})$/;
	return 1;
    }
    #return "enRoute"                        if $ccn=~/^(?:2014|2149)\d{11}$/; #ikke LUHN-krav?
    return 0;
}

=head2 KID_ok

Checks if a norwegian KID number has an ok control digit.

To check if a customer has typed the number correctly.

This uses the  LUHN algorithm (also known as mod-10) from 1960 which is also used
internationally in control digits for credit card numbers, and Canadian social security ID numbers as well.

The algorithm, as described in Phrack (47-8) (a long time hacker online publication):

 "For a card with an even number of digits, double every odd numbered
 digit and subtract 9 if the product is greater than 9. Add up all the
 even digits as well as the doubled-odd digits, and the result must be
 a multiple of 10 or it's not a valid card. If the card has an odd
 number of digits, perform the same addition doubling the even numbered
 digits instead."

B<Input:> A KID-nummer. Must consist of digits 0-9 only, otherwise a die (croak) happens.

B<Output:>

- Returns undef if the input argument is missing.

- Returns 0 if the control digit (the last digit) does not satify the LUHN/mod-10 algorithm.

- Returns 1 if ok

B<See also:> L</ccn_ok>

=cut

sub KID_ok {
  croak "Non-numeric argument" if $_[0]=~/\D/;
  my @k=split//,shift or return undef;
  my $s;$s+=pop(@k)+[qw/0 2 4 6 8 1 3 5 7 9/]->[pop@k] while @k;
  $s%10==0?1:0;
}


Tools.pm  view on Meta::CPAN


Will print:

 Report C

 Name  Attributt 1997   1997   1998   1998
                 Summer Winter Summer Winter
 ----- --------- ------ ------ ------ ------
 Gerd  Height     170    158    171    171
 Gerd  Weight      66     64     64     64
 Hilde Height     168    164    168    168
 Hilde Weight      62     61     62     62
 Per   Height     182    180    182    183
 Per   Weight      75     73     76     74
 Tone  Weight      70     69     70     71

.

 my @reportD=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name");
 print "\n\nReport D\n\n".tablestring(\@reportD);

Will print:

 Report D

 Name  Height Height Height Height Weight Weight Weight Weight
       1997   1997   1998   1998   1997   1997   1998   1998
       Summer Winter Summer Winter Summer Winter Summer Winter
 ----- ------ ------ ------ ------ ------ ------ ------ ------
 Gerd  170    158    171    171    66     64     64     64
 Hilde 168    164    168    168    62     61     62     62
 Per   182    180    182    183    75     73     76     74
 Tone                              70     69     70     71

Options:

Options to sort differently and show sums and percents are available. (...MORE DOC ON THAT LATER...)

See also L<Data::Pivot>

=cut

sub pivot {
  my($tabref,@vertikalefelt)=@_;
  my %opt=ref($vertikalefelt[-1]) eq 'HASH' ? %{pop(@vertikalefelt)} : ();
  my $opt_sum=1 if $opt{sum};
  my $opt_pro=exists $opt{prosent}?$opt{prosent}||0:undef;
  my $sortsub          = $opt{'sortsub'}          || \&_sortsub;
  my $sortsub_bortover = $opt{'sortsub_bortover'} || $sortsub;
  my $sortsub_nedover  = $opt{'sortsub_nedover'}  || $sortsub;
  #print serialize(\%opt,'opt');
  #print serialize(\$opt_pro,'opt_pro');
  my $antned=0+@vertikalefelt;
  my $bakerst=-1+@{$$tabref[0]};
  my(%h,%feltfinnes,%sum);
  #print "Bakerst<$bakerst>\n";
  for(@$tabref){
    my $rad=join($;,@$_[0..($antned-1)]);
    my $felt=join($;,@$_[$antned..($bakerst-1)]);
    my $verdi=$$_[$bakerst];
    length($rad) or $rad=' ';
    length($felt) or $felt=' ';
    $h{$rad}{$felt}=$verdi;
    $h{$rad}{"%$felt"}=$verdi;
    if($opt_sum or defined $opt_pro){
      $h{$rad}{Sum}+=$verdi;
      $sum{$felt}+=$verdi;
      $sum{Sum}+=$verdi;
    }
    $feltfinnes{$felt}++;
    $feltfinnes{"%$felt"}++ if $opt_pro;
  }
  my @feltfinnes = sort $sortsub_bortover keys%feltfinnes;
  push @feltfinnes, "Sum" if $opt_sum;
  my @t=([@vertikalefelt,map{replace($_,$;,"\n")}@feltfinnes]);
  #print serialize(\@feltfinnes,'feltfinnes');
  #print serialize(\%h,'h');
  #print "H = ".join(", ",sort _sortsub keys%h)."\n";
  for my $rad (sort $sortsub_nedover keys(%h)){
    my @rad=(split($;,$rad),
             map { defined($_)?$_:exists$opt{undefined}?$opt{undefined}:undef }
	     map {
	       if(/^\%/ and defined $opt_pro){
		 my $sum=$h{$rad}{Sum};
		 my $verdi=$h{$rad}{$_};
		 if($sum!=0){
		   defined $verdi
                   ?sprintf("%*.*f",3+1+$opt_pro,$opt_pro,100*$verdi/$sum)
		   :$verdi;
		 }
		 else{
		   $verdi!=0?"div0":$verdi;
		 }
	       }
	       else{
                 $h{$rad}{$_};
	       }
	     }
	     @feltfinnes);
    push(@t,[@rad]);
  }
  push(@t,"-",["Sum",(map{""}(2..$antned)),map{print "<$_>\n";$sum{$_}}@feltfinnes]) if $opt_sum;
  return @t;
}

# default sortsub for pivot()

sub _sortsub {
  no warnings;
  #my $c=($a<=>$b)||($a cmp $b);
  #return $c if $c;
  #printf "%-30s %-30s  ",replace($a,$;,','),replace($b,$;,',');
  my @a=split $;,$a;
  my @b=split $;,$b;
  for(0..$#a){
    my $c=$a[$_]<=>$b[$_];
    return $c if $c and "$a[$_]$b[$_]"!~/[iI][nN][fF]|þ|∞/i; # hm inf(inity)
    $c=$a[$_]cmp$b[$_];
    return $c if $c;
  }
  return 0;
}

Tools.pm  view on Meta::CPAN


- columns with just numeric values are right justified (header row excepted)

Example:

 print tablestring([
   [qw/AA BB CCCC/],
   [123,23,"d"],
   [12,23,34],
   [77,88,99],
   ["lin\nes",12,"asdff\nfdsa\naa"],[0,22,"adf"]
 ]);

Prints this string of 11 lines:

 AA  BB CCCC
 --- -- -----
 123 23 d
 12  23 34
 77   8 99

 lin 12 asdff
 es     fdsa
        aa

 10  22 adf

As you can see, rows containing multi-lined cells gets an empty line before and after the row to separate it more clearly.

=cut

sub tablestring {
  my $tab=shift;
  my %o=$_[0] ? %{shift()} : ();
  my $remove_empty       = $o{remove_empty_columns};
  my $no_multiline_space = $o{no_multiline_space};
  my $nodup              = $o{nodup}||0;
  my $no_header_line     = $o{no_header_line};
  my $pagesize           = exists $o{pagesize} ? $o{pagesize}-3 : 9999999;
  my $left_force         = $o{left};
  my(@width,@left,@height,@not_empty,@nodup);
  my $head=1;
  my $i=0;
  my $j;
  for(@$tab){
    $j=0;
    $height[$i]=0;
    my $nodup_rad=$nodup;
    if(ref($_) eq 'ARRAY'){
      for(@$_){
	my $cell=$_;
	$width[$j]||=0;
	if($nodup_rad and $i>0 and $$tab[$i][$j] eq $$tab[$i-1][$j] || ($nodup_rad=0)){
	  $cell=$nodup==1?"":$nodup;
	  $nodup[$i][$j]=1;
	}
	else{
	  my $height=0;
	  my $wider;
	  no warnings;
	  $not_empty[$j]=1 if !$head && length($cell)>0;
	  for(split("\n",$cell)){
	    $wider=/<input.+type=text.+size=(\d+)/i?$1:0; #hm
	    s/<[^>]+>//g;
	    $height++;
	    s/&gt;/>/g;
	    s/&lt;/</g;
	    $width[$j]=length($_)+1+$wider if length($_)+1+$wider>$width[$j];
	    $left[$j]=1 if $_ && !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !$head;
	  }
	  if( $height>1 && !$no_multiline_space){
	    $height++ if !$head;
	    $height[$i-1]++ if $i>1 && $height[$i-1]==1;
	  }
	  $height[$i]=$height if $height>$height[$i];
	}
	$j++;
      }
    }
    else{
      $height[$i]=1;
      $no_header_line=1;
    }
    $head=0;
    $i++;
  }
  $i=$#height;
  $j=$#width;
  if($i==0 or $left_force) { @left=map{1}(0..$j)                         }
  else { for(0..$j){ $left[$_]=1 if !$not_empty[$_] }  }
  my @tabout;
  my $row_start_line=0;
  my @header;
  my $header_last;
  for my $x (0..$i){
    if($$tab[$x] eq '-'){
      my @tegn=map {$$tab[$x-1][$_]=~/\S/?"-":" "} (0..$j);
      $tabout[$row_start_line]=join(" ",map {$tegn[$_] x ($width[$_]-1)} (0..$j));
    }
    else{
      for my $y (0..$j){
	next if $remove_empty && !$not_empty[$y];
	no warnings;
	my @cell = !$header_last&&$nodup&&$nodup[$x][$y]
     	         ? ($nodup>0?():((" " x (($width[$y]-length($nodup))/2)).$nodup))
                 : split("\n",$$tab[$x][$y]);
	for(0..($height[$x]-1)){
	  my $line=$row_start_line+$_;
	  my $txt=shift(@cell);
	  $txt='' if !defined$txt;
	  $txt=sprintf("%*s",$width[$y]-1,$txt) if length($txt)>0 && !$left[$y] && ($x>0 || $no_header_line);
	  $tabout[$line].=$txt;
	  if($y==$j){
	    $tabout[$line]=~s/\s+$//;
	  }
	  else{
	    my $wider;
	       $wider = $txt=~/<input.+type=text.+size=(\d+)/i?1+$1:0;
	    $txt=~s/<[^>]+>//g;
	    $txt=~s/&gt;/>/g;
	    $txt=~s/&lt;/</g;
	    $tabout[$line].= ' ' x ($width[$y]-length($txt)-$wider);
	  }
	}
      }
    }
    $row_start_line+=$height[$x];

    #--lage streker?
    if(not $no_header_line){
      if($x==0){
	for my $y (0..$j){
	  next if $remove_empty && !$not_empty[$y];
	  $tabout[$row_start_line].=('-' x ($width[$y]-1))." ";
	}
	$row_start_line++;
	@header=("",@tabout);
      }
      elsif(
	    $x%$pagesize==0 || $nodup>0&&!$nodup[$x+1][$nodup-1]
	    and $x+1<@$tab
	    and !$no_header_line
	    )
      {
	push(@tabout,@header);
	$row_start_line+=@header;
	$header_last=1;
      }
      else{
	$header_last=0;
      }
    }
  }#for x
  return join("\n",@tabout)."\n";
}

=head2 serialize

Returns a data structure as a string. See also C<Data::Dumper>
(serialize was created long time ago before Data::Dumper appeared on
CPAN, before CPAN even...)

B<Input:> One to four arguments.

First argument: A reference to the structure you want.

Second argument: (optional) The name the structure will get in the output string.
If second argument is missing or is undef or '', it will get no name in the output.

Third argument: (optional) The string that is returned is also put
into a created file with the name given in this argument.  Putting a
C<< > >> char in from of the filename will append that file
instead. Use C<''> or C<undef> to not write to a file if you want to
use a fourth argument.

Fourth argument: (optional) A number signalling the depth on which newlines is used in the output.
The default is infinite (some big number) so no extra newlines are output.

B<Output:> A string containing the perl-code definition that makes that data structure.
The input reference (first input argument) can be to an array, hash or a string.
Those can contain other refs and strings in a deep data structure.

Tools.pm  view on Meta::CPAN

=head2 srlz

Synonym to L</serialize>, but remove unnecessary single quote chars around
C<< \w+ >>-keys and number values (except numbers with leading zeros). Example:

serialize:

 %s=('action'=>{'del'=>'0','ins'=>'0','upd'=>'18'},'post'=>'1348','pre'=>'1348',
     'updcol'=>{'Laerestednr'=>'18','Studietypenr'=>'18','Undervisningssted'=>'7','Url'=>'11'},
     'where'=>'where 1=1');

srlz:

 %s=(action=>{del=>0,ins=>0,upd=>18},post=>1348,pre=>1348,
     updcol=>{Laerestednr=>18,Studietypenr=>18,Undervisningssted=>7,Url=>11},
     where=>'where 1=1');

Todo: update L</serialize> to do the same, but in the right way. (For now
srlz runs the string from serialize() through two C<< s/// >>, this will break
in certain cases). L</srlz> will be kept as a synonym (or the other way around).

=cut

sub srlz {
  my $s=serialize(@_);
  $s=~s,'(\w+)'=>,$1=>,g;
  $s=~s,=>'([+-]?(0|[1-9]\d*)(\.\d+)?([eE][-+]?\d+)?)',=>$1,g;  #ikke ledende null!    hm
  $s;
}

=head2 cnttbl

 my %nordic_country_population=(Norway=>5214890,Sweden=>9845155,Denmark=>5699220,Finland=>5496907,Iceland=>331310);
 print cnttbl(\%nordic_country_population);
 Iceland   331310   1.25%
 Norway   5214890  19.61%
 Finland  5496907  20.67%
 Denmark  5699220  21.44%
 Sweden   9845155  37.03%
 SUM     26587482 100.00%

Todo: Levels...:

 my %sales=(
  Toyota=>{Prius=>19,RAV=>12,Auris=>18,Avensis=>7},
  Volvo=>{V40=>14, XC90=>4},
  Nissan=>{Leaf=>19,Qashqai=>17},
  Tesla=>{ModelS=>8}
 );
 print cnttbl(\%sales);
 Toyota SUM 56
 Volvo SUM 18
 Nissan SUM 36
 Tesla SUM 8
 SUM SUM 56 100%

=cut

sub cnttbl {
  my $hr=shift;
  my $maxlen=max(3,map length($_),keys%$hr);
  join"",ref((values%$hr)[0])
  ?do{ map {my$o=$_;join("",map rpad($$o[0],$maxlen)." $_\n",split("\n",$$o[1]))}
       map [$_,cnttbl($$hr{$_})],
       sort keys%$hr }
  :do{ my $sum=sum(values%$hr);
       my $fmt=repl("%-xs %yd %6.2f%%\n",x=>$maxlen,y=>length($sum));
       map sprintf($fmt,@$_,100*$$_[1]/$sum),
       (map[$_,$$hr{$_}],sort{$$hr{$a}<=>$$hr{$b} or $a cmp $b}keys%$hr),
       (['SUM',$sum]) }
}

=head2 ref_deep

NOT IMPLEMENTED

Same as ref, but goes deeper.

 print ref_deep( { 10=>[1,'ten'], 100=>[2,'houndred'], 1000=>[3,'thousand'] } );   # prints HASH_of_ARRAYS
 print ref_deep( { 10=>'ten',     100=>[2,'houndred'], 1000=>[3,'thousand'] } );   # prints same (mixed, deepest)
 print ref_deep( { 1=>[{a=>3,b=>6},{a=>1,b=>8}], 5=>[{a=>2,b=>5},{a=>7,b=>1}] } ); # HASH_of_ARRAYS_of_HASHES

(Todo, not supported: circular, alternatives for mixed)

=cut

sub ref_deep {
  my $s=shift; #
}



=head2 nicenum

 print 14.3 - 14.0;              # 0.300000000000001
 print 34.3 - 34.0;              # 0.299999999999997
 print nicenum( 14.3 - 14.0 );   # 0.3
 print nicenum( 34.3 - 34.0 );   # 0.3

=cut

our $Nicenum;
sub nicenum { #hm
  $Nicenum=$_[0];
  $Nicenum=~s/([\.,]\d*)((\d)\3\3\3\3\3)\d$/$1$2$3$3$3$3$3$3$3$3$3/;
  my $r=0+$Nicenum;
  #warn "nn $_[0] --> $Nicenum --> $r\n";
  $r;
}


=head2 sys

Call instead of C<system> if you want C<die> (Carp::croak) when something fails.

 sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }


=cut

sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }

=head2 recursed

Returns true or false (actually 1 or 0) depending on whether the
current sub has been called by itself or not.

 sub xyz
 {
    xyz() if not recursed;

 }

=cut

sub recursed {(caller(1))[3] eq (caller(2))[3]?1:0}



=head2 ed

String editor commands

 literals:               a-z 0-9 space
 move cursor:            FBAEPN MF MB ME
 delete:                 D Md
 up/low/camelcase word   U L C
 backspace:              -
 search:                 S
 return/enter:           R
 meta/esc/alt:           M
 shift:                  T
 cut to eol:             K
 caps lock:              C
 yank:                   Y
 start and end:          < >
 macro start/end/play:   { } !
 times for next cmd:     M<number>  (i.e. M24a inserts 24 a's)

(TODO: alfa...and more docs needed)

=cut

our $Edcursor;
sub ed {
  my($s,$cs,$p,$buf)=@_; #string, commands, point (or cursor)
  return $$s=ed($$s,$cs,$p,$buf) if ref($s);
  my($sh,$cl,$m,$t,@m)=(0,0,0,undef);
  while(length($cs)){
    my $n = 0;
    my $c = $cs=~s,^(M\d+|M.|""|".+?"|S.+?R|\\.|.),,s ? $1 : die;
    $p = curb($p||0,0,length($s));
    if(defined$t){$cs="".($c x $t).$cs;$t=undef;next}
    my $add=sub{substr($s,$p,0)=$_[0];$p+=length($_[0])};
    if   ($c =~ /^([a-z0-9 ])/){ &$add($sh^$cl?uc($1):$1); $sh=0 }
    elsif($c =~ /^"(.+)"$/)    { &$add($1) }
    elsif($c =~ /^\\(.)/)      { &$add($1) }
    elsif($c =~ /^S(.+)R/)     { my $i=index($s,$1,$p);$p=$i+length($1) if $i>=0 }
    elsif($c =~ /^M(\d+)/)     { $t=$1; next }
    elsif($c eq 'F') { $p++ }
    elsif($c eq 'B') { $p-- }
    elsif($c eq 'A') { $p-- while $p>0 and substr($s,$p-1,2)!~/^\n/ }
    elsif($c eq 'E') { substr($s,$p)=~/(.*)/ and $p+=length($1) }
    elsif($c eq 'D') { substr($s,$p,1)='' }
    elsif($c eq 'MD'){ substr($s,$p)=~s/^(\W*\w+)// and $buf=$1 }
    elsif($c eq 'MF'){ substr($s,$p)=~/(\W*\w+)/ and $p+=length($1) }
    elsif($c eq 'MB'){ substr($s,0,$p)=~/(\w+\W*)$/ and $p-=length($1) }
    elsif($c eq '-') { substr($s,--$p,1)='' if $p }
    elsif($c eq 'M-'){ substr($s,0,$p)=~s/(\w+\W*)$// and $p-=length($buf=$1)}
    elsif($c eq 'K') { substr($s,$p)=~s/(\S.+|\s*?\n)// and $buf=$1 }
    elsif($c eq 'Y') { &$add($buf) }
    elsif($c eq 'U') { substr($s,$p)=~s/(\W*)(\w+)/$1\U$2\E/; $p+=length($1.$2) }
    elsif($c eq 'L') { substr($s,$p)=~s/(\W*)(\w+)/$1\L$2\E/; $p+=length($1.$2) }
    elsif($c eq 'C') { substr($s,$p)=~s/(\W*)(\w+)/$1\u\L$2\E/; $p+=length($1.$2) }
    elsif($c eq '<') { $p=0 }
    elsif($c eq '>') { $p=length($s) }
    elsif($c eq 'T') { $sh=1 }
    elsif($c eq 'C') { $cl^=1 }
    elsif($c eq '{') { $m=1; @m=() }
    elsif($c eq '}') { $m=0 }
    elsif($c eq '!') { $m||!@m and die"ed: no macro"; $cs=join("",@m).$cs }
    elsif($c eq '""'){ &$add('"') }
    else             { croak "ed: Unknown cmd '$c'\n" }
    push @m, $c if $m and $c ne '{';
    #warn serialize([$c,$m,$cs],'d');
  }
  $Edcursor=$p;
  $s;
}

=head2 changed

 while(<>){
    my $line=$_;
    print "\n" if changed(/^\d\d\d\d-\d\d-(\d\d)/);
    print "\n" if changed(substr($_,8,2));
 }

Returns undef, 0 or 1. Undef if its the first time C<changed> is
called on that perl line. 0 if not the first time and the parameters
differ from the last call on that line. 1 if not the first time and
the parameters is the exact same as they where on the previous call on
that line of perl source code.

=cut

our %Changed_lastval;
sub changed {
    my $now=join($;,@_);
    my $key=join($;,caller());
    my $e=exists $Changed_lastval{$key};
    if($e){
	my $last=$Changed_lastval{$key};
	return 0 if  defined $last and  defined $now and $last eq $now
                 or !defined $last and !defined $now;
    }
    $Changed_lastval{$key}=$now;
    return $e?1:undef;
}

#todo: sub unbless eller sub damn
#todo: ..se også: use Data::Structure::Util qw/unbless/;
#todo: ...og: Acme::Damn sin damn()
#todo? sub swap($$) http://www.idg.no/computerworld/article242008.ece
#todo? catal
#todo?
#void quicksort(int t, int u) int i, m; if (t >= u) return; swap(t, randint(t, u)); m = t; for (i = t + 1; i <= u; i++) if (x[i] < x[t]) swap(++m, i); swap(t, m) quicksort(t, m-1); quicksort(m+1, u);


=head1 JUST FOR FUN

=head2 brainfu

B<Input:> one or two arguments

First argument: a string, source code of the brainfu

Tools.pm  view on Meta::CPAN

Just as L</brainfu> but instead it return the perl code to which the
brainfu code is translated. Just C<< eval() >> this perl code to run.

Example:

 print brainfu2perl('>++++++++[<++++++++>-]<++++++++.>++++++[<++++++>-]<---.');

Prints this string:

 my($c,$o,@b)=(0); sub out{$o.=chr($b[$c]) for 1..$_[0]||1}
 ++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
 while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
 ++$b[$c];++$c;--$b[$c];}--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
 ++$b[$c];++$b[$c];out;++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
 while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$c;--$b[$c];}
 --$c;--$b[$c];--$b[$c];--$b[$c];out;$o;

=head2 brainfu2perl_optimized

Just as L</brainfu2perl> but optimizes the perl code. The same
example as above with brainfu2perl_optimized returns this equivalent
but shorter perl code:

 $b[++$c]+=8;while($b[$c]){$b[--$c]+=8;--$b[++$c]}$b[--$c]+=8;out;$b[++$c]+=6;
 while($b[$c]){$b[--$c]+=6;--$b[++$c]}$b[--$c]-=3;out;$o;

=cut

sub brainfu { eval(brainfu2perl(@_)) }

sub brainfu2perl {
  my($bf,$inp)=@_;
  my $perl='my($c,$inp,$o,@b)=(0,\''.$inp.'\'); no warnings; sub out{$o.=chr($b[$c]) for 1..$_[0]||1}'."\n";
  $perl.='sub inp{$inp=~s/(.)//s and $b[$c]=ord($1)}'."\n" if $inp and $bf=~/,/;
  $perl.=join("",map/\+/?'++$b[$c];':/\-/?'--$b[$c];':/\[/?'while($b[$c]){':/\]/?'}':/>/?'++$c;':/</?'--$c;':/\./?'out;':/\,/?'inp;':'',split//,$bf).'$o;';
  $perl;
}

sub brainfu2perl_optimized {
  my $perl=brainfu2perl(@_);
  $perl=~s{(((\+|\-)\3\$b\[\$c\];){2,})}{ '$b[$c]'.$3.'='.(grep/b/,split//,$1).';' }gisex;
  1 while $perl=~s/\+\+\$c;\-\-\$c;//g + $perl=~s/\-\-\$c;\+\+\$c;//g;
  $perl=~s{((([\-\+])\3\$c;){2,})}{"\$c$3=".(grep/c/,split//,$1).';'}gisex;
  $perl=~s{((\+\+|\-\-)\$c;([^;{}]+;))}{my($o,$s)=($2,$3);$s=~s/\$c/$o\$c/?$s:$1}ge;
  $perl=~s/\$c(\-|\+)=(\d+);(\+\+|\-\-)\$b\[\$c\]/$3.'$b[$c'.$1.'='.$2.'];'/ge;
  $perl=~s{((out;){2,})}{'out('.(grep/o/,split//,$1).');'}ge;
  $perl=~s/;}/}/g;$perl=~s/;+/;/g;
  $perl;
}


=head1 BLOOM FILTER SUBROUTINES

Bloom filters can be used to check whether an element (a string) is a
member of a large set using much less memory or disk space than other
data structures. Trading speed and accuracy for memory usage. While
risking false positives, Bloom filters have a very strong space
advantage over other data structures for representing sets.

In the example below, a set of 100000 phone numbers (or any string of
any length) can be "stored" in just 91230 bytes if you accept that you
can only check the data structure for existence of a string and accept
false positives with an error rate of 0.03 (that is three percent, error
rates are given in numbers larger than 0 and smaller than 1).

You can not retrieve the strings in the set without using "brute
force" methods and even then you would get slightly more strings than
you put in because of the error rate inaccuracy.

Bloom Filters have many uses.

See also: L<http://en.wikipedia.org/wiki/Bloom_filter>

See also: L<Bloom::Filter>

=head2 bfinit

Initialize a new Bloom Filter:

  my $bf = bfinit( error_rate=>0.01, capacity=>100000 );

The same:

  my $bf = bfinit( 0.01, 100000 );

since two arguments is interpreted as error_rate and capacity accordingly.


=head2 bfadd

  bfadd($bf, $_) for @phone_numbers;   # Adding strings one at a time

  bfadd($bf, @phone_numbers);          # ...or all at once (faster)

Returns 1 on success. Dies (croaks) if more strings than capacity is added.

=head2 bfcheck

  my $phone_number="99999999";
  if ( bfcheck($bf, $phone_number) ) {
    print "Yes, $phone_number was PROBABLY added\n";
  }
  else {
    print "No, $phone_number was DEFINITELY NOT added\n";
  }

Returns true if C<$phone_number> exists in C<@phone_numbers>.

Returns false most of the times, but sometimes true*), if C<$phone_number> doesn't exists in C<@phone_numbers>.

*) This is called a false positive.

Checking more than one key:

 @bools = bfcheck($bf, @keys);          # or ...
 @bools = bfcheck($bf, \@keys);         # better, uses less memory if @keys is large

Returns an array the same size as @keys where each element is true or false accordingly.

=head2 bfgrep

Same as C<bfcheck> except it returns the keys that exists in the bloom filter

 @found = bfgrep($bf, @keys);           # or ...
 @found = bfgrep($bf, \@keys);          # better, uses less memory if @keys is large, or ...
 @found = grep bfcheck($bf,$_), @keys;  # same but slower

=head2 bfgrepnot

Same as C<bfgrep> except it returns the keys that do NOT exists in the bloom filter:

 @not_found = bfgrepnot($bf, @keys);          # or ...
 @not_found = bfgrepnot($bf, \@keys);         # better, uses less memory if @keys is large, or ...
 @not_found = grep !bfcheck($bf,$_), @keys);  # same but slower

=head2 bfdelete

Deletes from a counting bloom filter.

To enable deleting be sure to initialize the bloom filter with the
numeric C<counting_bits> argument. The number of bits could be 2 or 3*)
for small filters with a small capacity (a small number of keys), but
setting the number to 4 ensures that even very large filters with very
small error rates would not overflow.

*) Acme::Tools do not currently support C<< counting_bits => 3 >> so 4
and 8 are the only practical alternatives where 8 is almost always overkill.

 my $bf=bfinit(
   error_rate    => 0.001,
   capacity      => 10000000,
   counting_bits => 4              # power of 2, that is 2, 4, 8, 16 or 32
 );
 bfadd(   $bf, @unique_phone_numbers);
 bfdelete($bf, @unique_phone_numbers);

Example: examine the frequency of the counters with 4 bit counters and 4 million keys:

 my $bf=bfinit( error_rate=>0.001, capacity=>4e6, counting_bits=>4 );
 bfadd($bf,[1e3*$_+1 .. 1e3*($_+1)]) for 0..4000-1;  # adding 4 million keys one thousand at a time
 my %c; $c{vec($$bf{filter},$_,$$bf{counting_bits})}++ for 0..$$bf{filterlength}-1;
 printf "%8d counters = %d\n",$c{$_},$_ for sort{$a<=>$b}keys%c;

The output:

 28689562 counters = 0
 19947673 counters = 1
  6941082 counters = 2
  1608250 counters = 3
   280107 counters = 4
    38859 counters = 5
     4533 counters = 6
      445 counters = 7
       46 counters = 8
        1 counters = 9

Even after the error_rate is changed from 0.001 to a percent of that, 0.00001, the limit of 16 (4 bits) is still far away:

 47162242 counters = 0
 33457237 counters = 1
 11865217 counters = 2
  2804447 counters = 3
   497308 counters = 4
    70608 counters = 5
     8359 counters = 6
      858 counters = 7
       65 counters = 8
        4 counters = 9

In algorithmic terms the number of bits needed is C<ln of ln of n>.  Thats why 4 bits (counters up
to 15) is "always" good enough except for extremely large capasities or extremely small error rates.
(Except when adding the same key many times, which should be avoided, and Acme::Tools::bfadd do not
check for that, perhaps in future versions).

Bloom filters of the counting type are not very space efficient: The tables above shows that 84%-85%
of the counters are 0 or 1. This means most bits are zero-bits. This doesn't have to be a problem if
a counting bloom filter is used to be sent over slow networks because they are very compressable by
common compression tools like I<gzip> or L<Compress::Zlib> and such.

Deletion of non-existing keys makes C<bfdelete> die (croak).

=head2 bfdelete

Deletes from a counting bloom filter:

 bfdelete($bf, @keys);
 bfdelete($bf, \@keys);

Returns C<$bf> after deletion.

Croaks (dies) on deleting a non-existing key or deleting from an previouly overflown counter in a counting bloom filter.

=head2 bfaddbf

Adds another bloom filter to a bloom filter.

Bloom filters has the proberty that bit-wise I<OR>-ing the bit-filters
of two filters with the same capacity and the same number and type of
hash functions, adds the filters:

  my $bf1=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[1..500]);
  my $bf2=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[501..1000]);

  bfaddbf($bf1,$bf2);

  print "Yes!" if bfgrep($bf1, 1..1000) == 1000;

Prints yes since C<bfgrep> now returns an array of all the 1000 elements.

Croaks if the filters are of different dimensions.

Works for counting bloom filters as well (C<< counting_bits=>4 >> e.g.)

=head2 bfsum

Returns the number of 1's in the filter.

 my $percent=100*bfsum($bf)/$$bf{filterlength};
 printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity

Sums the counters for counting bloom filters (much slower than for non counting).

=head2 bfdimensions

Input, two numeric arguments: Capacity and error_rate.

Outputs an array of two numbers: m and k.

  m = - n * log(p) / log(2)**2   # n = capacity, m = bits in filter (divide by 8 to get bytes)
  k = log(1/p) / log(2)          # p = error_rate, uses perls internal log() with base e (2.718)

...that is: m = the best number of bits in the filter and k = the best
number of hash functions optimized for the given capacity (n) and
error_rate (p). Note that k is a dependent only of the error_rate.  At
about two percent error rate the bloom filter needs just the same
number of bytes as the number of keys.

 Storage (bytes):
 Capacity      Error-rate  Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate
               0.000000001 0.00000001 0.0000001  0.000001   0.00001    0.0001     0.001      0.01       0.02141585 0.1        0.5        0.99
 ------------- ----------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
            10 54.48       48.49      42.5       36.51      30.52      24.53      18.53      12.54      10.56      6.553      2.366      0.5886
           100 539.7       479.8      419.9      360        300.1      240.2      180.3      120.4      100.6      60.47      18.6       0.824
          1000 5392        4793       4194       3595       2996       2397       1798       1199       1001       599.6      180.9      3.177
         10000 5.392e+04   4.793e+04  4.194e+04  3.594e+04  2.995e+04  2.396e+04  1.797e+04  1.198e+04  1e+04      5991       1804       26.71
        100000 5.392e+05   4.793e+05  4.193e+05  3.594e+05  2.995e+05  2.396e+05  1.797e+05  1.198e+05  1e+05      5.991e+04  1.803e+04  262
       1000000 5.392e+06   4.793e+06  4.193e+06  3.594e+06  2.995e+06  2.396e+06  1.797e+06  1.198e+06  1e+06      5.991e+05  1.803e+05  2615
      10000000 5.392e+07   4.793e+07  4.193e+07  3.594e+07  2.995e+07  2.396e+07  1.797e+07  1.198e+07  1e+07      5.991e+06  1.803e+06  2.615e+04
     100000000 5.392e+08   4.793e+08  4.193e+08  3.594e+08  2.995e+08  2.396e+08  1.797e+08  1.198e+08  1e+08      5.991e+07  1.803e+07  2.615e+05
    1000000000 5.392e+09   4.793e+09  4.193e+09  3.594e+09  2.995e+09  2.396e+09  1.797e+09  1.198e+09  1e+09      5.991e+08  1.803e+08  2.615e+06
   10000000000 5.392e+10   4.793e+10  4.193e+10  3.594e+10  2.995e+10  2.396e+10  1.797e+10  1.198e+10  1e+10      5.991e+09  1.803e+09  2.615e+07
  100000000000 5.392e+11   4.793e+11  4.193e+11  3.594e+11  2.995e+11  2.396e+11  1.797e+11  1.198e+11  1e+11      5.991e+10  1.803e+10  2.615e+08
 1000000000000 5.392e+12   4.793e+12  4.193e+12  3.594e+12  2.995e+12  2.396e+12  1.797e+12  1.198e+12  1e+12      5.991e+11  1.803e+11  2.615e+09

 Error rate:               0.99   Hash functions:  1
 Error rate:                0.5   Hash functions:  1
 Error rate:                0.1   Hash functions:  3
 Error rate: 0.0214158522653385   Hash functions:  6
 Error rate:               0.01   Hash functions:  7
 Error rate:              0.001   Hash functions: 10
 Error rate:             0.0001   Hash functions: 13
 Error rate:            0.00001   Hash functions: 17
 Error rate:           0.000001   Hash functions: 20
 Error rate:          0.0000001   Hash functions: 23
 Error rate:         0.00000001   Hash functions: 27
 Error rate:        0.000000001   Hash functions: 30

=head2 bfstore

Storing and retrieving bloom filters to and from disk uses L<Storable>s C<store> and C<retrieve>. This:

 bfstore($bf,'filename.bf');

It the same as:

 use Storable qw(store retrieve);
 ...
 store($bf,'filename.bf');

Tools.pm  view on Meta::CPAN


The internal hash-functions are C<< md5( "$key$salt" ) >> from L<Digest::MD5>.

Since C<md5> returns 128 bits and most medium to large sized bloom
filters need only a 32 bit hash function, the result from md5() are
split (C<unpack>-ed) into 4 parts 32 bits each and are treated as if 4
hash functions was called at once (speedup). Using different salts to
the key on each md5 results in different hash functions.

Digest::SHA512 would have been even better since it returns more bits,
if it werent for the fact that it's much slower than Digest::MD5.

String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:

 time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6'       #5.56 sec
 time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6'           #2.79 sec, faster but not per bit
 time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)

Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.

=head2 Theory and math behind bloom filters

L<http://www.internetmathematics.org/volumes/1/4/Broder.pdf>

L<http://blogs.sun.com/jrose/entry/bloom_filters_in_a_nutshell>

L<http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html>

See also Scaleable Bloom Filters: L<http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf> (not implemented in Acme::Tools)

...and perhaps L<http://intertrack.naist.jp/Matsumoto_IEICE-ED200805.pdf>

=cut

sub bfinit {
  return bfretrieve(@_)                             if @_==1;
  return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
  return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
  require Digest::MD5;
  @_%2&&croak "Arguments should be a hash of equal number of keys and values";
  my %arg=@_;
  my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
  my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
  croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
  croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
  croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
  croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
  my $bf={error_rate    => 0.001,  #default p
	  capacity      => 100000, #default n
          min_hashfuncs => 1,
          max_hashfuncs => 100,
	  counting_bits => 1,      #default: not counting filter
	  adaptive      => 0,
	  %arg,                    #arguments
	  key_count     => 0,
	  overflow      => {},
	  version       => $Acme::Tools::VERSION,
	 };
  croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
  @$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
  @$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
  $$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x   new empty filter
  $$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
               :$$bf{filterlength}<=2**32/4 ? "N*"
               :                              "Q*";
  bfadd($bf,@{$arg{keys}}) if $arg{keys};
  return $bf;
}
sub bfaddbf {
  my($bf,$bf2)=@_;
  my $differror=join"\n",
    map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
    grep $$bf{$_} ne $$bf2{$_},
    qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
  croak $differror if $differror;
  croak "Can not add adaptive bloom filters" if $$bf{adaptive};
  my $count=$$bf{key_count}+$$bf2{key_count};
  croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
    if $count > $$bf{capacity};
  $$bf{key_count}+=$$bf2{key_count};
  if($$bf{counting_bits}==1){
    $$bf{filter} |= $$bf2{filter};
    #$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
  }
  else {
    my $cb=$$bf{counting_bits};
    for(0..$$bf{filterlength}-1){
      my $sum=
      vec($$bf{filter}, $_,$cb)+
      vec($$bf2{filter},$_,$cb);
      if( $sum>2**$cb-1 ){
	$sum=2**$cb-1;
	$$bf{overflow}{$_}++;
      }
      vec($$bf{filter}, $_,$cb)=$sum;
      no warnings;
      $$bf{overflow}{$_}+=$$bf2{overflow}{$_}
	and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
	and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb)
	if exists $$bf2{overflow}{$_};
    }
  }
  return $bf; #for convenience
}
sub bfsum {
  my($bf)=@_;
  return unpack( "%32b*", $$bf{filter}) if $$bf{counting_bits}==1;
  my($sum,$cb)=(0,$$bf{counting_bits});
  $sum+=vec($$bf{filter},$_,$cb) for 0..$$bf{filterlength}-1;
  return $sum;
}
sub bfadd {
  require Digest::MD5;
  my($bf,@keys)=@_;
  return if !@keys;
  my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
  my($m,$k,$up,$n,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','capacity','counting_bits','adaptive'};
  for(@$keysref){
    #croak "Key should be scalar" if ref($_);
    $$bf{key_count} >= $n and croak "Exceeded filter capacity $n"  or  $$bf{key_count}++;
    my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
    if ($cb==1 and !$adaptive) { # normal bloom filter
      vec($$bf{filter}, $h[$_] % $m, 1) = 1 for 0..$k-1;
    }
    elsif ($cb>1) {                 # counting bloom filter
      for(0..$k-1){
	my $pos=$h[$_] % $m;
	my $c=
  	vec($$bf{filter}, $pos, $cb) =
	vec($$bf{filter}, $pos, $cb) + 1;
	if($c==0){
	  vec($$bf{filter}, $pos, $cb) = -1;
	  $$bf{overflow}{$pos}++
	    and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
	    and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb);
	}
      }
    }
    elsif ($adaptive) {             # adaptive bloom filter
      my($i,$key,$bit)=(0+@h,$_);
      for(0..$$bf{filterlength}-1){
	$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
	my $pos=shift(@h) % $m;
	$bit=vec($$bf{filter}, $pos, 1);
	vec($$bf{filter}, $pos, 1)=1;
	last if $_>=$k-1 and $bit==0;
      }
    }
    else {croak}
  }
  return 1;
}
sub bfcheck {
  require Digest::MD5;
  my($bf,@keys)=@_;
  return if !@keys;
  my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
  my($m,$k,$up,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','counting_bits','adaptive'};
  my $wa=wantarray();
  if(!$adaptive){    # normal bloom filter  or  counting bloom filter
    return map {
      my $match = 1; # match if every bit is on
      my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
      vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
      return $match if !$wa;
      $match;
    } @$keysref;
  }
  else {             # adaptive bloom filter
    return map {
      my($match,$i,$key,$bit,@h)=(1,0,$_);
      for(0..$$bf{filterlength}-1){
	$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
	my $pos=shift(@h) % $m;
	$bit=vec($$bf{filter}, $pos, 1);
	$match++ if $_ >  $k-1 and $bit==1;
	$match=0 if $_ <= $k-1 and $bit==0;
	last     if $bit==0;
      }
      return $match if !$wa;
      $match;
    } @$keysref;
  }
}
sub bfgrep { # just a copy of bfcheck with map replaced by grep
  require Digest::MD5;
  my($bf,@keys)=@_;
  return if !@keys;
  my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
  my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
  return grep {
    my $match = 1; # match if every bit is on
    my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
    vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
    $match;
  } @$keysref;
}
sub bfgrepnot { # just a copy of bfgrep with $match replaced by not $match
  require Digest::MD5;
  my($bf,@keys)=@_;
  return if !@keys;
  my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
  my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
  return grep {
    my $match = 1; # match if every bit is on
    my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
    vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
    !$match;
  } @$keysref;
}
sub bfdelete {
  require Digest::MD5;
  my($bf,@keys)=@_;
  return if !@keys;
  my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
  my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
  croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
  for my $key (@$keysref){
    my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
    $$bf{key_count}==0 and croak "Deleted all and then some"  or  $$bf{key_count}--;
    my($ones,$croak,@pos)=(0);
    for(0..$k-1){
      my $pos=$h[$_] % $m;
      my $c=
      vec($$bf{filter}, $pos, $cb);
      vec($$bf{filter}, $pos, $cb)=$c-1;
      $croak="Cannot delete a non-existing key $key" if $c==0;
      $croak="Cannot delete with previously overflown position. Try doubleing counting_bits"
	if $c==1 and ++$ones and $$bf{overflow}{$pos};
    }
    if($croak){ #rollback
      vec($$bf{filter}, $h[$_] % $m, $cb)=
      vec($$bf{filter}, $h[$_] % $m, $cb)+1 for 0..$k-1;
      croak $croak;
    }
  }
  return $bf;
}
sub bfstore {
  require Storable;
  Storable::store(@_);
}
sub bfretrieve {
  require Storable;
  my $bf=Storable::retrieve(@_);
  carp  "Retrieved bloom filter was stored in version $$bf{version}, this is version $VERSION" if $$bf{version}>$VERSION;
  return $bf;
}
sub bfclone {
  require Storable;
  return Storable::dclone(@_); #could be faster
}
sub bfdimensions_old {
  my($n,$p,$mink,$maxk, $k,$flen,$m)=
    @_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'},1)
   :@_==2 ? (@_,1,100,1)
          : croak "Wrong number of arguments (".@_."), should be 2";
  croak "p ($p) should be > 0 and < 1" if not ( 0<$p && $p<1 );
  $m=-1*$_*$n/log(1-$p**(1/$_)) and (!defined $flen or $m<$flen) and ($flen,$k)=($m,$_) for $mink..$maxk;
  $flen = int(1+$flen);
  return ($flen,$k);
}
sub bfdimensions {
  my($n,$p,$mink,$maxk)=
    @_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'})
   :@_==2 ? (@_,1,100)
          : croak "Wrong number of arguments (".@_."), should be 2";
  my $k=log(1/$p)/log(2);           # k hash funcs
  my $m=-$n*log($p)/log(2)**2;      # m bits in filter
  return ($m+0.5,min($maxk,max($mink,int($k+0.5))));
}

#crontab -e
#01 4,10,16,22 * * * /usr/bin/perl -MAcme::Tools -e'Acme::Tools::_update_currency_file("/var/www/html/currency-rates")' > /dev/null 2>&1

sub _update_currency_file { #call from cron
  my $fn=shift()||'/var/www/html/currency-rates';
  my %exe=map+($_=>"/usr/bin/$_"),qw/curl ci/;-x$_ or croak for values %exe;
  open my $F, '>', $fn or die"ERROR: Could not write file $fn ($!)\n";
  print $F "#-- Currency rates ".localtime()." (".time().")\n";
  print $F "#   File generated by Acme::Tools version $VERSION\n";
  print $F "#   Updated every 6th hour on http://calthis.com/currency-rates\n";
  print $F "NOK 1.000000000\n";
  my $amount=1000;
  my $data=qx($exe{curl} -s "https://www.x-rates.com/table/?from=NOK&amount=$amount");
  $data=~s,to=([A-Z]{3})(.)>,$2>$1</td><td>,g;
  my @data=ht2t($data,"Alphabetical order"); shift @data;
  @data=map "$$_[1] ".($$_[4]>1e-2?$$_[4]:$$_[2]?sprintf("%.8f",$amount/$$_[2]):0)."\n",@data;
  my %data=map split,@data;
  my $json=qx( $exe{curl} -s https://api.coinmarketcap.com/v1/ticker/ );
  eval "require JSON;"; croak if $@;
  my $arr=JSON::decode_json($json);
  for my $c (qw(BTC LTC XBT ETH XRP BCH ETC)) {
      my @a=grep$$_{symbol} eq $c,@$arr;
      next if @a != 1 or !$a[0]{price_usd};
      push @data, "$c ".($a[0]{price_usd}*$data{USD})."\n";
  }
  #die srlz(\@data,'data');
  print $F sort(@data);
  close($F);
  qx($exe{ci} -l -m. -d $fn) if -w"$fn,v";
}

sub ftype {
  my $f=shift;
  -e $f and
      -f$f ? 'file'         # -f  File is a plain file.
     :-d$f ? 'dir'          # -d  File is a directory.
     :-l$f ? 'symlink'      # -l  File is a symbolic link.
     :-p$f ? 'pipe'         # -p  File is a named pipe (FIFO), or Filehandle is a pipe.
     :-S$f ? 'socket'       # -S  File is a socket.
     :-b$f ? 'blockfile'    # -b  File is a block special file.
     :-c$f ? 'charfile'     # -c  File is a character special file.
     :-t$f ? 'ttyfile'      # -t  Filehandle is opened to a tty.
     :       ''
  or undef;
}

sub ext2mime {
  my $ext=shift(); #or filename
  #http://www.sitepoint.com/web-foundations/mime-types-complete-list/
  croak "todo: ext2mime not yet implemented";
  #return "application/json";#feks
}

sub base64 ($;$) { #
  if ($] >= 5.006) {
    require bytes;
    croak "base64 failed: only defined for bytes"
      if bytes::length($_[0]) > length($_[0])
      or $] >= 5.008 && $_[0] =~ /[^\0-\xFF]/
  }
  my $eol=defined$_[1]?$_[1]:"\n";
  my $res=pack("u",$_[0]);
  $res=~s/^.//mg;
  $res=~s/\n//g;
  $res=~tr|` -_|AA-Za-z0-9+/|;
  my $pad=(3-length($_[0])%3)%3;
  $res=~s/.{$pad}$/'=' x $pad/e if $pad;
  $res=~s/(.{1,76})/$1$eol/g if length($eol); #todo !=76
  $res;
}

our $Fix_unbase64=0;
sub unbase64 ($) {
  my $s=shift;
  $s=~tr,0-9a-zA-Z+=/,,cd;
  if($Fix_unbase64){ $s.='=' while length($s)%4 }
  croak "unbase64 failed: length ".length($s)." not multiple of 4" if length($s)%4;
  $s=~s/=+$//;
  $s=~tr|A-Za-z0-9+/| -_|;
  length($s) ? unpack("u",join'',map(chr(32+length($_)*3/4).$_,$s=~/(.{1,60})/gs)) : "";
}


=head1 COMMANDS

=head2 install_acme_command_tools

 sudo perl -MAcme::Tools -e install_acme_command_tools

 Wrote executable /usr/local/bin/conv
 Wrote executable /usr/local/bin/due
 Wrote executable /usr/local/bin/xcat
 Wrote executable /usr/local/bin/freq
 Wrote executable /usr/local/bin/deldup
 Wrote executable /usr/local/bin/ccmd
 Wrote executable /usr/local/bin/z2z
 Wrote executable /usr/local/bin/2gz
 Wrote executable /usr/local/bin/2gzip
 Wrote executable /usr/local/bin/2bz2
 Wrote executable /usr/local/bin/2bzip2
 Wrote executable /usr/local/bin/2xz
 Wrote executable /usr/local/bin/resubst

Examples of commands then made available:

 conv 1 USD EUR                #might show 0.88029 if thats the current currency rate. Uses conv()
 conv .5 in cm                 #reveals that 1/2 inch is 1.27 cm, see doc on conv() for all supported units
 due [-h] /path/1/ /path/2/    #like du, but show statistics on file extentions instead of subdirs
 xcat file                     #like cat, zcat, bzcat or xzcat in one. Uses file extention to decide. Uses openstr()
 freq file                     #reads file(s) or stdin and view counts of each byte 0-255
 ccmd grep string /huge/file   #caches stdout+stderr for 15 minutes (default) for much faster results later
 ccmd "sleep 2;echo hello"     #slow first time. Note the quotes!
 ccmd "du -s ~/*|sort -n|tail" #ccmd store stdout+stderr in /tmp files (default)
 z2z [-pvk1-9oe -t type] files #convert from/to .gz/bz2/xz files, -p progress, -v verbose (output result),
                               #-k keep org file, -o overwrite, 1-9 compression degree, -e for xz does "extreme"
                               #compressions, very slow. For some data types this reduces size significantly
                               #2xz and 2bz2 depends on xz and bzip2 being installed on system
 2xz                           #same as z2z with -t xz
 2bz2                          #same as z2z with -t bz2
 2gz                           #same as z2z with -t gz

 rttop
 trunc file(s)
 wipe file(s)

=head3 z2z

=head3 2xz

=head3 2bz2

=head3 2gz

The commands C<2xz>, C<2bz2> and C<2gz> are just synonyms for C<z2z> with an implicitly added option C<-t xz>, C<-t xz> or C<-t gz> accordingly.

 z2z [-p -k -v -o -1 -2 -3 -4 -5 -6 -7 -8 -9 ] files

Converts (recompresses) files from one compression type to another. For instance from .gz to .bz2
Keeps uid, gid, mode (chmod) and mtime.

Tools.pm  view on Meta::CPAN

  my(%c,%b,$cnt,$bts,%xtime);
  my $zext=$o{z}?'(\.(z|Z|gz|bz2|xz|rz|kr|lrz|rz))?':'';
  $o{E}||=11;
  my $r=qr/(\.[^\.\/]{1,$o{E}}$zext)$/i;
  my $qrexcl=exists$o{e}?qr/$o{e}/:0;
  #TODO: ought to work: tar cf - .|tar tvf -|due
  my $x=$o{M}?9:$o{C}?10:$o{A}?8:9;
  if(-p STDIN or @Due_fake_stdin){
    die "due: can not combine STDIN and args\n" if @argv;
    my $stdin=join"",map"$_\n",@Due_fake_stdin; #test
    open(local *STDIN, '<', \$stdin) or die "ERR: $! $?\n" if $stdin;
    my $rl=qr/(^| )\-[rwx\-sS]{9}\s+(?:\d )?(?:[\w\-]+(?:\/|\s+)[\w\-]+)\s+(\d+)\s+.*?([^\/]*\.[\w,\-]+)?$/;
    my $MorP=$o{M}||$o{C}||$o{A}||$o{P}?"due: -M, -C, -A and -P not yet implemented for STDIN unless list of filenames only\n":0;
    while(<STDIN>){
      chomp;
      next if /\/$/;
      my($f,$sz,$xtime)=(/$rl/?($3,$2):-f$_?($_,(stat)[7,$x]):next);
      #   1576142    240 -rw-r--r--   1 root     root       242153 april  4  2016 /opt/wine-staging/share/wine/wine.inf
      my $ext=$f=~$r?$1:'';
      $ext=lc($ext) if $o{i};
      $cnt++;    $c{$ext}++;
      $bts+=$sz; $b{$ext}+=$sz;
      defined $xtime and $xtime{$ext}.=",$xtime" or die $MorP if $MorP;
    }
  }
  else { #hm DRY
    @argv=('.') if !@argv;
    File::Find::find({follow=>0, wanted =>
      sub {
        return if !-f$_;
        return if $qrexcl and defined $File::Find::name and $File::Find::name=~$qrexcl;
        my($sz,$xtime)=(stat($_))[7,$x];
        my $ext=m/$r/?$1:'';
        $ext=lc($ext) if $o{i};
        $cnt++;    $c{$ext}++;
        $bts+=$sz; $b{$ext}+=$sz;
        $xtime{$ext}.=",$xtime" if $o{M} || $o{C} || $o{A} || $o{P};
	1;
      } },@argv);
  }
  my($f,$s)=$o{k}?("%14.2f kb",sub{$_[0]/1024})
           :$o{K}?("%14.2f Kb",sub{$_[0]/1000})
           :$o{m}?("%14.2f mb",sub{$_[0]/1024**2})
           :$o{h}?("%14s",     sub{bytes_readable($_[0])})
           :      ("%14d b",   sub{$_[0]});
  my @e=$o{a}?(sort(keys%c))
       :$o{c}?(sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c)
       :      (sort{$b{$a}<=>$b{$b} or $a cmp $b}keys%c);
  my $perc=!$o{M}&&!$o{C}&&!$o{A}&&!$o{P}?sub{""}:
    sub{
      my @p=$o{P}?(10,50,90):(50);
      my @m=@_>0 ? do {grep$_, split",", $xtime{$_[0]}}
                 : do {grep$_, map {split","} values %xtime};
      my @r=percentile(\@p,@m);
      @r=(min(@m),@r,max(@m)) if $o{M}||$o{C}||$o{A};
      @r=map int($_), @r;
      my $fmt=$o{t}?'YYYY/MM/DD-MM:MI:SS':'YYYY/MM/DD';
      @r=map tms($_,$fmt), @r;
      "  ".join(" ",@r);
  };
  my $width=max( 10, grep $_, map length($_), @e );
  @e=@e[-10..-1] if $o{t} and @e>10; #-t tail
  printf("%-*s %8d $f %7.2f%%%s\n",$width,$_,$c{$_},&$s($b{$_}),100*$b{$_}/$bts,&$perc($_)) for @e;
  printf("%-*s %8d $f %7.2f%%%s\n",$width,"Sum",$cnt,&$s($bts),100,&$perc());
}
sub cmd_resubst {
  my %o;
  my $zo="123456789e";
  my @argv=opts("f:t:vno:gi$zo",\%o,@_);
  if(exists$o{t}){ $o{t}=~s,\\,\$, } else { $o{t}='' }
  my($i,$tc,$tbfr,$tbto)=(0,0,0,0);
  for my $file (@argv){
      my $zopt=join" ",map"-$_",grep$o{$_},split//,$zo;
      my $oext=$o{o}?$o{o}:$file=~/\.(gz|bz2|xz)$/i?$1:'';
      my $open_out_pre=$oext?"|".openstr_prog({qw/gz gzip bz2 bzip2 xz xz/}->{lc($oext)})." $zopt":'';
      my $open_out="$open_out_pre > $file.tmp$$";
      my $open_in=openstr($file);
      #      die srlz(\%o,'o','',1);
      open my $I, $open_in  or croak"ERR: open $open_in failed. $! $?\n";
      open my $O, $open_out or croak"ERR: open $open_out failed. $! $?\n";
      my $c=0;
      my $mod=join"",grep$o{$_},qw(g i);
      eval"while(<\$I>){ \$c+=s/\$o{f}/$o{t}/$mod;print \$O \$_ }";
      $tc+=$c;
      close($I);close($O);
      chall($file,"$file.tmp$$") or croak"ERR: chall $file\n" if !$o{n};
      my($bfr,$bto)=(-s$file,-s"$file.tmp$$");
      unlink $file or croak"ERR: cant rm $file\n";
      my $newfile=$o{o}?repl($file,qr/\.(gz|bz2|xz)$/i,".$oext"):$file;
      rename("$file.tmp$$",$newfile) or croak"ERR: rename $file.tmp$$ -> $newfile failed\n";
      if($o{v}){
	my $pr=$bfr?100*$bto/$bfr:0;
	printf "%*d/%d %*s %7d =>%8d b (%2d%%) %s\n",
	  length(0+@argv), ++$i, 0+@argv, -15, "$tc/$c", $bfr, $bto, $pr, $file;
	$tbfr+=$bfr;
	$tbto+=$bto;
      }
  }
  if($o{v} and @argv>1){
      printf "Replaces: %d  Bytes before: %d  After: %d   Change: %.1f%%\n",
        $tc, $tbfr, $tbto, $tbfr?100*($tbto-$tbfr)/$tbfr:0
  }
  $tc;
}
sub cmd_xcat {
  for my $fn (@_){
    my $os=openstr($fn);
    open my $FH, $os or warn "xcat: cannot open $os ($!)\n" and next;
    #binmode($FH);#hm?
    print while <$FH>;
    close($FH);
  }
}
sub cmd_freq {
  my(@f,$i);
  map $f[$_]++, unpack("C*",$_) while <>;
  my $s=" " x 12;map{print"$_$s$_$s$_\n"}("BYTE  CHAR   COUNT","---- ----- -------");
  my %m=(145,"DOS-æ",155,"DOS-ø",134,"DOS-å",146,"DOS-Æ",157,"DOS-Ø",143,"DOS-Å",map{($_," ")}0..31);
  printf("%4d %5s%8d".(++$i%3?$s:"\n"),$_,$m{$_}||chr,$f[$_]) for grep$f[$_],0..255;print "\n";
  my @no=grep!$f[$_],0..255; print "No bytes for these ".@no.": ".join(" ",@no)."\n";
}
sub cmd_deldup {
  cmd_finddup('-d',@_);
}
sub cmd_finddup {
  # http://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
  # die "todo: finddup not ready yet"
  my %o;
  my @argv=opts("ak:dhsnqv0P:FMRp",\%o,@_); $o{P}=1024*8 if!defined$o{P}; $o{k}='' if!defined$o{k};
  croak"ERR: cannot combine -a with -d, -s or -h" if $o{a} and $o{d}||$o{s}||$o{h};
  require File::Find;
  @argv=map{
      my @f;
      if(-d$_){ File::Find::find({follow=>0,wanted=>sub{return if !-f$_;push@f,$File::Find::name;1}},$_) }
      else    { @f=($_) }
      @f;
  }@argv;
  my %md5sum;
  my $md5sum=sub{$md5sum{$_[0]}=md5sum($_[0]) if!defined$md5sum{$_[0]}}; #memoize
  my $md5sum_1st_part=sub{
      open my $fh, "<", $_[0] or die "ERR: Could not read $_[0]";
      binmode($fh);
      my $buf; read($fh,$buf,$o{P});
      close($fh);
      md5sum(\$buf);
  };
  my @checks=( #todo: stat()[0,1] (or[0,1,7]?) and diff filename => no need for md5, is hardlink! just linux?
      sub{-s$_[0]},
      sub{-s$_[0]<=$o{P}?md5sum($_[0]):&$md5sum_1st_part($_[0])},
      sub{md5sum($_[0])}
  );
  pop @checks if $o{M}; #4tst
  my $i=0;
  my %s=map{($_=>++$i)}@argv; #sort

Tools.pm  view on Meta::CPAN

            ? "  ETA:".sprintf("%-8s",sec_readable(eta('z2z',$bsf,$sum)-time_fp()))." $str"
	    : "   TA: 0s $str"
	  if $sum>1e6;
        $str="$i/".@argv." $str";
      }
      print "$str $new\n";
    }
  }
  if($o{v} and @argv>1){
      my $bytes=$o{h}?'':'bytes ';
      my $str=
        sprintf "%d files compressed in %.3f seconds from %s to %s $bytes (%s bytes) %.1f%% of original\n",
	  0+@argv,
	  time_fp()-$start,
	  (map{$o{h}?bytes_readable($_):$_}($sum,$sumnew,$sumnew-$sum)),
	  100*$sumnew/$sum;
      $str=~s,\((\d),(+$1,;
      print $str;
  }
}

=head2 args

Parses command line options and arguments:

 my %opt;
 my @argv=Acme::Tools::args('i:nJ123',\%opt,@ARGV);   #returns remaining command line elements after C<-o ptions> are parsed into C<%opt>.

Uses C<Getopt::Std::getopts()>. First arg names the different one char
options and an optional C<:> behind the letter or digit marks that the
switch takes an argument.

=cut

sub args {
    my $switches=shift;
    my $hashref=shift;
    my $re_sw='^([a-z0-9]:?)+$';
    croak "ERR: args: first arg $switches dont match $re_sw\n" if $switches !~ /$re_sw/i;
    croak "ERR: second arg to args() not hashref\n" if ref($hashref) ne 'HASH';
    local @ARGV=@_;
    require Getopt::Std;
    Getopt::Std::getopts($switches => $hashref);
    (@ARGV);
}

sub opts {
    my($def, $hashref, @a)=@_;
    @a=@ARGV if @_<=2;
    my %def=map{/(\w)(:?)/;($1=>$2?2:1)}$def=~/(\w:?)/g;
    my $o1=join"",grep$def{$_}==1,sort keys%def;
    my $o= join"",                sort keys%def;
    my @r;
    while(@a){
	my $a=shift(@a);
	if($a=~/^-([$o1])([$o].*)$/){
	    unshift@a,"-$1","-$2";
	}
	elsif($a=~/^-(\w)(.*)$/){
	    my $d=$def{$1}//0;
	    push@{$$hashref{$1}},$d==1 && length($2) ? croak"opt -$1 has no arg (is $2 here)"
		                :$d==1               ? 1
				:$d==2 && length($2) ? $2
				:$d==2               ? shift(@a)
				:croak"unknown opt -$1";
	}
	elsif($a eq '--'){
	    last;
	}
	else {
	    push @r, $a;
	}
    }
    $_=join",",@$_ for values %$hashref;
    (@r,@a)
}

#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp -
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp Tools.pm
sub cmd_zsize {
  my %o;
  my @argv=opts("heEpts",\%o,@_);
  my $stdin=!@argv || join(",",@argv) eq '-';
  @argv=("/tmp/acme-tools.$$.stdin") if $stdin;
  writefile($argv[0],join("",<STDIN>)) if $stdin;
  my @prog=grep qx(which $_), qw(gzip bzip2 xz zstd brotli);
  for my $f (@argv){
      my $sf=-s$f;
      print "--- $f does not exists\n" and next if !-e$f;
      print "--- $f is not a file\n" and next if !-f$f;
      print "--- $f ($sf b) is not readable\n" and next if !-r$f;
      print "--- $sf b  ".bytes_readable($sf)."  ".($stdin?"-":$f)."\n";
      next if !$sf;
      my(@t,@s);
      for my $prog (@prog){
          next if !qx(which $prog);
          my @l=1..9;
          push @l,map"e$_",1..9 if $prog eq 'xz' and $o{e};
          @l=map"e$_",1..9      if $prog eq 'xz' and $o{E};
          @l=map 10+$_,@l       if $prog eq 'zstd';
          @l=map"q $_",3..11    if $prog eq 'brotli';
          printf "%-6s",$prog;
          push @t, $prog, [] if $o{t};
          push @s, $prog, [] if $o{p} and $o{s};
          for my $l (@l){ #level
              my $t=time_fp();
              my $b=qx(cat $f|$prog -$l|wc -c);
              push@{$t[-1]},time_fp()-$t if $o{t};
              push@{$s[-1]},$b           if $o{p} and $o{s};
              $o{p} ? printf("%9.1f%% ",100*$b/$sf)
             :$o{h} ? printf("%10s ",bytes_readable($b))
             :        printf("%10d ",$b);
          }
          print "\n";
      }
      while(@s){
          printf "%-6s",shift@s;
          $o{h}?printf("%10s ",bytes_readable($_)):printf("%10d ",$_) for @{shift@s}; print "\n";
      }
      while(@t){
          printf "%-6s",shift@t;
          printf "%9.3fs ",$_ for @{shift@t}; print "\n";



( run in 0.641 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )