Acme-Tools

 view release on metacpan or  search on metacpan

Tools.pm  view on Meta::CPAN

our %EXPORT_TAGS = ( all => [ qw() ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
our @EXPORT = qw(
  min
  max
  mins
  maxs
  sum
  avg
  geomavg
  harmonicavg
  stddev
  rstddev
  median
  percentile
  $Resolve_iterations
  $Resolve_last_estimate
  $Resolve_time
  resolve
  resolve_equation
  conv
  rank
  rankstr
  egrep
  eqarr
  sorted
  sortedstr
  sortby
  subarrays
  pushsort
  pushsortstr
  binsearch
  binsearchstr
  random
  random_gauss
  big
  bigi
  bigf
  bigr
  bigscale
  nvl
  repl
  replace
  decode
  decode_num
  between
  btw
  curb
  bound
  log10
  log2
  logn
  distinct
  in
  in_num
  uniq
  union
  union_all
  minus
  minus_all
  intersect
  intersect_all
  not_intersect
  mix
  zip
  sim
  sim_perm
  subarr
  subhash
  hashtrans
  zipb64
  zipbin
  unzipb64
  unzipbin
  gzip
  gunzip
  bzip2
  bunzip2
  ipaddr
  ipnum
  ipnum_ok
  iprange_ok
  in_iprange
  webparams
  urlenc
  urldec
  ht2t
  chall
  makedir
  qrlist
  ansicolor
  ccn_ok
  KID_ok
  writefile
  readfile
  readdirectory
  basename
  dirname
  wipe
  username
  range
  globr
  permutations
  perm
  permute
  permute_continue
  trigram
  sliding
  chunks
  chars
  cart
  reduce
  int2roman
  roman2int
  num2code
  code2num
  dec2bin
  dec2hex
  dec2oct
  bin2dec
  bin2hex
  bin2oct
  hex2dec
  hex2bin
  hex2oct
  oct2dec
  oct2bin
  oct2hex
  base
  gcd
  lcm
  pivot
  tablestring
  upper
  lower
  trim
  rpad
  lpad
  cpad
  dserialize
  serialize
  srlz
  cnttbl
  nicenum
  bytes_readable
  sec_readable
  distance
  tms
  s2t
  easter
  time_fp
  timems
  sleep_fp
  sleeps
  sleepms
  sleepus
  sleepns
  eta
  sys
  recursed
  md5sum
  pwgen
  which
  read_conf
  openstr
  printed
  ldist
  $Re_isnum
  isnum
  part
  parth
  parta
  a2h
  h2a
  refa
  refh
  refs
  refaa
  refah
  refha
  refhh
  pushr
  popr
  shiftr
  unshiftr
  splicer
  keysr
  valuesr
  eachr
  joinr
  pile
  aoh2sql
  aoh2xls
  base64
  unbase64
  opts
  ed
  changed
  $Edcursor
  brainfu
  brainfu2perl
  brainfu2perl_optimized
  bfinit
  bfsum
  bfaddbf
  bfadd

Tools.pm  view on Meta::CPAN


 my @even = range(1000,2000,2);      # even numbers between 1000 and 2000
 my @odd  = range(1001,2001,2);

 my $dice = random(1,6);
 my $color = random(['red','green','blue','yellow','orange']);

 pushr $arrayref[$num], @stuff;      # push @{ $arrayref[$num] }, @stuff ... popr, shiftr, unshiftr

 print 2**200;       # 1.60693804425899e+60
 print big(2)**200;  # 1606938044258990275541962092341162602522202993782792835301376

 ...and much more.

=encoding utf8

=head1 ABSTRACT

About 120 more or less useful perl subroutines lumped together and exported into your namespace.

=head1 DESCRIPTION

Subs created and collected since the mid-90s.

=head1 INSTALLATION

 sudo cpan Acme::Tools
 sudo cpanm Acme::Tools   # after: sudo apt-get install cpanminus make   # for Ubuntu 12.04

Or to get the very newest:

 git clone https://github.com/kjetillll/Acme-Tools.git
 cd Acme-Tools
 perl Makefile.PL
 make test
 sudo make install

=head1 EXPORT

Almost every sub, about 90 of them.

Beware of namespace pollution. But what did you expect from an Acme module?

=head1 NUMBERS

=head2 num2code

See L</code2num>

=head2 code2num

C<num2code()> convert numbers (integers) from the normal decimal system to some arbitrary other number system.
That can be binary (2), oct (8), hex (16) or others.

Example:

 print num2code(255,2,"0123456789ABCDEF");  # prints FF
 print num2code( 14,2,"0123456789ABCDEF");  # prints 0E

...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
...and 14 are converted to 0E, with leading 0 because of the second argument 2.

Example:

 print num2code(1234,16,"01")

Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.

To convert back:

 print code2num("0000010011010010","01");  #prints 1234

C<num2code()> can be used to compress numeric IDs to something shorter:

 $chars="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_";
 print num2code("241274432",5,$chars);     # prints EOOv0
 print code2num("EOOv0",$chars);           # prints 241274432

=cut

#Math::BaseCnv

sub num2code {
  return num2code($_[0],0,$_[1]) if @_==2;
  my($num,$digits,$validchars,$start)=@_;
  my $l=length($validchars);
  my $key;
  $digits||=9e9;
  no warnings;
  croak if $num<$start;
  $num-=$start;
  for(1..$digits){
    $key=substr($validchars,$num%$l,1).$key;
    $num=int($num/$l);
    last if $digits==9e9 and !$num;
  }
  croak if $num>0;
  return $key;
}

sub code2num {
  my($code,$validchars,$start)=@_; $start=0 if!defined$start;
  my $l=length($validchars);
  my $num=0;
  $num=$num*$l+index($validchars,$_) for split//,$code;
  return $num+$start;
}

=head2 base

Numbers in any number system of base between 2 and 36. Using capital letters A-Z for base higher than 10.

 base(2,15)                 # 1111  2 --> binary
 base(8,4096)               # 10000 8 --> octal
 base(10,4096)              # 4096 of course
 base(16,254)               # FE   16 --> hex
 base(16,254.3)             # FE   16 --> hex, can not handle decimal numbers (yet...todo)
 base(36,123456)            # FE   16 --> hex, can not handle decimal numbers (yet...todo)
 base(36,1234567891011)     # FR5HUHC3  base36 using all 0-9 and A-Z as digits, 10+26=36
 base(37,1)                 # die with message 'base not 2-36'
 base($x,0)                 # 0

Tools.pm  view on Meta::CPAN

sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
#hm sub gcd { my($a,$b)=@_; ($a,$b)=($b,$a%$b) while $b; $a }

=head2 lcm

C<lcm()> finds the Least Common Multiple of two or more numbers (integers).

B<Input:> two or more positive numbers (integers)

B<Output:> an integer number

Example: C< 2/21 + 1/6 = 4/42 + 7/42 = 11/42>

Where 42 = lcm(21,6).

B<Example:>

  print lcm(45,120,75);   # prints 1800

Because the factors are:

  45 = 2^0 * 3^2 * 5^1
 120 = 2^3 * 3^1 * 5^1
  75 = 2^0 * 3^1 * 5^2

Take the bigest power of each primary number (2, 3 and 5 here).
Which is 2^3, 3^2 and 5^2. Multiplied this is 8 * 9 * 25 = 1800.

 sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }

Seems to works with L<Math::BigInt> as well: (C<lcm> of all integers from 1 to 200)

 perl -MAcme::Tools -MMath::BigInt -le'print lcm(map Math::BigInt->new($_),1..200)'

 337293588832926264639465766794841407432394382785157234228847021917234018060677390066992000

=cut

sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }

=head2 resolve

Resolves an equation by Newtons method.

B<Input:> 1-6 arguments. At least one argument.

First argument: must be a coderef to a subroutine (a function)

Second argument: if present, the target, f(x)=target. Default 0.

Third argument: a start position for x. Default 0.

Fourth argument: a small delta value. Default 1e-4 (0.0001).

Fifth argument: a maximum number of iterations before resolve gives up
and carps. Default 100 (if fifth argument is not given or is
undef). The number 0 means infinite here.  If the derivative of the
start position is zero or close to zero more iterations are typically
needed.

Sixth argument: A number of seconds to run before giving up.  If both
fifth and sixth argument is given and > 0, C<resolve> stops at
whichever comes first.

B<Output:> returns the number C<x> for C<f(x)> = 0

...or equal to the second input argument if present.

B<Example:>

The equation C<< x^2 - 4x - 21 = 0 >> has two solutions: -3 and 7.

The result of C<resolve> will depend on the start position:

 print resolve(sub{ $_**2 - 4*$_ - 21 });                     # -3 with $_ as your x
 print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 });        # -3 more elaborate call
 print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 },0,3);    # 7  with start position 3
 print "Iterations: $Acme::Tools::Resolve_iterations\n";      # 3 or larger, about 10-15 is normal

The variable C< $Acme::Tools::Resolve_iterations > (which is exported) will be set
to the last number of iterations C<resolve> used. Also if C<resolve> dies (carps).

The variable C< $Acme::Tools::Resolve_last_estimate > (which is exported) will be
set to the last estimate. This number will often be close to the solution and can
be used even if C<resolve> dies (carps).

B<BigFloat-example:>

If either second, third or fourth argument is an instance of L<Math::BigFloat>, so will the result be:

 use Acme::Tools;
 my $equation = sub{ $_ - 1 - 1/$_ };
 my $gr1 = resolve( $equation, 0,      1  ); #
 my $gr2 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
 bigscale(50);
 my $gr3 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2

 print 1/2 + sqrt(5)/2, "\n";
 print "Golden ratio 1: $gr1\n";
 print "Golden ratio 2: $gr2\n";
 print "Golden ratio 3: $gr3\n";

Output:

 1.61803398874989
 Golden ratio 1: 1.61803398874989
 Golden ratio 2: 1.61803398874989484820458683436563811772029300310882395927211731893236137472439025
 Golden ratio 3: 1.6180339887498948482045868343656381177203091798057610016490334024184302360920167724737807104860909804

See:

L<http://en.wikipedia.org/wiki/Newtons_method>

L<Math::BigFloat>

L<http://en.wikipedia.org/wiki/Golden_ratio>

=cut

our $Resolve_iterations;
our $Resolve_last_estimate;
our $Resolve_time;

#sub resolve(\[&$]@) {
#sub resolve(&@) { <=0.17
#todo: perl -MAcme::Tools -le'print resolve(sub{$_[0]**2-9431**2});print$Acme::Tools::Resolve_iterations'
#todo: perl -MAcme::Tools -le'sub d{5.3*1.0094**$_[0]-10.2*1.0072**$_[0]} print resolve(\&d)' #err, pop norway vs sweden
#todo: perl -MAcme::Tools -le' print resolve(sub{5.3*1.0094**$_[0]-10.2*1.0072**$_[0]})' #err, pop norway vs sweden
#    =>Div by zero: df(x) = 0 at n'th iteration, n=0, delta=0.0001, fx=CODE(0xc81d470) at -e line 1
#todo: ren solve?
sub resolve {
  my($f,$goal,$start,$delta,$iters,$sec)=@_;
  $goal=0      if!defined$goal;
  $start=0     if!defined$start;
  $delta=1e-4  if!defined$delta;
  $iters=100   if!defined$iters;
  $sec=0       if!defined$sec;
  $iters=13e13 if $iters==0;
  croak "Iterations ($iters) or seconds ($sec) can not be a negative number" if $iters<0 or $sec<0;
  $Resolve_iterations=undef;
  $Resolve_last_estimate=undef;
  croak "Should have at least 1 argument, a coderef" if !@_;
  croak "First argument should be a coderef" if ref($f) ne 'CODE';

  my @x=($start);
  my $time_start=$sec>0?time_fp():undef;
  my $ds=ref($start) eq 'Math::BigFloat' ? Math::BigFloat->div_scale() : undef;
  my $fx=sub{
    local$_=$_[0];
    my $fx=&$f($_);
    if($fx=~/x/ and $fx=~/^[ \(\)\.\d\+\-\*\/x\=\^]+$/){
      $fx=~s/(\d)x/$1*x/g;
      $fx=~s/\^/**/g;
      $fx=~s/^(.*)=(.*)$/($1)-($2)/;
      $fx=~s,x,\$_,g;
      $f=eval"sub{$fx}";
      $fx=&$f($_);
    }
    $fx
  };
  #warn "delta=$delta\n";
  my $n=0;
  while($n<=$iters-1){
    my $fd= &$fx($x[$n]+$delta*0.5) - &$fx($x[$n]-$delta*0.5);
    $fd   = &$fx($x[$n]+$delta*0.7) - &$fx($x[$n]-$delta*0.3) if $fd==0;# and warn"wigle 1\n";
    $fd   = &$fx($x[$n]+$delta*0.2) - &$fx($x[$n]-$delta*0.8) if $fd==0;# and warn"wigle 2\n";
    croak "Div by zero: df(x) = $x[$n] at n'th iteration, n=$n, delta=$delta, fx=$fx" if $fd==0;
    $x[$n+1]=$x[$n]-(&$fx($x[$n])-$goal)/($fd/$delta);
    $Resolve_last_estimate=$x[$n+1];
    #warn "n=$n  fd=$fd  x=$x[$n+1]\n";
    $Resolve_iterations=$n;
    last if $n>3 and $x[$n+1]==$x[$n] and $x[$n]==$x[$n-1];
    last if $n>4 and $x[$n]!=0 and abs(1-$x[$n+1]/$x[$n])<1e-13; #sub{3*$_+$_**4-12}
    last if $n>3 and ref($x[$n+1]) eq 'Math::BigFloat' and substr($x[$n+1],0,$ds) eq substr($x[$n],0,$ds); #hm
    croak "Could not resolve, perhaps too little time given ($sec), iteratons=$n"
      if $sec>0 and ($Resolve_time=time_fp()-$time_start)>$sec;
    #warn "$n: ".$x[$n+1]."\n";
    $n++;
  }
  croak "Could not resolve, perhaps too few iterations ($iters)" if @x>=$iters;
  return $x[-1];
}

=head2 resolve_equation

This prints 2:

 print resolve_equation "x + 13*(3-x) = 17 - x"

A string containing at least one x is converted into a perl function.
Then x is found by using L<resolve>. The string conversion is done by
replacing every x with $_ and if a C< = > char is present it converts
C< leftside = rightside > into C< (leftside) - (rightside) = 0 > which
is the default behaviour of L<resolve>.

=cut

sub resolve_equation { my $e=shift;resolve(sub{$e},@_)}

=head2 conv

Converts between:

=over 4

=item * units of measurement

=item * number systems

=item * currencies

=back

B<Examples:>

 print conv( 2000, "meters", "miles" );  #prints 1.24274238447467
 print conv( 2.1, 'km', 'm');            #prints 2100
 print conv( 70,"cm","in");              #prints 27.5590551181102
 print conv( 4,"USD","EUR");             #prints 3.20481552905431 (depending on todays rates)
 print conv( 4000,"b","kb");             #prints 3.90625 (1 kb = 1024 bytes)
 print conv( 4000,"b","Kb");             #prints 4       (1 Kb = 1000 bytes)
 print conv( 1000,"mb","kb");            #prints 1024000
 print conv( 101010,"bin","roman");      #prints XLII
 print conv( "DCCXLII","roman","oct");   #prints 1346

B<Units, types of measurement and currencies supported by C<conv> are:>

Note: units starting with the symbol _ means that all metric
prefixes from yocto 10^-24 to yotta 10^+24 is supported, so _m means
km, cm, mm, µm and so on. And _N means kN, MN GN and so on.

Note2: Many units have synonyms: m, meter, meters ...

 acceleration: g, g0, m/s2, mps2

 angle:        binary_degree, binary_radian, brad, deg, degree, degrees,
               gon, grad, grade, gradian, gradians, hexacontade, hour,
               new_degree, nygrad, point, quadrant, rad, radian, radians,
               sextant, turn

 area:         a, ar, are, ares, bunder, ca, centiare, cho, cm2,
               daa, decare, decares, deciare, dekar,
               djerib, m2, dunam, dönöm, earths, feddan, ft2, gongqing, ha
               ha, hectare, hectares, hektar, jerib, km2, m2, manzana,
               mi2, mm2, mu, qing, rai, sotka,
               sqcm, sqft, sqkm, sqm, sqmi, sqmm
               stremmata, um2, µm2

 bytes:        Eb, Gb, Kb, KiB, Mb, Pb, Tb, Yb, Zb, b, byte,
               kb, kilobyte,  mb, megabyte,
               gb, gigabyte,  tb, terabyte,
               pb, petabyte,  eb, exabyte,
               zb, zettabyte, yb, yottabyte

 charge:       As, C, _e, coulomb, e

 current:      A, _A, N/m2

 energy:       BTU, Btu, J, Nm, W/s, Wh, Wps, Ws, _J, _eV,
               cal, calorie, calories, eV, electronvolt, BeV,
               erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh, MWh, GWh, TWh
               kcal, kilocalorie, kilocalories,
               newtonmeter, newtonmeters, th, thermie

 force:        N, _N, dyn, dyne, dynes, lb, newton

 length:       NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
               in, inch, inches, km, league, lightyear, ls, ly,
               m, meter, meters, mi, mil, mile, miles,
               nautical mile, nautical miles, nmi,
               parsec, pc, planck, yard, yard_imperical, yd, Å, ångstrøm, angstrom

 mass:         Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
               grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
               lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
               pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
               pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey

 mileage:      mpg, l/100km, l/km, l/10km, lp10km, l/mil, liter_pr_100km, liter_pr_km, lp100km

 money:        AED, ARS, AUD, BGN, BHD, BND, BRL, BWP, CAD, CHF, CLP, CNY,
               COP, CZK, DKK, EUR, GBP, HKD, HRK, HUF, IDR, ILS, INR, IRR,
               ISK, JPY, KRW, KWD, KZT, LKR, LTL, LVL, LYD, MUR, MXN, MYR,
               NOK, NPR, NZD, OMR, PHP, PKR, PLN, QAR, RON, RUB, SAR, SEK,
               SGD, THB, TRY, TTD, TWD, USD, VEF, ZAR,      BTC, LTC, mBTC, XBT
               Currency rates are automatically updated from the net
               at least every 24h since last update (on linux/cygwin).

 numbers:      dec, hex, bin, oct, roman, dozen, doz, dz, dusin, gross, gro,
               gr, great_gross, small_gross  (not supported: decimal numbers)

 power:        BTU, BTU/h, BTU/s, BTUph, GWhpy, J/s, Jps, MWhpy, TWhpy,
               W, Whpy, _W, ftlb/min, ftlb/s, hk, hp, kWh/yr, kWhpy

 pressure:     N/m2, Pa, _Pa, at, atm, bar, mbar, pascal, psi, torr

 radioactivity: Bq, becquerel, curie

 speed:        _m/s, km/h, km/t, kmh, kmph, kmt, m/s, mi/h, mph, mps,
               kn, knot, knots, kt, kts, mach, machs, c, fps, ft/s, ftps

 temperature:  C, F, K, celsius, fahrenheit, kelvin

 time:         _s, biennium, century, d, day, days, decade, dy, fortnight,
               h, hour, hours, hr, indiction, jubilee, ke, lustrum, m,
               millennium, min, minute, minutes, mo, moment, mon, month,
               olympiad, quarter, s, season, sec, second, seconds, shake,
               tp, triennium, w, week, weeks, y, y365, ySI, ycommon,
               year, years, ygregorian, yjulian, ysideral, ytropical

 volume:        l, L, _L, _l, cm3, m3, ft3, in3, liter, liters, litre, litres,
                gal, gallon, gallon_imp, gallon_uk, gallon_us, gallons,
                pint, pint_imp, pint_uk, pint_us, tsp, tablespoon, teaspoon,
                floz, floz_uk, therm, thm, fat, bbl, Mbbl, MMbbl, drum,
                container (or container20), container40, container40HC, container45HC

See: L<http://en.wikipedia.org/wiki/Units_of_measurement>

=cut

#TODO:  @arr2=conv(\@arr1,"from","to")         # should be way faster than:
#TODO:  @arr2=map conv($_,"from","to"),@arr1
#TODO:  conv(123456789,'b','h'); # h converts to something human-readable

our %conv=(
	 length=>{
		  m       => 1,
		  _m      => 1,
		  meter   => 1,
		  meters  => 1,
		  metre   => 1,
		  metres  => 1,
		  km      => 1000,
		  mil     => 10000,                   #scandinavian #also: inch/1000!
		  in      => 0.0254,
		  inch    => 0.0254,
		  inches  => 0.0254,
		  ft      => 0.0254*12,               #0.3048 m
		  feet    => 0.0254*12,               #0.3048 m
		  yd      => 0.0254*12*3,             #0.9144 m
		  yard    => 0.0254*12*3,             #0.9144 m
		  yards   => 0.0254*12*3,             #0.9144 m
		  fathom  => 0.0254*12*3*2,           #1.8288 m
		  fathoms => 0.0254*12*3*2,           #1.8288 m
		  chain   => 0.0254*12*3*22,          #20.1168 m
		  chains  => 0.0254*12*3*22,          #20.1168 m
		  furlong => 0.0254*12*3*22*10,       #201.168 m
		  furlongs=> 0.0254*12*3*22*10,       #201.168 m
		  mi      => 0.0254*12*3*22*10*8,     #1609.344 m
		  mile    => 0.0254*12*3*22*10*8,     #1609.344 m
		  miles   => 0.0254*12*3*22*10*8,
		  league  => 0.0254*12*3*22*10*8*3,   #4828.032 m
		  leagues => 0.0254*12*3*22*10*8*3,   #4828.032 m
		  yard_imp           => 0.914398416,
		  yard_imperical     => 0.914398416,
                  NM                 => 1852,           #nautical mile
                  nmi                => 1852,           #nautical mile
                  'nautical mile'    => 1852,
                  'nautical miles'   => 1852,
		  micron             => 1e-6,
		  microns            => 1e-6,
		  micrometre         => 1e-6,
		  micrometres        => 1e-6,
                  'Ã…'                => 1e-10,
                  'ångstrøm'         => 1e-10,
                  'angstrom'         => 1e-10,
		  fm                 => 1e-15,
		  fermi              => 1e-15, #in honour of Enrico Fermi
		  fermis             => 1e-15, #in honour of Enrico Fermi
		  femtometer         => 1e-15, #derived from "femten" (=fifteen in Norwegian and Danish)
		  femtometre         => 1e-15,
		  femtometers        => 1e-15, #derived from "femten" (=fifteen in Norwegian and Danish)
		  femtometres        => 1e-15,
		  attometer          => 1e-18, #derived from "atten/atton" (=eighteen)
		  attometre          => 1e-18,
		  attometers         => 1e-18, #derived from "atten/atton" (=eighteen)
		  attometres         => 1e-18,
		  ly                 => 299792458*3600*24*365.25,
		  lightyear          => 299792458*3600*24*365.25, # = 9460730472580800 by def
		  ls                 => 299792458,      #light-second
                  au                 => 149597870700,   # by def: meters earth to sun
                  astronomical_unit  => 149597870700,
                 'astronomical unit' => 149597870700,
                  pc                 => 149597870700*648000/$PI, #3.0857e16 = 3.26156 ly
                 _pc                 => 149597870700*648000/$PI,
                  parsec             => 149597870700*648000/$PI,
		  attoparsec         => 149597870700*648000/$PI/1e18,
		  apc                => 149597870700*648000/$PI/1e18,
		  planck             => 1.61619997e-35, #planck length
		  #Norwegian (old) lengths:
		  tomme         => 0.0254,
		  tommer        => 0.0254,
		  fot           => 0.0254*12,               #0.3048m
		  alen          => 0.0254*12*2,             #0.6096m
		  favn          => 0.0254*12*2*3,           #1.8288m
		  kvart         => 0.0254*12*2/4,           #0.1524m a quarter alen
                  #--https://upload.wikimedia.org/wikipedia/commons/e/eb/English_length_units_graph.svg
                  twip          => 0.0254 / 6 / 12 / 20,
                  point         => 0.0254 / 6 / 12,
                  pica          => 0.0254 / 6,
                  line          => 0.0254 / 12,
		  thou          => 0.0254 / 1000,
                  barleycorn    => 0.0254 / 3,
                  poppyseed     => 0.0254 / 3 / 4,
                  finger        => 0.0254 / 6 / 12 * 63,
                  palm          => 0.0254 * 3,
                  digit         => 0.0254 * 3 / 4,
                  nail          => 0.0254 * 3 / 4 * 3,
                  rack          => 0.0254 * 1.75,
                  stick         => 0.0254 * 2,
                  hand          => 0.0254 * 2 * 2,
                  foot          => 0.0254 * 2 * 2 * 3,
                  shaftment     => 0.0254 * 3 * 2,
                  span          => 0.0254 * 3 * 3,
                  ell           => 0.0254 * 3 * 3 * 5,
                  pace          => 0.0254 * 3 * 2 * 5,
                  step          => 0.0254 * 3 * 2 * 5,
                  cubit         => 0.0254 * 3 * 2 * 3,
                  rod           => 0.0254 * 3 * 2 * 3 * 11,
                  link          => 0.0254 * 3 * 2 * 3 * 11 / 25,
		  yard          => 0.0254 * 2 * 2 * 3 * 3,
                  grade         => 0.0254 * 3 * 2 * 5 * 2,
                  rope          => 0.0254 * 3 * 2 * 5 * 2 * 4,
                  skein         => 0.0254 * 3 * 3 * 5 * 96,                   # 96 ell
                  fathom        => 0.0254 * 2 * 2 * 3 * 3 * 2,                # 2 yard
                  spindle       => 0.0254 * 3 * 3 * 5 * 96 * 120,             # 120 skein
                  gunter_chain  => 0.0254 * 2 * 2 * 3 * 3 * 2 * 11,           # 11 fathom
                  ramsden_chain => 0.0254 * 3 * 2 * 5 * 2 * 4 * 5,            # 5 rope
                  shackle       => 0.0254 * 2 * 2 * 3 * 3 * 2 * 15,           # 15 fathom
                  cable         => 0.0254 * 2 * 2 * 3 * 3 * 2 * 100,          # 100 fathom
                  furlong       => 0.0254 * 2 * 2 * 3 * 3 * 2 * 11 * 10,      # 10 gunter_chain, 220 yard
                  roman_mile    => 0.0254 * 3 * 2 * 5 * 2 * 4 * 5 * 50,       # 50 ramsden_chain
                  statute_mile  => 0.0254 * 2 * 2 * 3 * 3 * 2 * 11 * 10 * 8,  # 8 furlong
                  nautic_mile   => 0.0254 * 2 * 2 * 3 * 3 * 2 * 100 * 10,     # 10 cable
                  league        => 0.0254 * 2 * 2 * 3 * 3 * 2 * 100 * 10 * 5, # 5 nautic_mile
		  siriometer    => 149597870700*1e6,                          # 1 million astronomical units
		 },
	 mass  =>{ #https://en.wikipedia.org/wiki/Unit_conversion#Mass
		  g            => 1,
		  _g           => 1,
		  gram         => 1,
		  grams        => 1,
		  kilo         => 1000,
		  kilos        => 1000,
		  t            => 1000000,
		  tonn         => 1000000,
		  tonne        => 1000000,

Tools.pm  view on Meta::CPAN

		  square    => 100*(0.0254*12)**2, #100 square feet ~ 9.29 m2
		  morgen    => 0.856532 * 10000,   #0.856532 hectares
		  bornholm  => 588.36e6,           #area of danish island bornholm, 588km2
		  texas     => 695670e6,           #area of texas, 695 670 square km
        	 },
	 volume=>{
		  m3            => 1,                #1000 L
		  l             => 0.001,
		  L             => 0.001,
		  _L            => 0.001,
		  _l            => 0.001,
		  liter         => 0.001,
		  liters        => 0.001,
		  litre         => 0.001,
		  litres        => 0.001,
		  gal           => 231 * 0.0254**3, #3.785411784 L = 0.003785411784 m3, #231 cubic inches
		  gallon        => 231 * 0.0254**3,
		  gallons       => 231 * 0.0254**3,
		  gallon_us     => 231 * 0.0254**3, #231 cubic inches
		  gallon_wine   => 231 * 0.0254**3, #queen anne's gallon
		  gallon_ale    => 282 * 0.0254**3, #beer
		  gallon_corn   => 268.8*0.0254**3, #corn, or winchester gallon
		  gallon_uk     => 4.54609/1000,    #constant 4.54609 from definition
		  gallon_imp    => 4.54609/1000,    #imperial
		  gallon_us_dry => 4.40488377086/1000, # ~ 9.25**2*pi*2.54**3/1000 L
		  #hogshead, gill, pail, jigger, jackpot, The Science of Measurement - A Historical Survey (Klein)
		  cm3       => 0.01**3,               #0.001 L
                  in3       => 0.0254**3,             #0.016387064 L
                  ft3       => (0.0254*12)**3,
		  tablespoon=> 3.785411784/256,       #14.78676478125 mL
		  tsp       => 3.785411784/256/3,     #4.92892159375 mL
		  teaspoon  => 3.785411784/256/3,     #4.92892159375 mL
                  floz      => 3.785411784/128,       #fluid ounce US
                  floz_uk   => 4.54609/160,           #fluid ounce UK
                  pint      => 4.54609/8000,          #0.56826125 L
                  pint_uk   => 4.54609/8000,
                  pint_imp  => 4.54609/8000,
                  pint_us   => 3.785411784/8000,      #0.473176473
		  quart     => 4.54609/4000,             #2*pint
		  pottle    => 4.54609/2000,             #2*quart = gallon_uk/2
		 #therm     => 2.74,                #? 100000BTUs?   (!= thermie)
		 #thm       => 2.74,                #?               (!= th)
                  fat       => 42*231*2.54**3/1e6,
                  bbl       => 42*231*2.54**3/1e6,  #oil barrel ~159 liters https://en.wikipedia.org/wiki/Barrel_(unit)
		  Mbbl      => 42*231*2.54**3/1e3,  #mille (thousand) oil barrels, M er mille her, ikke mega!
		  MMbbl     => 42*231*2.54**3,      #mille mille (million) oil barrels
		  drum      => 0.208,               #208 L
		  container     => 33.1e3,  #container20
		  container20   => 33.1e3,
		  container40   => 67.5e3,
		  container40HC => 75.3e3,
		  container45HC => 86.1e3,
		  firkin        => 282*0.0254**3 * 8, #8 gallon_ale
		  #Norwegian:
                  meterfavn => 2 * 2 * 0.6,           #fire wood/ved 2.4 m3
                  storfavn  => 2 * 2 * 3,             #fire wood/ved 12 m3
		 },
	 time  =>{
		  s           => 1,
		  _s          => 1,
		  sec         => 1,
		  second      => 1,
		  seconds     => 1,
		  m           => 60,
		  min         => 60,
		  minute      => 60,
		  minutes     => 60,
		  h           => 60*60,
		  hr          => 60*60,
		  hour        => 60*60,
		  hours       => 60*60,
		  d           => 60*60*24,
		  dy          => 60*60*24,
		  day         => 60*60*24,
		  days        => 60*60*24,
		  w           => 60*60*24*7,
		  week        => 60*60*24*7,
		  weeks       => 60*60*24*7,
		  mo	      => 60*60*24 * 365.2425/12,
		  mon	      => 60*60*24 * 365.2425/12,
		  month	      => 60*60*24 * 365.2425/12,
		  quarter     => 60*60*24 * 365.2425/12 * 3, #3 months
		  season      => 60*60*24 * 365.2425/12 * 3, #3 months
		  y           => 60*60*24 * 365.2425, # 365+97/400    #97 leap yers in 400 years
		  year        => 60*60*24 * 365.2425,
		  years       => 60*60*24 * 365.2425,
		  yjulian     => 60*60*24 * 365.25,   # 365+1/4
		  y365        => 60*60*24 * 365,      # finance/science
		  ycommon     => 60*60*24 * 365,      # finance/science
		  ygregorian  => 60*60*24 * 365.2425, # 365+97/400
		 #ygaussian   => 365+(6*3600+9*60+56)/(24*3600),  # 365+97/400
                  ytropical   => 60*60*24 * 365.24219,
                  ysideral    => 365.256363004,
		  ySI         => 60*60*24*365.25, #31556925.9747
                  decade      =>   10 * 60*60*24*365.2425,
                  biennium    =>    2 * 60*60*24*365.2425,
                  triennium   =>    3 * 60*60*24*365.2425,
                  olympiad    =>    4 * 60*60*24*365.2425,
                  lustrum     =>    5 * 60*60*24*365.2425,
                  indiction   =>   15 * 60*60*24*365.2425,
		  jubilee     =>   50 * 60*60*24*365.2425,
		  century     =>  100 * 60*60*24*365.2425,
		  millennium  => 1000 * 60*60*24*365.2425,
                  shake       => 1e-8,
                  moment      => 3600/40,  #1/40th of an hour, used by Medieval Western European computists
		  ke          => 864,      #1/100th of a day, trad Chinese, 14m24s
		  fortnight   => 14*24*3600,
                  tp          => 5.3910632e-44,  #planck time, time for ligth to travel 1 planck length
		  nanocentury =>  100 * 60*60*24*365.2425 / 1e9,   #3.156 ~ pi seconds, response time limit (usability)
		  warhol      => 15*60,                            #"fifteen minutes of fame"
		 },
          speed=>{
                 'm/s'      => 1,
                '_m/s'      => 1,
                  mps       => 1,
                  mph       => 1609.344/3600,
                 'mi/h'     => 1609.344/3600,
                  kmh       => 1/3.6,
                  kmph      => 1/3.6,
                 'km/h'     => 1/3.6,
                  kmt       => 1/3.6, # t=time(=hour) or temps (scandinavian and french and dutch)
                 'km/t'     => 1/3.6,
 		  kt        => 1852/3600,
 		  kts       => 1852/3600,
 		  kn        => 1852/3600,
 		  knot      => 1852/3600,
 		  knots     => 1852/3600,
 		  knop      => 1852/3600,    #scandinavian
		  c         => 299792458,    #speed of light, exact due to definition of meter
		  mach      => 340.3,        #speed of sound
		  machs     => 340.3,
                  fps       => 0.3048, #0.0254*12
                  ftps      => 0.3048,
                 'ft/s'     => 0.3048,
                 },
	  acceleration=>{
                 'm/s2'     => 1,
                 'mps2'     => 1,
                  g         => 9.80665,
                  g0        => 9.80665,
                  #0-100kmh or ca 0-60 mph x seconds...
                 },
         temperature=>{  #http://en.wikipedia.org/wiki/Temperature#Conversion
                  C=>1, F=>1, K=>1, celsius=>1, fahrenheit=>1, kelvin=>1
                 },
         radioactivity=>{
                  Bq          => 1,
		  becquerel   => 1,
		  curie       => 3.7e10,
                 },
         current=> {
                  A     => 1,
                  _A    => 1,
                 'N/m2' => 2e-7,
	         },
         charge=>{
                  e       => 1,
                  _e      => 1,
                  C       => 6.24150964712042e+18,
                  coulomb => 6.24150964712042e+18,
                  As      => 6.24150964712042e+18,
                 #Faraday unit of charge ???
                 },
         power=> {
                  W        => 1,
 		  _W       => 1,
                 'J/s'     => 1,
                  Jps      => 1,
                  hp       => 746,
                  hk       => 746,        #hestekrefter (norwegian, scandinavian)
		  PS       => 746/1.014,  #pferdestärken
		 'kWh/yr'  => 1000    * 3600/(24*365), #kWh annually
                  Whpy     =>           3600/(24*365), #kWh annually
                  kWhpy    => 1000    * 3600/(24*365), #kWh annually
                  MWhpy    => 1000**2 * 3600/(24*365), #kWh annually
                  GWhpy    => 1000**3 * 3600/(24*365), #kWh annually
                  TWhpy    => 1000**4 * 3600/(24*365), #kWh annually
		  BTU      => 1055.05585262/3600,                    #
		  BTUph    => 1055.05585262/3600,
		 'BTU/h'   => 1055.05585262/3600,
		 'BTU/s'   => 1055.05585262,
		 'ftlb/s'  => 746/550,
		 'ftlb/min'=> 746/550/60,
                 },
         energy=>{
                   joule        => 1,
                   J            => 1,
                   _J           => 1,
                   Ws           => 1,
                   Wps          => 1,
                  'W/s'         => 1,
                   Nm           => 1,
                   newtonmeter  => 1,
                   newtonmeters => 1,
                   Wh           => 3600,             #3600 J (joules)
                   kWh          => 3600000,          #3.6 million J
                   MWh          => 3600000000,       #3.6 billion J
                   GWh          => 3600000000000,    #3.6 trillion J
                   TWh          => 3600000000000000, #3600 trillion J
                   cal          => 4.1868,           # ~ 3600/860
		   calorie      => 4.1868,

Tools.pm  view on Meta::CPAN

          numbers =>{
	    dec=>1,hex=>1,bin=>1,oct=>1,roman=>1,      des=>1,#des: spelling error in v0.15-0.16
            dusin=>1,dozen=>1,doz=>1,dz=>1,gross=>144,gr=>144,gro=>144,great_gross=>12*144,small_gross=>10*12,
          }
	);
our $conv_prepare_time=0;
our $conv_prepare_money_time=0;
sub conv_prepare {
  my %b    =(da  =>1e+1, h    =>1e+2, k    =>1e+3, M     =>1e+6,          G   =>1e+9, T   =>1e+12, P    =>1e+15, E   =>1e+18, Z    =>1e+21, Y    =>1e+24, H    =>1e+27);
  my %big  =(deca=>1e+1, hecto=>1e+2, kilo =>1e+3, mega  =>1e+6,          giga=>1e+9, tera=>1e+12, peta =>1e+15, exa =>1e+18, zetta=>1e+21, yotta=>1e+24, hella=>1e+27);
  my %s    =(d   =>1e-1, c    =>1e-2, m    =>1e-3,'µ'    =>1e-6, u=>1e-6, n   =>1e-9, p   =>1e-12, f    =>1e-15, a   =>1e-18, z    =>1e-21, y    =>1e-24);
  my %small=(deci=>1e-1, centi=>1e-2, milli=>1e-3, micro =>1e-6,          nano=>1e-9, pico=>1e-12, femto=>1e-15, atto=>1e-18, zepto=>1e-21, yocto=>1e-24);
  # myria=> 10000              #obsolete
  # demi => 1/2, double => 2   #obsolete
  # lakh => 1e5, crore => 1e7  #south	asian
  my %x = (%s,%b);
  for my $type (keys%conv) {
    for(grep/^_/,keys%{$conv{$type}}) {
      my $c=$conv{$type}{$_};
      delete$conv{$type}{$_};
      my $unit=substr($_,1);
      $conv{$type}{$_.$unit}=$x{$_}*$c for keys%x;
    }
  }
  $conv_prepare_time=time();
}

our $Currency_rates_url = 'http://calthis.com/currency-rates';
our $Currency_rates_expire = 6*3600;
sub conv_prepare_money {
  eval {
    require LWP::Simple;
    my $td=$^O=~/^(?:linux|cygwin)$/?"/tmp":"/tmp"; #hm wrong!
    my $fn="$td/acme-tools-currency-rates.data";
    if( !-e$fn  or  time() - (stat($fn))[9] >= $Currency_rates_expire){
      LWP::Simple::getstore($Currency_rates_url,"$fn.$$.tmp"); # get ... see getrates.cmd
      die "nothing downloaded" if !-s"$fn.$$.tmp";
      rename "$fn.$$.tmp",$fn;
      chmod 0666,$fn;
    }
    my $d=readfile($fn);
    my %r=$d=~/^\s*([A-Z]{3}) +(\d+\.\d+)\b/gm;
    $r{lc($_)}=$r{$_} for keys%r;
    #warn serialize([minus([sort keys(%r)],[sort keys(%{$conv{money}})])],'minus'); #ARS,AED,COP,BWP,LVL,BHD,NPR,LKR,QAR,KWD,LYD,SAR,KZT,CLP,IRR,VEF,TTD,OMR,MUR,BND
    #warn serialize([minus([sort keys(%{$conv{money}})],[sort keys(%r)])],'minus'); #LTC,I44,BTC,BYR,TWI,NOK,XDR
    $conv{money}={%{$conv{money}},%r} if keys(%r)>20;
  };
  carp "conv: conv_prepare_money (currency conversion automatic daily updated rates) - $@\n" if $@;
  $conv{money}{"m$_"}=$conv{money}{$_}/1000 for qw/BTC XBT/;
  $conv_prepare_money_time=time();
  1; #not yet
}

sub conv {
  my($num,$from,$to)=@_;
  croak "conf requires 3 args" if @_!=3;
  conv_prepare() if !$conv_prepare_time;
  my $types=sub{ my $unit=shift; [sort grep$conv{$_}{$unit}, keys%conv] };
  my @types=map{ my $ru=$_; my $r;$r=&$types($_) and @$r and $$ru=$_ and last for ($$ru,uc($$ru),lc($$ru)); $r }(\$from,\$to);
  my @err=map "Unit ".[$from,$to]->[$_]." is unknown",grep!@{$types[$_]},0..1;
  my @type=intersect(@types);
  push @err, "from=$from and to=$to has more than one possible conversions: ".join(", ", @type) if @type>1;
  push @err, "from $from (".(join(",",@{$types[0]})||'?').") and "
              ."to $to ("  .(join(",",@{$types[1]})||'?').") has no known common dimension (unit type).\n" if @type<1;
  croak join"\n",map"conv: $_",@err if @err;
  my $type=$type[0];
  conv_prepare_money()        if $type eq 'money' and time() >= $conv_prepare_money_time + $Currency_rates_expire;
  return conv_temperature(@_) if $type eq 'temperature';
  return conv_numbers(@_)     if $type eq 'numbers';
  my $c=$conv{$type};
  my($cf,$ct)=@{$conv{$type}}{$from,$to};
  my $r= $cf>0 && $ct<0 ? -$ct/$num/$cf
       : $cf<0 && $ct>0 ? -$cf/$num/$ct
       :                   $cf*$num/$ct;
  #  print STDERR "$num $from => $to    from=$ff  to=$ft  r=$r\n";
  return $r;
}

sub conv_temperature { #http://en.wikipedia.org/wiki/Temperature#Conversion
  my($t,$from,$to)=(shift(),map uc(substr($_,0,1)),@_);
  $from=~s/K/C/ and $t-=273.15;
 #$from=~s/R/F/ and $t-=459.67; #rankine
  return $t if $from eq $to;
  {CK=>sub{$t+273.15},
   FC=>sub{($t-32)*5/9},
   CF=>sub{$t*9/5+32},
   FK=>sub{($t-32)*5/9+273.15},
  }->{$from.$to}->();
}

sub conv_numbers {
  my($n,$fr,$to)=@_;
  my $dec=$fr eq 'dec'                    ? $n
         :$fr eq 'hex'                    ? hex($n)
         :$fr eq 'oct'                    ? oct($n)
         :$fr eq 'bin'                    ? oct("0b$n")
         :$fr =~ /^(dusin|dozen|doz|dz)$/ ? $n*12
         :$fr =~ /^(gross|gr|gro)$/       ? $n*144
         :$fr eq 'great_gross'            ? $n*12*144
         :$fr eq 'small_gross'            ? $n*12*10
         :$fr eq 'skokk'                  ? $n*60           #norwegian unit
         :$fr eq 'roman'                  ? roman2int($n)
         :$fr eq 'des'                    ? $n
         :croak "Conv from $fr not supported yet";
  my $ret=$to eq 'dec'                    ? $dec
         :$to eq 'hex'                    ? sprintf("%x",$dec)
         :$to eq 'oct'                    ? sprintf("%o",$dec)
         :$to eq 'bin'                    ? sprintf("%b",$dec)
         :$to =~ /^(dusin|dozen|doz|dz)$/ ? $dec/12
         :$to =~ /^(gross|gr|gro)$/       ? $dec/144
         :$to eq 'great_gross'            ? $dec/(12*144)
         :$to eq 'small_gross'            ? $dec/(12*10)
         :$to eq 'skokk'                  ? $dec/60
         :$to eq 'roman'                  ? int2roman($dec)
         :$to eq 'des'                    ? $dec
         :croak "Conv to $to not suppoerted yet";
  $ret;
}
#http://en.wikipedia.org/wiki/Norwegian_units_of_measurement


=head2 bytes_readable

Converts a number of bytes to something human readable.

Input 1: a number

Input 2: optionally the number of decimals if >1000 B. Default is 2.

Output: a string containing:

the number with a B behind if the number is less than 1000

the number divided by 1024 with two decimals and "kB" behind if the number is less than 1024*1000

the number divided by 1048576 with two decimals and "MB" behind if the number is less than 1024*1024*1000

the number divided by 1073741824 with two decimals and "GB" behind if the number is less than 1024*1024*1024*1000

the number divided by 1099511627776 with two decimals and "TB" behind otherwise

Examples:

 print bytes_readable(999);                              # 999 B
 print bytes_readable(1000);                             # 1000 B
 print bytes_readable(1001);                             # 0.98 kB
 print bytes_readable(1024);                             # 1.00 kB
 print bytes_readable(1153433.6);                        # 1.10 MB
 print bytes_readable(1181116006.4);                     # 1.10 GB
 print bytes_readable(1209462790553.6);                  # 1.10 TB
 print bytes_readable(1088516511498.24*1000);            # 990.00 TB
 print bytes_readable(1088516511498.24*1000, 3);         # 990.000 TB
 print bytes_readable(1088516511498.24*1000, 1);         # 990.0 TB

=cut

sub bytes_readable {
  my $bytes=shift();
  my $d=shift()||2; #decimals
  return undef if !defined $bytes;
  return "$bytes B"                         if abs($bytes) <= 2** 0*1000; #bytes
  return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) <  2**10*1000; #kilobyte
  return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) <  2**20*1000; #megabyte
  return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) <  2**30*1000; #gigabyte
  return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) <  2**40*1000; #terrabyte
  return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
}

=head2 sec_readable

Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.

 print sec_readable( 0 );           # 0s
 print sec_readable( 0.0123 );      # 0.0123s
 print sec_readable(-0.0123 );      # -0.0123s
 print sec_readable( 1.23 );        # 1.23s
 print sec_readable( 1 );           # 1s
 print sec_readable( 9.87 );        # 9.87s
 print sec_readable( 10 );          # 10s
 print sec_readable( 10.1 );        # 10.1s
 print sec_readable( 59 );          # 59s
 print sec_readable( 59.123 );      # 59.1s
 print sec_readable( 60 );          # 1m 0s
 print sec_readable( 60.1 );        # 1m 0s
 print sec_readable( 121 );         # 2m 1s
 print sec_readable( 131 );         # 2m 11s
 print sec_readable( 1331 );        # 22m 11s
 print sec_readable(-1331 );        # -22m 11s
 print sec_readable( 13331 );       # 3h 42m
 print sec_readable( 133331 );      # 1d 13h
 print sec_readable( 1333331 );     # 15d 10h
 print sec_readable( 13333331 );    # 154d 7h
 print sec_readable( 133333331 );   # 4yr 82d
 print sec_readable( 1333333331 );  # 42yr 91d

=cut

sub sec_readable {
  my $s=shift();
  my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
   !defined$s     ? undef
  :!length($s)    ? ''
  :$s<0           ? '-'.sec_readable(-$s)
  :$s<60 && int($s)==$s
                  ? $s."s"
  :$s<60          ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
  :$s<3600        ? int($s/60)."m " .($s%60)        ."s"
  :$s<24*3600     ? int($s/$h)."h " .int(($s%$h)/60)."m"
  :$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
  :                 int($s/$y)."yr ".int(($s%$y)/$d)."d";
}

=head2 int2roman

Converts integers to roman numbers.

B<Examples:>

 print int2roman(1234);   # prints MCCXXXIV
 print int2roman(1971);   # prints MCMLXXI

(Adapted subroutine from Peter J. Acklam, jacklam(&)math.uio.no)

 I = 1
 V = 5
 X = 10
 L = 50
 C = 100     (centum)
 D = 500
 M = 1000    (mille)

See also L<Roman>.

See L<http://en.wikipedia.org/wiki/Roman_numbers> for more.

=head2 roman2int

 roman2int("MCMLXXI") == 1971

=cut

#alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
#see also t/17_roman.t sub int2roman_old
sub int2roman {
    my $n=shift;
    !defined$n  ? undef
  : !length($n) ? ""
  : $n<0        ? "-".int2roman(-$n)
  : int($n)!=$n ? croak"int2roman: $n is not an integer"
#  : $] >= 5.014 ?        #s///r modifier introduced in perl v5.14
#        ("I" x $n)
#        =~s,I{1000},M,gr #unnecessary, but speedup for n>1000
#        =~s,I{100},C,gr  #unnecessary, but speedup for n>100
#        =~s,I{10},X,gr   #unnecessary, but speedup for n>10
#        =~s,IIIII,V,gr
#        =~s,IIII,IV,gr
#        =~s,VV,X,gr
#        =~s,VIV,IX,gr
#        =~s,XXXXX,L,gr
#        =~s,XXXX,XL,gr
#        =~s,LL,C,gr
#        =~s,LXL,XC,gr
#        =~s,CCCCC,D,gr

Tools.pm  view on Meta::CPAN

      }
    }
  }
  return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
  print "l=$l max=$max\n";
  $ne="9" x $l;
  print log($n),"\n";
  my $st=sub{print "status: ".($te/$ne)."   n=$n   ".($n/$te*$ne)."\n"};
  while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
  while($te/$n/$ne<0.99){ &$st(); $te*=10 }
  &$st();
  while(1){
    my $d=gcd($te,$ne); print "gcd=$d\n";
    last if $d==1;
    $te/=$d; $ne/=$d;
  }
  &$st();
  wantarray ? ($te,$ne) : "$te/$ne"; #gcd()
}

=head2 isnum

B<Input:> String to be tested on this regexp:

C<< /^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x >>

If no argument is given isnum checks C<< $_ >>.

B<Output:> True or false (1 or 0)

 use Acme::Tools;
 my @e=('   +32.12354E-21  ', 2.2, '9' x 99999, ' -123.12', '29,323.31', '29 323.31');
 print isnum()       ? 'num' : 'str' for @e;  #prints num for every element except the last two
 print $_=~$Re_isnum ? 'num' : 'str' for @e;  #same but slighhly faster

=cut

our $Re_isnum     =qr/^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x;
our $Re_isnum_wolz=qr/^ \s* [\-\+]? (?: ([1-9]\d*|0)?\.\d+ | [1-9]\d* | 0 ) (?:[eE][\-\+]?\d+)?\s*$/x; #without leading zero
sub isnum {(@_?$_[0]:$_)=~$Re_isnum}

=head2 between

Input: Three arguments.

Returns: Something I<true> if the first argument is numerically between the two next. Uses Perls C<< < >>, C<< >= >> and C<< <= >> operators.

=head2 btw

Like L<between> but instead of assuming numbers it checks all three input args
and does alphanumerical comparisons (with Perl operators C<lt>, C<ge> and C<le>) if any of the
three input args don't look like a number or look like a number but with
one or more leading zeros.

 btw(1,1,10)             #true numeric order since all three looks like number according to =~$Re_isnum
 btw(1,'02',13)          #true leading zero in '02' leads to alphabetical order
 btw(10, 012,10)         #true leading zero here means oct number, 012 = 10 (8*1+2), so 10 is btw 10 and 10
 btw('003', '02', '09')  #false because '003' lt '02'
 btw('a', 'b', 'c')      #false because 'a' lt 'b'
 btw('a', 'B', 'c')      #true because upper case letters comes before lower case ones in the "ascii alphabet"
 btw('a', 'c', 'B')      #true, btw() and between switches from and to if the first is > the second
 btw( -1, -2, 1)         #true
 btw( -1, -2, 0)         #true

Both between and btw returns C<undef> if any of the three input args are C<undef> (not defined).
If you're doing only numerical comparisons, using C<between> is faster than C<btw>.

=cut

sub between {
  my($test ,$fom, $tom)=@_;
  return if !defined$test or !defined$fom or !defined$tom;
  $fom < $tom ? $test >= $fom && $test <= $tom : $test >= $tom && $test <= $fom;
}

sub btw {
  my($test,$fom,$tom)=@_;
  return if !defined$test or !defined$fom or !defined$tom;
  $fom =~ $Re_isnum_wolz &&
  $tom =~ $Re_isnum_wolz &&
  $test=~ $Re_isnum_wolz
  ? $fom <  $tom ? $test >= $fom && $test <= $tom : $test >= $tom && $test <= $fom
  : $fom lt $tom ? $test ge $fom && $test le $tom : $test ge $tom && $test le $fom 
}

=head2 curb

B<Input:> Three arguments: value, minumum, maximum.

B<Output:> Returns the value if its between the given minumum and maximum.
Returns minimum if the value is less or maximum if the value is more.
Changes the variable if 1st arg is a scalarref.

 my $enthusiasm = 11;
 print curb( $enthusiasm, 1, 20 );      # prints 11, within bounds
 print curb( $enthusiasm, 1, 10 );      # prints 10
 print curb( $enthusiasm, 20, 100 );    # prints 20
 print curb(\$enthusiasm, 1, 10 );      # prints 10 and sets $enthusiasm = 10
 print $enthusiasm;                     # prints 10

=cut

sub curb {
  my($val,$min,$max)=@_;
  # todo: undef min|max => dont curb min|max
  croak "curb: wrong args" if @_!=3 or !defined$min or !defined$max or !defined$val or $min>$max;
  return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
  $val < $min ? $min :
  $val > $max ? $max :
                $val;
}
sub bound { curb(@_) }

=head2 log10

=head2 log2

=head2 logn

 print log10(1000);                  # prints 3
 print log10(10000*sqtr(10));        # prints 4.5

Tools.pm  view on Meta::CPAN

  if(ref($s) eq 'SCALAR'){ $$s=~s,^\s+|(?<=\s)\s+|\s+$,,g; return $$s}
  if(ref($s) eq 'ARRAY') { trim(\$_) for @$s; return $s }
  $s=~s,^\s+|(?<=\s)\s+|\s+$,,g if defined $s;
  $s;
}

sub rpad {
  my($s,$l,$p)=@_;
  $p=' ' if @_<3 or !length($p);
  $s.=$p while length($s)<$l;
  substr($s,0,$l);
}

sub lpad {
  my($s,$l,$p)=@_;
  $p=' ' if @_<3 or !length($p);
  $l<length($s)
  ? substr($s,0,$l)
  : substr($p x (1+$l/length($p)), 0, $l-length($s)).$s;
}

sub cpad {
  my($s,$l,$p)=@_;
  $p=' ' if @_<3 or !length($p);
  my $ls=length($s);
  return substr($s,0,$l) if $l<$ls;
  $p=$p x (($l-$ls+2)/length($p));
  substr($p, 0, ($l-$ls  )/2) . $s .
  substr($p, 0, ($l-$ls+1)/2);
}

sub cpad_old {
  my($s,$l,$p)=@_;
  $p=' ' if !length($p);
  return substr($s,0,$l) if $l<length($s);
  my $i=0;
  while($l>length($s)){
    my $pc=substr($p,($i==int($i)?1:-1)*($i%length($p)),1);
    $i==int($i) ? ($s.=$pc) : ($s=$pc.$s);
    $i+=1/2;
  }
  $s;
}

=head2 trigram

B<Input:> A string (i.e. a name). And an optional x (see example 2)

B<Output:> A list of this strings trigrams (See examlpe)

B<Example 1:>

 print join ", ", trigram("Kjetil Skotheim");

Prints:

 Kje, jet, eti, til, il , l S,  Sk, Sko, kot, oth, the, hei, eim

B<Example 2:>

Default is 3, but here 4 is used instead in the second optional input argument:

 print join ", ", trigram("Kjetil Skotheim", 4);

And this prints:

 Kjet, jeti, etil, til , il S, l Sk,  Sko, Skot, koth, othe, thei, heim

C<trigram()> was created for "fuzzy" name searching. If you have a database of many names,
addresses, phone numbers, customer numbers etc. You can use trigram() to search
among all of those at the same time. If the search form only has one input field.
One general search box.

Store all of the trigrams of the trigram-indexed input fields coupled
with each person, and when you search, you take each trigram of you
query string and adds the list of people that has that trigram. The
search result should then be sorted so that the persons with most hits
are listed first. Both the query strings and the indexed database
fields should have a space added first and last before C<trigram()>-ing
them.

This search algorithm is not includes here yet...

C<trigram()> should perhaps have been named ngram for obvious reasons.

=head2 sliding

Same as trigram (except there is no default width). Works also with arrayref instead of string.

Example:

 sliding( ["Reven","rasker","over","isen"], 2 )

Result:

  ( ['Reven','rasker'], ['rasker','over'], ['over','isen'] )

=head2 chunks

Splits strings and arrays into chunks of given size:

 my @a = chunks("Reven rasker over isen",7);
 my @b = chunks([qw/Og gubben satt i kveldinga og koste seg med skillinga/], 3);

Resulting arrays:

 ( 'Reven r', 'asker o', 'ver ise', 'n' )
 ( ['Og','gubben','satt'], ['i','kveldinga','og'], ['koste','seg','med'], ['skillinga'] )

=head2 chars

 chars("Tittentei");     # ('T','i','t','t','e','n','t','e','i')

=cut

sub trigram { sliding($_[0],$_[1]||3) }

sub sliding {
  my($s,$w)=@_;
  return map substr($s,$_,$w),   0..length($s)-$w  if !ref($s);
  return map [@$s[$_..$_+$w-1]], 0..@$s-$w         if ref($s) eq 'ARRAY';
}

sub chunks {
  my($s,$w)=@_;
  return $s=~/(.{1,$w})/g                                      if !ref($s);
  return map [@$s[$_*$w .. min($_*$w+$w-1,$#$s)]], 0..$#$s/$w  if ref($s) eq 'ARRAY';
}

sub chars { split//, shift }

=head2 repl

Synonym for replace().

=head2 replace

Return the string in the first input argument, but where pairs of search-replace strings (or rather regexes) has been run.

Works as C<replace()> in Oracle, or rather regexp_replace() in Oracle 10 and onward. Except that this C<replace()> accepts more than three arguments.

Examples:

 print replace("water","ater","ine");  # Turns water into wine
 print replace("water","ater");        # w
 print replace("water","at","eath");   # weather
 print replace("water","wa","ju",
                       "te","ic",
                       "x","y",        # No x is found, no y is returned
                       'r$',"e");      # Turns water into juice. 'r$' says that the r it wants
                                       # to change should be the last letters. This reveals that
                                       # second, fourth, sixth and so on argument is really regexs,
                                       # not normal strings. So use \ (or \\ inside "") to protect
                                       # the special characters of regexes. You probably also
                                       # should write qr/regexp/ instead of 'regexp' if you make
                                       # use of regexps here, just to make it more clear that
                                       # these are really regexps, not strings.

 print replace('JACK and JUE','J','BL'); # prints BLACK and BLUE
 print replace('JACK and JUE','J');      # prints ACK and UE
 print replace("abc","a","b","b","c");   # prints ccc           (not bcc)

If the first argument is a reference to a scalar variable, that variable is changed "in place".

Example:

 my $str="test";
 replace(\$str,'e','ee','s','S');
 print $str;                         # prints teeSt

=cut

sub replace { repl(@_) }
sub repl {
  my $str=shift;
  return $$str=replace($$str,@_) if ref($str) eq 'SCALAR';
 #return ? if ref($str) eq 'ARRAY';
 #return ? if ref($str) eq 'HASH';
  while(@_){
    my($fra,$til)=(shift,shift);
    defined $til ? $str=~s/$fra/$til/g : $str=~s/$fra//g;
  }
  return $str;
}

=head1 ARRAYS

=head2 subarr

The equivalent of C<substr> on arrays or C<splice> without changing the array.
Input: 1) array or arrayref, 2) offset and optionally 3) length. Without a
third argument, subarr returns the rest of the array.

 @top10    = subarr( @array, 0, 10);   # first 10
 @last_two = subarr( @array, -2, 2);   # last 2
 @last_two = subarr( $array_ref, -2);  # also last 2
 @last_six = subarr $array_ref, -6;    # parens are optional

The same can be obtained from C<< @array[$from..$to] >> but that dont work the
same way with negative offsets and boundary control of length.

=cut

#Todo: sjekk paastand over


#sub subarr(+$;$) { #perl>=5.14        # t/35_subarr.t
sub subarr { #perl<5.14
  my($a,$o,$l)=@_;
  $o=@$a+$o if $o<0;
  $o=0      if $o<0;
  $o=@$a-1  if $o>@$a-1;

Tools.pm  view on Meta::CPAN

  croak     if $l<0;
  $l=@$a-$o if $l>@$a-$o;
  @$a[$o..$o+$l-1];
}

=head2 min

Returns the smallest number in a list. Undef is ignored.

 @lengths=(2,3,5,2,10,undef,5,4);
 $shortest = min(@lengths);   # returns 2

Note: The comparison operator is perls C<< < >>> which means empty strings is treated as C<0>, the number zero. The same goes for C<max()>, except of course C<< > >> is used instead.

 min(3,4,5)       # 3
 min(3,4,5,undef) # 3
 min(3,4,5,'')    # returns the empty string

=head2 max

Returns the largest number in a list. Undef is ignored.

 @heights=(123,90,134,undef,132);
 $highest = max(@heights);   # 134

=head2 mins

Just as L</min>, except for strings.

 print min(2,7,10);          # 2
 print mins("2","7","10");   # 10
 print mins(2,7,10);         # 10

=head2 maxs

Just as L</mix>, except for strings.

 print max(2,7,10);          # 10
 print maxs("2","7","10");   # 7
 print maxs(2,7,10);         # 7

=cut

sub min  {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ < $min } $min }
sub mins {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ lt $min} $min }
sub max  {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ > $max } $max }
sub maxs {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ gt $max} $max }

=head2 zip

B<Input:> Two or more arrayrefs. A number of equal sized arrays
containing numbers, strings or anything really.

B<Output:> An array of those input arrays zipped (interlocked, merged) into each other.

 print join " ", zip( [1,3,5], [2,4,6] );               # 1 2 3 4 5 6
 print join " ", zip( [1,4,7], [2,5,8], [3,6,9] );      # 1 2 3 4 5 6 7 8 9

Example:

zip() creates a hash where the keys are found in the first array and values in the secord in the correct order:

 my @media = qw/CD DVD VHS LP Blueray/;
 my @count = qw/20 12  2   4  3/;
 my %count = zip(\@media,\@count);                 # or zip( [@media], [@count] )
 print "I got $count{DVD} DVDs\n";                 # I got 12 DVDs

Dies (croaks) if the two lists are of different sizes

...or any input argument is not an array ref.

=cut

sub zip {
  my @t=@_;
  ref($_) ne 'ARRAY' and croak "ERROR: zip should have arrayrefs as arguments" for @t;
  @{$t[$_]} != @{$t[0]} and croak "ERROR: zip should have equal sized arrays" for 1..$#t;
  my @res;
  for my $i (0..@{$t[0]}-1){
    push @res, $$_[$i] for @t;
  }
  return @res;
}

=head2 sim

B<Input:> Two or more strings

B<Output:> A number 0 - 1 indicating the similarity between two strings.

Requires L<String::Similarity> where the real magic happens.

 sim("Donald Duck", "Donald E. Knuth");    # returns 0.615
 sim("Kalle Anka", "Kalle And")'           # returns 0.842
 sim("Kalle Anka", "Kalle Anka");          # returns 1
 sim("Kalle Anka", "kalle anka");          # returns 0.8
 sim(map lc, "Kalle Anka", "kalle anka");  # returns 1

Todo: more doc

=cut

#Todo:
#peat -le'print join", ",sim("GskOk",[zip([qw(Gsk_ok Vgdoknr Personnavn Adferdkode Ordenkode G_kok)],[0..5])],0.7,0.127)'
#Use of uninitialized value in subroutine entry at /usr/local/share/perl/5.22.1/Acme/Tools.pm line 2365.
#Use of uninitialized value $simlikest in numeric ge (>=) at /usr/local/share/perl/5.22.1/Acme/Tools.pm line 2366.
#Use of uninitialized value in subroutine entry at /usr/local/share/perl/5.22.1/Acme/Tools.pm line 2365.
#Use of uninitialized value $simnestlikest in numeric ge (>=) at /usr/local/share/perl/5.22.1/Acme/Tools.pm line 2372.

sub sim {
  require String::Similarity;
  my($str,@r)=@_;
  return String::Similarity::similarity(@_) if @r==1; #to param
  my($min,$mindiff);
  if(ref($r[0]) eq 'ARRAY'){
    ($min,$mindiff)=@r[1,2];
    @r=@{$r[0]};
  }
  $min=0 if!defined$min;
  my($simlikest,$simnestlikest,$likest,$idlikest)=(-1,-1);
  for(@r){

Tools.pm  view on Meta::CPAN

=head2 binsearch

Returns the position of an element in a numerically sorted array. Returns undef if the element is not found.

B<Input:> Two, three or four arguments

B<First argument:> the element to find. Usually a number.

B<Second argument:> a reference to the array to search in. The array
should be sorted in ascending numerical order (se exceptions below).

B<Third argument:>  Optional. Default false.

If true, whether result I<not found> should return undef or a fractional position.

If the third argument is false binsearch returns undef if the element is not found.

If the third argument is true binsearch returns 0.5 plus closest position below the searched value.

Returns C< last position + 0.5 > if the searched element is greater than all elements in the sorted array.

Returns C< -0.5 > if the searched element is less than all elements in the sorted array.

Fourth argument: Optional. Default C<< sub { $_[0] <=> $_[1] } >>.

If present, the fourth argument is either:

=over 4

=item * a code-ref that alters the way binsearch compares two elements, default is C<< sub{$_[0]<=>$_[1]} >>

=item * a string that works as a hash key (column name), see example below

=back

B<Examples:>

 binsearch(10,[5,10,15,20]);                                # 1
 binsearch(10,[20,15,10,5],undef,sub{$_[1]<=>$_[0]});       # 2 search arrays sorted numerically in opposite order
 binsearch("c",["a","b","c","d"],undef,sub{$_[0]cmp$_[1]}); # 2 search arrays sorted alphanumerically
 binsearchstr("b",["a","b","c","d"]);                       # 1 search arrays sorted alphanumerically

 my @data=(  map {  {num=>$_, sqrt=>sqrt($_), square=>$_**2}  }
             grep !$_%7, 1..1000000   );
 my $i = binsearch( {num=>913374}, \@data, undef, sub {$_[0]{num} <=> $_[1]{num}} );
 my $i = binsearch( {num=>913374}, \@data, undef, 'num' );                           #same as previous line
 my $found_hashref = defined $i ? $data[$i] : undef;

=head2 binsearchstr

Same as binsearch except that the arrays is sorted alphanumerically
(cmp) instead of numerically (<=>) and the searched element is a
string, not a number. See L</binsearch>.

=cut

our $Binsearch_steps;
our $Binsearch_maxsteps=100;
sub binsearch {
  my($search,$aref,$insertpos,$cmpsub)=@_; #search pos of search in array
  croak "binsearch did not get arrayref as second arg" if ref($aref) ne 'ARRAY';
  croak "binsearch got fourth arg which is not a code-ref" if defined $cmpsub and ref($cmpsub) and ref($cmpsub) ne 'CODE';
  if(defined $cmpsub and !ref($cmpsub)){
      my $key=$cmpsub;
      $cmpsub = sub{ $_[0]{$key} <=> $_[1]{$key} };
  }
  return $insertpos ? -0.5 : undef if !@$aref;
  my($min,$max)=(0,$#$aref);
  $Binsearch_steps=0;
  while (++$Binsearch_steps <= $Binsearch_maxsteps) {
    my $middle=int(($min+$max+0.5)/2);
    my $middle_value=$$aref[$middle];

    #croak "binsearch got non-sorted array" if !$cmpsub and $$aref[$min]>$$aref[$min]
    #                                       or  $cmpsub and &$cmpsub($$aref[$min],$$aref[$min])>0;

    if(   !$cmpsub and $search < $middle_value
    or     $cmpsub and &$cmpsub($search,$middle_value) < 0  ) {      #print "<\n";
      $max=$min, next                   if $middle == $max and $min != $max;
      return $insertpos ? $middle-0.5 : undef if $middle == $max;
      $max=$middle;
    }
    elsif( !$cmpsub and $search > $middle_value
    or      $cmpsub and &$cmpsub($search,$middle_value) > 0 ) {      #print ">\n";
      $min=$max, next                   if $middle == $min and $max != $min;
      return $insertpos ? $middle+0.5 : undef if $middle == $min;
      $min=$middle;
    }
    else {                                                           #print "=\n";
      return $middle;
    }
  }
  croak "binsearch exceded $Binsearch_maxsteps steps";
}

sub binsearchfast { # binary search routine finds index just below value
  my ($x,$v)=@_;
  my ($klo,$khi)=(0,$#{$x});
  my $k;
  while (($khi-$klo)>1) {
    $k=int(($khi+$klo)/2);
    if ($$x[$k]>$v) { $khi=$k; } else { $klo=$k; }
  }
  return $klo;
}


sub binsearchstr {binsearch(@_[0..2],sub{$_[0]cmp$_[1]})}

=head2 rank

B<Input:> Two or three arguments. N and an arrayref for the list to look at.

In scalar context: Returns the nth smallest number in an array. The array doesn't have to be sorted.

In array context: Returns the n smallest numbers in an array.

To return the n(th) largest number(s) instead of smallest, just negate n.

An optional third argument can be a sub that is used to compare the elements of the input array.

Examples:

 my $second_smallest = rank(2, [11,12,13,14]);  # 12
 my @top10           = rank(-10, [1..100]);     # 100, 99, 98, 97, 96, 95, 94, 93, 92, 91
 my $max             = rank(-1, [101,102,103,102,101]); #103
 my @contest         = ({name=>"Alice",score=>14},{name=>"Bob",score=>13},{name=>"Eve",score=>12});
 my $second          = rank(2, \@contest, sub{$_[1]{score}<=>$_[0]{score}})->{name}; #Bob

=head2 rankstr

Just as C<rank> but sorts alphanumerically (strings, cmp) instead of numerically.

=cut

sub rank {
  my($rank,$aref,$cmpsub)=@_;
  if($rank<0){
    $cmpsub||=sub{$_[0]<=>$_[1]};
    return rank(-$rank,$aref,sub{0-&$cmpsub});
  }
  my @sort;
  local $Pushsort_cmpsub=$cmpsub;
  for(@$aref){
    pushsort @sort, $_;
    pop @sort if @sort>$rank;
  }
  return wantarray ? @sort : $sort[$rank-1];
}
sub rankstr {wantarray?(rank(@_,sub{$_[0]cmp$_[1]})):rank(@_,sub{$_[0]cmp$_[1]})}

=head2 egrep

Extended grep.

Works like L<grep> but with more insight: local vars $i, $n, $prev, $next, $prevr and $nextr are available:

$i is the current index, starts with 0, ends with the length of the input array minus one

$n is the current element number, starts with 1, $n = $i + 1

$prev is the previous value (undef if current is first)

$next is the next value (undef if current is last)

$prevr is the previous value, rotated so that the previous of the first element is the last element

$nextr is the next value, rotated so that the next of the last element is the first element

$_ is the current value, just as with Perls built-in grep

 my @a = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);  # 1..20
 my @r = egrep { $_ % 3 == 0 } @a;  # @r is 3, 6, 9, 12, 15, 18. Plain grep could have been used here
 my @r = egrep { $i==1 or $next==12 or $prev==14 } @a;  # @r is now 2, 11, 15

 my @a=2..44;
 egrep { $prev =~/4$/ or $next =~/2$/ } @a;  # 5, 11, 15, 21, 25, 31, 35, 41
 egrep { $prevr=~/4$/ or $nextr=~/2$/ } @a;  # 2, 5, 11, 15, 21, 25, 31, 35, 41, 44
 egrep { $i%7==0 } @a;                       # 2, 9, 16, 23, 30, 37, 44
 egrep { $n%7==0 } @a;                       # 8, 15, 22, 29, 36, 43

=cut

sub egrep (&@) {
    my($code,$i,$package)=(shift,-1,(caller)[0]);
    my %h=map{($_=>"${package}::$_")}qw(i n prev next prevr nextr);
    no strict 'refs';
    grep {

Tools.pm  view on Meta::CPAN

  @person=({Rank=>1,Name=>'Amy'}, {Rank=>2,Name=>'Paula'}, {Rank=>3,Name=>'Ruth'});
  print "Persons are sorted" if sorted @person, sub{$_[0]{Rank}<=>$_[1]{Rank}};

=head2 sortedstr

Return true if the input array is I<alpha>numerically sorted.

  @a=(1..10);      print "array is sorted" if sortedstr @a; #false
  @a=("01".."10"); print "array is sorted" if sortedstr @a; #true

=cut

sub sorted (\@@) {
  my($a,$cmpsub)=@_;
  if($cmpsub){ &$cmpsub($$a[$_],$$a[$_+1])>0 and return 0 for 0..$#$a-1 }
  else       { $$a[$_] > $$a[$_+1]           and return 0 for 0..$#$a-1 }
  return 1;
}
#sub sortedstr { sorted(@_,sub{$_[0]cmp$_[1]}) }
sub sortedstr { $_[$_] gt $_[$_+1] and return 0 for 0..$#$_-1; return 1 }

sub sortby {
    my($arr,@by)=@_;
    die if grep/^-/,@by; #hm 4now todo! - dash meaning descending order
    my $pattern=join(" ",map"%-40s",@by);#hm 4now bad, cant handle numeric sort
    map$$_[0],
    sort{$$a[1]cmp$$b[1]}
    map[$_,sprintf($pattern,@$_{@by})],
    @$arr;
}


=head2 subarrays

Returns all 2^n-1 combinatory subarrays of an array where each element
of input array participates or not. Note: The empty array is not among
the returned arrayrefs unless an empty input is given.

 my @a = subarrays( 'a', 'b', 'c' );         # same as:
 my @a = ( ['a'    ],
           [    'b'],
           ['a','b'],
           [        'c'],
           ['a',    'c'],
           [    'b','c'],
           ['a','b','c'] );

 sub subarrays { map { my $n = 2*$_; [ grep {($n/=2)%2} @_ ] } 1 .. 2**@_-1 } #implemented as

=cut

sub subarrays { map { my $n = 2*$_; [ grep {($n/=2)%2} @_ ] } 1 .. 2**@_-1 }

=head2 part

B<Input:> A code-ref and a list

B<Output:> Two array-refs

Like C<grep> but returns the false list as well. Partitions a list
into two lists where each element goes into the first or second list
whether the predicate (a code-ref) is true or false for that element.

 my( $odd, $even ) = part {$_%2} (1..8);
 print for @$odd;   #prints 1 3 5 7
 print for @$even;  #prints 2 4 6 8

(Works like C< partition() > in the Scala programming language)

=head2 parth

Like C<part> but returns any number of lists. Not just two. Sort of like I<group by> in SQL.

B<Input:> A code-ref and a list

B<Output:> A hash where the returned values from the code-ref are keys and the values are arrayrefs to the list elements which gave those keys.

 my %hash = parth { uc(substr($_,0,1)) } ('These','are','the','words','of','this','array');
 print serialize(\%hash);

Result:

 %hash = (  T=>['These','the','this'],
            A=>['are','array'],
            O=>['of'],
            W=>['words']  )

=head2 parta

Like L<parth> but returns an array of lists where the predicate returns an index number.

 my @a = parta { length } qw/These are the words of this array/;

Result:

 @a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )

Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.

=cut

sub part  (&@) { my($c,@r)=(shift,[],[]); push @{ $r[ &$c?0:1 ] }, $_ for @_; @r }
sub parth (&@) { my($c,%r)=(shift);       push @{ $r{ &$c     } }, $_ for @_; %r }
sub parta (&@) { my($c,@r)=(shift);       push @{ $r[ &$c     ] }, $_ for @_; @r }
#sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map

=head2 refa

=head2 refh

=head2 refs

=head2 refaa

=head2 refah

=head2 refha

=head2 refhh

Returns true or false (1 or 0) if the argument is an arrayref, hashref, scalarref, ref to an array of arrays, ref to an array of hashes

Examples:

  my $ref_to_array  = [1,2,3];
  my $ref_to_hash   = {1,100,2,200,3,300};
  my $ref_to_scalar = \"String";
  print "arrayref"  if ref($ref_to_array)  eq 'ARRAY';  #true
  print "hashref"   if ref($ref_to_hash)   eq 'HASH';   #true
  print "scalarref" if ref($ref_to_scalar) eq 'SCALAR'; #true
  print "arrayref"  if refa($ref_to_array);             #also true, without: eq 'ARRAY'
  print "hashref"   if refh($ref_to_hash);              #also true, without: eq 'HASH'
  print "scalarref" if refs($ref_to_scalar);            #also true, without: eq 'SCALAR'

  my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
  my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
  my $ref_to_hash_of_arrays  = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
  my $ref_to_hash_of_hashes  = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };

  print "aa"  if refaa($ref_to_array_of_arrays);         #true
  print "ah"  if refah($ref_to_array_of_hashes);         #true
  print "ha"  if refha($ref_to_hash_of_arrays);          #true
  print "hh"  if refhh($ref_to_hash_of_hashes);          #true

=cut

sub refa  { ref($_[0]) eq 'ARRAY'  ? 1                         : ref($_[0]) ? 0 : undef }
sub refh  { ref($_[0]) eq 'HASH'   ? 1                         : ref($_[0]) ? 0 : undef }
sub refs  { ref($_[0]) eq 'SCALAR' ? 1                         : ref($_[0]) ? 0 : undef }
sub refaa { ref($_[0]) eq 'ARRAY'  ? refa($_[0][0])            : ref($_[0]) ? 0 : undef }
sub refah { ref($_[0]) eq 'ARRAY'  ? refh($_[0][0])            : ref($_[0]) ? 0 : undef }
sub refha { ref($_[0]) eq 'HASH'   ? refa((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
sub refhh { ref($_[0]) eq 'HASH'   ? refh((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }


=head2 pushr

=head2 popr

=head2 shiftr

=head2 unshiftr

=head2 splicer

=head2 keysr

=head2 valuesr

=head2 eachr

=head2 joinr

In Perl versions 5.12 - 5.22 push, pop, shift, unshift, splice, keys, values and each
handled references to arrays and references to hashes just as if they where arrays and hashes. Examples:

 my $person={name=>'Gaga', array=>[1,2,3]};
 push    $person{array}  , 4;  #works in perl 5.12-5.22 but not before and after
 push @{ $person{array} }, 4;  #works in all perl5 versions
 pushr   $person{array}  , 4;  #use Acme::Tools and this should work in perl >= 5.8
 popr    $person{array};       #returns 4

=cut

sub pushr    { push    @{shift()}, @_ } # ?    ($@)
sub popr     { pop     @{shift()}     }
sub shiftr   { shift   @{shift()}     }
sub unshiftr { unshift @{shift()}, @_ }
sub splicer  { @_==1 ? splice( @{shift()} )
              :@_==2 ? splice( @{shift()}, shift() )
              :@_==3 ? splice( @{shift()}, shift(), shift() )
              :@_>=4 ? splice( @{shift()}, shift(), shift(), @_ ) : croak }
sub keysr    { ref($_[0]) eq 'HASH' ? keys(%{shift()}) : keysr({@{shift()}})  } #hm sort(keys%{shift()}) ?
sub valuesr  { values( %{shift()} )    }
sub eachr    { ref($_[0]) eq 'HASH'  ? each(%{shift()})
             #:ref($_[0]) eq 'ARRAY' ? each(@{shift()})  # perl 5.8.8 cannot compile each on array! eval?

Tools.pm  view on Meta::CPAN

 print "Median = " . percentile(50, 1,2,3,4);   # 2.5

This:

 @data=(11, 5, 3, 5, 7, 3, 1, 17, 4, 2, 6, 4, 12, 9, 0, 5);
 @p = map percentile($_,@data), (25, 50, 75);

Is the same as this:

 @p = percentile([25, 50, 75], @data);

But the latter is faster, especially if @data is large since it sorts
the numbers only once internally.

B<Example:>

Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992

Average (or mean) is 143

Median is 15.5 (which is the average of 9 and 22 who both equally lays in the middle)

The 25-percentile is 6.25 which are between 6 and 7, but closer to 6.

The 75-percentile is 46.5, which are between 39 and 49 but close to 49.

Linear interpolation is used to find the 25- and 75-percentile and any
other x-percentile which doesn't fall exactly on one of the numbers in
the set.

B<Interpolation:>

As you saw, 6.25 are closer to 6 than to 7 because 25% along the set of
the twelve numbers is closer to the third number (6) than to he fourth
(7). The median (50-percentile) is also really interpolated, but it is
always in the middle of the two center numbers if there are an even count
of numbers.

However, there is two methods of interpolation:

Example, we have only three numbers: 5, 6 and 7.

Method 1: The most common is to say that 5 and 7 lays on the 25- and
75-percentile. This method is used in Acme::Tools.

Method 2: In Oracle databases the least and greatest numbers
always lay on the 0- and 100-percentile.

As an argument on why Oracles (and others?) definition is not the best way is to
look at your data as for instance temperature measurements.  If you
place the highest temperature on the 100-percentile you are sort of
saying that there can never be a higher temperatures in future measurements.

A quick non-exhaustive Google survey suggests that method 1 here is most used.

The larger the data sets, the less difference there is between the two methods.

B<Extrapolation:>

In method one, when you want a percentile outside of any possible
interpolation, you use the smallest and second smallest to extrapolate
from. For instance in the data set C<5, 6, 7>, if you want an
x-percentile of x < 25, this is below 5.

If you feel tempted to go below 0 or above 100, C<percentile()> will
I<die> (or I<croak> to be more precise)

Another method could be to use "soft curves" instead of "straight
lines" in interpolation. Maybe B-splines or Bezier curves. This is not
used here.

For large sets of data Hoares algorithm would be faster than the
simple straightforward implementation used in C<percentile()>
here. Hoares don't sort all the numbers fully.

B<Differences between the two main methods described above:>

 Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992

 Percentile    Method 1                      Method 2
               (Acme::Tools::percentile      (Oracle)
               and others)
 ------------- ----------------------------- ---------
 0             -2                            1
 1             -1.61                         1.33
 25            6.25                          6.75
 50 (median)   15.5                          15.5
 75            46.5                          41.5
 99            1372.19                       943.93
 100           1429                          992

Found like this:

 perl -MAcme::Tools -le 'print for percentile([0,1,25,50,75,99,100], 1,4,6,7,8,9,22,24,39,49,555,992)'

And like this in Oracle-databases:

 select
   percentile_cont(0.00) within group(order by n) per0,
   percentile_cont(0.01) within group(order by n) per1,
   percentile_cont(0.25) within group(order by n) per25,
   percentile_cont(0.50) within group(order by n) per50,
   percentile_cont(0.75) within group(order by n) per75,
   percentile_cont(0.99) within group(order by n) per99,
   percentile_cont(1.00) within group(order by n) per100
 from (
   select 0+regexp_substr('1,4,6,7,8,9,22,24,39,49,555,992','[^,]+',1,i) n
   from dual,(select level i from dual connect by level <= 12)
 );

(Oracle also provides a similar function: C<percentile_disc> where I<disc>
is short for I<discrete>, meaning no interpolation is taking
place. Instead the closest number from the data set is picked.)

=cut

sub percentile {
  my(@p,@t,@ret);
  if(ref($_[0]) eq 'ARRAY'){ @p=@{shift()} }
  elsif(not ref($_[0]))    { @p=(shift())  }
  else{croak()}
  @t=@_;
  return if !@p;
  croak if !@t;
  @t=sort{$a<=>$b}@t;
  push@t,$t[0] if @t==1;
  for(@p){
    croak if $_<0 or $_>100;
    my $i=(@t+1)*$_/100-1;
    push@ret,
      $i<0       ? $t[0]+($t[1]-$t[0])*$i:
      $i>$#t     ? $t[-1]+($t[-1]-$t[-2])*($i-$#t):
      $i==int($i)? $t[$i]:
                   $t[$i]*(int($i+1)-$i) + $t[$i+1]*($i-int($i));
  }
  return @p==1 ? $ret[0] : @ret;
}

=head1 RANDOM

=head2 random

B<Input:> One or two arguments.

B<Output:>

If two integer arguments: returns a random integer between the integers in argument one and two.

If the first argument is an arrayref: returns a random member of that array without changing the array.

If the first argument is an arrayref and there is a second arg: return that many random members of that array

If the first argument is an hashref and there is no second arg: return a random key weighted by the values of that hash

If the first argument is an hashref and there is a second arg: return that many random keys weighted by the values of that hash

If there is no second argument and the first is an integer, a random integer between 0 and that number is returned. Including 0 and the number itself.

B<Examples:>

 $dice=random(1,6);                                      # 1, 2, 3, 4, 5 or 6
 $dice=random([1..6]);                                   # same as previous
 @dice=random([1..6],10);                                # 10 dice tosses
 $dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2});     # weighted dice with 6 being twice as likely as the others
 @dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2},10);  # 10 weighted dice tosses
 print random({head=>0.4999,tail=>0.4999,edge=>0.0002}); # coin toss (sum 1 here but not required to be)
 print random(2);                                        # prints 0, 1 or 2
 print 2**random(7);                                     # prints 1, 2, 4, 8, 16, 32, 64 or 128
 @dice=map random([1..6]), 1..10;                        # as third example above, but much slower
 perl -MAcme::Tools -le 'print for random({head=>0.499,tail=>0.499,edge=>0.002},10000);' | sort | uniq -c

=cut

sub random {
  my($from,$to)=@_;
  my $ref=ref($from);
  if($ref eq 'ARRAY'){
    my @r=map $$from[rand@$from], 1..$to||1;
    return @_>1?@r:$r[0];
  }
  elsif($ref eq 'HASH') {
    my @k=keys%$from;
    my $max;do{no warnings 'uninitialized';$_>$max and $max=$_ or $_<0 and croak"negative weight" for values%$from};
    my @r=map {my$r;1 while $$from{$r=$k[rand@k]}<rand($max);$r} 1..$to||1;
    return @_>1?@r:$r[0];
  }
  ($from,$to)=(0,$from) if @_==1;
  ($from,$to)=($to,$from) if $from>$to;
  return int($from+rand(1+$to-$from));
}
#todo?: https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution

=head2 random_gauss

Returns an pseudo-random number with a Gaussian distribution instead
of the uniform distribution of perls C<rand()> or C<random()> in this
module.  The algorithm is a variation of the one at
L<http://www.taygeta.com/random/gaussian.html> which is both faster
and better than adding a long series of C<rand()>.

Uses perls C<rand> function internally.

B<Input:> 0 - 3 arguments.

First argument: the average of the distribution. Default 0.

Second argument: the standard deviation of the distribution. Default 1.

Third argument: If a third argument is present, C<random_gauss>
returns an array of that many pseudo-random numbers. If there is no
third argument, a number (a scalar) is returned.

B<Output:> One or more pseudo-random numbers with a Gaussian distribution. Also known as a Bell curve or Normal distribution.

Example:

 my @I=random_gauss(100, 15, 100000);         # produces 100000 pseudo-random numbers, average=100, stddev=15

Tools.pm  view on Meta::CPAN


=back

Note that an input-array which COINCIDENTLY SOME TIMES has one element
(but more other times), and that element is an array-ref, you will
probably not get the expected result.

To check distribution:

 perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n

The letters a-z should occur around 1000 times each.

Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)

 perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'

(Uses L</cart>, which is not a typo, see further down here)

Note: C<List::Util::shuffle()> is approximately four times faster. Both respects the Perl built-in C<srand()>.

=cut

sub mix {
  if(@_==1 and ref($_[0]) eq 'ARRAY'){ #just one arg, and its ref array
    my $r=$_[0];
    push@$r,splice(@$r,rand(@$r-$_),1) for 0..(@$r-1);
    return $r;
  }
  else{
    my@e=@_;
    push@e,splice(@e,rand(@e-$_),1) for 0..$#e;
    return @e;
  }
}

=head2 pwgen

Generates random passwords.

B<Input:> 0-n args

* First arg: length of password(s), default 8

* Second arg: number of passwords, default 1

* Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!

* Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:

 sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}

...meaning the password should:
* start and end with: a letter a-z (lower- or uppercase) or a digit 0-9
* should contain at least one char from each of the groups lower, upper, digit and special char

To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
own C<rand()> internally.

C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
pwgen tries to find a password. Defaults for those are 0.01 and 10000.
Whenever one of the two limits is reached, a first generates a croak.

Examples:

 my $pw=pwgen();             # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
 my $pw=pwgen(12);           # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
 my @pw=pwgen(0,10);         # 10 random 8 chars passwords, containing the same possible chars
 my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z

 pwgen(3);                                # dies, defaults require chars in each of 4 group (see above)
 pwgen(5,1,'A-C0-9',  qr/^\D{3}\d{2}$/);  # a 5 char string starting with three A, B or Cs and endring with two digits
 pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above

Examples of adding additional requirements to the default ones:

 my @pwreq = ( qr/^[A-C]/ );
 pwgen(8,1,'','',@pwreq);    # use defaults for allowed chars and the standard requirements
                             # but also demand that the password must start with A, B or C

 push @pwreq, sub{ not /[a-z]{3}/i };
 pwgen(8,1,'','',@pwreq);    # as above and in addition the password should not contain three
                             # or more consecutive letters (to avoid "offensive" words perhaps)

=cut

our $Pwgen_max_sec=0.01;     #max seconds/password before croak (for hard to find requirements)
our $Pwgen_max_trials=10000; #max trials/password  before croak (for hard to find requirements)
our $Pwgen_sec=0;            #seconds used in last call to pwgen()
our $Pwgen_trials=0;         #trials in last call to pwgen()
sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
sub pwgen {
  my($len,$num,$chars,@req)=@_;
  $len||=8;
  $num||=1;
  $chars||='A-Za-z0-9,-./&%_!';
  $req[0]||=\&pwgendefreq if !$_[2];
  $chars=~s/([$_])-([$_])/join("","$1".."$2")/eg  for ('a-z','A-Z','0-9');
  my($c,$t,@pw,$d)=(length($chars),time_fp());
  ($Pwgen_trials,$Pwgen_sec)=(0,0);
  TRIAL:
  while(@pw<$num){
    croak "pwgen timeout after $Pwgen_trials trials"
      if ++$Pwgen_trials   >= $Pwgen_max_trials
      or ($d=time_fp()-$t) >  $Pwgen_max_sec*$num
            and $d!~/^\d+$/; #jic int from time_fp
    my $pw=join"",map substr($chars,rand($c),1),1..$len;
    for my $r (@req){
      if   (ref($r) eq 'CODE'  ){ local$_=$pw; &$r()    or next TRIAL }
      elsif(ref($r) eq 'Regexp'){ no warnings; $pw=~$r or next TRIAL }
      else                      { croak "pwgen: invalid req type $r ".ref($r) }
    }
    push@pw,$pw;
  }
  $Pwgen_sec=time_fp()-$t;
  return $pw[0] if $num==1;
  return @pw;
}

# =head1 veci
#
# Perls C<vec> takes 1, 2, 4, 8, 16, 32 and possibly 64 as its third argument.
#
# This limitation is removed with C<veci> (vec improved, but much slower)
#
# The third argument still needs to be 32 or lower (or possibly 64 or lower).
#
# =cut
#
# sub vecibs ($) {
#   my($s,$o,$b,$new)=@_;
#   if($b=~/^(1|2|4|8|16|32|64)$/){
#     return vec($s,$o,$b)=$new if @_==4;
#     return vec($s,$o,$b);
#   }
#   my $bb=$b<4?4:$b<8?8:$b<16?16:$b<32?32:$b<64?64:die;
#   my $ob=int($o*$b/$bb);
#   my $v=vec($s,$ob,$bb)*2**$bb+vec($s,$ob+1,$bb);
#   $v & (2**$b-1)
# }


=head1 SETS

=head2 distinct

Returns the values of the input list, sorted alfanumerically, but only
one of each value. This is the same as L</uniq> except uniq does not
sort the returned list.

Example:

 print join(", ", distinct(4,9,3,4,"abc",3,"abc"));    # 3, 4, 9, abc
 print join(", ", distinct(4,9,30,4,"abc",30,"abc"));  # 30, 4, 9, abc       note: alphanumeric sort

=cut

sub distinct { sort keys %{{map {($_,1)} @_}} }

=head2 in

Returns I<1> (true) if first argument is in the list of the remaining arguments. Uses the perl-operator C<< eq >>.

Otherwise it returns I<0> (false).

 print in(  5,   1,2,3,4,6);         # 0
 print in(  4,   1,2,3,4,6);         # 1
 print in( 'a',  'A','B','C','aa');  # 0
 print in( 'a',  'A','B','C','a');   # 1

I guess in perl 5.10 or perl 6 you could use the C<< ~~ >> operator instead.

=head2 in_num

Just as sub L</in>, but for numbers. Internally uses the perl operator C<< == >> instead of C< eq >.

 print in(5000,  '5e3');          # 0
 print in(5000,   5e3);           # 1 since 5e3 is converted to 5000 before the call
 print in_num(5000, 5e3);         # 1
 print in_num(5000, '+5.0e03');   # 1

=cut

sub in     { no warnings 'uninitialized'; my $val=shift; $_ eq $val and return 1 for @_; return 0 }
sub in_num { no warnings 'uninitialized'; my $val=shift; $_ == $val and return 1 for @_; return 0 }

=head2 union

Input: Two arrayrefs. (Two lists, that is)

Output: An array containing all elements from both input lists, but no element more than once even if it occurs twice or more in the input.

Example, prints 1,2,3,4:

 perl -MAcme::Tools -le 'print join ",", union([1,2,3],[2,3,3,4,4])'              # 1,2,3,4

=cut

sub union { my %seen; grep !$seen{$_}++, map @{shift()},@_ }
=head2 minus

Input: Two arrayrefs.

Output: An array containing all elements in the first input array but not in the second.

Example:

 perl -MAcme::Tools -le 'print join " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] )'

Output is C<< five 1 2 >>.

=cut

sub minus {
  my %seen;
  my %notme=map{($_=>1)}@{$_[1]};
  grep !$notme{$_}&&!$seen{$_}++, @{$_[0]};
}

=head2 intersect

Input: Two arrayrefs

Output: An array containing all elements which exists in both input arrays.

Example:

 perl -MAcme::Tools -le 'print join" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'      # 4 3 five

Output: C<< 4 3 five >>

=cut

sub intersect {
  my %first=map{($_=>1)}@{$_[0]};
  my %seen;
  return grep{$first{$_}&&!$seen{$_}++}@{$_[1]};
}

=head2 not_intersect

Input: Two arrayrefs

Output: An array containing all elements member of just one of the input arrays (not both).

Example:

 perl -MAcme::Tools -le ' print join " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'

The output is C<< 1 2 >>.

=cut

sub not_intersect {
  my %code;
  my %seen;
  for(@{$_[0]}){$code{$_}|=1}
  for(@{$_[1]}){$code{$_}|=2}
  return grep{$code{$_}!=3&&!$seen{$_}++}(@{$_[0]},@{$_[1]});
}

=head2 uniq

Input:    An array of strings (or numbers)

Output:   The same array in the same order, except elements which exists earlier in the list.

Same as L</distinct> but distinct sorts the returned list, I<uniq> does not.

Example:

 my @t=(7,2,3,3,4,2,1,4,5,3,"x","xx","x",02,"07");
 print join " ", uniq @t;                          # prints  7 2 3 4 1 5 x xx 07

Beware of using C<sort> like the following because sort will see C<uniq>
as the subroutine for comparing elements! Which you most likely didnt mean.
This has nothing to do with the way uniq is implemented. It's Perl's C<sort>.

 print sort uniq('a','dup','z','dup');  # will return this four element array: a dup z dup
 print sort(uniq('a','dup','z','dup')); # better, probably what you meant
 print distinct('a','dup','z','dup'));  # same, distinct includes alphanumeric sort

=cut

sub uniq(@) { my %seen; grep !$seen{$_}++, @_ }

=head1 HASHES

=head2 subhash

Copies a subset of keys/values from one hash to another.

B<Input:> First argument is a reference to a hash. The rest of the arguments are a list of the keys of which key/value-pair you want to be copied.

B<Output:> The hash consisting of the keys and values you specified.

Example:

 %population = ( Norway=>5000000, Sweden=>9500000, Finland=>5400000,
                 Denmark=>5600000, Iceland=>320000,
                 India => 1.21e9, China=>1.35e9, USA=>313e6, UK=>62e6 );

 %scandinavia = subhash( \%population , 'Norway', 'Sweden', 'Denmark' ); # this and
 %scandinavia = (Norway=>5000000,Sweden=>9500000,Denmark=>5600000);      # this is the same

 print "Population of $_ is $scandinavia{$_}\n" for keys %scandinavia;

...prints the populations of the three scandinavian countries.

Note: The values are NOT deep copied when they are references. (Use C<< Storable::dclone() >> to do that).

Note2: For perl versions >= 5.20 subhashes (hash slices returning keys as well as values) is built in like this:

 %scandinavia = %population{'Norway','Sweden','Denmark'};

Tools.pm  view on Meta::CPAN

B<Output:> The binary compressed representation of that input string.

C<gzip()> is really just a wrapper for C< Compress:Zlib::memGzip() > and uses the same
compression algorithm as the well known GNU program gzip found in most unix/linux/cygwin
distros. Except C<gzip()> does this in-memory. (Both using the C-library C<zlib>).

 writefile( "file.gz", gzip("some string") );

=head2 gunzip

B<Input:> A binary compressed string or a reference to such a string. I.e. something returned from
C<gzip()> earlier or read from a C<< .gz >> file.

B<Output:> The original larger non-compressed string. Text or binary.

C<gunzip()> is a wrapper for Compress::Zlib::memGunzip()

 print gunzip( gzip("some string") );   #some string

=head2 bzip2

Same as L</gzip> and L</gunzip> except with a different compression algorithm (compresses more but is slower). Wrapper for Compress::Bzip2::memBzip.

Compared to gzip/gunzip, bzip2 compression is much slower, bunzip2 decompression not so much.

See also L<Compress::Bzip2>, C<man Compress::Bzip2>, C<man bzip2>, C<man bunzip2>.

 writefile( "file.bz2", bzip2("some string") );
 print bunzip2( bzip2("some string") );   #some string

=head2 bunzip2

Decompressed something compressed by bzip2() or data from a C<.bz2> file. See L</bzip2>.

=cut

sub gzip    { my $s=shift; eval"require Compress::Zlib"  if !$INC{'Compress/Zlib.pm'};  croak "Compress::Zlib not found"  if $@; Compress::Zlib::memGzip(    ref($s)?$s:\$s ) }
sub gunzip  { my $s=shift; eval"require Compress::Zlib"  if !$INC{'Compress/Zlib.pm'};  croak "Compress::Zlib not found"  if $@; Compress::Zlib::memGunzip(  ref($s)?$s:\$s ) }
sub bzip2   { my $s=shift; eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBzip(   ref($s)?$s:\$s ) }
sub bunzip2 { my $s=shift; eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBunzip( ref($s)?$s:\$s ) }

=head1 NET, WEB, CGI-STUFF

=head2 ipaddr

B<Input:> an IP-number

B<Output:> either an IP-address I<machine.sld.tld> or an empty string
if the DNS lookup didn't find anything.

Example:

 perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")'  # prints www.uio.no

Uses perls C<gethostbyaddr> internally.

C<ipaddr()> memoizes the results internally (using the
C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
particular IP number might take some time.

Some few DNS loopups can take several seconds.
Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
probably turn off hostname lookups in their logs and just log IP numbers by using
C<HostnameLookups Off> in Apache C<httpd.conf> and then use I<ipaddr> afterwards if necessary.

=cut

our %IPADDR_memo;
sub ipaddr {
  my $ipnr=shift;
  #hm, NOTE: The 2 parameter on the next code line is not 2 for all OSes,
  #but seems to work in Linux and HPUX. Den correct way is to use the
  #AF_INET constant in the Socket or the IO::Socket package.
  return $IPADDR_memo{$ipnr} ||= gethostbyaddr(pack("C4",split("\\.",$ipnr)),2);
}

=head2 ipnum

C<ipnum()> does the opposite of C<ipaddr()>

Does an attempt of converting an IP address (hostname) to an IP number.
Uses DNS name servers via perls internal C<gethostbyname()>.
Return empty string (undef) if unsuccessful.

 print ipnum("www.uio.no");   # prints 129.240.13.152

Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.

=cut

our %IPNUM_memo;
sub ipnum {
  my $ipaddr=shift;
  #croak "No $ipaddr" if !length($ipaddr);
  return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
  my $h=gethostbyname($ipaddr);
  #croak "No ipnum for $ipaddr" if !$h;
  return if !defined $h;
  my $ipnum = join(".",unpack("C4",$h));
  $IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
  return $IPNUM_memo{$ipaddr};
}

our $Ipnum_errmsg;
our $Ipnum;
sub ipnum_ok {
    my $ipnum=shift;
    $Ipnum=undef;
    eval{
      die "malformed ipnum $ipnum\n" if not $ipnum=~/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
      die "invalid ipnum $ipnum\n"   if grep$_>255,$1,$2,$3,$4;
      $Ipnum=$1*256**3 + $2*256**2 + $3*256 + $4;
    };
    my$r=($Ipnum_errmsg=$@) ? 0 : 1;
    $r
}
our $Iprange_errmsg;
our $Iprange_start;
sub iprange_ok {
    my $iprange=shift;
    $Iprange_start=undef;
    my($r,$m);

Tools.pm  view on Meta::CPAN


Input: a string

Output: the same string URL encoded so it can be sent in URLs or POST requests.

In URLs (web addresses) certain characters are illegal. For instance I<space> and I<newline>.
And certain other chars have special meaning, such as C<+>, C<%>, C<=>, C<?>, C<&>.

These illegal and special chars needs to be encoded to be sent in
URLs.  This is done by sending them as C<%> and two hex-digits. All
chars can be URL encodes this way, but it's necessary just on some.

Example:

 $search="Østdal, Åge";
 my $url="http://machine.somewhere.com/search?q=" . urlenc($search);
 print $url;

Prints C<< http://machine.somewhere.com/search?q=%D8stdal%2C%20%C5ge >>

=cut

sub urlenc {
  my $str=shift;
  $str=~s/([^\w\-\.\/\,\[\]])/sprintf("%%%02x",ord($1))/eg; #more chars is probably legal...
  return $str;
}

=head2 urldec

Opposite of L</urlenc>.

Example, this returns 'C< ø>'. That is space and C<< ø >>.

 urldec('+%C3')

=cut

sub urldec {
  my $str=shift;
  $str=~s/\+/ /gs;
  $str=~s/%([a-f\d]{2})/pack("C", hex($1))/egi;
  return $str;
}

=head2 ht2t

C<ht2t> is short for I<html-table to table>.

This sub extracts an html-C<< <table> >>s and returns its C<< <tr>s >>
and C<< <td>s >> as an array of arrayrefs. And strips away any html
inside the C<< <td>s >> as well.

 my @table = ht2t($html,'some string occuring before the <table> you want');

Input: One or two arguments.

First argument: the html where a C<< <table> >> is to be found and converted.

Second argument: (optional) If the html contains more than one C<<
<table> >>, and you do not want the first one, applying a second
argument is a way of telling C<ht2t> which to capture: the one with this word
or string occurring before it.

Output: An array of arrayrefs.

C<ht2t()> is a quick and dirty way of scraping (or harvesting as it is
also called) data from a web page. Look too L<HTML::Parse> to do this
more accurate.

Example:

 use Acme::Tools;
 use LWP::Simple;
 my $url = "http://en.wikipedia.org/wiki/List_of_countries_by_population";
 for( ht2t( get($url), "Countries" ) ) {
   my($rank, $country, $pop) = @$_;
   $pop =~ s/,//g;
   printf "%3d | %-32s | %9d\n", @$_ if $pop>0;
 }

Output:

  1 | China                            | 1367740000
  2 | India                            | 1262090000
  3 | United States                    | 319043000
  4 | Indonesia                        | 252164800
  5 | Brazil                           | 203404000

...and so on.

=cut

sub ht2t {
  my($f,$s,$r)=@_; 1>@_||@_>3 and croak; $s='' if @_==1;
  $f=~s,.*?($s).*?(<table.*?)</table.*,$2,si;
  my $e=0;$e++ while index($f,$s=chr($e))>=$[;
  $f=~s/<t(d|r|h).*?>/\l$1$s/gsi;
  $f=~s/\s*<.*?>\s*/ /gsi;
  my @t=split("r$s",$f);shift @t;
  $r||=sub{s/&(#160|nbsp);/ /g;s/&amp;/&/g;s/^\s*(.*?)\s*$/$1/s;
	   s/(\d) (\d)/$1$2/g if /^[\d \.\,]+$/};
  for(@t){my @r=split/[dh]$s/;shift@r;$_=[map{&$r;$_}@r]}
  @t;
}

=head1 FILES, DIRECTORIES

=head2 writefile

Justification:

Perl needs three or four operations to make a file out of a string:

 open my $FILE, '>', $filename  or die $!;
 print $FILE $text;
 close($FILE);

This is way simpler:

 writefile($filename,$text);

Tools.pm  view on Meta::CPAN


=cut

sub wipe {
  my($file,$times,$keep)=@_;
  $times||=3;
  croak "ERROR: File $file nonexisting\n" if not -f $file or not -e $file;
  my $size=-s$file;
  open my $WIFH, '+<', $file or croak "ERROR: Unable to open $file: $!\n";
  binmode($WIFH);
  for(1..$times){
    my $block=chr(int(rand(256))) x 1024;#hm
    for(0..($size/1024)){
      seek($WIFH,$_*1024,0);
      print $WIFH $block;
    }
  }
  close($WIFH);
  $keep || unlink($file);
}

=head2 chall

Does chmod + utime + chown on one or more files.

Returns the number of files of which those operations was successful.

Mode, uid, gid, atime and mtime are set from the array ref in the first argument.

The first argument references an array which is exactly like an array returned from perls internal C<stat($filename)> -function.

Example:

 my @stat=stat($filenameA);
 chall( \@stat,       $filenameB, $filenameC, ... );  # by stat-array
 chall( $filenameA,   $filenameB, $filenameC, ... );  # by file name

Copies the chmod, owner, group, access time and modify time from file A to file B and C.

See C<perldoc -f stat>, C<perldoc -f chmod>, C<perldoc -f chown>, C<perldoc -f utime>

=cut


sub chall {
  my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks )
    = ref($_[0]) ? @{shift()} : stat(shift());
  my $successful=0;
  for(@_){ chmod($mode,$_) && utime($atime,$mtime,$_) && chown($uid,$gid,$_) && $successful++ }
  return $successful;
}

=head2 makedir

Input: One or two arguments.

Works like perls C<mkdir()> except that C<makedir()> will create nesessary parent directories if they dont exists.

First input argument: A directory name (absolute, starting with C< / > or relative).

Second input argument: (optional) permission bits. Using the normal C<< 0777^umask() >> as the default if no second input argument is provided.

Example:

 makedir("dirB/dirC")

...will create directory C<dirB> if it does not already exists, to be able to create C<dirC> inside C<dirB>.

Returns true on success, otherwise false.

C<makedir()> memoizes directories it has checked for existence before (trading memory and for speed).
Thus directories removed during running the script is not discovered by makedir.

See also C<< perldoc -f mkdir >>, C<< man umask >>

=cut

our %MAKEDIR;

sub makedir {
  my($d,$p,$dd)=@_;
  $p=0777^umask() if !defined$p;
  (
  $MAKEDIR{$d} or -d$d or mkdir($d,$p) #or croak("mkdir $d, $p")
  or ($dd)=($d=~m,^(.+)/+([^/]+)$,) and makedir($dd,$p) and mkdir($d,$p) #or die;
  ) and ++$MAKEDIR{$d};
}

=head2 md5sum

B<Input:> a filename (or a scalar ref to a string, see below)

B<Output:> a string of 32 hexadecimal chars from 0-9 or a-f.

Example, the md5sum gnu/linux command without options could be implementet like this:

 use Acme::Tools;
 print eval{ md5sum($_)."  $_\n" } || $@ for @ARGV;

This sub requires L<Digest::MD5>, which is a core perl-module since
version 5.?.?  It does not slurp the files or spawn new processes.

If the input argument is a scalar ref then the MD5 of the string referenced is returned in hex.

=cut

sub md5sum {
  require Digest::MD5;
  my $fn=shift;
  return Digest::MD5::md5_hex($$fn) if ref($fn) eq 'SCALAR';
  croak "md5sum: $fn is a directory (no md5sum)" if -d $fn;
  open my $FH, '<', $fn or croak "Could not open file $fn for md5sum() $!";
  binmode($FH);
  my $r = eval { Digest::MD5->new->addfile($FH)->hexdigest };
  croak "md5sum on $fn failed ($@)\n" if $@;
  $r;
}

=head2 which

Returns the first executable program in $ENV{PATH} paths (split by : colon) with the given name.

 echo $PATH
 perl -MAcme::Tools -le 'print which("gzip")'      # maybe prints /bin/gzip

=head2 read_conf

B<First argument:> A file name or a reference to a string with settings in the format described below.

B<Second argument, optional:> A reference to a hash. This hash will have the settings from the file (or stringref).
The hash do not have to be empty beforehand.

Returns a hash with the settings as in this examples:

 my %conf = read_conf('/etc/your/thing.conf');
 print $conf{sectionA}{knobble};  #prints ABC if the file is as shown below
 print $conf{sectionA}{gobble};   #prints ZZZ, the last gobble
 print $conf{switch};             #prints OK here as well, unsectioned value
 print $conf{part2}{password};    #prints oh:no= x

File use for the above example:

 switch:    OK       #before first section, the '' (empty) section
 [sectionA]
 knobble:   ABC
 gobble:    XYZ      #this gobble is overwritten by the gobble on the next line
 gobble:    ZZZ
 [part2]
 password:  oh:no= x  #should be better
 text:      { values starting with { continues
              until reaching a line with }

Everything from # and behind is regarded comments and ignored. Comments can be on any line.
To keep a # char, put a \ in front of it.

A C< : > or C< = > separates keys and values.  Spaces at the beginning or end of lines are
ignored (after removal of #comments), as are any spaces before and after : and = separators.

Empty lines or lines with no C< : > or C< = > is also ignored. Keys and values can contain
internal spaces and tabs, but not at the beginning or end.

Multi-line values must start and end with { and }. Using { and } keep spaces at the start
or end in both one-line and multi-line values.

Sections are marked with C<< [sectionname] >>.  Section names, keys and values is case
sensitive. C<Key:values> above the first section or below and empty C<< [] >> is placed
both in the empty section in the returned hash and as top level key/values.

C<read_conf> can be a simpler alternative to the core module L<Config::Std> which has
its own hassles.

 $Acme::Tools::Read_conf_empty_section=1;        #default 0 (was 1 in version 0.16)
 my %conf = read_conf('/etc/your/thing.conf');
 print $conf{''}{switch};                        #prints OK with the file above
 print $conf{switch};                            #prints OK here as well

=cut

our $Read_conf_empty_section=0;
sub read_conf {
  my($fn,$hr)=(@_,{});
  my $conf=ref($fn)?$$fn:readfile($fn);
  $conf=~s,\s*(?<!\\)#.*,,g;
  my($section,@l)=('',split"\n",$conf);
  while(@l) {
    my $l=shift@l;
    if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
      $section=$1;
      $$hr{$1}||={};
    }
    elsif( $l=~/^\s*([^\:\=]+?)\s*[:=]\s*(.*?)\s*$/ ) {
      my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
      my $v=&$ml($2);
      $$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
      $$hr{$1}=$v if !length($section);
    }
  }
  %$hr;
}
#  my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
#    s,<INCLUDE ([^>]+)>,"".readfile(&$incfn($1)),eg; #todo


=head2 openstr

                                            # returned from openstr:
  open my $FH, openstr("fil.txt")  or die;  # fil.txt
  open my $FH, openstr("fil.gz")   or die;  # zcat fil.gz |
  open my $FH, openstr("fil.bz2")  or die;  # bzcat fil.bz2 |
  open my $FH, openstr("fil.xz")   or die;  # xzcat fil.xz |
  open my $FH, openstr(">fil.txt") or die;  # > fil.txt
  open my $FH, openstr(">fil.gz")  or die;  # | gzip > fil.gz
  open my $FH, openstr(">fil.bz2") or die;  # | bzip2 > fil.bz2
  open my $FH, openstr(">fil.xz")  or die;  # | xz    > fil.bz2

Environment variable PATH is used. So in the examples above, /bin/gzip
is returned instead of gzip if /bin is the first directory in
$ENV{PATH} containing an executable file gzip. Dirs /usr/bin, /bin and
/usr/local/bin is added to PATH in openstr(). They are checked even if
PATH is empty.

See also C<writefile()> and C<readfile()> for automatic compression and decompression using C<openstr>.

=cut

our @Openstrpath=(grep$_,split(":",$ENV{PATH}),qw(/usr/bin /bin /usr/local/bin));
our $Magic_openstr=1;
sub openstr_prog { @Openstrpath or return $_[0];(grep -x$_, map "$_/$_[0]", @Openstrpath)[0] or croak"$_[0] not found" }
sub openstr {
  my($fn,$ext)=(shift()=~/^(.*?(?:\.(t?gz|bz2|xz))?)$/i);
  return $fn if !$ext or !$Magic_openstr;
  $fn =~ /^\s*>/
      ?  "| ".(openstr_prog({qw/gz gzip bz2 bzip2 xz xz tgz gzip/   }->{lc($ext)})).$fn
      :        openstr_prog({qw/gz zcat bz2 bzcat xz xzcat tgz zcat/}->{lc($ext)})." $fn |";
}

=head2 printed

Redirects C<print> and C<printf> from STDOUT to a string which is returned.

 my $p = printed { print "hello!" };     # now $p eq 'hello!'
 my $p = printed { some_sub() };         # now $p contains whatever was printed by some_sub() and the subs call from it

=cut

sub printed (&) { my $s; open(local *STDOUT, '>', \$s) or croak "ERR: $! $?"; shift->(); $s } #todo catch stderr also?

#todo: sub stdin{}
#todo: sub stdout{}
#todo: sub stderr{}
#todo: sub stdouterr{}

=head1 TIME FUNCTIONS

=head2 tms

Timestring, works somewhat like the Gnu/Linux C<date> command and Oracle's C<to_char()>

Converts timestamps to more readable forms of time strings.

Converts seconds since I<epoch> and time strings on the form C<YYYYMMDD-HH24:MI:SS> to other forms.

B<Input:> One, two or three arguments.

B<First argument:> A format string.

B<Second argument: (optional)> An epock C<time()> number or a time
string of the form YYYYMMDD-HH24:MI:SS or YYYYMMDDTHH:MI:SS or
YYYY-MM-DDTHH:MI:SS (in which T is litteral and HH is the 24-hour
version of hours) or YYYYMMDD. Uses the current C<time()> if the
second argument is missing.

TODO: Formats with % as in C<man date> (C<%Y%m%d> and so on)

B<Third argument: (optional> True or false. If true and first argument
is eight digits: Its interpreted as a date like YYYYMMDD time string,
not an epoch time.  If true and first argument is six digits its
interpreted as a date like DDMMYY (not YYMMDD!).

B<Output:> a date or clock string on the wanted form.

B<Examples:>

Prints C<< 3. july 1997 >> if thats the dato today:

  perl -MAcme::Tools -le 'print timestr("D. month YYYY")'

  print tms("HH24:MI");              # prints 23:55 if thats the time now
  tms("HH24:MI",time());             # ...same,since time() is the default
  tms("HH:MI",time()-5*60);          # 23:50 if that was the time 5 minutes ago
  tms("HH:MI",time()-5*60*60);       # 18:55 if thats the time 5 hours ago
  tms("Day Month Dth YYYY HH:MI");   # Saturday July 1st 2004 23:55    (big S, big J)
  tms("Day D. Month YYYY HH:MI");    # Saturday 8. July 2004 23:55     (big S, big J)
  tms("DAY D. MONTH YYYY HH:MI");    # SATURDAY 8. JULY 2004 23:55     (upper)
  tms("dy D. month YYYY HH:MI");     # sat 8. july 2004 23:55          (small s, small j)
  tms("Dy DD. MON YYYY HH12:MI am"); # Sat 08. JUL 2004 11:55 pm       (HH12, am becomes pm if after 12)
  tms("DD-MON-YYYY");                # 03-MAY-2004                     (mon, english)

The following list of codes in the first argument will be replaced:

  YYYY    Year, four digits
  YY      Year, two digits, i.e. 04 instead of 2004
  yyyy    Year, four digits, but nothing if its the current year
  YYYY|HH:MI  Year if its another year than the current, a time in hours and minutes elsewise
  MM      Month, two digits. I.e. 08 for August
  DD      Day of month, two digits. I.e. 01 (not 1) for the first day in a month
  D       Day of month, one digit. I.e. 1 (not 01)
  HH      Hour. From 00 to 23.
  HH24    Same as HH.
  HH12    12 becomes 12 (never 00), 13 becomes 01, 14 02 and so on.
          Note: 00 after midnight becomes 12 (am). Tip: always include the code
          am in a format string that uses HH12.
  MI      Minutt. Fra 00 til 59.
  SS      Sekund. Fra 00 til 59.
  am      Becomes am or pm
  pm      Same
  AM      Becomes AM or PM (upper case)
  PM      Same

  Month   The full name of the month in English from January to December
  MONTH   Same in upper case (JANUARY)
  month   Same in lower case (january)
  Mont    Jan Feb Mars Apr May June July Aug Sep Oct Nov Dec
  Mont.   Jan. Feb. Mars Apr. May June July Aug. Sep. Oct. Nov. Dec. (always four chars)
  Mon     Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec            (always three chars)

  Day     The full name of the weekday. Sunday to Saturday
  Dy      Three letters: Sun Mon Tue Wed Thu Fri Sat
  DAY     Upper case
  DY      Upper case
  Dth     1st 2nd 3rd 4th 5th ... 11th 12th ... 20th 21st 22nd 23rd 24th ... 30th 31st

  WW      Week number of the year 01-53 according to the ISO8601-definition (which most countries uses)
  WWUS    Week number of the year 01-53 according to the most used definition in the USA.
          Other definitions also exists.

  epoch   Converts a time string from YYYYMMDD-HH24:MI:SS, YYYYMMDD-HH24:MI:SS, YYYYMMDDTHH:MI:SS,
          YYYY-MM-DDTHH:MI:SS or YYYYMMDD to the number of seconds since January 1st 1970.
          Commonly known as the Unix epoch.

  JDN     Julian day number. Integer. The number of days since the day starting at noon on January 1 4713 BC
  JD      Same as JDN but a float accounting for the time of day

B<Third argument:> (optional) Is_date. False|true, default false. If true, the second argument is
interpreted as a date of the form YYYYMMDD, not as a number of seconds since epoch (January 1st 1970).

=cut

#Se også L</tidstrk> og L</tidstr>

our $Tms_pattern;
our %Tms_str=
	  ('MÃ…NED' => [4, 'JANUAR','FEBRUAR','MARS','APRIL','MAI','JUNI','JULI',
		          'AUGUST','SEPTEMBER','OKTOBER','NOVEMBER','DESEMBER' ],
	   'MÃ¥ned' => [4, 'Januar','Februar','Mars','April','Mai','Juni','Juli',
		          'August','September','Oktober','November','Desember'],
	   'måned' => [4, 'januar','februar','mars','april','mai','juni','juli',
		          'august','september','oktober','november','desember'],
	   'MÃ…NE.' => [4, 'JAN.','FEB.','MARS','APR.','MAI','JUNI','JULI','AUG.','SEP.','OKT.','NOV.','DES.'],
	   'MÃ¥ne.' => [4, 'Jan.','Feb.','Mars','Apr.','Mai','Juni','Juli','Aug.','Sep.','Okt.','Nov.','Des.'],
	   'måne.' => [4, 'jan.','feb.','mars','apr.','mai','juni','juli','aug.','sep.','okt.','nov.','des.'],
	   'MÃ…NE'  => [4, 'JAN','FEB','MARS','APR','MAI','JUNI','JULI','AUG','SEP','OKT','NOV','DES'],
	   'MÃ¥ne'  => [4, 'Jan','Feb','Mars','Apr','Mai','Juni','Juli','Aug','Sep','Okt','Nov','Des'],
	   'måne'  => [4, 'jan','feb','mars','apr','mai','juni','juli','aug','sep','okt','nov','des'],
	   'MÃ…N'   => [4, 'JAN','FEB','MAR','APR','MAI','JUN','JUL','AUG','SEP','OKT','NOV','DES'],
	   'MÃ¥n'   => [4, 'Jan','Feb','Mar','Apr','Mai','Jun','Jul','Aug','Sep','Okt','Nov','Des'],
	   'mån'   => [4, 'jan','feb','mar','apr','mai','jun','jul','aug','sep','okt','nov','des'],
	   'MONTH' => [4, 'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY',
		          'AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER'],
	   'Month' => [4, 'January','February','March','April','May','June','July',
		          'August','September','October','November','December'],
	   'month' => [4, 'january','february','march','april','may','june','july',
		          'august','september','october','november','december'],
	   'MONT.' => [4, 'JAN.','FEB.','MAR.','APR.','MAY','JUNE','JULY','AUG.','SEP.','OCT.','NOV.','DEC.'],
	   'Mont.' => [4, 'Jan.','Feb.','Mar.','Apr.','May','June','July','Aug.','Sep.','Oct.','Nov.','Dec.'],
	   'mont.' => [4, 'jan.','feb.','mar.','apr.','may','june','july','aug.','sep.','oct.','nov.','dec.'],
	   'MONT'  => [4, 'JAN','FEB','MAR','APR','MAY','JUNE','JULY','AUG','SEP','OCT','NOV','DEC'],
	   'Mont'  => [4, 'Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec'],
	   'mont'  => [4, 'jan','feb','mar','apr','may','june','july','aug','sep','oct','nov','dec'],
	   'MON'   => [4, 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],
	   'Mon'   => [4, 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
	   'mon'   => [4, 'jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'],
	   'DAY'   => [6, 'SUNDAY','MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY'],
	   'Day'   => [6, 'Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'],
	   'day'   => [6, 'sunday','monday','tuesday','wednesday','thursday','friday','saturday'],
	   'DY'    => [6, 'SUN','MON','TUE','WED','THU','FRI','SAT'],
	   'Dy'    => [6, 'Sun','Mon','Tue','Wed','Thu','Fri','Sat'],
	   'dy'    => [6, 'sun','mon','tue','wed','thu','fri','sat'],
	   'DAG'   => [6, 'SØNDAG','MANDAG','TIRSDAG','ONSDAG','TORSDAG','FREDAG','LØRDAG'],
	   'Dag'   => [6, 'Søndag','Mandag','Tirsdag','Onsdag','Torsdag','Fredag','Lørdag'],
	   'dag'   => [6, 'søndag','mandag','tirsdag','onsdag','torsdag','fredag','lørdag'],
	   'DG'    => [6, 'Søn','MAN','TIR','ONS','TOR','FRE','LØR'],
	   'Dg'    => [6, 'SØn','Man','Tir','Ons','Tor','Fre','Lør'],
	   'dg'    => [6, 'søn','man','tir','ons','tor','fre','lør'],
	   );
my $_tms_inited=0;
sub tms_init {
  return if $_tms_inited++;
  for(qw(MAANED Maaned maaned MAAN Maan maan),'MAANE.','Maane.','maane.'){
    $Tms_str{$_}=$Tms_str{replace($_,"aa","Ã¥","AA","Ã…")};
  }
  $Tms_pattern=join("|",map{quotemeta($_)}
		        sort{length($b)<=>length($a)}
			keys %Tms_str);
  #without sort "måned" could be "mared" because "mån"=>"mar"

Tools.pm  view on Meta::CPAN

     $s=~s/\bOkt\b/Oct/i;         $s=~s/\bokt\b/oct/i;         $s=~s/\bOKT\b/OCT/i;
     $s=~s/\bDes/Dec/;            $s=~s/\bdes/dec/;            $s=~s/\bDES/DEC/;
     $s=~s/\bFebruar\b/February/; $s=~s/\bfebruar\b/february/; $s=~s/\bFEBRUAR\b/FEBRUARY/;
     $s=~s/\bjuli\b/July/i;
     $s=~s/\bjuni\b/June/i;
  }
  elsif($s           =~ /^[19]\d{9}$/){ $s=localtime($s)      } #hm, make faster
  elsif($s           =~ /^[19]\d{12}$/
    and int($s/1000) =~ /^[19]\d{9}$/){ $s=localtime($s/1000) } #hm
  elsif($s=~/^((?:17|18|19|20|21)\d\d)(0[1-9]|1[012])(0[1-9]|[12]\d|3[01])$/){#hm
      $s="$1-$2-$3T00:00:00";
  }
  return Date::Parse::str2time($s)                  if !@_;
  return tms(Date::Parse::str2time($s),shift(@_))   if 0+@_ == 1;
  return map tms(Date::Parse::str2time($s),$_), @_;
}

sub date_ok {
  my($y,$m,$d)=@_;
  return date_ok($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
  return 0 if $y!~/^\d\d\d\d$/;
  return 0 if $m<1||$m>12||$d<1||$d>(31,$y%4||$y%100==0&&$y%400?28:29,31,30,31,30,31,31,30,31,30,31)[$m-1];
  return 1;
}

sub weeknum {
  return weeknum(tms('YYYYMMDD')) if @_<1;
  return weeknum($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
  my($year,$month,$day)= @_;
  eval{
    if(@_<2){
      if($year=~/^\d{8}$/) { ($year,$month,$day)=unpack("A4A2A2",$year) }
      elsif($year>99999999){ ($year,$month,$day)=(localtime($year))[5,4,3]; $year+=1900; $month++ }
      else {die}
    }
    elsif(@_!=3){croak}
    croak if !date_ok(sprintf("%04d%02d%02d",$year,$month,$day));
  };
  croak "ERROR: Wrong args Acme::Tools::weeknum(".join(",",@_).")" if $@;
  use integer;#heltallsdivisjon
  my $y=$year+4800-(14-$month)/12;
  my $j=$day+(153*($month+(14-$month)/12*12-3)+2)/5+365*$y+$y/4-$y/100+$y/400-32045;
  my $d=($j+31741-$j%7)%146097%36524%1461;
  return (($d-$d/1460)%365+$d/1460)/7+1;
}

#perl -MAcme::Tools -le 'print "$_ ".tms($_."0501","day",1) for 2015..2026'

sub tms {
  return undef if @_>1 and not defined $_[1]; #time=undef => undef
  if(@_==1){
    my @lt=localtime();
    $_[0] eq 'YYYY'     and return 1900+$lt[5];
    $_[0] eq 'YYYYMMDD' and return sprintf("%04d%02d%02d",1900+$lt[5],1+$lt[4],$lt[3]);
    $_[0] =~ $Re_isnum  and @lt=localtime($_[0]) and return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]);
  }
  my($format,$time,$is_date)=@_;
  $time=time_fp() if !defined$time;
  ($time,$format)=($format,$time) if @_>=2 and $format=~/^[\d+\:\-\.]+$/; #swap /hm/
  my @lt=localtime($time);
  #todo? $is_date=0 if $time=~s/^\@(\-?\d)/$1/; #@n where n is sec since epoch makes it clear that its not a formatted, as in `date`
  #todo? date --date='TZ="America/Los_Angeles" 09:00 next Fri' #`info date`
  #      Fri Nov 13 18:00:00 CET 2015
  #date --date="next Friday"  #--date or -d
  #date --date="last friday"
  #date --date="2 days ago"
  #date --date="yesterday" #or tomorrow
  #date --date="-1 day"  #date --date='10 week'

  if( $is_date ){
    my $yy2c=sub{10+$_[0]>$lt[5]%100?"20":"19"}; #hm 10+
    $time=totime(&$yy2c($1)."$1$2$3")."000000" if $time=~/^(\d\d)(\d\d)(\d\d)$/;
    $time=totime("$1$2${3}000000")             if $time=~/^((?:18|19|20)\d\d)(\d\d)(\d\d)$/; #hm 18-20?
  }
  else {
    $time = yyyymmddhh24miss_time("$1$2$3$4$5$6") #yyyymmddhh24miss_time ???
      if $time=~/^((?:19|20|18)\d\d)          #yyyy
                  (0[1-9]|1[012])             #mm
                  (0[1-9]|[12]\d|3[01]) \-?   #dd
                  ([01]\d|2[0-3])       \:?   #hh24
                  ([0-5]\d)             \:?   #mi
                  ([0-5]\d)             $/x;  #ss
  }
  tms_init() if !$_tms_inited;
  return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]) if !$format;
  my %p=('%'=>'%',
	 a=>'Dy',
	 A=>'Day',
	 b=>'Mon',
	 b=>'Month',
	 c=>'Dy Mon D HH:MI:SS YYYY',
	 C=>'CC',
	 d=>'DD',
	 D=>'MM/DD/YY',
	 e=>'D',
	 F=>'YYYY-MM-DD',
        #G=>'',
	 h=>'Month', H=>'HH24', I=>'HH12',
	 j=>'DoY', #day of year
	 k=>'H24', _H=>'H24',
	 l=>'H12', _I=>'H12',
	 m=>'MM', M=>'MI',
	 n=>"\n",
	#N=>'NS', #sprintf%09d,1e9*(time_fp()-time()) #000000000..999999999
	 p=>'AM', #AM|PM upper (yes, opposite: date +%H%M%S%P%p)
	 P=>'am', #am|pm lower
	 S=>'SS',
	 t=>"\t",
	 T=>'HH24:MI:SS',
	 u=>'DoW',  #day of week 1..7, 1=mon 7=sun
	 w=>'DoW0', #day of week 0..6, 1=mon 0=sun
	#U=>'WoYs', #week num of year 00..53, sunday as first day of week
	#V=>'UKE',  #ISO week num of year 01..53, monday as first day of week
	#W=>'WoYm', #week num of year 00..53, monday as first day of week, not ISO!
	#x=>$ENV{locale's date representation}, #e.g. MM/DD/YY
	#X=>$ENV{locale's time representation}, #e.g. HH/MI/SS
	 y=>'YY',
	 Y=>'YYYY',
	#z=>'TZHHMI', #time zone hour minute e.g. -0430
	#':z'=>'TZHH:MI',
	#'::z'=>'TZHH:MI:SS',

Tools.pm  view on Meta::CPAN

  $format=~s,M/                ,               ($lt[4]+1).'/'   ,gxe;
  $format=~s,/M                ,           '/'.($lt[4]+1)       ,gxe;
  $format=~s/DD                / sprintf("%02d",$lt[3])         /gxe;
  $format=~s/d0w|dow0          / $lt[6]                         /gxei;
  $format=~s/dow               / $lt[6]?$lt[6]:7                /gxei;
  $format=~s/d0y|doy0          / $lt[7]                         /gxei; #0-364 (365 leap)
  $format=~s/doy               / $lt[7]+1                       /gxei; #1-365 (366 leap)
  $format=~s/D(?![AaGgYyEeNn]) / $lt[3]                         /gxe;  #EN pga desember og wednesday
  $format=~s/dd                / sprintf("%d",$lt[3])           /gxe;
  $format=~s/hh12|HH12         / sprintf("%02d",$lt[2]<13?$lt[2]||12:$lt[2]-12)/gxe;
  $format=~s/HH24|HH24|HH|hh   / sprintf("%02d",$lt[2])         /gxe;
  $format=~s/MI                / sprintf("%02d",$lt[1])         /gxei;
  $format=~s{SS\.([1-9])      }{ sprintf("%0*.$1f",3+$1,$lt[0]+(repl($time,qr/^[^\.]+/)||0)) }gxei;
  $format=~s/SS(?:\.0)?        / sprintf("%02d",$lt[0])         /gxei;
  $format=~s/(?:am|pm|apm|xm)  / $lt[2]<13 ? 'am' : 'pm'        /gxe;
  $format=~s/(?:AM|PM|APM|XM)  / $lt[2]<13 ? 'AM' : 'PM'        /gxe;
  $format=~s/WWI|WW            / sprintf("%02d",weeknum($time)) /gxei;
  $format=~s/W                 / weeknum($time)                 /gxei;
  $format;
}

=head2 easter

Input: A year (a four digit number)

Output: array of two numbers: day and month of Easter Sunday that year. Month 3 means March and 4 means April.

 sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
             (($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }

...is a "golfed" version of Oudins algorithm (1940) L<http://astro.nmsu.edu/~lhuber/leaphist.html>
(see also http://www.smart.net/~mmontes/ec-cal.html )

Valid for any Gregorian year. Dates repeat themselves after 70499183
lunations = 2081882250 days = ca 5699845 years. However, our planet will
by then have a different rotation and spin time...

Example:

 ( $day, $month ) = easter( 2012 ); # $day == 8 and $month == 4

Example 2:

 my @e=map sprintf("%02d%02d", reverse(easter($_))), 1800..300000;
 print "First: ".min(@e)." Last: ".max(@e)."\n"; # First: 0322 Last: 0425

Note: The Spencer Jones formula differs Oudins used in C<easter()> in some years
before 1498. However, in that period the Julian calendar with a different formula was
used anyway. Countries introduced the current Gregorian calendar between 1583 and 1926.

=cut

sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
             (($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }


=head2 time_fp

No input arguments.

Return the same number as perls C<time()> except with decimals (fractions of a second, _fp as in floating point number).

 print time_fp(),"\n";
 print time(),"\n";

Could write:

 1116776232.38632

...if that is the time now.

Or just:

 1116776232

...from perl's internal C<time()> if C<Time::HiRes> isn't installed and available.


=cut

sub time_fp {  # {return 0+gettimeofday} is just as well?
    eval{ require Time::HiRes } or return time();
    my($sec,$mic)=Time::HiRes::gettimeofday();
    return $sec+$mic/1e6; #1e6 not portable?
}

sub timems {
    eval{ require Time::HiRes } or return time();
    my($sec,$mic)=Time::HiRes::gettimeofday();
    return $sec*1000+$mic/1e3;
}

=head2 sleep_fp

sleep_fp() work as the built in C<< sleep() >> but also accepts fractional seconds:

 sleep_fp(0.020);  # sleeps for 20 milliseconds

Sub sleep_fp do a C<require Time::HiRes>, thus it might take some
extra time the first call. To avoid that, add C<< use Time::HiRes >>
to your code. Sleep_fp should not be trusted for accuracy to more than
a tenth of a second. Virtual machines tend to be less accurate (sleep
longer) than physical ones. This was tested on VMware and RHEL
(Linux). See also L<Time::HiRes>.

=head2 sleeps

=head2 sleepms

=head2 sleepus

=head2 sleepns

 sleep_fp(0.020);   #sleeps for 20 milliseconds
 sleeps(0.020);     #sleeps for 20 milliseconds, sleeps() is a synonym to sleep_fp()
 sleepms(20);       #sleeps for 20 milliseconds
 sleepus(20000);    #sleeps for 20000 microseconds = 20 milliseconds
 sleepns(20000000); #sleeps for 20 million nanoseconds = 20 milliseconds

=cut

sub sleep_fp { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
sub sleeps   { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
sub sleepms  { eval{require Time::HiRes} or (sleep(shift()/1e3),return);Time::HiRes::sleep(shift()/1e3) }
sub sleepus  { eval{require Time::HiRes} or (sleep(shift()/1e6),return);Time::HiRes::sleep(shift()/1e6) }
sub sleepns  { eval{require Time::HiRes} or (sleep(shift()/1e9),return);Time::HiRes::sleep(shift()/1e9) }

=head2 eta

Estimated time of arrival (ETA).

 for(@files){
    ...do work on file...
    my $eta = eta( ++$i, 0+@files ); # file now, number of files
    print "" . localtime($eta);
 }

TODO: eta is borken and out of wack, good idea?: http://en.wikipedia.org/wiki/Kalman_filter

=head2 etahhmm

 ...NOT YET

=cut

our %Eta;
our $Eta_forgetfulness=2;
sub eta {
  my($id,$pos,$end,$time_fp)=( @_==2 ? (join(";",caller()),@_) : @_ );
  $time_fp||=time_fp();
  my $a=$Eta{$id}||=[];
  push @$a, [$pos,$time_fp];
  @$a=@$a[map$_*2,0..@$a/2] if @$a>40;  #hm 40
  splice(@$a,-2,1) if @$a>1 and $$a[-2][0]==$$a[-1][0]; #same pos as last
  return undef if @$a<2;
  my @eta;
  for(2..@$a){
    push @eta, $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-$_][1])/($$a[-1][0]-$$a[-$_][0]);
  }
  my($sum,$sumw,$w)=(0,0,1);
  for(@eta){
    $sum+=$w*$_;
    $sumw+=$w;
    $w/=$Eta_forgetfulness;
  }
  my $avg=$sum/$sumw;
  return $avg;
#  return avg(@eta);
 #return $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-2][1])/($$a[-1][0]-$$a[-2][0]);
  1;
}

=head2 sleep_until

sleep_until(0.5) sleeps until half a second has passed since the last
call to sleep_until. This example starts the next job excactly ten
seconds after the last job started even if the last job lasted for a
while (but not more than ten seconds):

 for(@jobs){
   sleep_until(10);
   print localtime()."\n";
   ...heavy job....
 }

Might print:

 Thu Jan 12 16:00:00 2012
 Thu Jan 12 16:00:10 2012
 Thu Jan 12 16:00:20 2012

...and so on even if the C<< ...heavy job... >>-part takes more than a
second to complete. Whereas if sleep(10) was used, each job would
spend more than ten seconds in average since the work time would be
added to sleep(10).

Note: sleep_until() will remember the time of ANY last call of this sub,
not just the one on the same line in the source code (this might change
in the future). The first call to sleep_until() will be the same as
sleep_fp() or Perl's own sleep() if the argument is an integer.

=cut

our $Time_last_sleep_until;
sub sleep_until {
  my $s=@_==1?shift():0;
  my $time=time_fp();
  my $sleep=$s-($time-nvl($Time_last_sleep_until,0));
  $Time_last_sleep_until=time;
  sleep_fp($sleep) if $sleep>0;
}

my %thr;
sub throttle {
  my($times,$mintime,$what)=@_;
  $what||=join(":",@{[caller(1)]}[3,2]);
  $thr{$what}||=[];
  my $thr=$thr{$what};
  push @$thr,time_fp();
  return if @$thr<$times;
  my $since=$$thr[-1]-shift(@$thr);
  my $sleep=$since<$mintime?$mintime-$since:0;
  sleep_fp($sleep);
  return $sleep;
}

=head2 leapyear

B<Input:> A year. A four digit number.

B<Output:> True (1) or false (0) of whether the year is a leap year or
not. (Uses current calendar even for periods before leapyears was used).

 print join(", ",grep leapyear($_), 1900..2014)."\n";

 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956,
 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012

Note: 1900 is not a leap year, but 2000 is. Years divided by 100 is a leap year only
if it can be divided by 400.

=cut

sub leapyear{$_[0]%400?$_[0]%100?$_[0]%4?0:1:0:1} #bool

#http://rosettacode.org/wiki/Levenshtein_distance#Perl
our %ldist_cache;
sub ldist {
  my($s,$t,$l) = @_;
  return length($t) if !$s;
  return length($s) if !$t;
  %ldist_cache=() if !$l and 1000<0+%ldist_cache;
  $ldist_cache{$s,$t} ||=
  do {
    my($s1,$t1) = ( substr($s,1), substr($t,1) );
    substr($s,0,1) eq substr($t,0,1)
      ? ldist($s1,$t1)
      : 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
  };
}

=head1 OTHER

=head2 nvl

The I<no value> function (or I<null value> function)

C<nvl()> takes two or more arguments. (Oracles nvl-function take just two)

Returns the value of the first input argument with length() > 0.

Return I<undef> if there is no such input argument.

In perl 5.10 and perl 6 this will most often be easier with the C< //
> operator, although C<nvl()> and C<< // >> treats empty strings C<"">
differently. Sub nvl here considers empty strings and undef the same.

=cut

sub nvl {
  return $_[0] if defined $_[0] and length($_[0]) or @_==1;
  return $_[1] if @_==2;
  return nvl(@_[1..$#_]) if @_>2;
  return undef;
}

=head2 decode_num

See L</decode>.

=head2 decode

C<decode()> and C<decode_num()> works just as Oracles C<decode()>.

C<decode()> and C<decode_num()> accordingly uses perl operators C<eq> and C<==> for comparison.

Examples:

 my $a=123;
 print decode($a, 123,3,  214,4, $a);     # prints 3
 print decode($a, 123=>3, 214=>4, $a);    # prints 3, same thing since => is synonymous to comma in Perl

The first argument is tested against the second, fourth, sixth and so on,
and then the third, fifth, seventh and so on is
returned if decode() finds an equal string or number.

In the above example: 123 maps to 3, 124 maps to 4 and the last argument $a is returned elsewise.

More examples:

 my $a=123;
 print decode($a, 123=>3, 214=>7, $a);              # also 3,  note that => is synonym for , (comma) in perl
 print decode($a, 122=>3, 214=>7, $a);              # prints 123
 print decode($a,  123.0 =>3, 214=>7);              # prints 3
 print decode($a, '123.0'=>3, 214=>7);              # prints nothing (undef), no last argument default value here
 print decode_num($a, 121=>3, 221=>7, '123.0','b'); # prints b

Sort of:

 decode($string, %conversion, $default);

The last argument is returned as a default if none of the keys in the keys/value-pairs matched.

A more perl-ish and often faster way of doing the same:

 {123=>3, 214=>7}->{$a} || $a                       # (beware of 0)

=cut

sub decode {
  croak "Must have a mimimum of two arguments" if @_<2;
  my $uttrykk=shift;
  if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
  else               { !defined shift    and return shift or shift for 1..@_/2 }
  return shift;
}

sub decode_num {
  croak "Must have a mimimum of two arguments" if @_<2;
  my $uttrykk=shift;
  if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }
  else               { !defined shift    and return shift or shift for 1..@_/2 }
  return shift;
}

=head2 qrlist

Input: An array of values to be used to test againts for existence.

Output: A reference to a regular expression. That is a C<qr//>

The regex sets $1 if it match.

Example:

  my @list=qw/ABc XY DEF DEFG XYZ/;
  my $filter=qrlist("ABC","DEF","XY.");         # makes a regex of it qr/^(\QABC\E|\QDEF\E|\QXY.\E)$/
  my @filtered= grep { $_ =~ $filter } @list;   # returns DEF and XYZ, but not XYZ because the . char is taken literally

Note: Filtering with hash lookups are WAY faster.

Source:

Tools.pm  view on Meta::CPAN

 'JCB'                          2131 eller 1800          15

And should perhaps have had:

 'enRoute'                      2014 eller 2149          15

...but that card uses either another control algorithm or no control
digits at all. So C<enRoute> is never returned here.

If the control digits is valid, but the input does not match anything in the column C<starts on>, 1 is returned.

(This is also the same control digit mechanism used in Norwegian KID numbers on payment bills)

The first digit in a credit card number is supposed to tell what "industry" the card is meant for:

 MII Digit Value             Issuer Category
 --------------------------- ----------------------------------------------------
 0                           ISO/TC 68 and other industry assignments
 1                           Airlines
 2                           Airlines and other industry assignments
 3                           Travel and entertainment
 4                           Banking and financial
 5                           Banking and financial
 6                           Merchandizing and banking
 7                           Petroleum
 8                           Telecommunications and other industry assignments
 9                           National assignment

...although this has no meaning to C<Acme::Tools::ccn_ok()>.

The first six digits is I<Issuer Identifier>, that is the bank
(probably). The rest in the "account number", except the last digits,
which is the control digit. Max length on credit card numbers are 19
digits.

=cut

sub ccn_ok {
    my $ccn=shift(); #credit card number
    $ccn=~s/\D+//g;
    if(KID_ok($ccn)){
	return "MasterCard"                   if $ccn=~/^5[1-5]\d{14}$/;
	return "Visa"                         if $ccn=~/^4\d{12}(?:\d{3})?$/;
	return "American Express"             if $ccn=~/^3[47]\d{13}$/;
	return "Discover"                     if $ccn=~/^6011\d{12}$/;
	return "Diners Club / Carte Blanche"  if $ccn=~/^3(?:0[0-5]\d{11}|[68]\d{12})$/;
	return "JCB"                          if $ccn=~/^(?:3\d{15}|(?:2131|1800)\d{11})$/;
	return 1;
    }
    #return "enRoute"                        if $ccn=~/^(?:2014|2149)\d{11}$/; #ikke LUHN-krav?
    return 0;
}

=head2 KID_ok

Checks if a norwegian KID number has an ok control digit.

To check if a customer has typed the number correctly.

This uses the  LUHN algorithm (also known as mod-10) from 1960 which is also used
internationally in control digits for credit card numbers, and Canadian social security ID numbers as well.

The algorithm, as described in Phrack (47-8) (a long time hacker online publication):

 "For a card with an even number of digits, double every odd numbered
 digit and subtract 9 if the product is greater than 9. Add up all the
 even digits as well as the doubled-odd digits, and the result must be
 a multiple of 10 or it's not a valid card. If the card has an odd
 number of digits, perform the same addition doubling the even numbered
 digits instead."

B<Input:> A KID-nummer. Must consist of digits 0-9 only, otherwise a die (croak) happens.

B<Output:>

- Returns undef if the input argument is missing.

- Returns 0 if the control digit (the last digit) does not satify the LUHN/mod-10 algorithm.

- Returns 1 if ok

B<See also:> L</ccn_ok>

=cut

sub KID_ok {
  croak "Non-numeric argument" if $_[0]=~/\D/;
  my @k=split//,shift or return undef;
  my $s;$s+=pop(@k)+[qw/0 2 4 6 8 1 3 5 7 9/]->[pop@k] while @k;
  $s%10==0?1:0;
}



=head2 range

B<Input:>

One or more numeric arguments:

First: x (first returned element)

Second: y (up to y but not including y)

Third: step, default 1. The step between each returned element

If a fourth, fifth and so on arguments are given, they change the step for each returned element. As first derivative, second derivative.

B<Output:>

If one argument: returns the array C<(0 .. x-1)>

If two arguments: returns the array C<(x .. y-1)>

If three arguments: The default step is 1. Use a third argument to use a different step.

B<Examples:>

 print join ",", range(11);         # prints 0,1,2,3,4,5,6,7,8,9,10  (but not 11)
 print join ",", range(2,11);       # 2,3,4,5,6,7,8,9,10             (but not 11)
 print join ",", range(11,2,-1);    # 11,10,9,8,7,6,5,4,3
 print join ",", range(2,11,3);     # 2,5,8
 print join ",", range(11,2,-3);    # 11,8,5
 print join ",", range(11,2,+3);    # prints nothing

 print join ", ",range(2,11,1,0.1);       # 2, 3, 4.1, 5.3, 6.6, 8, 9.5   adds 0.1 to step each time
 print join ", ",range(2,11,1,0.1,-0.01); # 2, 3, 4.1, 5.29, 6.56, 7.9, 9.3, 10.75

Note: In the Python language and others, C<range> is a build in iterator (a
generator), not an array. This saves memory for large sets and sometimes time.
Use C<range> in L<List::Gen> to get a similar lazy generator in Perl.

=cut

sub range {
  return _range_accellerated(@_) if @_>3;  #see below
  my($x,$y,$jump)=@_;
  return (  0 .. $x-1 ) if @_==1;
  return ( $x .. $y-1 ) if @_==2;
  croak "Wrong number of arguments or jump==0" if @_!=3 or $jump==0;
  my @r;
  if($jump>0){  while($x<$y){ push @r, $x; $x+=$jump } }
  else       {  while($x>$y){ push @r, $x; $x+=$jump } }
  return @r;
}

#jumps derivative, double der., trippled der usw
sub _range_accellerated {
  my($x,$y,@jump)=@_;
  my @r;
  my $test = $jump[0]>=0 ? sub{$x<$y} : sub{$x>$y};
  while(&$test()){
    push @r, $x;
    $x+=$jump[0];
    $jump[$_-1]+=$jump[$_] for 1..$#jump;
  }
  return @r;
}

=head2 globr

Works like and uses Perls builtin C<< glob() >> but adds support for ranges
with C<< {from..to} >> and C<< {from..to..step} >>. Like brace expansion in bash.

Examples:

 my @arr = glob  "X{a,b,c,d}Z";         # @arr now have four elements: XaZ XbZ XcZ XdZ

Tools.pm  view on Meta::CPAN

 my @arr = globr "{01..12..3}b";        # 01b 04b 07b 10b

=cut

sub globr($) {
  my $p=shift;
  $p=~s{
    \{(-?\w+)\.\.(-?\w+)(\.\.(-?\d+))?\}
  }{
    my $i=0;
    my @r=$1 le $2 ? ($1..$2) : reverse($2..$1);
    @r=grep !($i++%$4),@r if $4;
    "{" . join(",",@r) . "}"
  }xeg;
  glob $p;
}

=head2 permutations

How many ways (permutations) can six people be placed around a table:

 One person:          one way
 Two persons:         two ways  (they can swap places)
 Three persons:         6
 Four persons:         24
 Five persons:        120
 Six  persons:        720

The formula is C<x!> where the postfix unary operator C<!>, also known as I<faculty> is defined as:
C<x! = x * (x-1) * (x-2) ... * 1>. Example: C<5! = 5 * 4 * 3 * 2 * 1 = 120>.Run this to see the 100 first C<< n! >>

 perl -MAcme::Tools -le'$i=big(1);print "$_!=",$i*=$_ for 1..100'

  1!  = 1
  2!  = 2
  3!  = 6
  4!  = 24
  5!  = 120
  6!  = 720
  7!  = 5040
  8!  = 40320
  9!  = 362880
 10!  = 3628800
 .
 .
 .
 100! = 93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000

C<permutations()> takes a list and return a list of arrayrefs for each
of the permutations of the input list:

 permutations('a','b');     #returns (['a','b'],['b','a'])

 permutations('a','b','c'); #returns (['a','b','c'],['a','c','b'],
                            #         ['b','a','c'],['b','c','a'],
                            #         ['c','a','b'],['c','b','a'])

Up to five input arguments C<permutations()> is probably as fast as it
can be in this pure perl implementation (see source). For more than
five, it could be faster. How fast is it now: Running with different
n, this many time took that many seconds:

 n   times    seconds
 -- ------- ---------
  2  100000      0.32
  3  10000       0.09
  4  10000       0.33
  5  1000        0.18
  6  100         0.27
  7  10          0.21
  8  1           0.17
  9  1           1.63
 10  1          17.00

If the first argument is a coderef, that sub will be called for each permutation and the return from those calls with be the real return from C<permutations()>. For example this:

 print for permutations(sub{join"",@_},1..3);

...will print the same as:

 print for map join("",@$_), permutations(1..3);

...but the first of those two uses less RAM if 3 has been say 9.
Changing 3 with 10, and many computers hasn't enough memory
for the latter.

The examples prints:

 123
 132
 213
 231
 312
 321

If you just want to say calculate something on each permutation,
but is not interested in the list of them, you just don't
take the return. That is:

 my $ant;
 permutations(sub{$ant++ if $_[-1]>=$_[0]*2},1..9);

...is the same as:

 $$_[-1]>=$$_[0]*2 and $ant++ for permutations(1..9);

...but the first uses next to nothing of memory compared to the latter. They have about the same speed.
(The examples just counts the permutations where the last number is at least twice as large as the first)

C<permutations()> was created to find all combinations of a persons
name. This is useful in "fuzzy" name searches with
L<String::Similarity> if you can not be certain what is first, middle
and last names. In foreign or unfamiliar names it can be difficult to
know that.

=cut

#TODO: see t/test_perm.pl and t/test_perm2.pl

sub permutations {
  my $code=ref($_[0]) eq 'CODE' ? shift() : undef;
  $code and @_<6 and return map &$code(@$_),permutations(@_);

Tools.pm  view on Meta::CPAN

	  my $line=$row_start_line+$_;
	  my $txt=shift(@cell);
	  $txt='' if !defined$txt;
	  $txt=sprintf("%*s",$width[$y]-1,$txt) if length($txt)>0 && !$left[$y] && ($x>0 || $no_header_line);
	  $tabout[$line].=$txt;
	  if($y==$j){
	    $tabout[$line]=~s/\s+$//;
	  }
	  else{
	    my $wider;
	       $wider = $txt=~/<input.+type=text.+size=(\d+)/i?1+$1:0;
	    $txt=~s/<[^>]+>//g;
	    $txt=~s/&gt;/>/g;
	    $txt=~s/&lt;/</g;
	    $tabout[$line].= ' ' x ($width[$y]-length($txt)-$wider);
	  }
	}
      }
    }
    $row_start_line+=$height[$x];

    #--lage streker?
    if(not $no_header_line){
      if($x==0){
	for my $y (0..$j){
	  next if $remove_empty && !$not_empty[$y];
	  $tabout[$row_start_line].=('-' x ($width[$y]-1))." ";
	}
	$row_start_line++;
	@header=("",@tabout);
      }
      elsif(
	    $x%$pagesize==0 || $nodup>0&&!$nodup[$x+1][$nodup-1]
	    and $x+1<@$tab
	    and !$no_header_line
	    )
      {
	push(@tabout,@header);
	$row_start_line+=@header;
	$header_last=1;
      }
      else{
	$header_last=0;
      }
    }
  }#for x
  return join("\n",@tabout)."\n";
}

=head2 serialize

Returns a data structure as a string. See also C<Data::Dumper>
(serialize was created long time ago before Data::Dumper appeared on
CPAN, before CPAN even...)

B<Input:> One to four arguments.

First argument: A reference to the structure you want.

Second argument: (optional) The name the structure will get in the output string.
If second argument is missing or is undef or '', it will get no name in the output.

Third argument: (optional) The string that is returned is also put
into a created file with the name given in this argument.  Putting a
C<< > >> char in from of the filename will append that file
instead. Use C<''> or C<undef> to not write to a file if you want to
use a fourth argument.

Fourth argument: (optional) A number signalling the depth on which newlines is used in the output.
The default is infinite (some big number) so no extra newlines are output.

B<Output:> A string containing the perl-code definition that makes that data structure.
The input reference (first input argument) can be to an array, hash or a string.
Those can contain other refs and strings in a deep data structure.

Limitations:

- Code refs are not handled (just returns C<sub{die()}>)

- Regex, class refs and circular recursive structures are also not handled.

B<Examples:>

  $a = 'test';
  @b = (1,2,3);
  %c = (1=>2, 2=>3, 3=>5, 4=>7, 5=>11);
  %d = (1=>2, 2=>3, 3=>\5, 4=>7, 5=>11, 6=>[13,17,19,{1,2,3,'asdf\'\\\''}],7=>'x');
  print serialize(\$a,'a');
  print serialize(\@b,'tab');
  print serialize(\%c,'c');
  print serialize(\%d,'d');
  print serialize(\("test'n roll",'brb "brb"'));
  print serialize(\%d,'d',undef,1);

Prints accordingly:

 $a='test';
 @tab=('1','2','3');
 %c=('1','2','2','3','3','5','4','7','5','11');
 %d=('1'=>'2','2'=>'3','3'=>\'5','4'=>'7','5'=>'11','6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}]);
 ('test\'n roll','brb "brb"');
 %d=('1'=>'2',
 '2'=>'3',
 '3'=>\'5',
 '4'=>'7',
 '5'=>'11',
 '6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}],
 '7'=>'x');

Areas of use:

- Debugging (first and foremost)

- Storing arrays and hashes and data structures of those on file, database or sending them over the net

- eval earlier stored string to get back the data structure

Be aware of the security implications of C<eval>ing a perl code string
stored somewhere that unauthorized users can change them! You are
probably better of using L<YAML::Syck> or L<Storable> without
enabling the CODE-options if you have such security issues.
More on decompiling Perl-code: L<Storable> or L<B::Deparse>.

=head2 dserialize

Debug-serialize, dumping data structures for you to look at.

Same as C<serialize()> but the output is given a newline every 80th character.
(Every 80th or whatever C<$Acme::Tools::Dserialize_width> contains)

=cut

our $Dserialize_width=80;
sub _kallstack { my $tilbake=shift||0; my @c; my $ret; $ret.=serialize(\@c,"caller$tilbake") while @c=caller(++$tilbake); $ret }
sub dserialize{join "\n",serialize(@_)=~/(.{1,$Dserialize_width})/gs}
sub serialize {
  no warnings;
  my($r,$name,$filename,$level)=@_;
  my @r=(undef,undef,($level||0)-1);
  if($filename){
    open my $fh, '>', $filename or croak("FEIL: could not open file $filename\n" . _kallstack());
    my $ret=serialize($r,$name,undef,$level);
    print $fh "$ret\n1;\n";
    close($fh);
    return $ret;
  }
  if(ref($r) eq 'SCALAR'){
    return "\$$name=".serialize($r,@r).";\n" if $name;
    return "undef" unless defined $$r;
    my $ret=$$r;
    $ret=~s/\\/\\\\/g;
    $ret=~s/\'/\\'/g;
    return "'$ret'";
  }
  elsif(ref($r) eq 'ARRAY'){
    return "\@$name=".serialize($r,@r).";\n" if $name;
    my $ret="(";
    for(@$r){
      $ret.=serialize(\$_,@r).",";
      $ret.="\n" if $level>=0;
    }
    $ret=~s/,$//;
    $ret.=")";
    $ret.=";\n" if $name;
    return $ret;
  }
  elsif(ref($r) eq 'HASH'){
    return "\%$name=".serialize($r,@r).";\n" if $name;
    my $ret="(";
    for(sort keys %$r){
      $ret.=serialize(\$_,@r)."=>".serialize(\$$r{$_},@r).",";
      $ret.="\n" if $level>=0;
    }
    $ret=~s/,$//;
    $ret.=")";
    $ret.=";\n" if $name;
    return $ret;
  }
  elsif(ref($$r) eq 'ARRAY'){
    return "\@$name=".serialize($r,@r).";\n" if $name;
    my $ret="[";

Tools.pm  view on Meta::CPAN

    elsif($c eq '}') { $m=0 }
    elsif($c eq '!') { $m||!@m and die"ed: no macro"; $cs=join("",@m).$cs }
    elsif($c eq '""'){ &$add('"') }
    else             { croak "ed: Unknown cmd '$c'\n" }
    push @m, $c if $m and $c ne '{';
    #warn serialize([$c,$m,$cs],'d');
  }
  $Edcursor=$p;
  $s;
}

=head2 changed

 while(<>){
    my $line=$_;
    print "\n" if changed(/^\d\d\d\d-\d\d-(\d\d)/);
    print "\n" if changed(substr($_,8,2));
 }

Returns undef, 0 or 1. Undef if its the first time C<changed> is
called on that perl line. 0 if not the first time and the parameters
differ from the last call on that line. 1 if not the first time and
the parameters is the exact same as they where on the previous call on
that line of perl source code.

=cut

our %Changed_lastval;
sub changed {
    my $now=join($;,@_);
    my $key=join($;,caller());
    my $e=exists $Changed_lastval{$key};
    if($e){
	my $last=$Changed_lastval{$key};
	return 0 if  defined $last and  defined $now and $last eq $now
                 or !defined $last and !defined $now;
    }
    $Changed_lastval{$key}=$now;
    return $e?1:undef;
}

#todo: sub unbless eller sub damn
#todo: ..se også: use Data::Structure::Util qw/unbless/;
#todo: ...og: Acme::Damn sin damn()
#todo? sub swap($$) http://www.idg.no/computerworld/article242008.ece
#todo? catal
#todo?
#void quicksort(int t, int u) int i, m; if (t >= u) return; swap(t, randint(t, u)); m = t; for (i = t + 1; i <= u; i++) if (x[i] < x[t]) swap(++m, i); swap(t, m) quicksort(t, m-1); quicksort(m+1, u);


=head1 JUST FOR FUN

=head2 brainfu

B<Input:> one or two arguments

First argument: a string, source code of the brainfu
language. String containing the eight charachters + - < > [ ] . ,
Every other char is ignored silently.

Second argument: if the source code contains commas (,) the second
argument is the input characters in a string.

B<Output:> The resulting output from the program.

Example:

 print brainfu(<<"");  #prints "Hallo Verden!\n"
 ++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>---.+++++++++++..+++.>++.<<++++++++++++++
 .>----------.+++++++++++++.--------------.+.+++++++++.>+.>.

See L<http://en.wikipedia.org/wiki/Brainfuck>

=head2 brainfu2perl

Just as L</brainfu> but instead it return the perl code to which the
brainfu code is translated. Just C<< eval() >> this perl code to run.

Example:

 print brainfu2perl('>++++++++[<++++++++>-]<++++++++.>++++++[<++++++>-]<---.');

Prints this string:

 my($c,$o,@b)=(0); sub out{$o.=chr($b[$c]) for 1..$_[0]||1}
 ++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
 while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
 ++$b[$c];++$c;--$b[$c];}--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
 ++$b[$c];++$b[$c];out;++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
 while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$c;--$b[$c];}
 --$c;--$b[$c];--$b[$c];--$b[$c];out;$o;

=head2 brainfu2perl_optimized

Just as L</brainfu2perl> but optimizes the perl code. The same
example as above with brainfu2perl_optimized returns this equivalent
but shorter perl code:

 $b[++$c]+=8;while($b[$c]){$b[--$c]+=8;--$b[++$c]}$b[--$c]+=8;out;$b[++$c]+=6;
 while($b[$c]){$b[--$c]+=6;--$b[++$c]}$b[--$c]-=3;out;$o;

=cut

sub brainfu { eval(brainfu2perl(@_)) }

sub brainfu2perl {
  my($bf,$inp)=@_;
  my $perl='my($c,$inp,$o,@b)=(0,\''.$inp.'\'); no warnings; sub out{$o.=chr($b[$c]) for 1..$_[0]||1}'."\n";
  $perl.='sub inp{$inp=~s/(.)//s and $b[$c]=ord($1)}'."\n" if $inp and $bf=~/,/;
  $perl.=join("",map/\+/?'++$b[$c];':/\-/?'--$b[$c];':/\[/?'while($b[$c]){':/\]/?'}':/>/?'++$c;':/</?'--$c;':/\./?'out;':/\,/?'inp;':'',split//,$bf).'$o;';
  $perl;
}

sub brainfu2perl_optimized {
  my $perl=brainfu2perl(@_);
  $perl=~s{(((\+|\-)\3\$b\[\$c\];){2,})}{ '$b[$c]'.$3.'='.(grep/b/,split//,$1).';' }gisex;
  1 while $perl=~s/\+\+\$c;\-\-\$c;//g + $perl=~s/\-\-\$c;\+\+\$c;//g;
  $perl=~s{((([\-\+])\3\$c;){2,})}{"\$c$3=".(grep/c/,split//,$1).';'}gisex;
  $perl=~s{((\+\+|\-\-)\$c;([^;{}]+;))}{my($o,$s)=($2,$3);$s=~s/\$c/$o\$c/?$s:$1}ge;
  $perl=~s/\$c(\-|\+)=(\d+);(\+\+|\-\-)\$b\[\$c\]/$3.'$b[$c'.$1.'='.$2.'];'/ge;
  $perl=~s{((out;){2,})}{'out('.(grep/o/,split//,$1).');'}ge;

Tools.pm  view on Meta::CPAN

Or this:

 my $bf=bfinit('filename.bf');

Is the same as:

 use Storable qw(store retrieve);
 my $bf=retrieve('filename.bf');

=head2 bfclone

Deep copies the bloom filter data structure. (Which btw is not very deep, two levels at most)

This:

 my $bfc = bfclone($bf);

Works just as:

 use Storable;
 my $bfc=Storable::dclone($bf);

=head2 Object oriented interface to bloom filters

 use Acme::Tools;
 my $bf=new Acme::Tools::BloomFilter(0.1,1000); # the same as bfinit, see bfinit above
 print ref($bf),"\n";                           # prints Acme::Tools::BloomFilter
 $bf->add(@keys);
 $bf->check($keys[0]) and print "ok\n";         # prints ok
 $bf->grep(\@keys)==@keys and print "ok\n";     # prints ok
 $bf->store('filename.bf');
 my $bf2=bfretrieve('filename.bf');
 $bf2->check($keys[0]) and print "ok\n";        # still ok

 $bf2=$bf->clone();

To instantiate a previously stored bloom filter:

 my $bf = Acme::Tools::BloomFilter->new( '/path/to/stored/bloomfilter.bf' );

The o.o. interface has the same methods as the C<bf...>-subs without the
C<bf>-prefix in the names. The C<bfretrieve> is not available as a
method, although C<bfretrieve>, C<Acme::Tools::bfretrieve> and
C<Acme::Tools::BloomFilter::retrieve> are synonyms.

=head2 Internals and speed

The internal hash-functions are C<< md5( "$key$salt" ) >> from L<Digest::MD5>.

Since C<md5> returns 128 bits and most medium to large sized bloom
filters need only a 32 bit hash function, the result from md5() are
split (C<unpack>-ed) into 4 parts 32 bits each and are treated as if 4
hash functions was called at once (speedup). Using different salts to
the key on each md5 results in different hash functions.

Digest::SHA512 would have been even better since it returns more bits,
if it werent for the fact that it's much slower than Digest::MD5.

String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:

 time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6'       #5.56 sec
 time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6'           #2.79 sec, faster but not per bit
 time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)

Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.

=head2 Theory and math behind bloom filters

L<http://www.internetmathematics.org/volumes/1/4/Broder.pdf>

L<http://blogs.sun.com/jrose/entry/bloom_filters_in_a_nutshell>

L<http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html>

See also Scaleable Bloom Filters: L<http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf> (not implemented in Acme::Tools)

...and perhaps L<http://intertrack.naist.jp/Matsumoto_IEICE-ED200805.pdf>

=cut

sub bfinit {
  return bfretrieve(@_)                             if @_==1;
  return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
  return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
  require Digest::MD5;
  @_%2&&croak "Arguments should be a hash of equal number of keys and values";
  my %arg=@_;
  my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
  my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
  croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
  croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
  croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
  croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
  my $bf={error_rate    => 0.001,  #default p
	  capacity      => 100000, #default n
          min_hashfuncs => 1,
          max_hashfuncs => 100,
	  counting_bits => 1,      #default: not counting filter
	  adaptive      => 0,
	  %arg,                    #arguments
	  key_count     => 0,
	  overflow      => {},
	  version       => $Acme::Tools::VERSION,
	 };
  croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
  @$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
  @$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
  $$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x   new empty filter
  $$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
               :$$bf{filterlength}<=2**32/4 ? "N*"
               :                              "Q*";
  bfadd($bf,@{$arg{keys}}) if $arg{keys};
  return $bf;
}
sub bfaddbf {
  my($bf,$bf2)=@_;
  my $differror=join"\n",
    map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
    grep $$bf{$_} ne $$bf2{$_},
    qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
  croak $differror if $differror;
  croak "Can not add adaptive bloom filters" if $$bf{adaptive};
  my $count=$$bf{key_count}+$$bf2{key_count};

Tools.pm  view on Meta::CPAN


 Wrote executable /usr/local/bin/conv
 Wrote executable /usr/local/bin/due
 Wrote executable /usr/local/bin/xcat
 Wrote executable /usr/local/bin/freq
 Wrote executable /usr/local/bin/deldup
 Wrote executable /usr/local/bin/ccmd
 Wrote executable /usr/local/bin/z2z
 Wrote executable /usr/local/bin/2gz
 Wrote executable /usr/local/bin/2gzip
 Wrote executable /usr/local/bin/2bz2
 Wrote executable /usr/local/bin/2bzip2
 Wrote executable /usr/local/bin/2xz
 Wrote executable /usr/local/bin/resubst

Examples of commands then made available:

 conv 1 USD EUR                #might show 0.88029 if thats the current currency rate. Uses conv()
 conv .5 in cm                 #reveals that 1/2 inch is 1.27 cm, see doc on conv() for all supported units
 due [-h] /path/1/ /path/2/    #like du, but show statistics on file extentions instead of subdirs
 xcat file                     #like cat, zcat, bzcat or xzcat in one. Uses file extention to decide. Uses openstr()
 freq file                     #reads file(s) or stdin and view counts of each byte 0-255
 ccmd grep string /huge/file   #caches stdout+stderr for 15 minutes (default) for much faster results later
 ccmd "sleep 2;echo hello"     #slow first time. Note the quotes!
 ccmd "du -s ~/*|sort -n|tail" #ccmd store stdout+stderr in /tmp files (default)
 z2z [-pvk1-9oe -t type] files #convert from/to .gz/bz2/xz files, -p progress, -v verbose (output result),
                               #-k keep org file, -o overwrite, 1-9 compression degree, -e for xz does "extreme"
                               #compressions, very slow. For some data types this reduces size significantly
                               #2xz and 2bz2 depends on xz and bzip2 being installed on system
 2xz                           #same as z2z with -t xz
 2bz2                          #same as z2z with -t bz2
 2gz                           #same as z2z with -t gz

 rttop
 trunc file(s)
 wipe file(s)

=head3 z2z

=head3 2xz

=head3 2bz2

=head3 2gz

The commands C<2xz>, C<2bz2> and C<2gz> are just synonyms for C<z2z> with an implicitly added option C<-t xz>, C<-t xz> or C<-t gz> accordingly.

 z2z [-p -k -v -o -1 -2 -3 -4 -5 -6 -7 -8 -9 ] files

Converts (recompresses) files from one compression type to another. For instance from .gz to .bz2
Keeps uid, gid, mode (chmod) and mtime.

 -p              Show a progress meter using the pv program if installed
 -k              Keeps original file
 -v              Verbose, shows info on degree of compression and file
                 number if more than one file is being converted
 -o              Overwrites existing result file, otherwise stop with error msg
 -1 .. -9        Degree of compression, -1 fastest .. -9 best
 -e              With -t xz (or 2xz) passes -e to xz (-9e = extreme compression)

 -L rate         With -p. Slow down, ex:  -L 200K  means 200 kilobytes per second
 -D sec          With -p. Only turn on progress meter (pv) after x seconds
 -i sec          With -p. Info update rate
 -l              With -p. Line mode
 -I              With -p. Show ETA as time of arrival as well as time left
 -q              With -p. Quiet. Useful with -L to limit rate, but no output

The options -L -D -i -l -I -q implicitly turns on -p. Those options are passed
through to pv. See: man pv.

=head3 due

Like C<du> command but views space used by file extentions instead of dirs. Options:

 due [-options] [dirs] [files]
 due -h          View bytes "human readable", i.e. C<8.72 MB> instead of C<9145662 b> (bytes)
 due -k | -m     View bytes in kilobytes | megabytes (1024 | 1048576)
 due -K          Like -k but uses 1000 instead of 1024
 due -z          View two extentions if .z .Z .gz .bz2 .rz or .xz (.tar.gz, not just .gz)
 due -M          Also show min, medium and max date (mtime) of files, give an idea of their age
 due -C          Like -M, but create time instead (ctime)
 due -A          Like -M, but access time instead (atime)
 due -P          Also show 10, 50 (medium) and 90 percentile of file date
 due -MP         Both -M and -P, shows min, 10p, 50p, 90p and max
 due -a          Sort output alphabetically by extention (default order is by size)
 due -c          Sort output by number of files
 due -i          Ignore case, .GZ and .gz is the same, output in lower case
 due -t          Adds time of day to -M and -P output
 due -e 'regex'  Exclude files (full path) matching regex. Ex: due -e '\.git'
 TODO: due -l    TODO: Exclude hardlinks (dont count "same" file more than once, "man du")
 ls -l | due     Parses output of ls -l, find -ls, tar tvf for size+filename and reports
 find | due      List of filenames from stdin produces same as just command 'due'
 ls | due        Reports on just files in current dir without recursing into subdirs

=head3 finddup

Find duplicate files. Three steps to speed this up in case of many
large files: 1) Find files of same size, 2) of those: find files with
the same first 8 kilobytes, 3) of those: find duplicate files by
finding the MD5sums of the whole files.

 finddup [-d -s -h] paths/ files/* ...  #reports (+deletes with -d) duplicate files
                                        #-s for symlinkings dups, -h for hardlink
 finddup <files>    # print duplicate files, <files> might be filenames and directories
 finddup -a <files> # print duplicate files, also print the first file
 finddup -d <files> # delete duplicate files, use -v to also print them before deletion
 finddup -s <files> # make symbolic links of duplicate files
 finddup -h <files> # make hard links of duplicate files
 finddup -v ...     # verbose, print before -d, -s or -h
 finddup -n -d <files>  # dry run: show rm commands without actually running them
 finddup -n -s <files>  # dry run: show ln commands to make symlinks of duplicate files todo:NEEDS FIX!
 finddup -n -h <files>  # dry run: show ln commands to make hard links of duplicate files
 finddup -q ...         # quiet
 finddup -k o           # keep oldest with -d, -s, -h, consider newer files duplicates
 finddup -k n           # keep newest with -d, -s, -h, consider older files duplicates
 finddup -k O           # same as -k o, just use access time instead of modify time
 finddup -k N           # same as -k n, just use access time instead of modify time
 finddup -0 ...         # use ascii 0 instead of the normal \n, for xargs -0
 finddup -P n           # use n bytes from start of file in 1st md5 check (default 8192)
 finddup -p             # view progress in last and slowest of the three steps

Default ordering of files without C<-k n> or C<-k o> is the order they
are mentioned on the command line. For directory args the order might be

Tools.pm  view on Meta::CPAN

}
sub cmd_xcat {
  for my $fn (@_){
    my $os=openstr($fn);
    open my $FH, $os or warn "xcat: cannot open $os ($!)\n" and next;
    #binmode($FH);#hm?
    print while <$FH>;
    close($FH);
  }
}
sub cmd_freq {
  my(@f,$i);
  map $f[$_]++, unpack("C*",$_) while <>;
  my $s=" " x 12;map{print"$_$s$_$s$_\n"}("BYTE  CHAR   COUNT","---- ----- -------");
  my %m=(145,"DOS-æ",155,"DOS-ø",134,"DOS-å",146,"DOS-Æ",157,"DOS-Ø",143,"DOS-Å",map{($_," ")}0..31);
  printf("%4d %5s%8d".(++$i%3?$s:"\n"),$_,$m{$_}||chr,$f[$_]) for grep$f[$_],0..255;print "\n";
  my @no=grep!$f[$_],0..255; print "No bytes for these ".@no.": ".join(" ",@no)."\n";
}
sub cmd_deldup {
  cmd_finddup('-d',@_);
}
sub cmd_finddup {
  # http://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
  # die "todo: finddup not ready yet"
  my %o;
  my @argv=opts("ak:dhsnqv0P:FMRp",\%o,@_); $o{P}=1024*8 if!defined$o{P}; $o{k}='' if!defined$o{k};
  croak"ERR: cannot combine -a with -d, -s or -h" if $o{a} and $o{d}||$o{s}||$o{h};
  require File::Find;
  @argv=map{
      my @f;
      if(-d$_){ File::Find::find({follow=>0,wanted=>sub{return if !-f$_;push@f,$File::Find::name;1}},$_) }
      else    { @f=($_) }
      @f;
  }@argv;
  my %md5sum;
  my $md5sum=sub{$md5sum{$_[0]}=md5sum($_[0]) if!defined$md5sum{$_[0]}}; #memoize
  my $md5sum_1st_part=sub{
      open my $fh, "<", $_[0] or die "ERR: Could not read $_[0]";
      binmode($fh);
      my $buf; read($fh,$buf,$o{P});
      close($fh);
      md5sum(\$buf);
  };
  my @checks=( #todo: stat()[0,1] (or[0,1,7]?) and diff filename => no need for md5, is hardlink! just linux?
      sub{-s$_[0]},
      sub{-s$_[0]<=$o{P}?md5sum($_[0]):&$md5sum_1st_part($_[0])},
      sub{md5sum($_[0])}
  );
  pop @checks if $o{M}; #4tst
  my $i=0;
  my %s=map{($_=>++$i)}@argv; #sort
  my %f=map{($_=>[$_])}@argv; #also weeds out dupl params
  for my $c (@checks){
    my @f=map @{$f{$_}}, sort keys %f;
    if($o{p} and $c eq $checks[-1]){ #view progress for last check, todo: eta() is wacky here! everywhere?
      my $sum=@f?sum(map -s$_,@f):0;
      my($corg,$cnt,$cntmb,$mb)=($c,0,0,$sum/1e6);
      $c=sub{
	  $cntmb+=(-s$_[0])/1e6;
	  my $eol=++$cnt==@f?"\n":"\r";
	  print STDERR sprintf("%d/%d files checked (%d%%), %d/%d MB (%d%%), ETA in %d sec       $eol",
			       $cnt, 0+@f, 100*$cnt/@f, $cntmb, $mb, 100*$cntmb/$mb,
			       curb(nvl(eta($cnt,0+@f),time)-time(),0,1e7));
	  &$corg(@_)
      };
    }
    my %n; push @{$n{&$c($_)}}, $_ for @f;
    delete @n{grep@{$n{$_}}<2,keys%n};
    %f=%n;
  }
  return %f if $o{F};
  my@r=sort{$s{$$a[0]}<=>$s{$$b[0]}}values%f;
  my $si={qw(o 9 n 9 O 8 N 8)}->{$o{k}}; #stat index: 9=mtime, 8=atime
  my $sort=lc$o{k} eq 'o' ? sub{sprintf"%011d%9d",     (stat($_[0]))[$si],$s{$_[0]}}
          :lc$o{k} eq 'n' ? sub{sprintf"%011d%9d",1e11-(stat($_[0]))[$si],$s{$_[0]}}
          :                 sub{sprintf     "%9d",                        $s{$_[0]}};
  @$_=map$$_[1],sort{$$a[0]cmp$$b[0]}map[&$sort($_),$_],@$_ for @r;
  my %of; #dup of
  for my $r (@r){
      $of{$_}=$$r[0] for @$r[1..$#$r];
  }
  my $nl=$o{0}?"\x00":"\n";
  my $print=sub{$o{q} or print $_[0]};
  my $do=sub{ $o{v} && &$print("$_[0]$nl"); qx($_[0]) };
  my $go=sub{ $o{n} ? &$print("$_[0]$nl") : &$do($_[0]) };
  &$print(join$nl,map join("",map"$_$nl",@$_),@r) and return if $o{a};
  @r=map@$_[1..$#$_],@r;
  return @r if $o{R}; #hm
  unlink@r                              if $o{d}||$o{s}||$o{h} and !$o{n}; #delete duplicates
  map &$go(qq(rm "$_")             ),@r if $o{d}&& $o{n}; #delete duplicates, dryrun
  map &$go(qq(ln    "$of{$_}" "$_")),@r if $o{h}; #replace duplicates with hardlink
  map &$go(qq(ln -s "$of{$_}" "$_")),@r if $o{s}; #replace duplicates with symlink,
                                                  #todo: BUG! abc/def/file -> ghi/file should be abc/def/file -> ../../ghi/file
  return if $o{q} or $o{n};    #quiet or dryrun
  &$print("$_$nl") for @r;
}
#http://stackoverflow.com/questions/11900239/can-i-cache-the-output-of-a-command-on-linux-from-cli
our $Ccmd_cache_dir='/tmp/acme-tools-ccmd-cache';
our $Ccmd_cache_expire=15*60;  #default 15 minutes
sub cmd_ccmd {
  require Digest::MD5;
  my $cmd=join" ",@_;
  my $d="$Ccmd_cache_dir/".username();
  makedir($d);
  my $md5=Digest::MD5::md5_hex($cmd);
  my($fno,$fne)=map"$d/cmd.$md5.std$_","out","err";
  my $too_old=sub{time()-(stat(shift))[9] >= $Ccmd_cache_expire};
  unlink grep &$too_old($_), <$d/*.std???>;
  sys("($cmd) > $fno 2> $fne") if !-e$fno or &$too_old($fno);
  print STDOUT "".readfile($fno);
  print STDERR "".readfile($fne);
}

sub cmd_trunc { die "todo: trunc not ready yet"} #truncate a file, size 0, keep all other attr

#todo:   wipe -n 4 filer*   #virker ikke! tror det er args() eller opts() som ikke virker
sub cmd_wipe  {
  my %o;
  my @argv=opts("n:k0123456789",\%o,@_);
  die if 1<grep exists$o{$_},'n',0..9;
  $o{$_} and $o{n}=$_ for 0..9;

Tools.pm  view on Meta::CPAN

#todo: sub cmd_7z
#todo: .tgz same as .tar.gz (but not .tbz2/.txz)
sub cmd_z2z {
  my %o;
  my $pvopts="L:D:i:lIq";
  my @argv=opts("pt:kvhon123456789es:$pvopts",\%o,@_);
  my $t=repl(lc$o{t},qw/gzip gz bzip2 bz2/);
  die "due: unknown compression type $o{t}, known are gz, bz2 and xz" if $t!~/^(gz|bz2|xz)$/;
  $o{p}=1 if!defined$o{p} and grep$pvopts=~/$_/,keys%o;
  delete $o{e} if $o{e} and $o{t} ne 'xz' and warn "-e available only for type xz\n";
  my $sum=sum(map -s$_,@argv);
  print "Converting ".@argv." files, total ".bytes_readable($sum)."\n" if $o{v} and @argv>1;
  my $cat='cat';
  if($o{p}){ if(which('pv')){ $cat='pv' } else { warn repl(<<"",qr/^\s+/) } }
    due: pv for -p not found, install with sudo yum install pv, sudo apt-get install pv or similar

  $o{$_} and $o{$_}=" " for qw(l q); #still true, but no cmd arg for:
  $o{I} and $o{I}="-pterb";
  exists$o{$_} and $cat=~s,pv,pv -$_ $o{$_}, for $pvopts=~/(\w)/g; #warn "cat: $cat\n";

  my $sumnew=0;
  my $start=time_fp();
  my($i,$bsf)=(0,0);#bytes so far
  $Eta{'z2z'}=[];eta('z2z',0,$sum);
  #@argv=map$$_[1],sort{$$a[0]cmp$$b[0]}map{[$opt{
  for(@argv){
    my $new=$_; $new=~s/(\.(gz|bz2|xz))?$/.$t/i or die;
    my $ext=defined($2)?lc($2):'';
    my $same=/^$new$/; $new.=".tmp" if $same; die if $o{k} and $same;
    next if !-e$_ and warn"$_ do not exists\n";
    next if !-r$_ and warn"$_ is not readable\n";
    next if -e$new and !$o{o} and warn"$new already exists, skipping (use -o to overwrite)\n";
    my $unz={qw/gz gunzip bz2 bunzip2 xz unxz/}->{$ext}||'';
    #todo: my $cntfile="/tmp/acme-tools-z2z-wc-c.$$";
    #todo: my $cnt="tee >(wc -c>$cntfile)" if $ENV{SHELL}=~/bash/ and $o{v}; #hm dash vs bash
    my $z=  {qw/gz gzip   bz2 bzip2   xz xz/}->{$t};
    $z.=" -$_" for grep$o{$_},1..9,'e';
    $z.=" -$_ $o{$_}" for grep exists$o{$_},'L';
    my $cmd=qq($cat "$_"|$unz|$z>"$new");
     #todo: "$cat $_|$unz|$cnt|$z>$new";
    #cat /tmp/kontroll-linux.xz|unxz|tee >(wc -c>/tmp/p)|gzip|wc -c;cat /tmp/p
    $cmd=~s,\|+,|,g; #print "cmd: $cmd\n";
    sys($cmd);
    chall($_,$new) or croak("$0 cannot chmod|chown|touch $new") if !$o{n};
    my($szold,$sznew)=map{-s$_}($_,$new);
    $bsf+=-s$_;
    unlink $_ if !$o{k};
    rename($new, replace($new,qr/.tmp$/)) or die if $same;
    if($o{v}){
      $sumnew+=$sznew;
      my $pr=sprintf"%0.1f%%",$szold?100*$sznew/$szold:0;
      #todo: my $szuncmp=-s$cntfile&&time()-(stat($cntfile))[9]<10 ? qx(cat $cntfile) : '';
      #todo: $o{h} ? printf("%6.1f%%  %9s => %9s => %9s %s\n",      $pr,(map bytes_readable($_),$szold,$szuncmp,$sznew),$_)
      #todo:       : printf("%6.1f%% %11d b  => %11d b => %11 b  %s\n",$pr,$szold,$szuncmp,$sznew,$_)
      my $str= $o{h}
      ? sprintf("%-7s %9s => %9s",       $pr,(map bytes_readable($_),$szold,$sznew))
      : sprintf("%-7s %11d b => %11d b", $pr,$szold,$sznew);
      if(@argv>1){
	$i++;
	$str=$i<@argv
            ? "  ETA:".sprintf("%-8s",sec_readable(eta('z2z',$bsf,$sum)-time_fp()))." $str"
	    : "   TA: 0s $str"
	  if $sum>1e6;
        $str="$i/".@argv." $str";
      }
      print "$str $new\n";
    }
  }
  if($o{v} and @argv>1){
      my $bytes=$o{h}?'':'bytes ';
      my $str=
        sprintf "%d files compressed in %.3f seconds from %s to %s $bytes (%s bytes) %.1f%% of original\n",
	  0+@argv,
	  time_fp()-$start,
	  (map{$o{h}?bytes_readable($_):$_}($sum,$sumnew,$sumnew-$sum)),
	  100*$sumnew/$sum;
      $str=~s,\((\d),(+$1,;
      print $str;
  }
}

=head2 args

Parses command line options and arguments:

 my %opt;
 my @argv=Acme::Tools::args('i:nJ123',\%opt,@ARGV);   #returns remaining command line elements after C<-o ptions> are parsed into C<%opt>.

Uses C<Getopt::Std::getopts()>. First arg names the different one char
options and an optional C<:> behind the letter or digit marks that the
switch takes an argument.

=cut

sub args {
    my $switches=shift;
    my $hashref=shift;
    my $re_sw='^([a-z0-9]:?)+$';
    croak "ERR: args: first arg $switches dont match $re_sw\n" if $switches !~ /$re_sw/i;
    croak "ERR: second arg to args() not hashref\n" if ref($hashref) ne 'HASH';
    local @ARGV=@_;
    require Getopt::Std;
    Getopt::Std::getopts($switches => $hashref);
    (@ARGV);
}

sub opts {
    my($def, $hashref, @a)=@_;
    @a=@ARGV if @_<=2;
    my %def=map{/(\w)(:?)/;($1=>$2?2:1)}$def=~/(\w:?)/g;
    my $o1=join"",grep$def{$_}==1,sort keys%def;
    my $o= join"",                sort keys%def;
    my @r;
    while(@a){
	my $a=shift(@a);
	if($a=~/^-([$o1])([$o].*)$/){
	    unshift@a,"-$1","-$2";
	}
	elsif($a=~/^-(\w)(.*)$/){
	    my $d=$def{$1}//0;
	    push@{$$hashref{$1}},$d==1 && length($2) ? croak"opt -$1 has no arg (is $2 here)"
		                :$d==1               ? 1
				:$d==2 && length($2) ? $2
				:$d==2               ? shift(@a)
				:croak"unknown opt -$1";
	}
	elsif($a eq '--'){
	    last;
	}
	else {
	    push @r, $a;
	}
    }
    $_=join",",@$_ for values %$hashref;
    (@r,@a)
}

#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp -
#cat Tools.pm|perl -I. /usr/local/bin/zsize -tp Tools.pm
sub cmd_zsize {
  my %o;
  my @argv=opts("heEpts",\%o,@_);
  my $stdin=!@argv || join(",",@argv) eq '-';
  @argv=("/tmp/acme-tools.$$.stdin") if $stdin;
  writefile($argv[0],join("",<STDIN>)) if $stdin;
  my @prog=grep qx(which $_), qw(gzip bzip2 xz zstd brotli);
  for my $f (@argv){
      my $sf=-s$f;
      print "--- $f does not exists\n" and next if !-e$f;
      print "--- $f is not a file\n" and next if !-f$f;
      print "--- $f ($sf b) is not readable\n" and next if !-r$f;
      print "--- $sf b  ".bytes_readable($sf)."  ".($stdin?"-":$f)."\n";
      next if !$sf;
      my(@t,@s);
      for my $prog (@prog){
          next if !qx(which $prog);
          my @l=1..9;
          push @l,map"e$_",1..9 if $prog eq 'xz' and $o{e};
          @l=map"e$_",1..9      if $prog eq 'xz' and $o{E};

Tools.pm  view on Meta::CPAN

# memoize_memcached         http://search.cpan.org/~dtrischuk/Memoize-Memcached-0.03/lib/Memoize/Memcached.pm
# hint on http://perl.jonallen.info/writing/articles/install-perl-modules-without-root

# sub mycrc32 {  #http://billauer.co.il/blog/2011/05/perl-crc32-crc-xs-module/  eller String::CRC32::crc32 som er 100 x raskere enn Digest::CRC::crc32
#  my ($input, $init_value, $polynomial) = @_;
#  $init_value = 0 unless (defined $init_value);
#  $polynomial = 0xedb88320 unless (defined $polynomial);
#  my @lookup_table;
#  for (my $i=0; $i<256; $i++) {
#    my $x = $i;
#    for (my $j=0; $j<8; $j++) {
#      if ($x & 1) {
#        $x = ($x >> 1) ^ $polynomial;
#      } else {
#        $x = $x >> 1;
#      }
#    }
#    push @lookup_table, $x;
#  }
#  my $crc = $init_value ^ 0xffffffff;
#  foreach my $x (unpack ('C*', $input)) {
#    $crc = (($crc >> 8) & 0xffffff) ^ $lookup_table[ ($crc ^ $x) & 0xff ];
#  }
#  $crc = $crc ^ 0xffffffff;
#  return $crc;
# }
#
# $maybe_valid_utf8 =~                   # https://stackoverflow.com/questions/11709410/regex-to-detect-invalid-utf-8-string
# m/\A(
#     [\x09\x0A\x0D\x20-\x7E]            # ASCII, or rather: [\x00-\x7F]
#   | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
#   |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
#   | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
#   |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
#   |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
#   | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
#   |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
#  )*\z/x;

=head1 HISTORY

Release history

 0.27  Feb 2020   Small fixes for some platforms

 0.26  Jan 2020   Convert subs: base bin2dec bin2hex bin2oct dec2bin dec2hex dec2oct
                  hex2bin hex2dec hex2oct oct2bin oct2dec oct2hex
                  Array subs: joinr perm permute permute_continue pile sortby subarrays
                  Other subs: btw in_iprange ipnum_ok iprange_ok opts s2t

 0.24  Feb 2019   fixed failes on perl 5.16 and older

 0.23  Jan 2019   Subs: logn, egrep, which. More UTF-8 "oriented" (lower, upper, ...)
                  Commands: zsize, finddup, due (improved), conv (improved, [MGT]?Wh
                  and many more units), due -M for stdin of filenames.

 0.22  Feb 2018   Subs: subarr, sim, sim_perm, aoh2sql. command: resubst

 0.21  Mar 2017   Improved nicenum() and its tests

 0.20  Mar 2017   Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
                  throttle timems refa refaa refah refh refha refhh refs
                  eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
                  Commands: 2bz2 2gz 2xz z2z

 0.172 Dec 2015   Subs: curb openstr pwgen sleepms sleepnm srlz tms username
                   self_update install_acme_command_tools
                  Commands: conv due freq wipe xcat (see "Commands")

 0.16  Feb 2015   bigr curb cpad isnum parta parth read_conf resolve_equation
                  roman2int trim. Improved: conv (numbers currency) range ("derivatives")

 0.15  Nov 2014   Improved doc
 0.14  Nov 2014   New subs, improved tests and doc
 0.13  Oct 2010   Non-linux test issue, resolve. improved: bloom filter, tests, doc
 0.12  Oct 2010   Improved tests, doc, bloom filter, random_gauss, bytes_readable
 0.11  Dec 2008   Improved doc
 0.10  Dec 2008

=head1 SEE ALSO

L<https://github.com/kjetillll/Acme-Tools>

=head1 AUTHOR

Kjetil Skotheim, E<lt>kjetil.skotheim@gmail.comE<gt>

=head1 COPYRIGHT

2008-2020, Kjetil Skotheim

=head1 LICENSE

This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=cut



( run in 1.015 second using v1.01-cache-2.11-cpan-39bf76dae61 )