Acme-Tools
view release on metacpan or search on metacpan
distinct
in
in_num
uniq
union
union_all
minus
minus_all
intersect
intersect_all
not_intersect
mix
zip
sim
sim_perm
subarr
subhash
hashtrans
zipb64
zipbin
unzipb64
Output: C<< 4 3 five >>
=cut
sub intersect {
my %first=map{($_=>1)}@{$_[0]};
my %seen;
return grep{$first{$_}&&!$seen{$_}++}@{$_[1]};
}
=head2 not_intersect
Input: Two arrayrefs
Output: An array containing all elements member of just one of the input arrays (not both).
Example:
perl -MAcme::Tools -le ' print join " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'
The output is C<< 1 2 >>.
=cut
sub not_intersect {
my %code;
my %seen;
for(@{$_[0]}){$code{$_}|=1}
for(@{$_[1]}){$code{$_}|=2}
return grep{$code{$_}!=3&&!$seen{$_}++}(@{$_[0]},@{$_[1]});
}
=head2 uniq
Input: An array of strings (or numbers)
sub tablestring {
my $tab=shift;
my %o=$_[0] ? %{shift()} : ();
my $remove_empty = $o{remove_empty_columns};
my $no_multiline_space = $o{no_multiline_space};
my $nodup = $o{nodup}||0;
my $no_header_line = $o{no_header_line};
my $pagesize = exists $o{pagesize} ? $o{pagesize}-3 : 9999999;
my $left_force = $o{left};
my(@width,@left,@height,@not_empty,@nodup);
my $head=1;
my $i=0;
my $j;
for(@$tab){
$j=0;
$height[$i]=0;
my $nodup_rad=$nodup;
if(ref($_) eq 'ARRAY'){
for(@$_){
my $cell=$_;
$width[$j]||=0;
if($nodup_rad and $i>0 and $$tab[$i][$j] eq $$tab[$i-1][$j] || ($nodup_rad=0)){
$cell=$nodup==1?"":$nodup;
$nodup[$i][$j]=1;
}
else{
my $height=0;
my $wider;
no warnings;
$not_empty[$j]=1 if !$head && length($cell)>0;
for(split("\n",$cell)){
$wider=/<input.+type=text.+size=(\d+)/i?$1:0; #hm
s/<[^>]+>//g;
$height++;
s/>/>/g;
s/</</g;
$width[$j]=length($_)+1+$wider if length($_)+1+$wider>$width[$j];
$left[$j]=1 if $_ && !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !$head;
}
if( $height>1 && !$no_multiline_space){
else{
$height[$i]=1;
$no_header_line=1;
}
$head=0;
$i++;
}
$i=$#height;
$j=$#width;
if($i==0 or $left_force) { @left=map{1}(0..$j) }
else { for(0..$j){ $left[$_]=1 if !$not_empty[$_] } }
my @tabout;
my $row_start_line=0;
my @header;
my $header_last;
for my $x (0..$i){
if($$tab[$x] eq '-'){
my @tegn=map {$$tab[$x-1][$_]=~/\S/?"-":" "} (0..$j);
$tabout[$row_start_line]=join(" ",map {$tegn[$_] x ($width[$_]-1)} (0..$j));
}
else{
for my $y (0..$j){
next if $remove_empty && !$not_empty[$y];
no warnings;
my @cell = !$header_last&&$nodup&&$nodup[$x][$y]
? ($nodup>0?():((" " x (($width[$y]-length($nodup))/2)).$nodup))
: split("\n",$$tab[$x][$y]);
for(0..($height[$x]-1)){
my $line=$row_start_line+$_;
my $txt=shift(@cell);
$txt='' if !defined$txt;
$txt=sprintf("%*s",$width[$y]-1,$txt) if length($txt)>0 && !$left[$y] && ($x>0 || $no_header_line);
$tabout[$line].=$txt;
}
}
}
}
$row_start_line+=$height[$x];
#--lage streker?
if(not $no_header_line){
if($x==0){
for my $y (0..$j){
next if $remove_empty && !$not_empty[$y];
$tabout[$row_start_line].=('-' x ($width[$y]-1))." ";
}
$row_start_line++;
@header=("",@tabout);
}
elsif(
$x%$pagesize==0 || $nodup>0&&!$nodup[$x+1][$nodup-1]
and $x+1<@$tab
and !$no_header_line
)
Same as C<bfcheck> except it returns the keys that exists in the bloom filter
@found = bfgrep($bf, @keys); # or ...
@found = bfgrep($bf, \@keys); # better, uses less memory if @keys is large, or ...
@found = grep bfcheck($bf,$_), @keys; # same but slower
=head2 bfgrepnot
Same as C<bfgrep> except it returns the keys that do NOT exists in the bloom filter:
@not_found = bfgrepnot($bf, @keys); # or ...
@not_found = bfgrepnot($bf, \@keys); # better, uses less memory if @keys is large, or ...
@not_found = grep !bfcheck($bf,$_), @keys); # same but slower
=head2 bfdelete
Deletes from a counting bloom filter.
To enable deleting be sure to initialize the bloom filter with the
numeric C<counting_bits> argument. The number of bits could be 2 or 3*)
for small filters with a small capacity (a small number of keys), but
setting the number to 4 ensures that even very large filters with very
small error rates would not overflow.
=cut
sub bfinit {
return bfretrieve(@_) if @_==1;
return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
require Digest::MD5;
@_%2&&croak "Arguments should be a hash of equal number of keys and values";
my %arg=@_;
my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
my $bf={error_rate => 0.001, #default p
capacity => 100000, #default n
min_hashfuncs => 1,
max_hashfuncs => 100,
counting_bits => 1, #default: not counting filter
adaptive => 0,
%arg, #arguments
t/02_general.t view on Meta::CPAN
#--union
ok( join( ",", union([1,2,3],[2,3,3,4,4]) ) eq '1,2,3,4' );
#--minus
ok( join( " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] ) ) eq 'five 1 2' );
#--intersect
ok( join(" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )) eq '4 3 five' );
#--not_intersect
ok( join( " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )) eq '1 2' );
#--subhash
my %pop = ( Norway=>4800000, Sweeden=>8900000, Finland=>5000000,
Denmark=>5100000, Iceland=>260000, India => 1e9 );
ok_ref({subhash(\%pop,qw/Norway Sweeden Denmark/)},
{Denmark=>5100000,Norway=>4800000,Sweeden=>8900000}, 'subhash');
#--hashtrans
my%h = ( 1 => {a=>33,b=>55},
2 => {a=>11,b=>22},
( run in 0.252 second using v1.01-cache-2.11-cpan-0a987023a57 )