AI-MicroStructure

 view release on metacpan or  search on metacpan

bin/leo  view on Meta::CPAN

re="${re//oe/$onetwo}"
re="${re//ss/$onetwo}"

# have to use perl for grepping because of umlauts

q=$(lynx -dump -nolist  'http://dict.leo.org/'$from$to'?lp='$from$to'&lang='$from'&searchLoc=0&cmpType=strict&sectHdr=on&spellToler=on&search='$search'&relink=on' | perl -n -e "print if /$re/i")
#perl -MHTML::Entities  -le ' print \$q,encode_entities(\$q),\n';

#echo "$q";

perl  -MEncode -le "print decode('utf-8','$q');";

bin/micro-dict  view on Meta::CPAN


stop=$(perl -MAI::MicroStructure::WordBlacklist -E  "my \$s=AI::MicroStructure::WordBlacklist::getStopWords('de'); my @s = keys %\$s; print join('|',@s);")
IFS=$'\n';

$cmd $1 |   tr A-Z a-z |                # Convert to lowercase.
        tr ' ' '_' |             # New: change spaces to newlines.
       #tr -cd '\012[a-z][0-9]' |   #  Get rid of everything
                                    #+ non-alphanumeric (in orig. script).
        tr -c '\012a-z'  '\012' |   #  Rather than deleting non-alpha
        egrep -v '^#' |              # Delete lines starting with hashmark.
        egrep -v "^[ ]*([A-Za-z][A-Za-z]|[A-Za-z])$" | egrep -v "^$" | egrep -v -i "^ (denkbarer|ganze|bez|ver�ffentlichtes|uns�gliches|ungew�hnliche|vollstaendig|erstem|Inf.|titel|unsaeglichem|beforehand|denkbares|yours|contains|gedurft|seithe...


 stop=$(perl -MAI::MicroStructure::WordBlacklist -E  "my \$s=AI::MicroStructure::WordBlacklist::getStopWords('de'); my @s = keys %\$s; print join('|',@s);")

 cat /tmp/micro-dict.tmp | sort -n | egrep -v "^.*.[\ ].*.[1-9][\:][\ ][\ ]($stop)";


 #if [ !  "$(echo  "$stop" | egrep -i zzzzzzzzzzzz)" ]; then  echo cool; fi


bin/micro-relation-test  view on Meta::CPAN

             compress_threshold => 100_000,
             compress_ratio => 0.9,
             compress_methods => [ \&IO::Compress::Gzip::gzip,
                                   \&IO::Uncompress::Gunzip::gunzip ],
             max_failures => 3,
             failure_timeout => 2,
             ketama_points => 150,
             nowait => 1,
             hash_namespace => 1,
             serialize_methods => [ \&Storable::freeze, \&Storable::thaw ],
             utf8 => ($^V ge v5.8.1 ? 1 : 0),
             max_size => 512 * 1024,
         });





sub  decruft  {
  my($file)  =  @_;
  my($cruftSet)  =  q{%§&|#[^+*(]),'";};

bin/micro-sense  view on Meta::CPAN

#print Dumper [split("sense",`wn $line  -coorn`)];
push @{$data->{'search'}},grep{/[a-z|A-Z]/}split("\n|,",lc `micro-wnet $line words`) unless(!$words);
#push @{$data->{'coordinate'}},map{$_=trim($_); @_=grep(/[A-Z]|[0-9]|[a-z]/,map{$_=trim($_)}split("\n|=>",$_))}split(/Sense/,`wn $line -coorn`) unless(!$words);
#shift @{$data->{'coordinate'}} &&  shift @{$data->{'search'}} for(0..1);
#@{$data->{'search'}};
END{

 $data = {"rows"=>$data,"senses"=>[sort keys %{$data->{'senses'}}]};


   my $utf8_encoded_json_text = encode_json($data);

  if($debug == 1) {
    print Dumper $data,$wn;
  }


  print $utf8_encoded_json_text;

}

bin/micro-wiki  view on Meta::CPAN

#!/usr/bin/perl -w
use utf8;
use File::Basename;
use Data::Printer;
use Data::Dumper;
use Parallel::Iterator qw( iterate );
use Env qw/PWD/;
use JSON::XS;
use HTML::Strip;
use AI::MicroStructure::Util;
use WWW::Wikipedia;
use LWP::UserAgent;
use HTML::SimpleLinkExtor;
use URI::Escape qw( uri_unescape );
our $e = HTML::SimpleLinkExtor->new;

binmode STDOUT, ':utf8';
binmode STDERR, ':utf8';


my $state  = AI::MicroStructure::Util::config();

my @CWD    = $state->{cwd};
our $config = $state->{cfg};


die("require a argument") unless($ARGV[0]);

bin/micro-wiki  view on Meta::CPAN

}

sub URLEncode {
my $theURL = $_[0];
$theURL =~ s/([\W])/"%" . uc(sprintf("%2.2x",ord($1)))/eg;
return $theURL;
}

sub smartdecode {
use URI::Escape qw( uri_unescape );
use utf8;
my $x = my $y = uri_unescape($_[0]);
return $x if utf8::decode($x);
return $y;
}

sub imgTranslate {

my ($idx,$url) = @_;

print $idx;
print $url;
if($url){

bin/micro-wiki  view on Meta::CPAN

      my $wiki = WWW::Wikipedia->new();
      my $hs = HTML::Strip->new();

      my $result = $wiki->search(ucfirst $url);
      if (defined($result) && $result->text() ) {

      my $clean_text = $hs->parse($result->text() );
      $hs->eof;

      require HTML::SimpleLinkExtor;
      no warnings 'utf8';
      my $e = HTML::SimpleLinkExtor->new();
      $e->parse($response->decoded_content);


      my @all_links = $e->links;
      my @tags= map{$_=lc($_); $_=~s/\)|\/wiki\///g; $_=~s/ /_/g; $_=[split("_\\(",$_)] }grep {/([(].+?[)]|$url)/}@all_links ;# $result->related();
      my @audio = grep{/^(\/\/|upload|http).*.(mp3|wave|ogg|OGG|WAVE|MP3)$/}@all_links;
      my @pdf = grep{/^(\/\/|upload|http).*.(pdf|PDF)$/}@all_links;
      my @book = grep{/books.google/i}@all_links;
         foreach(@tags){

bin/remote.pl  view on Meta::CPAN

    $str;
}

sub tr_accent {
    my $str = shift;
    $str =~ tr{ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ}
              {AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy};
    return $str;
}

my %utf2asc = (
    "\xc3\x89" => 'E',
    "\xc3\xa0" => 'a',
    "\xc3\xa1" => 'a',
    "\xc3\xa9" => 'e',
    "\xc3\xaf" => 'i',
    "\xc3\xad" => 'i',
    "\xc3\xb6" => 'o',
    "\xc3\xb8" => 'o',
    "\xc5\xa0" => 'S',
    "\x{0160}" => 'S',
    # for pokemons
    "\x{0101}"     => 'a',
    "\x{012b}"     => 'i',
    "\x{014d}"     => 'o',
    "\x{016b}"     => 'u',
    "\xe2\x99\x80" => 'female',
    "\xe2\x99\x82" => 'male',
    "\x{2640}"     => 'female',
    "\x{2642}"     => 'male',
);
my $utf_re = qr/(@{[join( '|', sort keys %utf2asc )]})/;

sub tr_utf8_basic {
    my $str = shift;
    $str =~ s/$utf_re/$utf2asc{$1}/go;
    return $str;
}

1;

package main;

use Data::Printer;
my $ps = AI::MicroStructure::KnowHow->new();
my @go = $ps->remote_Knowledge();

bin/remote.pl  view on Meta::CPAN

__PACKAGE__->init();

our %Remote = (
    source => {
        female => 'http://en.wikipedia.org/wiki/Knowledge_of_female_porn_stars',
        male   => 'http://en.wikipedia.org/wiki/Knowledge_of_male_porn_stars'
    },
    extract => sub {
        return
            map { AI::MicroStructure::RemoteKnowledge::tr_accent($_) }
            map { AI::MicroStructure::RemoteKnowledge::tr_utf8_basic($_) }
            grep { ! /^Knowledge_|_Groups$/ }
            map { s/[-\s']/_/g; s/[."]//g; $_ }
            $_[0]
            =~ m{^<li>(?:<[^>]*>)?(.*?)(?:(?: ?[-,(<]| aka | see ).*)?</li>}mig
    },
    ,
);

1;
package main;

lib/AI/MicroStructure.pm  view on Meta::CPAN

use File::Spec;
use File::Glob;
use Data::Dumper;
use Data::Printer;
use AI::MicroStructure::Util;
use Carp qw(croak);
our $absstructdir = "";
our $structdir = "";
our $VERSION = '0.20';
our $Structure = 'any'; # default structure
our $CODESET = 'utf8';
our $LANG = '';
our %MICRO;
our %MODS;
our %ALIEN;
our $str = "[A-Z]";
our $special = "any";
our $search;
our $data={};
our $item="";
our @items;

lib/AI/MicroStructure/RemoteList.pm  view on Meta::CPAN

    $str;
}

sub tr_accent {
    my $str = shift;
    $str =~ tr{ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ}
              {AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy};
    return $str;
}

my %utf2asc = (
    "æ"        => 'ae',
    "Æ"        => 'AE',
    "\xc5\xa0" => 'S',
    "\x{0160}" => 'S',
    # for pokemons
    "\x{0101}"     => 'a',
    "\x{012b}"     => 'i',
    "\x{014d}"     => 'o',
    "\x{016b}"     => 'u',
    "\xe2\x99\x80" => 'female',
    "\xe2\x99\x82" => 'male',
    "\x{2640}"     => 'female',
    "\x{2642}"     => 'male',
);
my $utf_re = qr/(@{[join( '|', sort keys %utf2asc )]})/;

sub tr_utf8_basic {
    my $str = shift;
    $str =~ s/$utf_re/$utf2asc{$1}/go;
    return $str;
}

1;

__END__

=head1 NAME

AI::MicroStructure::RemoteList - Retrieval of a remote source for a structure

lib/AI/MicroStructure/RemoteList.pm  view on Meta::CPAN

=item tr_nonword( $str )

Return a copy of C<$str> with all non-word characters turned into
underscores (C<_>).

=item tr_accent( $str )

Return a copy of C<$str> will all iso-8859-1 accented characters turned
into basic ASCII characters.

=item tr_utf8_basic( $str )

Return a copy of C<$str> with some of the utf-8 accented characters turned
into basic ASCII characters. This is very crude, but I didn't to bother
and depend on the proper module to do that.

=back

=head1 AUTHOR

'santex'  << <santex@cpan.org> >>.

=head1 SEE ALSO

lib/AI/MicroStructure/WordBlacklist.pm  view on Meta::CPAN

p particular particularly per perhaps placed please plus possible presumably probably provides
q que quite qv r rather rd re really reasonably regarding regardless regards relatively respectively right
s said same saw say saying says second secondly see seeing seem seemed seeming seems seen self selves sensible sent serious seriously seven several shall she should shouldnt since six so some somebody somehow someone something sometime sometimes some...
t take taken tell tends th than thank thanks thanx that thats thats the their theirs them themselves then thence there thereafter thereby therefore therein theres theres thereupon these they theyd theyll theyre theyve think third this thorough thorou...
u un under unfortunately unless unlikely until unto up upon us use used useful uses using usually uucp
v value various very via viz vs
w want wants was wasnt way we wed welcome well well went were were werent weve what whatever whats when whence whenever where whereafter whereas whereby wherein wheres whereupon wherever whether which while whither who whoever whole whom whos whose w...
x
y yes yet you youd youll your youre yours yourself yourselves youve
z zero
air anr arm cke ckg ckl duo ged gie hie ihk las len lie llt los lte max mmt mvv opa rer rkt sap spr str ung utf zoo zuf zuh abo awo bef beh bmw cks daf del ern geh gen hne hof hrl lle lze mer nau ner ost pfe ppg pur rde rin rke sen tee tvf uni vri wu...
i me my myself we our ours ourselves you your yours yourself
yourselves he him his himself she her hers herself it its
itself they them their theirs themselves what which who whom
this that these those am is are was were be been being have has
had having do does did doing would should could ought i'm
you're he's she's it's we're they're i've you've we've they've
i'd you'd he'd she'd we'd they'd i'll you'll he'll she'll we'll
they'll isn't aren't wasn't weren't hasn't haven't hadn't
doesn't don't didn't won't wouldn't shan't shouldn't can't
cannot couldn't mustn't let's that's who's what's here's

lib/AI/MicroStructure/WordBlacklist.pm  view on Meta::CPAN

anderweitigen anderweitiger anderweitiges anerkannt anerkannte anerkannter anerkanntes anfangen anfing angefangen angesetze angesetzt angesetzten angesetzter ans anscheinend ansetzen ansonst ansonsten anstatt anstelle arbeiten auch auf aufgehört aufg...
bessere besserem besseren besserer besseres bestehen besteht bestenfalls bestimmt bestimmte bestimmtem bestimmten bestimmter bestimmtes betraechtlich betraechtliche betraechtlichem betraechtlichen betraechtlicher betraechtliches betreffend betreffend...
diesseitiges diesseits dinge dir direkt direkte direkten direkter doch doppelt dort dorther dorthin dran drauf drei dreißig drin dritte drueber drum drunter drüber du dunklen durch durchaus durchweg durchwegs durfte durften dürfen dürfte eben ebenfal...
entsprechender entsprechendes entweder er ergo ergänze ergänzen ergänzte ergänzten erhalten erhielt erhielten erhält erneut erst erste erstem ersten erster erstere ersterem ersteren ersterer ersteres erstes eröffne eröffnen eröffnet eröffnete eröffne...
häufige häufigem häufigen häufiger häufigere häufigeren häufigerer häufigeres höchst höchstens ich igitt ihm ihn ihnen ihr ihre ihrem ihren ihrer ihres ihretwegen im immer immerhin immerwaehrend immerwaehrende immerwaehrendem immerwaehrenden immerwae...
jeglichen jeglicher jegliches jemals jemand jene jenem jenen jener jenes jenseitig jenseitigem jenseitiger jenseits jetzt jährig jährige jährigem jährigen jähriges kaeumlich kam kann kannst kaum kein keine keinem keinen keiner keinerlei keines keines...
naechste naemlich nahm naturgemaess naturgemaeß naturgemäss naturgemäß natürlich neben nebenan nehmen nein neu neue neuem neuen neuer neuerdings neuerlich neuerliche neuerlichem neuerlicher neuerliches neues neulich neun nicht nichts nichtsdestotrotz...
seine seinem seinen seiner seines seit seitdem seite seiten seither selbe selben selber selbst selbstredend selbstredende selbstredendem selbstredenden selbstredender selbstredendes seltsamerweise senke senken senkt senkte senkten setzen setzt setzte...
unmaßgeblichem unmaßgeblichen unmaßgeblicher unmaßgebliches unmoeglich unmoegliche unmoeglichem unmoeglichen unmoeglicher unmoegliches unmöglich unmögliche unmöglichen unmöglicher unnötig uns unsaeglich unsaegliche unsaeglichem unsaeglichen unsaeglic...
vollends vollstaendig vollstaendige vollstaendigem vollstaendigen vollstaendiger vollstaendiges vollständig vollständige vollständigem vollständigen vollständiger vollständiges vom von vor voran vorbei vorgestern vorher vorherig vorherige vorherigem ...
würde würden während währenddessen wär wäre wären x übel über überall überallhin überaus überdies überhaupt übermorgen üblicherweise übrig übrigens z.B. zahlreich zahlreichem zahlreicher zB zb. zehn zeitweise zeitweisem zeitweisen zeitweiser ziehen z...
return \%stoplist;
}
}




1;




( run in 0.282 second using v1.01-cache-2.11-cpan-4d50c553e7e )