AI-MicroStructure
view release on metacpan or search on metacpan
bin/micro-relation-test view on Meta::CPAN
compress_threshold => 100_000,
compress_ratio => 0.9,
compress_methods => [ \&IO::Compress::Gzip::gzip,
\&IO::Uncompress::Gunzip::gunzip ],
max_failures => 3,
failure_timeout => 2,
ketama_points => 150,
nowait => 1,
hash_namespace => 1,
serialize_methods => [ \&Storable::freeze, \&Storable::thaw ],
utf8 => ($^V ge v5.8.1 ? 1 : 0),
max_size => 512 * 1024,
});
sub decruft {
my($file) = @_;
my($cruftSet) = q{%ç&|#[^+*(]),'";};
bin/micro-sense view on Meta::CPAN
#print Dumper [split("sense",`wn $line -coorn`)];
push @{$data->{'search'}},grep{/[a-z|A-Z]/}split("\n|,",lc `micro-wnet $line words`) unless(!$words);
#push @{$data->{'coordinate'}},map{$_=trim($_); @_=grep(/[A-Z]|[0-9]|[a-z]/,map{$_=trim($_)}split("\n|=>",$_))}split(/Sense/,`wn $line -coorn`) unless(!$words);
#shift @{$data->{'coordinate'}} && shift @{$data->{'search'}} for(0..1);
#@{$data->{'search'}};
END{
$data = {"rows"=>$data,"senses"=>[sort keys %{$data->{'senses'}}]};
my $utf8_encoded_json_text = encode_json($data);
if($debug == 1) {
print Dumper $data,$wn;
}
print $utf8_encoded_json_text;
}
bin/micro-wiki view on Meta::CPAN
#!/usr/bin/perl -w
use utf8;
use File::Basename;
use Data::Printer;
use Data::Dumper;
use Parallel::Iterator qw( iterate );
use Env qw/PWD/;
use JSON::XS;
use HTML::Strip;
use AI::MicroStructure::Util;
use WWW::Wikipedia;
use LWP::UserAgent;
use HTML::SimpleLinkExtor;
use URI::Escape qw( uri_unescape );
our $e = HTML::SimpleLinkExtor->new;
binmode STDOUT, ':utf8';
binmode STDERR, ':utf8';
my $state = AI::MicroStructure::Util::config();
my @CWD = $state->{cwd};
our $config = $state->{cfg};
die("require a argument") unless($ARGV[0]);
bin/micro-wiki view on Meta::CPAN
}
sub URLEncode {
my $theURL = $_[0];
$theURL =~ s/([\W])/"%" . uc(sprintf("%2.2x",ord($1)))/eg;
return $theURL;
}
sub smartdecode {
use URI::Escape qw( uri_unescape );
use utf8;
my $x = my $y = uri_unescape($_[0]);
return $x if utf8::decode($x);
return $y;
}
sub imgTranslate {
my ($idx,$url) = @_;
print $idx;
print $url;
if($url){
bin/micro-wiki view on Meta::CPAN
my $wiki = WWW::Wikipedia->new();
my $hs = HTML::Strip->new();
my $result = $wiki->search(ucfirst $url);
if (defined($result) && $result->text() ) {
my $clean_text = $hs->parse($result->text() );
$hs->eof;
require HTML::SimpleLinkExtor;
no warnings 'utf8';
my $e = HTML::SimpleLinkExtor->new();
$e->parse($response->decoded_content);
my @all_links = $e->links;
my @tags= map{$_=lc($_); $_=~s/\)|\/wiki\///g; $_=~s/ /_/g; $_=[split("_\\(",$_)] }grep {/([(].+?[)]|$url)/}@all_links ;# $result->related();
my @audio = grep{/^(\/\/|upload|http).*.(mp3|wave|ogg|OGG|WAVE|MP3)$/}@all_links;
my @pdf = grep{/^(\/\/|upload|http).*.(pdf|PDF)$/}@all_links;
my @book = grep{/books.google/i}@all_links;
foreach(@tags){
bin/remote.pl view on Meta::CPAN
"\x{012b}" => 'i',
"\x{014d}" => 'o',
"\x{016b}" => 'u',
"\xe2\x99\x80" => 'female',
"\xe2\x99\x82" => 'male',
"\x{2640}" => 'female',
"\x{2642}" => 'male',
);
my $utf_re = qr/(@{[join( '|', sort keys %utf2asc )]})/;
sub tr_utf8_basic {
my $str = shift;
$str =~ s/$utf_re/$utf2asc{$1}/go;
return $str;
}
1;
package main;
use Data::Printer;
bin/remote.pl view on Meta::CPAN
__PACKAGE__->init();
our %Remote = (
source => {
female => 'http://en.wikipedia.org/wiki/Knowledge_of_female_porn_stars',
male => 'http://en.wikipedia.org/wiki/Knowledge_of_male_porn_stars'
},
extract => sub {
return
map { AI::MicroStructure::RemoteKnowledge::tr_accent($_) }
map { AI::MicroStructure::RemoteKnowledge::tr_utf8_basic($_) }
grep { ! /^Knowledge_|_Groups$/ }
map { s/[-\s']/_/g; s/[."]//g; $_ }
$_[0]
=~ m{^<li>(?:<[^>]*>)?(.*?)(?:(?: ?[-,(<]| aka | see ).*)?</li>}mig
},
,
);
1;
package main;
lib/AI/MicroStructure.pm view on Meta::CPAN
use File::Spec;
use File::Glob;
use Data::Dumper;
use Data::Printer;
use AI::MicroStructure::Util;
use Carp qw(croak);
our $absstructdir = "";
our $structdir = "";
our $VERSION = '0.20';
our $Structure = 'any'; # default structure
our $CODESET = 'utf8';
our $LANG = '';
our %MICRO;
our %MODS;
our %ALIEN;
our $str = "[A-Z]";
our $special = "any";
our $search;
our $data={};
our $item="";
our @items;
lib/AI/MicroStructure/RemoteList.pm view on Meta::CPAN
"\x{012b}" => 'i',
"\x{014d}" => 'o',
"\x{016b}" => 'u',
"\xe2\x99\x80" => 'female',
"\xe2\x99\x82" => 'male',
"\x{2640}" => 'female',
"\x{2642}" => 'male',
);
my $utf_re = qr/(@{[join( '|', sort keys %utf2asc )]})/;
sub tr_utf8_basic {
my $str = shift;
$str =~ s/$utf_re/$utf2asc{$1}/go;
return $str;
}
1;
__END__
=head1 NAME
lib/AI/MicroStructure/RemoteList.pm view on Meta::CPAN
=item tr_nonword( $str )
Return a copy of C<$str> with all non-word characters turned into
underscores (C<_>).
=item tr_accent( $str )
Return a copy of C<$str> will all iso-8859-1 accented characters turned
into basic ASCII characters.
=item tr_utf8_basic( $str )
Return a copy of C<$str> with some of the utf-8 accented characters turned
into basic ASCII characters. This is very crude, but I didn't to bother
and depend on the proper module to do that.
=back
=head1 AUTHOR
'santex' << <santex@cpan.org> >>.
( run in 0.521 second using v1.01-cache-2.11-cpan-49f99fa48dc )