AI-MicroStructure
view release on metacpan or search on metacpan
bin/micro-wiki view on Meta::CPAN
my $ua = LWP::UserAgent->new;
my $content ;
my $response ="";
my @book = ();
$response = $ua->get(sprintf("%s%s",$config->{wikipedia}, ucfirst($url)));
my $doc={};
my $linkdata={};
my $wiki = WWW::Wikipedia->new();
my $hs = HTML::Strip->new();
my $result = $wiki->search(ucfirst $url);
if (defined($result) && $result->text() ) {
my $clean_text = $hs->parse($result->text() );
$hs->eof;
require HTML::SimpleLinkExtor;
no warnings 'utf8';
my $e = HTML::SimpleLinkExtor->new();
$e->parse($response->decoded_content);
my @all_links = $e->links;
my @tags= map{$_=lc($_); $_=~s/\)|\/wiki\///g; $_=~s/ /_/g; $_=[split("_\\(",$_)] }grep {/([(].+?[)]|$url)/}@all_links ;# $result->related();
my @audio = grep{/^(\/\/|upload|http).*.(mp3|wave|ogg|OGG|WAVE|MP3)$/}@all_links;
my @pdf = grep{/^(\/\/|upload|http).*.(pdf|PDF)$/}@all_links;
my @book = grep{/books.google/i}@all_links;
foreach(@tags){
if($_->[1] && $_->[1]!~/\W/){
$doc->{tags}->{$_->[1]}->{$_->[0]} = $doc->{tags}->{$_->[1]}->{$_->[0]} ? $doc->{tags}->{$_->[1]}->{$_->[0]} +1:1;
}
}
$doc->{image}=[map{$_="http:$_"; }grep{/[1-9][0-9][0-9]px/}$e->img];
$doc->{cat}=[grep{/^.*.(Kategory|Category)+?/}@all_links];
$doc->{cat}=[sort grep{!/(category|wikipedia|article|page|List.*.of)/i}map{$a=$_; $a =~ s/^.*.://g; $_=$a;}@{$doc->{cat}}];
$doc->{list}=[grep{/List.*.of_/}@all_links];
$doc->{list}=[sort map{$a=$_; $a=~ s/^.*.List/List/g; $_=$a;}@{$doc->{list}}];
$doc->{book}= [@book] unless(!@book);
$doc->{pdf}= [@pdf] unless(!@pdf);
$doc->{audio}= [@audio] unless(!@audio);
$doc->{related} = quantify $result->related();
$doc->{links} = [sort grep{/http/}@all_links];
#eval '$couchdb->store("$url" ,$doc)' or warn "error: $@\n";
# createJsonFile($url,$doc);
p $doc;
return $doc;
}
}
our @out = ();
foreach my $urlx (@ARGV) {
if($urlx) { $result->{$urlx} = call(0,$urlx); }
}
1;
__DATA__
( run in 0.319 second using v1.01-cache-2.11-cpan-0c5ce583b80 )