AI-MicroStructure

 view release on metacpan or  search on metacpan

bin/micro-wiki  view on Meta::CPAN


my $ua = LWP::UserAgent->new;


my $content ;
my $response ="";
my @book = ();


      $response  = $ua->get(sprintf("%s%s",$config->{wikipedia}, ucfirst($url)));

      my $doc={};
      my $linkdata={};




      my $wiki = WWW::Wikipedia->new();
      my $hs = HTML::Strip->new();

      my $result = $wiki->search(ucfirst $url);
      if (defined($result) && $result->text() ) {

      my $clean_text = $hs->parse($result->text() );
      $hs->eof;

      require HTML::SimpleLinkExtor;
      no warnings 'utf8';
      my $e = HTML::SimpleLinkExtor->new();
      $e->parse($response->decoded_content);


      my @all_links = $e->links;
      my @tags= map{$_=lc($_); $_=~s/\)|\/wiki\///g; $_=~s/ /_/g; $_=[split("_\\(",$_)] }grep {/([(].+?[)]|$url)/}@all_links ;# $result->related();
      my @audio = grep{/^(\/\/|upload|http).*.(mp3|wave|ogg|OGG|WAVE|MP3)$/}@all_links;
      my @pdf = grep{/^(\/\/|upload|http).*.(pdf|PDF)$/}@all_links;
      my @book = grep{/books.google/i}@all_links;
         foreach(@tags){
            if($_->[1] && $_->[1]!~/\W/){
              $doc->{tags}->{$_->[1]}->{$_->[0]} = $doc->{tags}->{$_->[1]}->{$_->[0]} ? $doc->{tags}->{$_->[1]}->{$_->[0]} +1:1;
            }
         }

          $doc->{image}=[map{$_="http:$_"; }grep{/[1-9][0-9][0-9]px/}$e->img];
          $doc->{cat}=[grep{/^.*.(Kategory|Category)+?/}@all_links];
          $doc->{cat}=[sort grep{!/(category|wikipedia|article|page|List.*.of)/i}map{$a=$_; $a =~ s/^.*.://g; $_=$a;}@{$doc->{cat}}];
          $doc->{list}=[grep{/List.*.of_/}@all_links];
          $doc->{list}=[sort map{$a=$_; $a=~ s/^.*.List/List/g; $_=$a;}@{$doc->{list}}];
          
      
          $doc->{book}= [@book] unless(!@book);
          $doc->{pdf}= [@pdf] unless(!@pdf);
          $doc->{audio}= [@audio] unless(!@audio);
          $doc->{related} = quantify $result->related();
          $doc->{links} = [sort grep{/http/}@all_links];
          
        


         
        #eval '$couchdb->store("$url" ,$doc)' or warn "error: $@\n";

      # createJsonFile($url,$doc);
    p $doc;

    return $doc;

}


}



our @out = ();
foreach my $urlx (@ARGV) {


if($urlx) { $result->{$urlx} = call(0,$urlx); }




}



1;

__DATA__



( run in 0.319 second using v1.01-cache-2.11-cpan-0c5ce583b80 )