KeywordsSpider

 view release on metacpan or  search on metacpan

lib/KeywordsSpider.pm  view on Meta::CPAN

package KeywordsSpider;

use KeywordsSpider::Core 'find_origin';
use Modern::Perl;
use Parallel::ForkManager;

my @unwanted_params = qw/
  referer
  ref
  sesid
  hash
  ssid
/;

my $spidered_websites = 0;
my %links = ();

my $old_origin = '';
my $old_origin_domain = '^$';

my $COUNT = 0;

sub debug {
  my ($string, $debug) = @_;

  if ($debug) {
    print "$string";
  }

  return;
}

sub _normalize_url {
  my $url = shift;

  chomp($url);
  $url =~ s/^\'//g;
  $url =~ s/\'$//g;

  if($url =~ /^null$/) {
    return $url;
  };

  if ( $url !~ /http/ ) {
    $url = 'http://' . $url;
  }

  return $url;
}

sub _remove_garbage {
  my $referrer = shift;
  foreach (@unwanted_params) {
    if ( $referrer =~ /$_/ ) {
      my $temp = $_;
      $referrer =~ s/${temp}=.*&//g;
      $referrer =~ s/${temp}=.*$//g;
    }
  }

  #probably a hash
  $referrer =~ s/=.{32}&/=&/g;
  $referrer =~ s/=.{32}$/=/g;



( run in 1.108 second using v1.01-cache-2.11-cpan-437f7b0c052 )