KeywordsSpider
view release on metacpan or search on metacpan
lib/KeywordsSpider.pm view on Meta::CPAN
package KeywordsSpider;
use KeywordsSpider::Core 'find_origin';
use Modern::Perl;
use Parallel::ForkManager;
my @unwanted_params = qw/
referer
ref
sesid
hash
ssid
/;
my $spidered_websites = 0;
my %links = ();
my $old_origin = '';
my $old_origin_domain = '^$';
my $COUNT = 0;
sub debug {
my ($string, $debug) = @_;
if ($debug) {
print "$string";
}
return;
}
sub _normalize_url {
my $url = shift;
chomp($url);
$url =~ s/^\'//g;
$url =~ s/\'$//g;
if($url =~ /^null$/) {
return $url;
};
if ( $url !~ /http/ ) {
$url = 'http://' . $url;
}
return $url;
}
sub _remove_garbage {
my $referrer = shift;
foreach (@unwanted_params) {
if ( $referrer =~ /$_/ ) {
my $temp = $_;
$referrer =~ s/${temp}=.*&//g;
$referrer =~ s/${temp}=.*$//g;
}
}
#probably a hash
$referrer =~ s/=.{32}&/=&/g;
$referrer =~ s/=.{32}$/=/g;
( run in 1.108 second using v1.01-cache-2.11-cpan-437f7b0c052 )