TAPORlib
view release on metacpan or search on metacpan
lib/TAPORlib.pm view on Meta::CPAN
next if /<\s*applet\b/im ;
# These are seldom-used tags, or tags that seldom have URLs in them
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*bgsound\b/im ; # Microsoft only
s/(<[^>]*\bcite\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*blockquote\b/im ;
s/(<[^>]*\bcite\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*del\b/im ;
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*embed\b/im ; # Netscape only
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\bimagemap\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*fig\b/im ; # HTML 3.0
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*h[1-6]\b/im ; # HTML 3.0
s/(<[^>]*\bprofile\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*head\b/im ;
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*hr\b/im ; # HTML 3.0
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\blongdesc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*iframe\b/im ;
s/(<[^>]*\bcite\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*ins\b/im ;
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*layer\b/im ;
s/(<[^>]*\bhref\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\burn\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*link\b/im ;
s/(<[^>]*\burl\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*meta\b/im ; # Netscape only
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*note\b/im ; # HTML 3.0
s/(<[^>]*\busemap\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\bcodebase\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\bdata\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\barchive\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\bclassid\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\bname\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*object\b/im ;
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\bimagemap\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*overlay\b/im ; # HTML 3.0
s/(<[^>]*\bcite\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*q\b/im ;
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
s/(<[^>]*\bfor\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*script\b/im ;
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*select\b/im ; # HTML 3.0
s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/ $1 . &full_url($2,$url) /ime,
next if /<\s*ul\b/im ; # HTML 3.0
} # foreach (@body)
my ($commonline) = undef;
foreach (@body) {$commonline = $commonline . $_ . ">";}
substr($commonline,-1) = "";
return $commonline;
}
###############################################################################
sub full_url{
my($link,$url)= @_ ;
my $oldlink=$link;
my ($baseurl,$relurl,$relurl2) = GetRelativeUrls($url);
if($link=~ m|^(http://)|i) {goto exit_full_url;}
if($link=~ m|^(mailto:)|i) {goto exit_full_url;}
if($link=~ m|^(javascript:)|i) {goto exit_full_url;}
if($link=~ m|^(#)|i) {goto exit_full_url;}
$link=~ s|^/|$baseurl/|i;
$link=~ s|^\./|$relurl/|i;
$link=~ s|^\.\./|$relurl2/|i;
if(!($link=~ m|^(http://)|i)) {$link = "$relurl/$link";}
exit_full_url:
return $link;
}
###############################################################################
sub GetRelativeUrls {
my($url) = @_;
$url =~ m|^(http://)([^/\?\r\n]*)|i;
my $host = $2;
### http://adm.ict.nsc.ru/rus/docs/perl/ - http://adm.ict.nsc.ru
### http://www.irtel.ru/ - http://www.irtel.ru
### http://www.irtel.ru - http://www.irtel.ru
my $baseurl = "http://$host";
### http://adm.ict.nsc.ru/rus/docs/perl/ - http://adm.ict.nsc.ru/rus/docs/perl
### http://www.irtel.ru/ - http://www.irtel.ru
( run in 1.370 second using v1.01-cache-2.11-cpan-df04353d9ac )