Net-ICQV5

 view release on metacpan or  search on metacpan

demos/client/SNlib.pl  view on Meta::CPAN

            next if /<\s*applet\b/im ;


        # These are seldom-used tags, or tags that seldom have URLs in them

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*bgsound\b/im ;  # Microsoft only

        s/(<[^>]*\bcite\s*=\s*["']?)([^\s"'>]*)/       $1 . &full_url($2) /ime,
            next if /<\s*blockquote\b/im ;

        s/(<[^>]*\bcite\s*=\s*["']?)([^\s"'>]*)/       $1 . &full_url($2) /ime,
            next if /<\s*del\b/im ;

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*embed\b/im ;    # Netscape only

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
        s/(<[^>]*\bimagemap\s*=\s*["']?)([^\s"'>]*)/   $1 . &full_url($2) /ime,
            next if /<\s*fig\b/im ;      # HTML 3.0

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*h[1-6]\b/im ;   # HTML 3.0

        s/(<[^>]*\bprofile\s*=\s*["']?)([^\s"'>]*)/    $1 . &full_url($2) /ime,
            next if /<\s*head\b/im ;

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*hr\b/im ;       # HTML 3.0

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
        s/(<[^>]*\blongdesc\s*=\s*["']?)([^\s"'>]*)/   $1 . &full_url($2) /ime,
            next if /<\s*iframe\b/im ;

        s/(<[^>]*\bcite\s*=\s*["']?)([^\s"'>]*)/       $1 . &full_url($2) /ime,
            next if /<\s*ins\b/im ;

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*layer\b/im ;

        s/(<[^>]*\bhref\s*=\s*["']?)([^\s"'>]*)/       $1 . &full_url($2) /ime,
        s/(<[^>]*\burn\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*link\b/im ;

        s/(<[^>]*\burl\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*meta\b/im ;     # Netscape only

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*note\b/im ;     # HTML 3.0

        s/(<[^>]*\busemap\s*=\s*["']?)([^\s"'>]*)/     $1 . &full_url($2) /ime,
        s/(<[^>]*\bcodebase\s*=\s*["']?)([^\s"'>]*)/   $1 . &full_url($2) /ime,
        s/(<[^>]*\bdata\s*=\s*["']?)([^\s"'>]*)/       $1 . &full_url($2) /ime,
        s/(<[^>]*\barchive\s*=\s*["']?)([^\s"'>]*)/    $1 . &full_url($2) /ime,
        s/(<[^>]*\bclassid\s*=\s*["']?)([^\s"'>]*)/    $1 . &full_url($2) /ime,
        s/(<[^>]*\bname\s*=\s*["']?)([^\s"'>]*)/       $1 . &full_url($2) /ime,
            next if /<\s*object\b/im ;

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
        s/(<[^>]*\bimagemap\s*=\s*["']?)([^\s"'>]*)/   $1 . &full_url($2) /ime,
            next if /<\s*overlay\b/im ;  # HTML 3.0

        s/(<[^>]*\bcite\s*=\s*["']?)([^\s"'>]*)/       $1 . &full_url($2) /ime,
            next if /<\s*q\b/im ;

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
        s/(<[^>]*\bfor\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*script\b/im ;

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*select\b/im ;   # HTML 3.0

        s/(<[^>]*\bsrc\s*=\s*["']?)([^\s"'>]*)/        $1 . &full_url($2) /ime,
            next if /<\s*ul\b/im ;       # HTML 3.0

	}   # foreach (@body)

	local($commonline)="";
	foreach $line (@body) {$commonline="$commonline$line>";}
	substr($commonline,-1)="";

	return $commonline;
}
###############################################################################
sub full_url{
    	local($link)= @_ ;

	$oldlink=$link;

	if($link=~ m|^(http://)|i) {goto exit_full_url;}
	if($link=~ m|^(mailto:)|i) {goto exit_full_url;}
	if($link=~ m|^(javascript:)|i) {goto exit_full_url;}
	if($link=~ m|^(#)|i) {goto exit_full_url;}

	$link=~ s|^/|$baseurl/|i;
	$link=~ s|^\./|$relurl/|i;
	$link=~ s|^\.\./|$relurl2/|i;

	if(!($link=~ m|^(http://)|i)) {$link = "$relurl/$link";}

exit_full_url:

	open(FILE,">>$logfile");
	print FILE "'$oldlink' -> '$link'\n";
	close(FILE);

	return $link;
}
###############################################################################
sub GetRelativeUrls {
	local($url) = @_;

#open(FILE,">>$logfile");

#print FILE "\n";
#print FILE "URL: $url\n";

$url     =~ m|^(http://)([^/\?\r\n]*)|i;
$host    = $2;
#print FILE "HOST: $host\n";



( run in 1.851 second using v1.01-cache-2.11-cpan-39bf76dae61 )