Alvis-Convert

 view release on metacpan or  search on metacpan

lib/Alvis/Wikipedia/Templates.pm  view on Meta::CPAN

	       'ns:3'=>'User_talk',
	       'ns:User_talk'=>'User_talk',
	       'ns:4'=>'Wikipedia',
	       'ns:Project'=>'Wikipedia',
	       'ns:5'=>'Wikipedia_talk',
	       'ns:Project_talk'=>'Wikipedia_talk',
	       'ns:6'=>'Image',
	       'ns:Image'=>'Image',
	       'ns:7'=>'Image_talk',
	       'ns:Image_talk'=>'Image_talk',
	       'ns:8'=>'MediaWiki',
	       'ns:MediaWiki'=>'MediaWiki',
	       'ns:9'=>'MediaWiki_talk',
	       'ns:MediaWiki_talk'=>'MediaWiki_talk',
	       'ns:10'=>'Template',
	       'ns:Template'=>'Template',
	       'ns:11'=>'Template_talk',
	       'ns:Template_talk'=>'Template_talk',
	       'ns:12'=>'Help',
	       'ns:Help'=>'Help',
	       'ns:13'=>'Help_talk',
	       'ns:Help_talk'=>'Help_talk',
	       'ns:14'=>'Category',
	       'ns:Category'=>'Category',
	       'ns:15'=>'Category_talk',
	       'ns:Category_talk'=>'Category_talk',
	       'ns:100'=>'Portal',
	       'ns:101'=>'Portal_talk',
	       'SITENAME'=>'wikipedia',
	       'SERVER'=>'http://en.wikipedia.org',
	       'SERVERNAME'=>'en.wikipedia.org',
	       'localurl:'=>'/wiki/',
	       'localurle:'=>'/wiki/',
	       'fullurl:'=>'http://en.wikipedia.org/wiki/'
	       );

#########################################################################
#
#   Error message stuff
#
#########################################################################

my $ErrStr;
my ($ERR_OK,
    $ERR_PARSER,
    $ERR_NORM,
    $ERR_UNK_TEMPL,
    $ERR_PARAM,
    $ERR_NO_TEXT,
    $ERR_NO_TITLE,
    $ERR_NO_NAMESPACE,
    $ERR_STORE,
    $ERR_UNDEF_DUMP,
    $ERR_RETRIEVE
    )=(0..10);
my %ErrMsgs=($ERR_OK=>"",
	     $ERR_PARSER=>"Unable to instantiate Alvis::Wikipedia::WikitextParser.",
	     $ERR_NORM=>"Title normalization failed.",
	     $ERR_UNK_TEMPL=>"Unrecognized template name.",
	     $ERR_PARAM=>"Application of a parameter pattern failed.",
	     $ERR_NO_TEXT=>"Undefined text to expand",
	     $ERR_NO_TITLE=>"Undefined title to expand",
	     $ERR_NO_NAMESPACE=>"Undefined namespace to expand",
	     $ERR_STORE=>"Storable::store() failed.",
	     $ERR_UNDEF_DUMP=>"Trying to dump when there are no definitions.",
	     $ERR_RETRIEVE=>"Storable::retrieve() failed."
	    );

sub _set_err_state
{
    my $self=shift;
    my $errcode=shift;
    my $errmsg=shift;

    if (!defined($errcode))
    {
	confess("set_err_state() called with an undefined argument.");
    }

    if (exists($ErrMsgs{$errcode}))
    {
	if ($errcode==$ERR_OK)
	{
	    $self->{errstr}="";
	}
	else
	{
	    $self->{errstr}.=" " . $ErrMsgs{$errcode};
	    if (defined($errmsg))
	    {
		$self->{errstr}.=" " . $errmsg;
	    }
	}
    }
    else
    {
	confess("Internal error: set_err_state() called with an " .
		"unrecognized argument ($errcode).")
    }
}

sub errmsg
{
    my $self=shift;

    return $self->{errstr};
}

sub clearerr
{
    my $self=shift;

    $self->{errstr}="";
}

#######################################################################
#
#  Public methods
#
#######################################################################

sub new
{

lib/Alvis/Wikipedia/Templates.pm  view on Meta::CPAN

    my $self=shift;
    my $f=shift;

    if (!defined($self->{defs}))
    {
	$self->_set_err_state($ERR_UNDEF_DUMP);
	return 0;
    }

    my %defs=%{$self->{defs}};
    if (store(\%defs,$f))
    {
	return 1;
    }
    else
    {
	$self->_set_err_state($ERR_STORE,"File:\"$f\".");
	return 0;
    }
}

sub load
{
    my $self=shift;
    my $f=shift;

    my $defs=retrieve($f);
    if (!defined($defs))
    {
	$self->_set_err_state($ERR_RETRIEVE,"File:\"$f\".");
	return 0;
    }
    my %defs=%$defs;
    $self->{defs}=\%defs;

    return 1;
}

sub add
{
    my $self=shift;
    my $name=shift;
    my $def=shift;

    my $norm_name=$self->{parser}->normalize_title($name);
    if (!defined($norm_name))
    {
	$self->_set_err_state($ERR_NORM,"Name:\"$name\".");
	return 0;
    }

    $def=~s/<noinclude>.*?<\/noinclude>//sgo;
    $def=~s/<\/?includeonly>//sgo;
    
    $self->{defs}{$norm_name}=$def;

    return 1;
}

#
#  expand_for_real:  do we try to expand the templates for real
#                    (messy and error-prone) or do we simply replace
#                    with a list of the parameter values?
#
sub expand 
{
    my $self=shift;
    my $namespace=shift;
    my $title=shift;
    my $text=shift;
    my $expand_for_real=shift;

    if (!defined($namespace))
    {
	$self->_set_err_state($ERR_NO_NAMESPACE);
	return undef;
    }
    if (!defined($title))
    {
	$self->_set_err_state($ERR_NO_TITLE);
	return undef;
    }
    if (!defined($text))
    {
	$self->_set_err_state($ERR_NO_TEXT);
	return undef;
    }

    $self->{currNamespace}=$namespace;
    $self->{currTitle}=$title;
    $self->{nofExpansions}=0;

    warn "TRANSCLUDING...\n" if $DEBUG;
    my $expanded_text=$self->_transclude($text,$expand_for_real);
    warn "DONE TRANSCLUDING\n" if $DEBUG;

    return $expanded_text;
}

#
#  expand_for_real:  do we try to expand the templates for real
#                    (messy and error-prone) or do we simply replace
#                    with a list of the parameter values?
#
sub _transclude
{
    my $self=shift;
    my $text=shift;
    my $expand_for_real=shift;

    $self->{higherLevelExpandedNames}={};
    while ($text=~/(([^\{])?\{\{([ %!\"\$\&\'\(\)\*,\-\.\/0-9:;=\?\@A-Z\\\^_\`a-z\~\x80-\xFF\n]*)(\|.*?)?\}\})/sgo)
    {
	$self->{thisLevelExpandedNames}={};
	# Safeguard against malevolent templates 
	if (length($text)>$self->{maxExpandedTextSize} || 
	    $self->{nofExpansions}>$self->{maxNofExpansions})
	{
	    warn "Excessive expansion stopped for \"$self->{currNamespace}:$self->{currTitle}\"" .
		". Length of the text to expand: " .
		length($text) . ", # of expansions: " . $self->{nofExpansions};
		return $text;
	}

	warn "BEFORE VARIABLE SUBSITUTION\n" if $DEBUG;
	# Variable substitution
	$text=~s/(\{\{([ %!\"\$\&\'\(\)\*,\-\.\/0-9:;=\?\@A-Z\\\^_\`a-z\~\x80-\xFF\n]*?)\}\})/$self->_substitute_variable($1,$2)/sgeo;
	warn "TEXT AFTER VARIABLE SUBSTITUTION:$text\n" if $DEBUG;
	
	# Template substitution
	$text=~s/(([^\{])?\{\{([ %!\"\$\&\'\(\)\*,\-\.\/0-9:;=\?\@A-Z\\\^_\`a-z\~\x80-\xFF\n]*)(\|[^\{]*?)?\}\})/$self->_substitute_template($1,$2,$3,$4,$expand_for_real)/sgeo;
	
	warn "TEXT AFTER TEMPLATE SUBSTITUTION:$text\n" if $DEBUG;
    
	for my $name (keys %{$self->{thisLevelExpandedNames}})
	{
	    $self->{higherLevelExpandedNames}->{$name}=1;
	}
    }

    return $text;

}

sub _substitute_variable
{
    my $self=shift;
    my $text=shift;
    my $var_name=shift;
    
    if ($var_name=~/^(localurle?|fullurl):(.*)$/isgo)
    {
	my ($var,$rest)=($1,$2);
	if ($rest)
	{
	    return $VarSubst{$var} . $rest;
	}
	else
	{
	    return $VarSubst{$var};
	}
    }
    elsif (exists($VarSubst{$var_name}))
    {
	return $VarSubst{$var_name};
    }
    elsif ($var_name=~/^(subst|int):/isgo)
    {
	$var_name=~s/^(subst|int)://isgo;
    }
    elsif ($var_name=~/^(FULL)?PAGENAMEE?$/)
    {
	return $self->{currTitle};
    }
    elsif ($var_name=~/^NAMESPACEE?$/)
    {
	return $self->{currNamespace};
    }
    elsif ($var_name=~/^(__NOTOC__|__FORCETOC__|__TOC__|__NOEDITSECTION__|__START__|CURRENT(MONTH|MONTHNAME|MONTHNAMEGEN|MONTHABBREV|DAY|DAYNAME|YEAR|TIME)|NUMBEROFARTICLES|NUMBEROFFILES|PAGENAMEE|NAMESPACE|__END__|thumbnail|thumb|right|left|none|cen...
    {
	return "$var_name";
    }
    else
    {
	return $text;
    }
}

sub _substitute_template
{
    my $self=shift;
    my $orig_text=shift;
    my $pre_context=shift;
    my $name=shift;
    my $params=shift;
    my $expand_for_real=shift;

    my $found=0;

    my $expanded_text;
    my %arg_assignments=(); 

    $name=$self->{parser}->normalize_title($name);

    warn "substitute_template():" if $DEBUG;
    warn "PRE:\"$pre_context\"\n" if $DEBUG;
    warn "NAME:\"$name\"\n" if $DEBUG;
    warn "PARAMS:\"$params\"\n" if $DEBUG;

    # Don't parse {{{}}} because that's only for template arguments
    if (defined($pre_context) && $pre_context eq '{') 
    {
	warn "{ PRE-CONTEXT\n" if $DEBUG;
	return $orig_text;
    }

    # Ok, now expand if it's a template
    
    # Do we know this template or don't we care anyway?
    if (($name && exists($self->{defs}{$name})) || !$expand_for_real) 
    {
	warn "TEMPLATE $name FOUND\n" if $DEBUG;

	$found=1;
	
	if (defined($pre_context))
	{
	    $expanded_text=$pre_context;
	}

	#
        # Not recommended atm .. the bloody syntax seems to keep
        # on changing with each new server alpha version
	#
	if ($expand_for_real)
	{
	    $expanded_text.=$self->{defs}{$name};
	    warn "TEXT AFTER ADDING EXPANSION:$expanded_text\n" if $DEBUG;
	    
	    if (defined($params))
	    {
		# Collect the parameter assignments 
		my @actual_args=$self->_get_template_call_args($params);
		my $index=1;
		for my $arg (@actual_args)
		{
		    my $eq_pos=index($arg,'=');
		    if ($eq_pos<0) 
		    {
			warn "Adding actual arg \'$index\', value \'$arg\'\n" if $DEBUG;
			$arg_assignments{$index++}=$arg;
		    } 
		    else 
		    {
			$name=substr($arg,0,$eq_pos);
			$name=~s/^\s+//;
			$name=~s/\s+$//;
			my $value=substr($arg,$eq_pos+1);
			$value=~s/^\s+//;
			$value=~s/\s+$//;
			
			warn "Adding actual arg \'$name\', value \'$value\'\n" if $DEBUG;
			$arg_assignments{$name}=$value;
		    }
		}
	    }
	    
	    # Keep track of expanded names
	    $self->{thisLevelExpandedNames}{$name}=1;

	    # Substitute actual parameter values 
	    while ($expanded_text=~/(\{\{\{([ %!\"\$\&\'\(\)\*,\-\.\/0-9:;=\?\@A-Z\\\^_\`a-z\~\x80-\xFF\n]*?)(\|[^\{]*?)?\}\}\})/sgo)
	    {
		$expanded_text=~s/(\{\{\{([ %!\"\$\&\'\(\)\*,\-\.\/0-9:;=\?\@A-Z\\\^_\`a-z\~\x80-\xFF\n]*?)(\|[^\{]*?)?\}\}\})/$self->_substitute_param_value($1,$2,$3,\%arg_assignments)/sgeo;
		warn "TEXT AFTER PARAMETER VALUE SUBSTITUTION:$expanded_text\n" if $DEBUG;
	    }
	    
	    # If the template begins with a table or block-level
	    # element, it should be treated as beginning a new line.
	    if (defined($pre_context) && $pre_context!~/\n/ && $expanded_text=~/^(\{\||:|;|\#|\*)/) 
	    {
		warn "ADDING NEWLINE PRE-CONTEXT\n" if $DEBUG;
		$expanded_text="\n" . $expanded_text;
	    }
	    # remove comments
	    $expanded_text=~s/<!--.*?-->//isgo;   

	}
	else  # play it safe -- shouldn't matter much for search engine
              # purposes
	{
	    if (defined($params))
	    {
		# Collect the parameter assignments 
		my @actual_args=$self->_get_template_call_args($params);
		my $index=1;
		for my $arg (@actual_args)
		{
		    my $eq_pos=index($arg,'=');
		    if ($eq_pos<0) 
		    {
			warn "Adding actual arg \'$index\', value \'$arg\'\n" if $DEBUG;
			$arg_assignments{$index++}=$arg;
		    } 
		    else 
		    {
			$name=substr($arg,0,$eq_pos);
			$name=~s/^\s+//;
			$name=~s/\s+$//;
			my $value=substr($arg,$eq_pos+1);
			$value=~s/^\s+//;
			$value=~s/\s+$//;
			
			warn "Adding actual arg \'$name\', value \'$value\'\n" if $DEBUG;
			$arg_assignments{$name}=$value;
		    }
		}
	    }
	    #
	    # Simply insert the parameter values as a list
	    #
	    $expanded_text.="\n";
	    for my $p (keys %arg_assignments)
	    {
		$expanded_text.="*$arg_assignments{$p}\n";
	    }
	    # If the template begins with a table or block-level
	    # element, it should be treated as beginning a new line.
	    if (defined($pre_context) && $pre_context!~/\n/ && $expanded_text=~/^(\{\||:|;|\#|\*)/) 
	    {
		warn "ADDING NEWLINE PRE-CONTEXT\n" if $DEBUG;
		$expanded_text="\n" . $expanded_text;
	    }
	    # remove comments
	    $expanded_text=~s/<!--.*?-->//isgo if defined($expanded_text);   
	    $expanded_text.="\n";
	    $expanded_text.="----\n"; # to cause a logical section break 

	    return $expanded_text;
	}
    }
    
    if (!$found) 
    {
	warn "AT END. NOT FOUND\n" if $DEBUG;
	#
	# Have to safeguard against retrying this
	#
	return $pre_context . "UNKNOWN_TEMPLATE_$name" if $DEBUG;
    } 
    else 
    {
	$self->{nofExpansions}++;

	warn "AT END. FOUND.\n" if $DEBUG;
	return $expanded_text;
    }
}

#
# Triple brace replacement -- used for template arguments
#
sub _substitute_param_value
{
    my $self=shift;
    my $orig_text=shift;
    my $param_name=shift;
    my $default=shift;
    my $arg_assignments=shift;

    $param_name=~s/^\s+//go;
    $param_name=~s/\s+$//go;
    if (!defined($default))
    {
	$default="";
    }
    else
    {
	$default=substr($default,1); # lose the |
    }

    warn "PARAM VALUE SUBST ENTRY.\n" if $DEBUG;
    warn "ORIG TEXT:$orig_text\n" if $DEBUG;
    warn "PARAM:$param_name\n" if $DEBUG;
    warn "DEFAULT:$default\n" if $DEBUG;
    warn "ARG ASSIGNMENTS:\n" if $DEBUG;
    for my $name (keys(%$arg_assignments))
    {
	warn "\t$name -> $arg_assignments->{$name}\n" if $DEBUG;
    }

    my $subst;
    
    if (exists($arg_assignments->{$param_name})) 
    {
	warn "PARAM HAS BEEN ASSIGNED TO\n" if $DEBUG;
	$subst=$arg_assignments->{$param_name};
    }
    else
    {
	warn "USING DEFAULT\n" if $DEBUG;
	$subst=$default;
    }
    warn "SUBSTITUTE:\"$orig_text\"->\"$subst\"\n" if $DEBUG;
    
    return $subst;
}

#
# Get the actual call argument values. Watch out for bl***y piped links.
#
sub _get_template_call_args
{
    my $self=shift;
    my $args_str=shift;



( run in 0.432 second using v1.01-cache-2.11-cpan-5b529ec07f3 )