Alvis-Convert

 view release on metacpan or  search on metacpan

lib/Alvis/Wikipedia/XMLDump.pm  view on Meta::CPAN

	    }

	}
    }
    else
    {
	confess("Internal error: set_err_state() called with an " .
		"unrecognized argument ($errcode).")
    }
}

sub clearerr
{
    my $self=shift;
    
    $self->{errstr}="";
}

sub errmsg
{
    my $self=shift;
    
    return $self->{errstr};
}

##########################################################################
#
# Public methods
#
##########################################################################

sub new
{
    my $proto=shift;

    my $class=ref($proto)||$proto;
    my $parent=ref($proto)&&$proto;
    my $self={};
    bless($self,$class);

    $self->_set_err_state($ERR_OK);

    $self->_init(@_);

    $self->{variables}=Alvis::Wikipedia::Variables->new();
    if (!defined($self->{variables}))
    {
	$self->_set_err_state($ERR_VAR);
	return undef;
    }

    $self->{parser}=Alvis::Wikipedia::WikitextParser->new();
    if (!defined($self->{parser}))
    {
	$self->_set_err_state($ERR_PARSER);
	return undef;
    }

    $self->{canDocConverter}=Alvis::Canonical->new(convertCharEnts=>1,
						   convertNumEnts=>1,
						   sourceEncoding=>'utf8');
    if (!defined($self->{canDocConverter}))
    {
	$self->_set_err_state($ERR_CAN_DOC_CONV);
	return undef;
    }

    $self->{catGraph}=Alvis::Wikipedia::CatGraph->new();
    if (!defined($self->{catGraph}))
    {
	$self->_set_err_state($ERR_CAT_GRAPH);
	return undef;
    }

    return $self;
}

sub _init
{
    my $self=shift;

    $self->{expandTemplates}=0;
    $self->{outputFormat}=$OUTPUT_HTML;
    $self->{skipRedirects}=0;
    $self->{categoryWord}='Category';
    $self->{templateWord}='Template';
    $self->{dumpCategoryData}=1;
    $self->{dumpTemplateData}=1;
    $self->{catGraphDumpF}='CatGraph.storable';
    $self->{templateDumpF}='Templates.storable';

    if (defined(@_))
    {
        my %args=@_;
        @$self{ keys %args }=values(%args);
    }
}

#
# opts: hash with fields
#
#     namespaces              ref to a list of namespace identifiers whose
#                             records to extract
#     expandTemplates         flag for true template expansion
#     templateDumpF           template dump file
#     outputFormat            format for result records ($OUTPUT_HTML,
#                             $OUTPUT_ALVIS),...
#     categoryWord            category namespace identifier (changes with
#                             language)
#     templateWord            template namespace identifier (changes with
#                             language)
#     rootCategory            root category identifier (changes with
#                             language)
#     date                    the date of the dump
#     dumpCatGraph            flag for dumping the category graph
#     catGraphDumpF           category graph dump file
#
sub extract_records
{
    my $self=shift;
    my $fd=shift;   # dump fd ref 



( run in 0.885 second using v1.01-cache-2.11-cpan-140bd7fdf52 )