Alvis-Convert
view release on metacpan or search on metacpan
lib/Alvis/Convert.pm view on Meta::CPAN
else
{
if (!defined($meta->get('baseURL')))
{
my $base_URL=$meta->get('url');
$base_URL=~s/\/[^\/]+?$/\//isgo;
$meta->set('baseURL',$base_URL);
}
}
my $alvisXML=
$self->{documentAssembler}->assemble({canDoc=>$can_doc,
meta=>$meta,
links=>$links,
origText=>$orig_txt});
if (!defined($alvisXML))
{
$self->_set_err_state($ERR_ASSEMBLE,
$self->{documentAssembler}->errmsg());
return undef;
}
push(@alvisXMLs,$alvisXML);
}
return \@alvisXMLs;
}
sub ainodump
{
my $self=shift;
my $f=shift;
# No meta needed -- one per record in the dump
#
if (!defined(open(AINO,"<:raw",$f)))
{
$self->_set_err_state($ERR_OPEN_AINODUMP,
"File: \"$f\"");
return 0;
}
if (!$self->{ainodumpConverter}
->process_dump(*AINO,
[\&_process_ainodump_doc,$self]))
{
$self->_set_err_state($ERR_AINODUMP_PROCESS,
"File: \"$f\"");
return 0;
}
close(AINO);
return 1;
}
#
# output_cb: [\&_output_wikipedia_article,$arg1,$arg2,...]
# will be called like this:
# _output_wikipedia_article($arg1,$arg2,...,
# $title,$output_format,
# $record_txt,$is_redir)
#
# where $output_format is a global defined in Alvis::Wikipedia::XMLDump
# as $OUTPUT_*
#
#
# progress_cb: [\&_wikipedia_progress,$arg1,$arg2,...] OPTIONAL
# will be called like this:
# _wikipedia_progress($arg1,$arg2,...,
# $prog_txt,$N,$n,$mess)
#
# where $N is the total number of records processed and $n the number of hits
#
# opts: a hash of options with these possible fields:
#
# namespaces ref to a list of namespace identifiers whose
# records to extract
# expandTemplates flag for true template expansion
# templateDumpF template dump file
# outputFormat format for result records
# ($Alvis::Wikipedia::XMLDump::OUTPUT_*)
# categoryWord category namespace identifier (changes with
# language)
# templateWord template namespace identifier (changes with
# language)
# rootCategory root category identifier (changes with
# language)
# date the date of the dump
# dumpCatGraph flag for dumping the category graph
# catGraphDumpF category graph dump file
#
sub wikipedia
{
my $self=shift;
my $f=shift;
my $output_cb=shift;
my $opts=shift;
my $progress_cb=shift;
if (!defined(open(WIKIPEDIA,"<:utf8",$f)))
{
$self->_set_err_state($ERR_OPEN_WIKIPEDIA,
"File: \"$f\"");
return 0;
}
if (!$self->{wikipediaConverter}->extract_records(\*WIKIPEDIA,
$output_cb,
$opts,
$progress_cb))
{
$self->_set_err_state($ERR_WIKIPEDIA_CONV,
"File: \"$f\"");
return 0;
}
close(WIKIPEDIA);
return 1;
}
sub set
{
my $self=shift;
( run in 0.679 second using v1.01-cache-2.11-cpan-39bf76dae61 )