Alvis-Convert

 view release on metacpan or  search on metacpan

lib/Alvis/Convert.pm  view on Meta::CPAN

	else
	{
	    if (!defined($meta->get('baseURL')))
	    {
		my $base_URL=$meta->get('url');
		$base_URL=~s/\/[^\/]+?$/\//isgo;
		$meta->set('baseURL',$base_URL);
	    }
	}
	
	my $alvisXML=
	    $self->{documentAssembler}->assemble({canDoc=>$can_doc,
						  meta=>$meta,
						  links=>$links,
						  origText=>$orig_txt});
	if (!defined($alvisXML))
	{
	    $self->_set_err_state($ERR_ASSEMBLE,
				  $self->{documentAssembler}->errmsg());
	    return undef;
	}
	push(@alvisXMLs,$alvisXML);
    }

    return \@alvisXMLs;
}

sub ainodump
{
    my $self=shift;
    my $f=shift;   

    # No meta needed -- one per record in the dump
    #
    if (!defined(open(AINO,"<:raw",$f)))
    {
	$self->_set_err_state($ERR_OPEN_AINODUMP,
			      "File: \"$f\"");
	return 0;
    }
    if (!$self->{ainodumpConverter}
	->process_dump(*AINO,
		       [\&_process_ainodump_doc,$self]))
    {
	$self->_set_err_state($ERR_AINODUMP_PROCESS,
			      "File: \"$f\"");
	return 0;
    }
    close(AINO);
 
    return 1;
}

#
# output_cb: [\&_output_wikipedia_article,$arg1,$arg2,...]
#               will be called like this:
#          _output_wikipedia_article($arg1,$arg2,...,
#                                    $title,$output_format,
#                                    $record_txt,$is_redir)
#
#  where $output_format is a global defined in Alvis::Wikipedia::XMLDump
#  as $OUTPUT_*
#
#
# progress_cb: [\&_wikipedia_progress,$arg1,$arg2,...]     OPTIONAL
#               will be called like this:
#          _wikipedia_progress($arg1,$arg2,...,
#                              $prog_txt,$N,$n,$mess)
#
#   where $N is the total number of records processed and $n the number of hits
#
# opts:  a hash of options with these possible fields:
#
#     namespaces              ref to a list of namespace identifiers whose
#                             records to extract
#     expandTemplates         flag for true template expansion
#     templateDumpF           template dump file
#     outputFormat            format for result records 
#                             ($Alvis::Wikipedia::XMLDump::OUTPUT_*)
#     categoryWord            category namespace identifier (changes with
#                             language)
#     templateWord            template namespace identifier (changes with
#                             language)
#     rootCategory            root category identifier (changes with
#                             language)
#     date                    the date of the dump
#     dumpCatGraph            flag for dumping the category graph
#     catGraphDumpF           category graph dump file
#
sub wikipedia
{
    my $self=shift;
    my $f=shift;   
    my $output_cb=shift;  
    my $opts=shift;
    my $progress_cb=shift;

    if (!defined(open(WIKIPEDIA,"<:utf8",$f)))
    {
	$self->_set_err_state($ERR_OPEN_WIKIPEDIA,
			      "File: \"$f\"");
	return 0;
    }
    if (!$self->{wikipediaConverter}->extract_records(\*WIKIPEDIA,
						      $output_cb,
						      $opts,
						      $progress_cb))
    {
	$self->_set_err_state($ERR_WIKIPEDIA_CONV,
			      "File: \"$f\"");
	return 0;
    }

    close(WIKIPEDIA);

    return 1;
}

sub set
{
    my $self=shift;



( run in 0.679 second using v1.01-cache-2.11-cpan-39bf76dae61 )