Alvis-Convert

 view release on metacpan or  search on metacpan

bin/html2alvis  view on Meta::CPAN

	{
	    $html_txt=$C->read_HTML($html_entries{$base_name}{htmlF},
                                    $meta_txt);
	    if (!defined($html_txt))
	    {
		warn "Reading the HTML for basename \"$base_name\" failed. " .
		    $C->errmsg() if 
		    $Warnings;
		$C->clearerr();
		next;
	    }
	}
	else
	{
	     warn "No HTML file for basename \"$base_name\"." if 
		$Warnings;
	     next;
	}

	$alvisXML=$C->HTML($html_txt,$meta_txt);
	if (!defined($alvisXML))
	{
	    warn "Obtaining the Alvis version of the " .
		"\"$base_name\"'s HTML file failed. " . $C->errmsg() if 
		$Warnings;
	    $C->clearerr();
	    next;
	}

	if (!$C->output_Alvis([$alvisXML],$base_name))
	{
	    warn "Outputting the Alvis records for base name \"$base_name\" failed. " . $C->errmsg() if 
		$Warnings;
	    $C->clearerr();
	    next;
	}
    }

    return 1;
}

__END__

=head1 NAME
    
    html2alvis - HTML to Alvis XML converter
    
=head1 SYNOPSIS
    
    html2alvis [options] [source directory ...]

  Options:

    --html-ext                 HTML file identifying filename extension
    --meta-ext                 meta file identifying filename extension
    --out-dir                  output directory
    --N-per-out-dir            # of records per output directory
    --meta-encoding            the encoding of the meta files
    --html-encoding            the encoding of all HTML files
    --html-encoding-from-meta  take the encoding of the HTML files from
                               the meta files (attribute 'detected-charset')
    --[no]original             include original document?
    --help                     brief help message
    --man                      full documentation
    --[no]warnings             warnings output flag
    
=head1 OPTIONS
    
=over 8

=item B<--html-ext>

    Sets the HTML file identifying filename extension. 
    Default value: 'html'.

=item B<--meta-ext>

    Sets the  meta file identifying filename extension.
    The meta file syntax is

          <feature name>\t<feature value>\n

    Special features are url,title,date,detectedCharSet.
    Default value: 'meta'.

=item B<--out-dir>

    Sets the output directory. Default value: '.'.

=item B<--N-per-out-dir>

    Sets the # of records per output directory. Default value: 1000.

=item B<--meta-encoding>

    Specifies the encoding of all meta files. Default value 'iso-8859-1'.

=item B<--html-encoding>

    Specifies the encoding of all HTML files. Default value 'iso-8859-1'.
    Default: undef (meaning 'guess').

=item B<--html-encoding-from-meta>

    Specifies whether the encoding of an HTML file should be read from
    the corresponding meta file. If no information is given there,
    --html-encoding is used, if that is not given, the encoding is guessed.
    Default: no.

=item B<--[no]original>

    Shall the original document be included in the output? Default
    value: yes.

=item B<--help>

    Prints a brief help message and exits.

=item B<--man>

    Prints the manual page and exits.



( run in 1.124 second using v1.01-cache-2.11-cpan-39bf76dae61 )