Alvis-Convert
view release on metacpan or search on metacpan
bin/html2alvis view on Meta::CPAN
{
$html_txt=$C->read_HTML($html_entries{$base_name}{htmlF},
$meta_txt);
if (!defined($html_txt))
{
warn "Reading the HTML for basename \"$base_name\" failed. " .
$C->errmsg() if
$Warnings;
$C->clearerr();
next;
}
}
else
{
warn "No HTML file for basename \"$base_name\"." if
$Warnings;
next;
}
$alvisXML=$C->HTML($html_txt,$meta_txt);
if (!defined($alvisXML))
{
warn "Obtaining the Alvis version of the " .
"\"$base_name\"'s HTML file failed. " . $C->errmsg() if
$Warnings;
$C->clearerr();
next;
}
if (!$C->output_Alvis([$alvisXML],$base_name))
{
warn "Outputting the Alvis records for base name \"$base_name\" failed. " . $C->errmsg() if
$Warnings;
$C->clearerr();
next;
}
}
return 1;
}
__END__
=head1 NAME
html2alvis - HTML to Alvis XML converter
=head1 SYNOPSIS
html2alvis [options] [source directory ...]
Options:
--html-ext HTML file identifying filename extension
--meta-ext meta file identifying filename extension
--out-dir output directory
--N-per-out-dir # of records per output directory
--meta-encoding the encoding of the meta files
--html-encoding the encoding of all HTML files
--html-encoding-from-meta take the encoding of the HTML files from
the meta files (attribute 'detected-charset')
--[no]original include original document?
--help brief help message
--man full documentation
--[no]warnings warnings output flag
=head1 OPTIONS
=over 8
=item B<--html-ext>
Sets the HTML file identifying filename extension.
Default value: 'html'.
=item B<--meta-ext>
Sets the meta file identifying filename extension.
The meta file syntax is
<feature name>\t<feature value>\n
Special features are url,title,date,detectedCharSet.
Default value: 'meta'.
=item B<--out-dir>
Sets the output directory. Default value: '.'.
=item B<--N-per-out-dir>
Sets the # of records per output directory. Default value: 1000.
=item B<--meta-encoding>
Specifies the encoding of all meta files. Default value 'iso-8859-1'.
=item B<--html-encoding>
Specifies the encoding of all HTML files. Default value 'iso-8859-1'.
Default: undef (meaning 'guess').
=item B<--html-encoding-from-meta>
Specifies whether the encoding of an HTML file should be read from
the corresponding meta file. If no information is given there,
--html-encoding is used, if that is not given, the encoding is guessed.
Default: no.
=item B<--[no]original>
Shall the original document be included in the output? Default
value: yes.
=item B<--help>
Prints a brief help message and exits.
=item B<--man>
Prints the manual page and exits.
( run in 1.124 second using v1.01-cache-2.11-cpan-39bf76dae61 )