EBook-Tools
view release on metacpan or search on metacpan
lib/EBook/Tools.pm view on Meta::CPAN
# It is very unlikely that split_metadata will be called twice
# from the same program, so undef all capture variables reclaim
# the memory. Just going out of scope will not necessarily do
# this.
undef(@metablocks);
undef(@guideblocks);
undef($_);
if(-z $htmlfile) {
debug(1,"split_metadata(): HTML has zero size.",
" Not replacing original.");
unlink($htmlfile);
}
else {
rename($htmlfile,$metahtmlfile)
or croak("split_metadata(): Failed to rename ",$htmlfile,
" to ",$metahtmlfile,"!\n");
}
if(-z $metafile) {
croak($subname,
"(): unable to remove empty output file '",$metafile,"'!")
if(! unlink($metafile) );
return;
}
return $metafile;
}
=head2 C<split_pre($htmlfile,$outfilebase)>
Splits <pre>...</pre> blocks out of a source HTML file into their own
separate HTML files including required headers. Each block will be
written to its own file following the naming format
C<$outfilebase-###.html>, where ### is a three-digit number beginning
at 001 and incrementing for each block found. If C<$outfilebase> is
not specified, it defaults to the basename of C<$htmlfile> with
"-pre-###.html" appended. The
Returns a list containing all filenames created.
=cut
sub split_pre {
my ($htmlfile,$outfilebase) = @_;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
croak($subname,"(): no input file specified")
if(!$htmlfile);
my ($filebase,$filedir,$fileext);
my ($fh_html,$fh_htmlout,$fh_pre);
my $htmloutfile;
my @preblocks;
my @prefiles = ();
my $prefile;
my $count = 0;
my $htmlheader = <<'END';
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<title></title>
</head>
<body>
END
($filebase,$filedir,$fileext) = fileparse($htmlfile,'\.\w+$');
$outfilebase = "$filebase-pre" if(!$outfilebase);
$htmloutfile = "$filebase-nopre.html";
open($fh_html,"<:raw",$htmlfile)
or croak($subname,"(): Failed to open '",$htmlfile,"' for reading!");
open($fh_htmlout,">:raw",$htmloutfile)
or croak($subname,"(): Failed to open '",$htmloutfile,"' for writing!");
local $/;
while(<$fh_html>)
{
(@preblocks) = /(<pre>.*?<\/pre>)/gisx;
last unless(@preblocks);
foreach my $pre (@preblocks)
{
$count++;
debug(1,"DEBUG: split_pre() splitting block ",
sprintf("%03d",$count));
$prefile = sprintf("%s-%03d.html",$outfilebase,$count);
if(-f $prefile)
{
rename($prefile,"$prefile.backup")
or croak("Unable to rename '",$prefile,
"' to '",$prefile,".backup'");
}
open($fh_pre,">:raw",$prefile)
or croak("Unable to open '",$prefile,"' for writing!");
print {*$fh_pre} $utf8xmldec;
print {*$fh_pre} $htmlheader,"\n";
print {*$fh_pre} $pre,"\n";
print {*$fh_pre} "</body>\n</html>\n";
close($fh_pre) or croak("Unable to close '",$prefile,"'!");
push @prefiles,$prefile;
}
s/(<pre>.*?<\/pre>)//gisx;
print {*$fh_htmlout} $_,"\n";
close($fh_htmlout)
or croak($subname,"(): Failed to close '",$htmloutfile,"'!");
rename($htmloutfile,$htmlfile)
or croak($subname,"(): Failed to rename '",$htmloutfile,"' to '",
$htmlfile,"'!");
}
return @prefiles;
}
=head2 C<strip_script(%args)>
Strips any <script>...</script> blocks out of a HTML file.
lib/EBook/Tools.pm view on Meta::CPAN
return 1;
}
=head2 C<system_result($caller,$retval,@syscmd)>
Checks the result of a system call and croak on failure with an
appropriate message. For this to work, it MUST be used as the line
immediately following the system command.
=head3 Arguments
=over
=item $caller
The calling function (used in output message)
=item $retval
The return value of the system command
=item @syscmd
The array passed to the system call
=back
=head3 Return Values
Returns 0 on success
Croaks on failure.
=cut
sub system_result {
my ($caller,$retval,@syscmd) = @_;
if ( ($CHILD_ERROR >> 8) == 0 ) {
return 0;
}
elsif ($CHILD_ERROR == -1) {
croak($caller," child failed to execute (ERRNO=",$ERRNO,"):\n ",
join(' ',@syscmd),"\n")
}
elsif ($CHILD_ERROR & 127) {
my $withcoredump = ($CHILD_ERROR & 128) ? 'with' : 'without';
croak($caller," child died with signal ",($CHILD_ERROR & 127)," ",
$withcoredump," coredump:\n ",join(' ',@syscmd),"\n");
}
else {
croak($caller," child exited with value ",$CHILD_ERROR >> 8,":\n ",
join(' ',@syscmd),"\n")
}
}
=head2 C<system_tidy_xhtml($infile,$outfile)>
Runs tidy on a XHTML file semi-safely (using a secondary file)
Converts HTML to XHTML if necessary
=head3 Arguments
=over
=item $infile
The filename to tidy
=item $outfile
The filename to use for tidy output if the safety condition to
overwrite the input file isn't met.
Defaults to C<infile-tidy.ext> if not specified.
=back
=head3 Global variables used
=over
=item $tidycmd
the location of the tidy executable
=item $tidyxhtmlerrors
the filename to use to output errors
=item $tidysafety
the safety factor to use (see CONFIGURABLE GLOBAL VARIABLES, above)
=back
=head3 Return Values
Returns the return value from tidy
=over
=item 0 - no errors
=item 1 - warnings only
=item 2 - errors
=item Dies horribly if the return value is unexpected
=back
=cut
sub system_tidy_xhtml {
my ($infile,$outfile) = @_;
my $retval;
croak("system_tidy_xhtml called with no input file") if(!$infile);
if(!$outfile)
( run in 0.604 second using v1.01-cache-2.11-cpan-2398b32b56e )