EBook-Tools

 view release on metacpan or  search on metacpan

lib/EBook/Tools.pm  view on Meta::CPAN

    # It is very unlikely that split_metadata will be called twice
    # from the same program, so undef all capture variables reclaim
    # the memory.  Just going out of scope will not necessarily do
    # this.
    undef(@metablocks);
    undef(@guideblocks);
    undef($_);

    if(-z $htmlfile) {
        debug(1,"split_metadata(): HTML has zero size.",
             "  Not replacing original.");
        unlink($htmlfile);
    }
    else {
        rename($htmlfile,$metahtmlfile)
            or croak("split_metadata(): Failed to rename ",$htmlfile,
                     " to ",$metahtmlfile,"!\n");
    }

    if(-z $metafile) {
        croak($subname,
              "(): unable to remove empty output file '",$metafile,"'!")
            if(! unlink($metafile) );
        return;
    }
    return $metafile;
}


=head2 C<split_pre($htmlfile,$outfilebase)>

Splits <pre>...</pre> blocks out of a source HTML file into their own
separate HTML files including required headers.  Each block will be
written to its own file following the naming format
C<$outfilebase-###.html>, where ### is a three-digit number beginning
at 001 and incrementing for each block found.  If C<$outfilebase> is
not specified, it defaults to the basename of C<$htmlfile> with
"-pre-###.html" appended.  The

Returns a list containing all filenames created.

=cut

sub split_pre {
    my ($htmlfile,$outfilebase) = @_;
    my $subname = ( caller(0) )[3];
    debug(2,"DEBUG[",$subname,"]");

    croak($subname,"(): no input file specified")
        if(!$htmlfile);

    my ($filebase,$filedir,$fileext);
    my ($fh_html,$fh_htmlout,$fh_pre);
    my $htmloutfile;
    my @preblocks;
    my @prefiles = ();
    my $prefile;
    my $count = 0;

    my $htmlheader = <<'END';
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<title></title>
</head>
<body>
END

    ($filebase,$filedir,$fileext) = fileparse($htmlfile,'\.\w+$');
    $outfilebase = "$filebase-pre" if(!$outfilebase);
    $htmloutfile = "$filebase-nopre.html";

    open($fh_html,"<:raw",$htmlfile)
	or croak($subname,"(): Failed to open '",$htmlfile,"' for reading!");
    open($fh_htmlout,">:raw",$htmloutfile)
        or croak($subname,"(): Failed to open '",$htmloutfile,"' for writing!");

    local $/;
    while(<$fh_html>)
    {
	(@preblocks) = /(<pre>.*?<\/pre>)/gisx;
	last unless(@preblocks);

        foreach my $pre (@preblocks)
        {
            $count++;
            debug(1,"DEBUG: split_pre() splitting block ",
                  sprintf("%03d",$count));
            $prefile = sprintf("%s-%03d.html",$outfilebase,$count);
            if(-f $prefile)
            {
                rename($prefile,"$prefile.backup")
                    or croak("Unable to rename '",$prefile,
                             "' to '",$prefile,".backup'");
            }
            open($fh_pre,">:raw",$prefile)
                or croak("Unable to open '",$prefile,"' for writing!");
            print {*$fh_pre} $utf8xmldec;
            print {*$fh_pre} $htmlheader,"\n";
            print {*$fh_pre} $pre,"\n";
            print {*$fh_pre} "</body>\n</html>\n";
            close($fh_pre) or croak("Unable to close '",$prefile,"'!");
            push @prefiles,$prefile;
        }
	s/(<pre>.*?<\/pre>)//gisx;
	print {*$fh_htmlout} $_,"\n";
        close($fh_htmlout)
            or croak($subname,"(): Failed to close '",$htmloutfile,"'!");
        rename($htmloutfile,$htmlfile)
            or croak($subname,"(): Failed to rename '",$htmloutfile,"' to '",
                     $htmlfile,"'!");
    }
    return @prefiles;
}


=head2 C<strip_script(%args)>

Strips any <script>...</script> blocks out of a HTML file.

lib/EBook/Tools.pm  view on Meta::CPAN

    return 1;
}


=head2 C<system_result($caller,$retval,@syscmd)>

Checks the result of a system call and croak on failure with an
appropriate message.  For this to work, it MUST be used as the line
immediately following the system command.

=head3 Arguments

=over

=item $caller

The calling function (used in output message)

=item $retval

The return value of the system command

=item @syscmd

The array passed to the system call

=back

=head3 Return Values

Returns 0 on success

Croaks on failure.

=cut

sub system_result {
    my ($caller,$retval,@syscmd) = @_;

    if ( ($CHILD_ERROR >> 8) == 0 ) {
        return 0;
    }
    elsif ($CHILD_ERROR == -1) {
        croak($caller," child failed to execute (ERRNO=",$ERRNO,"):\n ",
              join(' ',@syscmd),"\n")
    }
    elsif ($CHILD_ERROR & 127) {
        my $withcoredump = ($CHILD_ERROR & 128) ? 'with' : 'without';
        croak($caller," child died with signal ",($CHILD_ERROR & 127)," ",
              $withcoredump," coredump:\n ",join(' ',@syscmd),"\n");
    }
    else {
        croak($caller," child exited with value ",$CHILD_ERROR >> 8,":\n ",
              join(' ',@syscmd),"\n")
    }
}


=head2 C<system_tidy_xhtml($infile,$outfile)>

Runs tidy on a XHTML file semi-safely (using a secondary file)

Converts HTML to XHTML if necessary

=head3 Arguments

=over

=item $infile

The filename to tidy

=item $outfile

The filename to use for tidy output if the safety condition to
overwrite the input file isn't met.

Defaults to C<infile-tidy.ext> if not specified.

=back

=head3 Global variables used

=over

=item $tidycmd

the location of the tidy executable

=item $tidyxhtmlerrors

the filename to use to output errors

=item $tidysafety

the safety factor to use (see CONFIGURABLE GLOBAL VARIABLES, above)

=back

=head3 Return Values

Returns the return value from tidy

=over

=item 0 - no errors

=item 1 - warnings only

=item 2 - errors

=item Dies horribly if the return value is unexpected

=back

=cut

sub system_tidy_xhtml {
    my ($infile,$outfile) = @_;
    my $retval;

    croak("system_tidy_xhtml called with no input file") if(!$infile);
    if(!$outfile)



( run in 0.604 second using v1.01-cache-2.11-cpan-2398b32b56e )