App-Basis-ConvertText2

 view release on metacpan or  search on metacpan

lib/App/Basis/ConvertText2.pm  view on Meta::CPAN

}

# ----------------------------------------------------------------------------
# rewrite the headers so that they are nice for the TOC
sub _rewrite_hdrs {
    state $counters = { 2 => 0, 3 => 0, 4 => 0 };
    state $last_lvl = 0;
    my ( $head, $txt, $tail ) = @_;
    my $pre;

    my ($lvl) = ( $head =~ /<h(\d)/i );
    my $ref = $txt;

    if ( $lvl < $last_lvl ) {
        debug( "ERROR", "something odd happening in _rewrite_hdrs" );
    }
    elsif ( $lvl > $last_lvl ) {

        # if we are stepping back up a level then we need to reset the counter below
        if ( $lvl == 3 ) {
            $counters->{4} = 0;
        }
        elsif ( $lvl == 2 ) {
            $counters->{3} = 0;
            $counters->{4} = 0;
        }

    }
    $counters->{$lvl}++;

    if    ( $lvl == 2 ) { $pre = "$counters->{2}"; }
    elsif ( $lvl == 3 ) { $pre = "$counters->{2}.$counters->{3}"; }
    elsif ( $lvl == 4 ) { $pre = "$counters->{2}.$counters->{3}.$counters->{4}"; }

    $ref =~ s/\s/_/gsm;

    # remove things we don't like from the reference
    $ref =~ s/[\s'"\(\)\[\]<>]//g;

    my $out = "$head<a name='$pre" . "_" . lc($ref) . "'>$pre $txt</a>$tail";
    return $out;
}

# ----------------------------------------------------------------------------
# use pandoc to parse markdown into nice HTML
# pandoc has extra features over and above markdown, eg syntax highlighting
# and tables
# pandoc must be in user path

sub _pandoc_html {
    my $input = shift;

    my $resp = execute_cmd(
        command     => PANDOC . " --email-obfuscation=none -S -R --normalize -t html5 --highlight-style='kate'",
        timeout     => 30,
        child_stdin => $input
    );

    my $html;

    debug( "Pandoc: " . $resp->{stderr} ) if ( $resp->{stderr} );
    if ( !$resp->{exit_code} ) {
        $html = $resp->{stdout};
    }
    else {
        debug( "ERROR", "Could not parse with pandoc, using markdown" );
        warn "Could not parse with pandoc, using markdown";
        $html = markdown($input);
    }

    return $html;
}

# ----------------------------------------------------------------------------
# use pandoc to convert HTML into another format
# pandoc must be in user path

sub _pandoc_format {
    my ( $input, $output ) = @_;
    my $status = 1;

    my $resp = execute_cmd(

        command => PANDOC . " $input -o $output",
        timeout => 30,
    );

    debug( "Pandoc: " . $resp->{stderr} ) if ( $resp->{stderr} );
    if ( !$resp->{exit_code} ) {
        $status = 0;
    }
    else {
        debug( "ERROR", "Could not parse with pandoc" );
        $status = 1;
    }

    return $status;
}

# ----------------------------------------------------------------------------
# convert_file
# convert the file to a different format from HTML
#  parameters
#     file    - file to re-convert
#     format  - format to convert to
#     pdfconvertor  - use prince/wkhtmltopdf rather than pandoc to convert to PDF

sub _convert_file {
    my $self = shift ;
    my ( $file, $format, $pdfconvertor ) = @_;

    # we work on the is that pandoc should be in your PATH
    my $fmt_str = $format;
    my ( $outfile, $exit );

    $outfile = $file;
    $outfile =~ s/\.(\w+)$/.pdf/;

    # we can use prince to do PDF conversion, its faster and better, but not free for commercial use
    # you would have to ignore the P symbol on the resultant document
    if ( $format =~ /pdf/i && $pdfconvertor ) {
        my $cmd;

        if ( $pdfconvertor =~ /^prince/i ) {
            $cmd = PRINCE . " " ;
            $cmd.= "--pdf-title='$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
            $cmd.= "--pdf-subject='$self->{replace}->{SUBJECT}' " if ($self->{replace}->{SUBJECT}) ;
            $cmd.= "--pdf-creator='$self->{replace}->{AUTHOR}' " if ($self->{replace}->{AUTHOR}) ;
            $cmd.= "--pdf-keywords='$self->{replace}->{KEYWORDS}' " if ($self->{replace}->{KEYWORDS}) ;
            $cmd .= " --media=print $file -o $outfile";
        }
        elsif ( $pdfconvertor =~ /^wkhtmltopdf/i ) {
            $cmd = WKHTML . " -q --print-media-type " ;
            $cmd.= "--title '$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
            # do we want to specify the size
            $cmd .= "--page-size $self->{replace}->{PAGE_SIZE} " if( $self->{replace}->{PAGE_SIZE}) ;
            $cmd .= "$file $outfile";
        }
        else {
            warn "Unknown PDF converter ($pdfconvertor), using pandoc";

            # otherwise lets use pandoc to create the file in the other formats
            $exit = _pandoc_format( $file, $outfile );
        }
        if ($cmd) {
            my ( $out, $err );
            try {
                # say "$cmd" ;



( run in 1.113 second using v1.01-cache-2.11-cpan-39bf76dae61 )