App-Basis-ConvertText2
view release on metacpan or search on metacpan
lib/App/Basis/ConvertText2.pm view on Meta::CPAN
}
# ----------------------------------------------------------------------------
# rewrite the headers so that they are nice for the TOC
sub _rewrite_hdrs {
state $counters = { 2 => 0, 3 => 0, 4 => 0 };
state $last_lvl = 0;
my ( $head, $txt, $tail ) = @_;
my $pre;
my ($lvl) = ( $head =~ /<h(\d)/i );
my $ref = $txt;
if ( $lvl < $last_lvl ) {
debug( "ERROR", "something odd happening in _rewrite_hdrs" );
}
elsif ( $lvl > $last_lvl ) {
# if we are stepping back up a level then we need to reset the counter below
if ( $lvl == 3 ) {
$counters->{4} = 0;
}
elsif ( $lvl == 2 ) {
$counters->{3} = 0;
$counters->{4} = 0;
}
}
$counters->{$lvl}++;
if ( $lvl == 2 ) { $pre = "$counters->{2}"; }
elsif ( $lvl == 3 ) { $pre = "$counters->{2}.$counters->{3}"; }
elsif ( $lvl == 4 ) { $pre = "$counters->{2}.$counters->{3}.$counters->{4}"; }
$ref =~ s/\s/_/gsm;
# remove things we don't like from the reference
$ref =~ s/[\s'"\(\)\[\]<>]//g;
my $out = "$head<a name='$pre" . "_" . lc($ref) . "'>$pre $txt</a>$tail";
return $out;
}
# ----------------------------------------------------------------------------
# use pandoc to parse markdown into nice HTML
# pandoc has extra features over and above markdown, eg syntax highlighting
# and tables
# pandoc must be in user path
sub _pandoc_html {
my $input = shift;
my $resp = execute_cmd(
command => PANDOC . " --email-obfuscation=none -S -R --normalize -t html5 --highlight-style='kate'",
timeout => 30,
child_stdin => $input
);
my $html;
debug( "Pandoc: " . $resp->{stderr} ) if ( $resp->{stderr} );
if ( !$resp->{exit_code} ) {
$html = $resp->{stdout};
}
else {
debug( "ERROR", "Could not parse with pandoc, using markdown" );
warn "Could not parse with pandoc, using markdown";
$html = markdown($input);
}
return $html;
}
# ----------------------------------------------------------------------------
# use pandoc to convert HTML into another format
# pandoc must be in user path
sub _pandoc_format {
my ( $input, $output ) = @_;
my $status = 1;
my $resp = execute_cmd(
command => PANDOC . " $input -o $output",
timeout => 30,
);
debug( "Pandoc: " . $resp->{stderr} ) if ( $resp->{stderr} );
if ( !$resp->{exit_code} ) {
$status = 0;
}
else {
debug( "ERROR", "Could not parse with pandoc" );
$status = 1;
}
return $status;
}
# ----------------------------------------------------------------------------
# convert_file
# convert the file to a different format from HTML
# parameters
# file - file to re-convert
# format - format to convert to
# pdfconvertor - use prince/wkhtmltopdf rather than pandoc to convert to PDF
sub _convert_file {
my $self = shift ;
my ( $file, $format, $pdfconvertor ) = @_;
# we work on the is that pandoc should be in your PATH
my $fmt_str = $format;
my ( $outfile, $exit );
$outfile = $file;
$outfile =~ s/\.(\w+)$/.pdf/;
# we can use prince to do PDF conversion, its faster and better, but not free for commercial use
# you would have to ignore the P symbol on the resultant document
if ( $format =~ /pdf/i && $pdfconvertor ) {
my $cmd;
if ( $pdfconvertor =~ /^prince/i ) {
$cmd = PRINCE . " " ;
$cmd.= "--pdf-title='$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
$cmd.= "--pdf-subject='$self->{replace}->{SUBJECT}' " if ($self->{replace}->{SUBJECT}) ;
$cmd.= "--pdf-creator='$self->{replace}->{AUTHOR}' " if ($self->{replace}->{AUTHOR}) ;
$cmd.= "--pdf-keywords='$self->{replace}->{KEYWORDS}' " if ($self->{replace}->{KEYWORDS}) ;
$cmd .= " --media=print $file -o $outfile";
}
elsif ( $pdfconvertor =~ /^wkhtmltopdf/i ) {
$cmd = WKHTML . " -q --print-media-type " ;
$cmd.= "--title '$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
# do we want to specify the size
$cmd .= "--page-size $self->{replace}->{PAGE_SIZE} " if( $self->{replace}->{PAGE_SIZE}) ;
$cmd .= "$file $outfile";
}
else {
warn "Unknown PDF converter ($pdfconvertor), using pandoc";
# otherwise lets use pandoc to create the file in the other formats
$exit = _pandoc_format( $file, $outfile );
}
if ($cmd) {
my ( $out, $err );
try {
# say "$cmd" ;
( run in 1.113 second using v1.01-cache-2.11-cpan-39bf76dae61 )