App-Basis-ConvertText2
view release on metacpan or search on metacpan
lib/App/Basis/ConvertText2.pm view on Meta::CPAN
debug( "Pandoc: " . $resp->{stderr} ) if ( $resp->{stderr} );
if ( !$resp->{exit_code} ) {
$status = 0;
}
else {
debug( "ERROR", "Could not parse with pandoc" );
$status = 1;
}
return $status;
}
# ----------------------------------------------------------------------------
# convert_file
# convert the file to a different format from HTML
# parameters
# file - file to re-convert
# format - format to convert to
# pdfconvertor - use prince/wkhtmltopdf rather than pandoc to convert to PDF
sub _convert_file {
my $self = shift ;
my ( $file, $format, $pdfconvertor ) = @_;
# we work on the is that pandoc should be in your PATH
my $fmt_str = $format;
my ( $outfile, $exit );
$outfile = $file;
$outfile =~ s/\.(\w+)$/.pdf/;
# we can use prince to do PDF conversion, its faster and better, but not free for commercial use
# you would have to ignore the P symbol on the resultant document
if ( $format =~ /pdf/i && $pdfconvertor ) {
my $cmd;
if ( $pdfconvertor =~ /^prince/i ) {
$cmd = PRINCE . " " ;
$cmd.= "--pdf-title='$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
$cmd.= "--pdf-subject='$self->{replace}->{SUBJECT}' " if ($self->{replace}->{SUBJECT}) ;
$cmd.= "--pdf-creator='$self->{replace}->{AUTHOR}' " if ($self->{replace}->{AUTHOR}) ;
$cmd.= "--pdf-keywords='$self->{replace}->{KEYWORDS}' " if ($self->{replace}->{KEYWORDS}) ;
$cmd .= " --media=print $file -o $outfile";
}
elsif ( $pdfconvertor =~ /^wkhtmltopdf/i ) {
$cmd = WKHTML . " -q --print-media-type " ;
$cmd.= "--title '$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
# do we want to specify the size
$cmd .= "--page-size $self->{replace}->{PAGE_SIZE} " if( $self->{replace}->{PAGE_SIZE}) ;
$cmd .= "$file $outfile";
}
else {
warn "Unknown PDF converter ($pdfconvertor), using pandoc";
# otherwise lets use pandoc to create the file in the other formats
$exit = _pandoc_format( $file, $outfile );
}
if ($cmd) {
my ( $out, $err );
try {
# say "$cmd" ;
( $exit, $out, $err ) = run_cmd($cmd);
}
catch {
$err = "run_cmd($cmd) died - $_";
$exit = 1;
};
debug( "ERROR", $err ) if ($err); # only debug if return code is not 0
}
}
else {
# otherwise lets use pandoc to create the file in the other formats
$exit = _pandoc_format( $file, $outfile );
}
# if we failed to convert, then clear the filename
return $exit == 0 ? $outfile : undef;
}
# ----------------------------------------------------------------------------
=item parse
parse the markup into HTML and return it, HTML is also stored internally
B<Parameter>
markdown text
=cut
sub parse {
my $self = shift;
my ($data) = @_;
die "Nothing to parse" if ( !$data );
my $id = md5_hex( encode_utf8($data) );
# my $id = md5_hex( $data );
$self->_set_md5id($id);
$self->_set_input($data);
my $cachefile = cachefile( $self->cache_dir, "$id.html" );
if ( -f $cachefile ) {
my $cache = path($cachefile)->slurp_utf8;
$self->{output} = $cache; # put cached item into output
}
else {
$self->{output} = ""; # blank the output
my @lines = split( /\n/, $data );
# process top 20 lines for keywords
# maybe replace this with some YAML processor?
for ( my $i = 0; $i < 20; $i++ ) {
## if there is no keyword separator then we must have done the keywords
last if ( $lines[$i] !~ /:/ );
# allow keywords to be :keyword or keyword:
my ( $k, $v ) = ( $lines[$i] =~ /^:?(\w+):?\s+(.*?)\s?$/ );
lib/App/Basis/ConvertText2.pm view on Meta::CPAN
=cut
sub save_to_file {
state $counter = 0;
my $self = shift;
my ( $filename, $pdfconvertor ) = @_;
my ($format) = ( $filename =~ /\.(\w+)$/ ); # get last thing after a '.'
if ( !$format ) {
warn "Could not determine outpout file format, using PDF";
$format = '.pdf';
}
my $f = $self->_md5id() . ".html";
# have we got the parsed data
my $cf = cachefile( $self->cache_dir, $f );
if ( !$self->{output} ) {
die "parse has not been run yet";
}
if ( !-f $cf ) {
if ( !$self->use_cache() ) {
# create a file name to store the output to
$cf = "/tmp/" . get_program() . "$$." . $counter++;
}
# either update the cache, or create temp file
path($cf)->spew_utf8( encode_utf8( $self->{output} ) );
}
my $outfile = $cf;
$outfile =~ s/\.html$/.$format/i;
# if the marked-up file is more recent than the converted one
# then we need to convert it again
if ( $format !~ /html/i ) {
# as we can generate PDF using a number of convertors we should
# always regenerate PDF output incase the convertor used is different
if ( !-f $outfile || $format =~ /pdf/i || ( ( stat($cf) )[9] > ( stat($outfile) )[9] ) ) {
$outfile = $self->_convert_file( $cf, $format, $pdfconvertor );
# if we failed to convert, then clear the filename
if ( !$outfile || !-f $outfile ) {
$outfile = undef;
debug( "ERROR", "failed to create output file from cached file $cf" );
}
}
}
my $status = 0;
# now lets copy it to its final resting place
if ($outfile) {
try {
$status = path($outfile)->copy($filename);
}
catch {
say STDERR "$_ ";
debug( "ERROR", "failed to copy $outfile to $filename" );
};
}
return $status;
}
=back
=cut
# ----------------------------------------------------------------------------
1;
__END__
( run in 0.690 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )