App-Basis-ConvertText2

 view release on metacpan or  search on metacpan

lib/App/Basis/ConvertText2.pm  view on Meta::CPAN

    debug( "Pandoc: " . $resp->{stderr} ) if ( $resp->{stderr} );
    if ( !$resp->{exit_code} ) {
        $status = 0;
    }
    else {
        debug( "ERROR", "Could not parse with pandoc" );
        $status = 1;
    }

    return $status;
}

# ----------------------------------------------------------------------------
# convert_file
# convert the file to a different format from HTML
#  parameters
#     file    - file to re-convert
#     format  - format to convert to
#     pdfconvertor  - use prince/wkhtmltopdf rather than pandoc to convert to PDF

sub _convert_file {
    my $self = shift ;
    my ( $file, $format, $pdfconvertor ) = @_;

    # we work on the is that pandoc should be in your PATH
    my $fmt_str = $format;
    my ( $outfile, $exit );

    $outfile = $file;
    $outfile =~ s/\.(\w+)$/.pdf/;

    # we can use prince to do PDF conversion, its faster and better, but not free for commercial use
    # you would have to ignore the P symbol on the resultant document
    if ( $format =~ /pdf/i && $pdfconvertor ) {
        my $cmd;

        if ( $pdfconvertor =~ /^prince/i ) {
            $cmd = PRINCE . " " ;
            $cmd.= "--pdf-title='$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
            $cmd.= "--pdf-subject='$self->{replace}->{SUBJECT}' " if ($self->{replace}->{SUBJECT}) ;
            $cmd.= "--pdf-creator='$self->{replace}->{AUTHOR}' " if ($self->{replace}->{AUTHOR}) ;
            $cmd.= "--pdf-keywords='$self->{replace}->{KEYWORDS}' " if ($self->{replace}->{KEYWORDS}) ;
            $cmd .= " --media=print $file -o $outfile";
        }
        elsif ( $pdfconvertor =~ /^wkhtmltopdf/i ) {
            $cmd = WKHTML . " -q --print-media-type " ;
            $cmd.= "--title '$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
            # do we want to specify the size
            $cmd .= "--page-size $self->{replace}->{PAGE_SIZE} " if( $self->{replace}->{PAGE_SIZE}) ;
            $cmd .= "$file $outfile";
        }
        else {
            warn "Unknown PDF converter ($pdfconvertor), using pandoc";

            # otherwise lets use pandoc to create the file in the other formats
            $exit = _pandoc_format( $file, $outfile );
        }
        if ($cmd) {
            my ( $out, $err );
            try {
                # say "$cmd" ;
                ( $exit, $out, $err ) = run_cmd($cmd);
            }
            catch {
                $err  = "run_cmd($cmd) died - $_";
                $exit = 1;
            };

            debug( "ERROR", $err ) if ($err);    # only debug if return code is not 0
        }
    }
    else {
        # otherwise lets use pandoc to create the file in the other formats
        $exit = _pandoc_format( $file, $outfile );
    }

    # if we failed to convert, then clear the filename
    return $exit == 0 ? $outfile : undef;
}

# ----------------------------------------------------------------------------

=item parse

parse the markup into HTML and return it, HTML is also stored internally

B<Parameter>  
    markdown text

=cut

sub parse {
    my $self = shift;
    my ($data) = @_;

    die "Nothing to parse" if ( !$data );

    my $id = md5_hex( encode_utf8($data) );

    # my $id = md5_hex( $data );
    $self->_set_md5id($id);
    $self->_set_input($data);

    my $cachefile = cachefile( $self->cache_dir, "$id.html" );
    if ( -f $cachefile ) {
        my $cache = path($cachefile)->slurp_utf8;
        $self->{output} = $cache;    # put cached item into output
    }
    else {
        $self->{output} = "";        # blank the output

        my @lines = split( /\n/, $data );

        # process top 20 lines for keywords
        # maybe replace this with some YAML processor?
        for ( my $i = 0; $i < 20; $i++ ) {
            ## if there is no keyword separator then we must have done the keywords
            last if ( $lines[$i] !~ /:/ );

            # allow keywords to be :keyword or keyword:
            my ( $k, $v ) = ( $lines[$i] =~ /^:?(\w+):?\s+(.*?)\s?$/ );

lib/App/Basis/ConvertText2.pm  view on Meta::CPAN


=cut

sub save_to_file {
    state $counter = 0;
    my $self = shift;
    my ( $filename, $pdfconvertor ) = @_;
    my ($format) = ( $filename =~ /\.(\w+)$/ );    # get last thing after a '.'
    if ( !$format ) {
        warn "Could not determine outpout file format, using PDF";
        $format = '.pdf';
    }

    my $f = $self->_md5id() . ".html";

    # have we got the parsed data
    my $cf = cachefile( $self->cache_dir, $f );
    if ( !$self->{output} ) {
        die "parse has not been run yet";
    }

    if ( !-f $cf ) {
        if ( !$self->use_cache() ) {

            # create a file name to store the output to
            $cf = "/tmp/" . get_program() . "$$." . $counter++;
        }

        # either update the cache, or create temp file
        path($cf)->spew_utf8( encode_utf8( $self->{output} ) );
    }

    my $outfile = $cf;
    $outfile =~ s/\.html$/.$format/i;

    # if the marked-up file is more recent than the converted one
    # then we need to convert it again
    if ( $format !~ /html/i ) {

        # as we can generate PDF using a number of convertors we should
        # always regenerate PDF output incase the convertor used is different
        if ( !-f $outfile || $format =~ /pdf/i || ( ( stat($cf) )[9] > ( stat($outfile) )[9] ) ) {
            $outfile = $self->_convert_file( $cf, $format, $pdfconvertor );

            # if we failed to convert, then clear the filename
            if ( !$outfile || !-f $outfile ) {
                $outfile = undef;
                debug( "ERROR", "failed to create output file from cached file $cf" );
            }
        }
    }

    my $status = 0;

    # now lets copy it to its final resting place
    if ($outfile) {
        try {
            $status = path($outfile)->copy($filename);
        }
        catch {
            say STDERR "$_ ";
            debug( "ERROR", "failed to copy $outfile to $filename" );
        };
    }
    return $status;
}

=back

=cut

# ----------------------------------------------------------------------------

1;

__END__



( run in 0.690 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )