App-Zapzi

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

  * (#1) Moved file slurp and other file operations to Path::Tiny

0.014     2014-05-10 18:35:44+07:00 Asia/Bangkok

  * Removed requirement for GD module and system library
  * Remove iframe tags when using HTML based transformers
  * Improved error reporting for transformers

0.013     2014-03-05 20:29:23+07:00 Asia/Bangkok

  * Remove font tag attributes when using HTMLExtractMain transformer
  * Ignore errors from Text::Markdown about matching braces as can
    continue anyway
  * Validate article IDs passed in to command line

0.012     2013-10-29 15:09:24 Asia/Hong_Kong

  * Added distributors to deliver eBooks after publication
  * Copy distributor - copies file to another directory eg an eReader
    connected via USB cable
  * Script distributor - runs a script with the eBook as parameter

MANIFEST  view on Meta::CPAN

t/lib/ZapziTestSchema.pm
t/release-distmeta.t
t/release-pod-coverage.t
t/release-pod-syntax.t
t/release-test-version.t
t/testfiles/bad-markdown.txt
t/testfiles/distribute-script-echo.pl
t/testfiles/distribute-script-error.pl
t/testfiles/empty.html
t/testfiles/empty.txt
t/testfiles/html-font.html
t/testfiles/html-fragment.html
t/testfiles/html-links.html
t/testfiles/html-no-title.html
t/testfiles/html-two-titles.html
t/testfiles/html-utf8.html
t/testfiles/sample.html
t/testfiles/sample.pm
t/testfiles/sample.txt
t/testfiles/sample.unknown
t/testfiles/ws-and-long-lines.txt

lib/App/Zapzi/Transformers/HTMLExtractMain.pm  view on Meta::CPAN

sub _extract_html
{
    my $self = shift;
    my ($raw_html) = @_;

    my $tree = HTML::ExtractMain::extract_main_html($raw_html,
                                                    output_type => 'tree' );

    if ($tree)
    {
        $self->_remove_fonts($tree);
        $self->_optionally_deactivate_links($tree);
    }

    return $tree;
}

sub _remove_fonts
{
    my ($self, $tree) = @_;

    # Remove any font attributes as they rarely work as expected on
    # eReaders - eg colours do not make sense on monochrome displays,
    # font families will probably not exist.
    for my $font ($tree->look_down(_tag => "font"))
    {
        $font->attr($_, undef) for $font->all_external_attr_names;
    }
}

sub _optionally_deactivate_links
{
    my ($self, $tree) = @_;

    # Turn links into text if option was requested.

    my $option = App::Zapzi::UserConfig::get('deactivate_links');

t/05-transform.t  view on Meta::CPAN

    # Try an HTML file with two titles and leading/trailing whitespace
    $f = App::Zapzi::FetchArticle->new(
        source => 't/testfiles/html-two-titles.html');
    ok( $f->fetch, 'Fetch HTML with two title tags' );
    $tx = App::Zapzi::Transform->new(raw_article => $f);
    isa_ok( $tx, 'App::Zapzi::Transform' );
    ok( $tx->to_readable, 'Transform sample HTML file' );
    is( $tx->title, 'Title 1',
        'Title selected from HTML extract with two title tags');

    # Try an HTML file with embedded font tags
    $f = App::Zapzi::FetchArticle->new(
        source => 't/testfiles/html-font.html');
    ok( $f->fetch, 'Fetch HTML with font tags' );
    $tx = App::Zapzi::Transform->new(raw_article => $f);
    isa_ok( $tx, 'App::Zapzi::Transform' );
    ok( $tx->to_readable, 'Transform sample HTML file with font tags' );
    unlike( $tx->readable_text, qr/yellow/,
            'Font attributes removed from HTML');

    # Try an HTML file with links
    $f = App::Zapzi::FetchArticle->new(
        source => 't/testfiles/html-links.html');
    ok( $f->fetch, 'Fetch HTML with links' );
    $tx = App::Zapzi::Transform->new(raw_article => $f);
    isa_ok( $tx, 'App::Zapzi::Transform' );
    ok( $tx->to_readable, 'Transform sample HTML file with links' );

t/testfiles/html-font.html  view on Meta::CPAN

    <title>Font tests </title>
    <meta name="description" content="A testing document for App::Zapzi">
    <meta name="author" content="Rupert Lane">
  </head>

  <body>

    <h1>Lorem ipsum</h1>

    <p>
      <font color="red">Lorem ipsum dolor sit amet, consectetur
      adipisicing elit, sed do eiusmod tempor incididunt ut labore et
      dolore magna aliqua.</font>

      <font color="yellow">Ut enim ad minim veniam, quis
      nostrud exercitation ullamco laboris nisi ut aliquip ex ea
      commodo consequat. Duis aute irure dolor in reprehenderit in
      voluptate velit esse cillum dolore eu fugiat nulla pariatur.</font>

      Excepteur sint occaecat cupidatat non proident, sunt in culpa
      qui officia deserunt mollit anim id est laborum.</p>

  </body>
</html>



( run in 0.736 second using v1.01-cache-2.11-cpan-5735350b133 )