App-Zapzi
view release on metacpan or search on metacpan
t/05-transform.t view on Meta::CPAN
ok( $tx->to_readable, 'Transform ws-and-long-lines.txt' );
ok( length($tx->title) <= 80, 'Length of title OK' );
like( $tx->title, qr/^This is an example/, 'Title without whitespace' );
}
sub test_html
{
my $f = App::Zapzi::FetchArticle->new(source => 't/testfiles/sample.html');
ok( $f->fetch, 'Fetch HTML' );
my $tx = App::Zapzi::Transform->new(raw_article => $f,
transformer => 'HTML');
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample HTML file' );
like( $tx->readable_text, qr/<h1>Lorem/, 'Contents of HTML file OK' );
unlike( $tx->readable_text, qr/<script>/,
'Javascript stripped from HTML file' );
unlike( $tx->readable_text, qr/<iframe/,
'iframe stripped from HTML file' );
like( $tx->readable_text, qr/Header!/,
'Full HTML preserved with plain HTML transformer' );
is( $tx->title, 'Sample âHTMLâ Document',
'Title of HTML file OK with entity decoding' );
}
sub test_html_extractmain
{
my $f = App::Zapzi::FetchArticle->new(source => 't/testfiles/sample.html');
ok( $f->fetch, 'Fetch HTML' );
my $tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample HTML file' );
like( $tx->readable_text, qr/<h1>Lorem/, 'Contents of HTML file OK' );
unlike( $tx->readable_text, qr/<script>/,
'Javascript stripped from HTML file' );
unlike( $tx->readable_text, qr/Header!/,
'Non-essential text stripped from HTML file' );
is( $tx->title, 'Sample âHTMLâ Document',
'Title of HTML file OK with entity decoding' );
# Try an HTML file with no <title>
$f = App::Zapzi::FetchArticle->new(
source => 't/testfiles/html-no-title.html');
ok( $f->fetch, 'Fetch HTML' );
$tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample HTML file' );
like( $tx->title, qr/html-no-title/,
'Title set for HTML file without <title>' );
# Try an HTML file with two titles and leading/trailing whitespace
$f = App::Zapzi::FetchArticle->new(
source => 't/testfiles/html-two-titles.html');
ok( $f->fetch, 'Fetch HTML with two title tags' );
$tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample HTML file' );
is( $tx->title, 'Title 1',
'Title selected from HTML extract with two title tags');
# Try an HTML file with embedded font tags
$f = App::Zapzi::FetchArticle->new(
source => 't/testfiles/html-font.html');
ok( $f->fetch, 'Fetch HTML with font tags' );
$tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample HTML file with font tags' );
unlike( $tx->readable_text, qr/yellow/,
'Font attributes removed from HTML');
# Try an HTML file with links
$f = App::Zapzi::FetchArticle->new(
source => 't/testfiles/html-links.html');
ok( $f->fetch, 'Fetch HTML with links' );
$tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample HTML file with links' );
like( $tx->readable_text, qr|example.com/some-link|,
'Links present in HTML');
ok( App::Zapzi::UserConfig::set('deactivate_links', 'Yes'),
'Can set deactivate_links' );
$tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample HTML file with links' );
unlike( $tx->readable_text, qr|example.com/some-link|,
'Links now not present in HTML');
ok( App::Zapzi::UserConfig::set('deactivate_links', 'No'),
'Can set deactivate_links off' );
$tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample HTML file with links' );
like( $tx->readable_text, qr|example.com/some-link|,
'Links present in HTML again');
# Try an empty HTML file, which should return error
$f = App::Zapzi::FetchArticle->new(source => 't/testfiles/empty.html');
ok( $f->fetch, 'Fetch HTML' );
$tx = App::Zapzi::Transform->new(raw_article => $f,
transformer => 'HTMLExtractMain');
isa_ok( $tx, 'App::Zapzi::Transform' );
is( $tx->to_readable, undef, 'Empty HTML file returns error' );
}
sub test_pod
{
my $f = App::Zapzi::FetchArticle->new(source => 't/testfiles/sample.pm');
ok( $f->fetch, 'Fetch POD' );
my $tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform sample file containing POD' );
like( $tx->readable_text, qr/TESTING/s,
'Headings in POD file OK' );
like( $tx->readable_text, qr/<p>POD is transformed/s,
'Text in POD file OK' );
is( $tx->title, 'sample.pm',
'Basename of file is used for POD title' );
# Try a named module
$f = App::Zapzi::FetchArticle->new(source => 'File::Basename');
ok( $f->fetch, 'Fetch POD from module' );
$tx = App::Zapzi::Transform->new(raw_article => $f);
isa_ok( $tx, 'App::Zapzi::Transform' );
ok( $tx->to_readable, 'Transform module containing POD' );
( run in 0.438 second using v1.01-cache-2.11-cpan-5735350b133 )