CPAN-MetaCurator
view release on metacpan or search on metacpan
scripts/tiddly2text.pl view on Meta::CPAN
#!/usr/bin/perl
use 5.36.0;
use open qw(:std :utf8);
use strict;
use warnings;
use warnings qw(FATAL utf8);
use Getopt::Long;
use HTML::Entities;
use HTML::TreeBuilder;
use Path::Tiny; # For path().
use Pod::Usage;
# ----------------------------------------------
sub process
{
my(%options) = @_;
my($root) = HTML::TreeBuilder -> new();
my($file_name) = path($options{in_file});
my $content = $file_name -> slurp_utf8;
decode_entities $content;
my($result) = $root -> parse_content($content);
my($store) = $root -> look_down(_tag => 'div', id => 'storeArea');
my(@div) = $store -> look_down(_tag => 'div');
my($count) = 0;
open(OUT, '>', $options{out_file});
my(@line);
my($main_menu, %main_menu);
my($title, %title);
for my $div (@div)
{
$title = $div -> attr('title');
next if ( (! defined $title) || ($title =~ /(?:DefaultTiddlers|SiteTitle|SiteSubtitle)/) );
$title{$title} = 1;
for my $child ($div -> content_list)
{
@line = map{s/[\s]+/ /gs; s/ [oO] /\no /g; s/ - /\n\t-/g; $_} $child -> as_text;
}
if ($title eq 'MainMenu')
{
$count++;
$main_menu = [grep{! /GettingStarted/} map{s/^\[\[//; s/]]$//; $_} split(/ /, $line[0])];
}
else
{
say OUT $title;
say OUT @line;
say OUT '';
}
}
close OUT;
@$main_menu = sort ('uaAD', @$main_menu);
#say 'Main Menu:';
#say map{"<$_>\n"} @$main_menu;
#say '';
for $title (@$main_menu)
{
if (! $title{$title})
{
say "In main menu, but no title: $title";
}
}
@main_menu{@$main_menu} = (1) x @$main_menu;
for $title (sort keys %title)
{
if (! $main_menu{$title})
{
say "In title, but no main menu: $title.";
}
}
return 0;
} # End of process.
# ----------------------------------------------
say "tiddly2text.pl - Converts a TiddlyWiki HTML file into a text file\n";
my(%options);
$options{help} = 0;
$options{in_file} = 'data/in.html';
$options{out_file} = 'data/out.txt';
my(%opts) =
(
'help' => \$options{help},
'in_file=s' => \$options{in_file},
'out_file=s' => \$options{out_file},
);
GetOptions(%opts) || die("Error in options. Options: " . Dumper(%opts) );
if ($options{help} == 1)
{
pod2usage(1);
exit 0;
}
exit process(%options);
__END__
=pod
=head1 NAME
tiddly2text.pl - Converts a TiddlyWiki HTML file into a text file
( run in 0.792 second using v1.01-cache-2.11-cpan-39bf76dae61 )