App-ArticleWrap
view release on metacpan or search on metacpan
script/article-wrap view on Meta::CPAN
#!/usr/bin/env perl
=head1 Article Wrap
Wrap news articles or mail files. That is to say, don't wrap the header lines at
the top but do wrap the rest of the file. Also don't wrap "source code" which is
fenced by Markdown code fences: trible backticks. Don't wrap short lines (10
characters or less). Don't wrap lines with whitespace at the end. Don't wrap
empty lines.
This is useful when post-processing a message written using ed for posting using
tin, for example.
=cut
use Modern::Perl;
die "This filter wraps news posts.\n" if @ARGV;
binmode(STDIN, ':utf8');
binmode(STDOUT, ':utf8');
# headers
while (<STDIN>) {
chomp;
last if not $_; # end of headers
say; # header line
}
print "\n"; # empty line after headers
my $max = 72;
my $buffer;
my $prefix = '';
my $wrap = 1;
while (<STDIN>) {
chomp;
my ($new_prefix) = /([> ]*)/;
# empty lines don't get wrapped, nor lines with a space at the end, nor indented lines
my $empty = length() == 0 || /^$prefix\s*$/ || /\s$/ || /^\s/ || /^$prefix.{0,10}$/;
# ``` toggles wrap
$wrap = not $wrap if /^$prefix\s*```$/;
# end old paragraph with the old prefix if the prefix changed, an empty line,
# or not wrapping anymore
if ($buffer and ($new_prefix ne $prefix or $empty or not $wrap)) {
say $prefix . $buffer;
$buffer = '';
}
# print empty lines or not wrapped lines without stripping trailing whitespace
if ($empty or not $wrap) {
say $_;
next;
}
# continue old paragraph
$buffer .= " " if $buffer;
# strip the prefix
$prefix = $new_prefix;
$buffer .= substr($_, length($prefix));
# wrap what we have
while (length($buffer) > $max) {
# this is the max line + 1
my $test_line = substr($buffer, 0, $max - length($prefix) + 1);
# if there's a word that reaches into that last character, break before
if ($test_line =~ /(\s+(\S+)\S)$/) {
# $1 is the last word: strip it, print prefix and stripped line
say $prefix . substr($buffer, 0, $max - length($prefix) - length($1) + 1);
# the new buffer starts with the word just stripped
$buffer = substr($buffer, $max - length($prefix) - length($2));
} else {
# we know that there is no word at the boundary, so cut there
my $line = substr($buffer, 0, $max - length($prefix));
# strip trailing whitespace and print it
$line =~ s/\s+$//;
say $prefix . $line;
# the new buffer starts where we did the cut, strip leading whitespace
$buffer = substr($buffer, $max - length($prefix));
$buffer =~ s/^\s+//;
}
}
}
say $prefix . $buffer if $buffer;
( run in 1.258 second using v1.01-cache-2.11-cpan-5837b0d9d2c )