App-DocKnot
view release on metacpan or search on metacpan
lib/App/DocKnot/Spin/Text.pm view on Meta::CPAN
# $line - Line to classify
#
# Returns: True if so, false otherwise
sub _is_centered {
my ($line) = @_;
return if $line !~ m{ \A (\s+) (.+) }xms;
my ($space, $text) = ($1, $2);
return if abs(74 - length($text) - length($space) * 2) >= 2;
return length(untabify($space)) >= 8;
}
# Whether a paragraph is a content listing.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_contents {
my ($paragraph) = @_;
return $paragraph =~ m{ \A (?: \s* [\d.]+[.\)] [ \t] \N* \n)+ \s* \z }xms;
}
# Whether a paragraph looks like a title and a description. Allows for
# multiple titles.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_description {
my ($paragraph) = @_;
return if $paragraph !~ m{
\A
(\s*) \S \N* \n # title (1 is indent)
(?: \1 \S \N* \n)* # possibly more than one
(\s+) \S \N* \n # first line of description (2 is indent)
(?: \2 \S \N* \n)* # subsequent lines
\s* \z
}xms;
return length($1) < length($2);
}
# Whether a line is a digest divider.
#
# $line - Line to classify
#
# Returns: True if so, false otherwise
sub _is_divider {
my ($line) = @_;
return $line =~ m{ \A -{30} \s* \z }xms;
}
# Whether a line is an RFC 2822 header.
#
# $line - Line to classify
#
# Returns: True if so, false otherwise
sub _is_header {
my ($line) = @_;
return if $line =~ m{ \A [\w-]+: \s+ \N }xms;
}
# Whether a paragraph is a heading. This is all about heuristics and guesses,
# and there are a number of other things we could confuse for headings, so we
# have to be careful.
#
# If it's a single line and outdented from the baseline, it's probably a
# heading.
#
# If it's at the baseline, check to see if it looks like a heading and either
# it's in all caps or there is a rule underneath it. If we haven't seen a
# baseline, be more accepting about headers.
#
# If we're inside a contents block, be even more careful and disallow numbered
# things that look like a heading unless they're outdented.
#
# Unlike most of the classification functions, this is a regular method, since
# it needs access to the parsing state.
#
# $paragraph - Paragraph to classify
#
# Returns: True if a heading, false otherwise
sub _is_heading {
my ($self, $paragraph) = @_;
$paragraph = _unescape($paragraph);
my $indent = indent($paragraph);
my $nobase = !defined($self->{baseline});
my $outdented = defined($self->{baseline}) && $indent < $self->{baseline};
# Numbered lines inside the contents section are definitely not headings.
my $numbered = $paragraph =~ m{ \A [\d.]+[.\)] \s }xms;
return if !$outdented && $self->{contents} && $numbered;
# Outdented single lines are headings as long as they're either short or
# contain at least two words.
if ($outdented && lines($paragraph) == 1) {
return 1 if $paragraph =~ m{ \S \s \S }xms;
return 1 if length($paragraph) < 30;
}
# Indented lines are never headings.
return if defined($INDENT) && $indent > $INDENT;
# Lines of at most 31 characters ending in a word character or closing
# quote or paren are headings if they're underlined.
return 1 if $paragraph =~ m{
\A \s*
[ \w\"\(\),:./&-]{0,30} [\w\"\)] \s* \n
[-=~]+ \s*
\z
}xms;
# All-uppercase lines of at most 31 characters ending in an uppercase
# character, digit, or closing quote or paren are headings.
return 1 if $paragraph =~ m{
\A \s*
[ [:upper:]\d\"\(\),:./&-]{0,30} [[:upper:]\d\"\)]
\s* \n
\z
}xms;
# If there is no baseline, assume single lines of at most 34 characters
# with no unexpected characters are headings.
( run in 1.396 second using v1.01-cache-2.11-cpan-39bf76dae61 )