App-DocKnot

 view release on metacpan or  search on metacpan

lib/App/DocKnot/Spin/Text.pm  view on Meta::CPAN

    for my $line (@lines) {
        next if $line !~ m{ \S }xms;
        return if $line !~ m{ \A (?: \Q$bullet\E | \Q$space\E ) \S }xms;
        if ($line =~ m{ \A \Q$bullet\E }xms) {
            $bullets++;
        }
    }
    return $bullets > 1;
}

# Whether every line of a paragraph is a numbered item with a simple number.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_allnumbered {
    my ($paragraph) = @_;
    return $paragraph =~ m{ \A (\s* \d\d?[.\)] [ ] \N* \n){2,} \s* \z }xms;
}

# Whether a line is all capital letters.
#
# $line - Line to classify
#
# Returns: True if so, false otherwise
sub _is_allcaps {
    my ($line) = @_;
    return $line !~ m{ [^[:upper:]\d\s\"\(\),:.!/?-] }xms;
}

# Whether a paragraph is broken into a series of short lines or a series of
# lines without internal space.  The last line of the paragraph doesn't matter
# for this determination.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_broken {
    my ($paragraph) = @_;
    $paragraph =~ s{ \s* \z }{\n}xms;
    my @lines = split(m{ \n }xms, $paragraph);
    return if @lines == 1;
    pop(@lines);
    return 1 if grep { length($_) < 40 } @lines;
    my $short = grep { length($_) < 60 } @lines;
    return 1 if $short >= int(@lines / 2) + 1;
    return $paragraph =~ m{ \A (?: \s* \S+ [ \t]* \n )+ \z }xms;
}

# Whether a paragraph is a bullet item.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_bullet {
    my ($paragraph) = @_;
    return $paragraph =~ m{ \A \s* [-o*] \s }xms;
}

# Whether a line is centered (in 74 columns).  Also require at least 10 spaces
# of whitespace so that we don't catch accidentally centered paragraph lines
# by mistake.
#
# $line - Line to classify
#
# Returns: True if so, false otherwise
sub _is_centered {
    my ($line) = @_;
    return if $line !~ m{ \A (\s+) (.+) }xms;
    my ($space, $text) = ($1, $2);
    return if abs(74 - length($text) - length($space) * 2) >= 2;
    return length(untabify($space)) >= 8;
}

# Whether a paragraph is a content listing.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_contents {
    my ($paragraph) = @_;
    return $paragraph =~ m{ \A (?: \s* [\d.]+[.\)] [ \t] \N* \n)+ \s* \z }xms;
}

# Whether a paragraph looks like a title and a description.  Allows for
# multiple titles.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_description {
    my ($paragraph) = @_;
    return if $paragraph !~ m{
        \A
        (\s*) \S \N* \n         # title (1 is indent)
        (?: \1 \S \N* \n)*      # possibly more than one
        (\s+) \S \N* \n         # first line of description (2 is indent)
        (?: \2 \S \N* \n)*      # subsequent lines
        \s* \z
    }xms;
    return length($1) < length($2);
}

# Whether a line is a digest divider.
#
# $line - Line to classify
#
# Returns: True if so, false otherwise
sub _is_divider {
    my ($line) = @_;
    return $line =~ m{ \A -{30} \s* \z }xms;
}

# Whether a line is an RFC 2822 header.
#
# $line - Line to classify
#
# Returns: True if so, false otherwise
sub _is_header {
    my ($line) = @_;
    return if $line =~ m{ \A [\w-]+: \s+ \N }xms;

lib/App/DocKnot/Spin/Text.pm  view on Meta::CPAN

# Returns: True if a heading, false otherwise
sub _is_heading {
    my ($self, $paragraph) = @_;
    $paragraph = _unescape($paragraph);
    my $indent = indent($paragraph);
    my $nobase = !defined($self->{baseline});
    my $outdented = defined($self->{baseline}) && $indent < $self->{baseline};

    # Numbered lines inside the contents section are definitely not headings.
    my $numbered = $paragraph =~ m{ \A [\d.]+[.\)] \s }xms;
    return if !$outdented && $self->{contents} && $numbered;

    # Outdented single lines are headings as long as they're either short or
    # contain at least two words.
    if ($outdented && lines($paragraph) == 1) {
        return 1 if $paragraph =~ m{ \S \s \S }xms;
        return 1 if length($paragraph) < 30;
    }

    # Indented lines are never headings.
    return if defined($INDENT) && $indent > $INDENT;

    # Lines of at most 31 characters ending in a word character or closing
    # quote or paren are headings if they're underlined.
    return 1 if $paragraph =~ m{
        \A \s*
        [ \w\"\(\),:./&-]{0,30} [\w\"\)] \s* \n
        [-=~]+ \s*
        \z
    }xms;

    # All-uppercase lines of at most 31 characters ending in an uppercase
    # character, digit, or closing quote or paren are headings.
    return 1 if $paragraph =~ m{
        \A \s*
        [ [:upper:]\d\"\(\),:./&-]{0,30} [[:upper:]\d\"\)]
        \s* \n
        \z
    }xms;

    # If there is no baseline, assume single lines of at most 34 characters
    # with no unexpected characters are headings.
    return $nobase && $paragraph =~ m{
        \A \s*
        [ \w\"\(\),:./&-]{0,33} [\w\"\)]
        \s* \n
        \z
    }xms;
}

# Whether a line is an RCS/CVS Id string that has been expanded.
#
# $line - Line to classify
#
# Returns: True if so, false otherise
sub _is_id {
    my ($line) = @_;
    return $line =~ m{ \A \s* [\$]Id: \N+ [\$] \s* \z }xms;
}

# Whether a paragraph should be a literal paragraph, decided based on whether
# it has internal whitespace.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_literal {
    my ($paragraph) = @_;
    return $paragraph =~ m{
        \A [ \t]*
        \S \N*
        (?: [^.?!\"\)\]:*_\n] [ ] [ ] | [ ] [ ] [ ] | \t )
        \S
    }xms;
}

# Whether a paragarph is part of a numbered list.
#
# $paragraph - Paragraph to classify
#
# Returns: The number if the paragraph is a numbered list element
#          undef otherwise
sub _is_numbered {
    my ($paragraph) = @_;
    if ($paragraph =~ m{ \A \s* (\d\d?) [.\)] \s }xms) {
        return $1;
    } else {
        return undef;
    }
}

# Whether a paragraph has inconsistent indentation.
#
# $paragraph - Paragraph to classify
#
# Returns: True if so, false otherwise
sub _is_offset {
    my ($paragraph) = @_;

    # Strip off a leading bullet or number and consider it whitespace in
    # making this check.
    $paragraph =~ s{ \A (\s* (?: \d\d? ) [.\)] \s) }{ q{ } x length($1) }xmse;
    $paragraph =~ s{ \A (\s* [-*o] \s) }{ q{ } x length($1) }xmse;

    # Now, return true if the indentation isn't consistent.
    return $paragraph !~ m{ \A (\s*) \S \N* \n (\1 \S \N* \n)* \s* \z }xms;
}

# Whether a paragraph is quoted.  Requires the paragraph be at least two
# lines, since otherwise we cannot detect a common prefix.
#
# $paragraph - Paragraph to classify
#
# Returns: The quote character if it is quoted
#          undef otherwise
sub _is_quoted {
    my ($paragraph) = @_;
    return if $paragraph !~ m{
        \A \s*
        ([^\w\s\"\']) \s* \N* \n
        (?: \s* \1 \s* \N* \n )+



( run in 0.499 second using v1.01-cache-2.11-cpan-39bf76dae61 )