App-DocKnot

 view release on metacpan or  search on metacpan

lib/App/DocKnot/Spin/Thread.pm  view on Meta::CPAN

    return $output;
}

# Extract some number of arguments from the front of the given string.
#
# $text        - Text to parse arguments from
# $count       - How many arguments to extract, or -1 for as many as possible
# $want_format - If true, check for a parenthesized formatting instruction
#                first and extract it if present
#
# Returns: List of the following strings:
#            $format - Format or empty string, omitted if !$want_format
#            $text   - The remaining unparsed text
#            @args   - $count arguments (undef if the argument wasn't found)
sub _extract {
    my ($self, $text, $count, $want_format) = @_;
    my $format = q{};
    my @args;

    # Extract the format string if requested.
    if ($want_format) {
        $format = extract_bracketed($text, '()') // q{};
        if ($format) {
            $format = substr($format, 1, -1);
        }
    }

    # Extract the desired number of arguments, or all arguments present if
    # $count was negative.
    if ($count >= 0) {
        for my $i (1 .. $count) {
            my $arg = extract_bracketed($text, '[]');
            if (defined($arg)) {
                $arg = substr($arg, 1, -1);
            } else {
                $self->_warning("cannot find argument $i: $@");
                $arg = q{};
            }
            push(@args, $arg);
        }
    } else {
        while (defined(my $arg = extract_bracketed($text, '[]'))) {
            push(@args, substr($arg, 1, -1));
        }
    }

    # Return the results.
    return $want_format ? ($format, $text, @args) : ($text, @args);
}

# Expand a macro invocation.
#
# $definition - Definition of the macro
# $block      - True if currently in block context
# @args       - The arguments to the macro
#
# Returns: List with the macro expansion and the block context flag
sub _macro {
    my ($self, $definition, $block, @args) = @_;

    # The function that expands a macro substitution marker.  If the number of
    # the marker is higher than the number of arguments of the macro, leave it
    # as-is.  (We will have already warned about this when defining the
    # macro.)
    my $expand = sub {
        my ($n) = @_;
        return ($n > scalar(@args)) ? "\\\\$n" : $args[$n - 1];
    };

    # Replace the substitution markers in the macro definition.
    $definition =~ s{ \\(\d+) }{ $expand->($1) }xmsge;

    # Now parse the result as if it were input thread and return the results.
    return $self->_parse_context($definition, $block);
}

# Expand a given command into its representation.  This function is mutually
# recursive with _parse_context and _macro.
#
# $command - Name of the command
# $text    - Input text following the command
# $block   - True if currently in block context (if so, and if the command
#            doesn't generate its own container, it will need to be wrapped
#            in <p>
#
# Returns: List with the following elements:
#            $output - Output from expanding the command
#            $block  - Whether the output is block context
#            $text   - Remaining unparsed text
sub _expand {
    my ($self, $command, $text, $block) = @_;

    # Special handling for expanding variables.  These references look like
    # \=NAME and expand to the value of the variable "NAME".
    if ($command =~ m{ \A = \w }xms) {
        my $variable = substr($command, 1);
        if (exists($self->{variable}{$variable})) {
            return ($self->{variable}{$variable}, 0, $text);
        } else {
            $self->_warning("unknown variable \\=$variable");
            return (q{}, 0, $text);
        }
    }

    # Special handling for macros.  Macros shadow commands of the same name.
    if (exists($self->{macro}{$command})) {
        my ($args, $definition) = $self->{macro}{$command}->@*;

        # Extract the macro arguments, if any were requested.
        my @args;
        if ($args != 0) {
            ($text, @args) = $self->_extract($text, $args, 0);
        }

        # The macro runs in a block context if we're currently in block
        # context and there is no remaining non-whitespace text.  Otherwise,
        # use an inline context.
        $block &&= $text =~ m{ \A \s* \z }xms;

        # Expand the macro.
        my ($result, $blocktag) = $self->_macro($definition, $block, @args);

        # We have now double-counted all of the lines in the macro body
        # itself, so we need to subtract the line count in the macro
        # definition from the line number.
        #
        # This unfortunately means that the line number of errors that happen
        # inside macro arguments will be somewhat off if the macro definition
        # itself contains newlines.  I don't see a way to avoid that without
        # much more complex parsing and state tracking.
        $self->{input}[-1][2] -= $definition =~ tr{\n}{};

        # Return the macro results.
        return ($result, $blocktag, $text);
    }

    # The normal command-handling case.  Ensure it is a valid command.
    if (!ref($COMMANDS{$command})) {
        $self->_warning("unknown command or macro \\$command");
        return (q{}, 1, $text);
    }

    # Dispatch the command to its handler.
    my ($args, $handler, $want_format) = $COMMANDS{$command}->@*;
    if ($want_format) {
        my ($format, $rest, @args) = $self->_extract($text, $args, 1);
        my ($blocktag, $output) = $self->$handler($format, @args);
        return ($output, $blocktag, $rest);
    } else {
        my ($rest, @args) = $self->_extract($text, $args);
        my ($blocktag, $output) = $self->$handler(@args);
        return ($output, $blocktag, $rest);
    }
}

# This is the heart of the input parser.  Take a string of raw input, expand
# the commands in it, and format the results as HTML.  This function is
# mutually recursive with _expand and _macro.
#
# This function is responsible for maintaining the line number in the file
# currently being processed, for error reporting.  The strategy used is to
# increment the line number whenever a newline is seen in processed text.
# This means that newlines are not seen until the text containing them is
# parsed, which in turn means that every argument that may contain a newline
# must be parsed or must update the line number.
#
# $text  - Input text to parse
# $block - True if the parse is done in a block context
#
# Returns: List of the following values:
#            $output - HTML output corresponding to $text
#            $block  - Whether the result is suitable for block level
#
## no critic (Subroutines::ProhibitExcessComplexity)
sub _parse_context {
    my ($self, $text, $block) = @_;

    # Check if there are any commands in the input.  If not, we have a
    # paragraph of regular text.
    if (index($text, q{\\}) == -1) {
        my $output = $text;

        # Update the line number.
        $self->{input}[-1][2] += $text =~ tr{\n}{};

        # If we are at block context, we need to make the text into a block
        # element, which means wrapping it in <p> tags.  Since that is a
        # top-level block construct, also close any open block structure.
        if ($block) {
            $output = $self->_border_end() . $self->_paragraph($output);
        }

        # Return the result.
        return ($output, $block);
    }

    # The output seen so far.
    my $output = q{};

    # Output required to close any open block-level constructs that we saw
    # prior to the text we're currently parsing.
    my $border = q{};

    # Output with inline context that needs to be wrapped in <p> tags.
    my $paragraph = q{};

    # Leading whitespace that should be added to a created paragraph.  This is
    # only non-empty if $paragraph is empty.
    my $space = q{};

    # Whether we saw a construct not suitable for block level.
    my $nonblock = 0;

    # We have at least one command.  Parse the text into sections of regular
    # text and commands, expand the commands, and glue the results together as
    # HTML.
    #
    # If we are at block level, we have to distinguish between plain text and
    # inline commands, which have to be wrapped in paragraph tags, and
    # block-level commands, which shouldn't be.
    while ($text ne q{}) {
        my ($string, $command);

        # Extract text before the next command, or a command name (but none of
        # its arguments).  I think it's impossible for this regex to fail to
        # match as long as $text is non-empty, but do error handling just in
        # case.
        if ($text =~ s{ \A ( [^\\]+ | \\ ([\w=]+ | .) ) }{}xms) {
            ($string, $command) = ($1, $2);
        } else {
            my $context = substr($text, 0, 20);
            $context =~ s{ \n .* }{}xms;
            $self->_fatal(qq(unable to parse near "$context"));
        }

        # Update the line number.
        $self->{input}[-1][2] += $string =~ tr{\n}{};

        # If this is not a command, and we're not at the block level, just add
        # it verbatim to the output.
        #
        # if we are at the block level, pull off any leading space.  If there
        # is still remaining text, add it plus any accumulated whitespace to a
        # new paragraph.
        if (index($string, q{\\}) == -1) {
            if ($block) {
                if ($string =~ s{ \A (\s+) }{}xms) {
                    $space .= $1;
                }
                if ($paragraph ne q{} || $string ne q{}) {
                    if ($paragraph eq q{}) {
                        $border = $self->_border_end();
                    }
                    $paragraph .= $space . $string;
                    $space = q{};
                }
            } else {
                $output .= $string;
                $nonblock = 1;
            }
        }

        # Otherwise, we have a command.  Expand that command, setting block
        # context if we haven't seen any inline content so far.
        else {
            my ($result, $blocktag);
            ($result, $blocktag, $text)
              = $self->_expand($command, $text, $block && $paragraph eq q{});

            # If the result requires block context, output any pending
            # paragraph and then the result.  Otherwise, if we are already at
            # block context, start a new paragraph.  Otherwise, just append
            # the result to our output.
            if ($blocktag) {
                if ($block && $paragraph ne q{}) {
                    $output .= $border . $self->_paragraph($paragraph);
                    $border = q{};
                    $paragraph = q{};
                } else {
                    $output .= $space;
                }
                $output .= $result;
            } elsif ($block) {
                if ($paragraph eq q{}) {
                    $border = $self->_border_end();
                }
                $paragraph .= $space . $result;
                $nonblock = 1;
            } else {
                $output .= $result;
                $nonblock = 1;
            }
            $space = q{};
        }

        # If the next bit of unparsed text starts with a newline, extract it
        # and any following whitespace now.
        if ($text =~ s{ \A \n (\s*) }{}xms) {
            my $spaces = $1;

            # Update the line number.
            $self->{input}[-1][2] += 1 + $spaces =~ tr{\n}{};

            # Add it to our paragraph if we're accumulating one; otherwise,
            # add it to the output, but only add the newline if we saw inline
            # elements or there is remaining text.  This suppresses some
            # useless black lines.
            if ($paragraph ne q{}) {
                $paragraph .= "\n$spaces";
            } else {
                if ($text ne q{} || $nonblock) {
                    $output .= "\n";
                }
                $output .= $spaces;
            }
        }
    }

    # If there is any remaining paragraph text, wrap it in tags and append it
    # to the output.  If we were at block level, our output is always suitable
    # for block level.  Otherwise, it's suitable for block level only if all
    # of our output was from block commands.
    if ($paragraph ne q{}) {
        $output .= $border . $self->_paragraph($paragraph);
    }
    return ($output, $block || !$nonblock);
}
## use critic

lib/App/DocKnot/Spin/Thread.pm  view on Meta::CPAN

Used primarily for quotations or license statements embedded in regular text.

=item \bullet[TEXT]

TEXT is formatted as an item in a bullet list.  This is like C<< <li> >>
inside C<< <ul> >> in HTML, but the surrounding list tags are inferred
automatically and handled correctly when multiple C<\bullet> commands are used
in a row.

Normally, TEXT is treated like a paragraph.  If used with a formatting
instruction of C<packed>, such as:

    \bullet(packed)[First item]

then the TEXT argument will not be treated as a paragraph and will not be
surrounded in C<< <p> >>.  No block commands should be used inside this type
of C<\bullet> command.  This variation will, on most browsers, not put any
additional whitespace around the line, which will produce better formatting
for bullet lists where each item is a single line.

=item \desc[HEADING][TEXT]

An element in a description list, where each item has a tag HEADING and an
associated body text of TEXT, like C<< <dt> >> and C<< <dd> >> in HTML.  As
with C<\bullet>, the C<< <dl> >> tags are inferred automatically.

=item \div[TEXT]

Does nothing except wrap TEXT in an HTML C<< <div> >> tag.  The only purpose
of this command is to use it with a formatting instruction to generate an HTML
C<class> attribute on the C<< <div> >> tag.

=item \h1[HEADING] .. \h6[HEADING]

Level one through level six headings, just like C<< <h1> >> .. C<< <h6> >> in
HTML.  If given an C<id> formatting instruction, such as:

    \h1(#anchor)[Heading]

then not only will an id attribute be added to the C<< <h1> >> container but
the text of the heading will also be enclosed in an C<< <a name> >> container
to ensure that C<#anchor> can be used as an anchor in a link in older browsers
that don't understand C<id> attributes.  This is special handling that only
works with C<\h1> through C<\h6>, not with other commands.

=item \heading[TITLE][STYLE]

Set the page title to TITLE and the style sheet to STYLE and emit the HTML
page header.  If a C<style-url> argument was given, that base URL will be
prepended to STYLE to form the URL for the style sheet; otherwise, STYLE will
be used verbatim as a URL except with C<.css> appended.

This command must come after any C<\id> or C<\rss> commands and may come after
commands that don't produce any output (such as macro definitions or
C<\include> of files that produce no output) but otherwise must be the first
command of the file.

=item \id[ID]

Sets the Subversion, CVS, or RCS revision number and time.  ID should be the
string C<< $Z<>Id$ >>, which will be expanded by Subversion, CVS, and RCS.
This string is embedded verbatim in an HTML comment near the beginning of the
generated output, and is used to determine last modified information for the
file (used by the C<\signature> command).

For this command to behave properly, it must be given before C<\heading>.

=item \include[FILE]

Include FILE after the current paragraph.  If multiple files are included in
the same paragraph, they're included in reverse order, but this behavior may
change in later versions and should not be relied on.  It's strongly
recommended to always put the C<\include> command in its own paragraph.  Don't
put C<\heading> or C<\signature> into an included file; the results won't be
correct.

=item \number[TEXT]

TEXT is formatted as an item in a numbered list, like C<< <li> >> inside C<<
<ol> >> in HTML.  As with C<\bullet> and C<\desc>, the surrounding tags are
inferred automatically.

As with C<\bullet>, a formatting instruction of C<packed> will omit the
paragraph tags around TEXT for better formatting with a list of short items.
See the description under C<\bullet> for more information.

=item \pre[TEXT]

Insert TEXT preformatted, preserving spacing and line breaks.  This uses the
HTML C<< <pre> >> tag, and therefore is normally also shown in a fixed-width
font by the browser.

When using C<\pre> inside indented blocks or lists, some care must be taken
with indentation whitespace.  Normally, the browser indents text inside
C<\pre> relative to the enclosing block, so you should only put as much
whitespace before each line in C<\pre> as those lines should be indented
relative to the enclosing text.  However B<lynx>, unfortunately, indents
relative to the left margin, so it's difficult to use indentation that looks
correct in both B<lynx> and other browsers.

=item \quote[TEXT][AUTHOR][CITATION]

Used for quotes at the top of a web page.

The whole text will be enclosed in a C<< <blockquote> >> tag with class
C<quote> for style sheets.  TEXT may be multiple paragraphs.  Any formatting
instruction given to C<\quote> will be used as the formatting instruction for
each paragraph in TEXT (so an C<id> is normally not appropriate).

If the formatting instruction is C<broken>, line breaks in TEXT will be
honored by inserting C<< <br> >> tags at the end of each line.  Use this for
poetry or other cases where line breaks are significant.

A final paragraph will then be added with class C<attribution> if the
formatting instruction is C<broken> or C<short> and class C<long-attrib>
otherwise.  This paragraph will contain the AUTHOR, a comma, and then
CITATION.  CITATION will be omitted if empty.

=item \rss[URL][TITLE]

Indicates that this page has a corresponding RSS feed at the URL URL.

lib/App/DocKnot/Spin/Thread.pm  view on Meta::CPAN

of this command is to use it with a formatting instruction to generate an HTML
C<class> attribute on the C<< <span> >> tag.  For example, you might write:

    \class(red)[A style sheet can make this text red.]

and then use a style sheet that changes the text color for class C<red>.

=item \entity[CODE]

An HTML entity with code CODE.  This normally becomes C<&CODE;> or C<&#CODE;>
in the generated HTML, depending on whether CODE is entirely numeric.

Use C<\entity[91]> and C<\entity[93]> for unbalanced C<[> and C<]> characters,
respectively.

Thread source is UTF-8, so this command is normally only necessary to escape
unbalanced square brackets.

=item \image[URL][TEXT]

Insert an inline image.  TEXT is the alt text for the image (which will be
displayed on non-graphical browsers).  Height and width tags are added
automatically if the URL is a relative path name and the corresponding file
exists and is supported by the Perl module Image::Size.

=item \link[URL][TEXT]

Create a link to URL with link text TEXT.  Equivalent to C<< <a href> >>.

=item \release[PACKAGE]

If the C<versions> argument was provided, replaced with the latest release
date of PACKAGE.  The date will be in the UTC time zone, not the local time
zone.

=item \size[FILE]

Replaced with the size of FILE in B, KB, MB, GB, or TB as is most appropriate,
without decimal places.  The next largest unit is used if the value is larger
than 1024.  1024 is used as the scaling factor, not 1000.

=item \version[PACKAGE]

If the C<versions> argument was provided, replaced with the latest version of
PACKAGE.

=back

=head2 Defining Variables and Macros

One of the reasons to use thread instead of HTML is the ability to define new
macros on the fly.  If there are constructs that are used more than once in
the page, you can define a macro at the top of that page and then use it
throughout the page.

A variable can be defined with the command:

    \=[VARIABLE][VALUE]

where VARIABLE is the name that will be used (can only be alphanumerics plus
underscore) and VALUE is the value that string will expand into.  Any later
occurrence of \=VARIABLE in the file will be replaced with <value>.  For
example:

    \=[FOO][some string]

will cause any later occurrences of C<\=FOO> in the file to be replaced with
the text C<some string>.  Consider using this to collect external URLs for
links at the top of a page for easy updating.

A macro can be defined with the command:

    \==[NAME][NARGS][DEFINITION]

where NAME is the name of the macro (again consisting only of alphanumerics or
underscore), NARGS is the number of arguments that it takes, and DEFINITION is
the definition of the macro.

When the macro is expanded, any occurrence of C<\1> in the definition is
replaced with the first argument, any occurrence of C<\2> with the second
argument, and so forth, and then the definition with those substitutions is
parsed as thread, as if it were written directly in the source page.

For example:

    \==[bolddesc] [2] [\desc[\bold[\1]][\2]]

defines a macro C<\bolddesc> that takes the same arguments as the regular
C<\desc> command but always wraps the first argument, the heading, in C<<
<strong> >>.

=head1 AUTHOR

Russ Allbery <rra@cpan.org>

=head1 COPYRIGHT AND LICENSE

Copyright 1999-2011, 2013, 2021-2023 Russ Allbery <rra@cpan.org>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

=head1 SEE ALSO

L<docknot(1)>, L<App::DocKnot::Spin>, L<App::DocKnot::Spin::Sitemap>,
L<App::DocKnot::Spin::Versions>

This module is part of the App-DocKnot distribution.  The current version of
DocKnot is available from CPAN, or directly from its web site at
L<https://www.eyrie.org/~eagle/software/docknot/>.

=cut

# Local Variables:
# copyright-at-end-flag: t
# End:



( run in 0.710 second using v1.01-cache-2.11-cpan-5623c5533a1 )