App-DocKnot

 view release on metacpan or  search on metacpan

lib/App/DocKnot/Spin/Thread.pm  view on Meta::CPAN

        }xms;
        $prefix .= $self->{space};

        # Collapse multiple whitespace-only lines into a single blank line.
        $prefix =~ s{ \n\s* \n\s* \n }{\n\n}xmsg;

        # Replace the output with added whitespace and clear saved whitespace.
        $output = $prefix . $body;
        $self->{space} = q{};
    }

    # Remove and save any trailing whitespace.
    if ($output =~ s{ \n (\s+) \z }{\n}xms) {
        $self->{space} = $1;
    }

    # Send the results to the output file.
    print_fh($self->{out_fh}, $self->{out_path}, $output);
    return;
}

# Report a fatal problem with the current file and line.
#
# $problem - Error message to report
#
# Throws: Text exception with the provided error
sub _fatal {
    my ($self, $problem) = @_;
    my (undef, $file, $lineno) = $self->{input}[-1]->@*;
    $file //= q{-};
    die "$file:$lineno: $problem\n";
}

# Warn about a problem with the current file and line.
#
# $problem - Warning message to report
# $file    - Optional path where the problem was seen, otherwise the current
#            input file is used
sub _warning {
    my ($self, $problem, $file) = @_;
    my $lineno;
    if (!defined($file)) {
        (undef, $file, $lineno) = $self->{input}[-1]->@*;
        $file //= q{-};
    } else {
        $lineno = 0;
    }
    warn "$file:$lineno: $problem\n";
    return;
}

##############################################################################
# Basic parsing
##############################################################################

# Escapes &, <, and > characters for HTML output.
#
# $string - Input string
#
# Returns: Escaped string
sub _escape {
    my ($string) = @_;
    $string =~ s{ & }{&amp;}xmsg;
    $string =~ s{ < }{&lt;}xmsg;
    $string =~ s{ > }{&gt;}xmsg;
    return $string;
}

# Wrap something in paragraph markers, being careful to get newlines right.
# Special-case a paragraph consisting entirely of <span> by turning it into a
# <p> with the same class.
#
# $text - Text to wrap
#
# Returns: Text wrapped in <p> tags
sub _paragraph {
    my ($self, $text) = @_;

    # Trim leading newline and whitespace and ensure the paragraph ends with a
    # newline.
    $text =~ s{ \A \n (\s*\n)* }{}xms;
    $text =~ s{ ( \S [ \t]* ) \z }{$1\n}xms;

    # If the whole paragraph is wrapped in <span>, lift its attributes into
    # the <p> tag.  Otherwise, just add the <p> tags.  This unfortunately
    # means we also won't lift <span> for any paragraph with nexted \class
    # commands; doing that would require more HTML parsing than I want to do.
    my $re = qr{
        \A                      # start of paragraph
        (\s*)                   # any whitespace (1)
        <span([^>]*)>           # span tag before any text with class (2)
        (?! .* <span)           # no second span tag
        (.*)                    # text of the paragraph (3)
        </span>                 # close span tag
        (\s*)                   # any whitespace (4)
        \z                      # end of paragraph without other text
    }xms;
    if ($text =~ $re) {
        my ($lead, $attrs, $body, $trail) = ($1, $2, $3, $4);
        return "$lead<p$attrs>$body</p>$trail";
    } else {
        $text =~ s{ \A }{<p>\n}xms;
        $text =~ s{ (\n\s*) \z }{\n</p>$1}xms;
        return $text;
    }
}

# Opens the border of a continued structure.
#
# spin, unlike HTML, does not require declaring structures like lists in
# advance of adding elements to them.  You start a bullet list by simply
# having a bullet item, and a list is started if one is not already open.
# This is the method that does that: check whether the desired structure is
# already open and, if not, open it and add it to the state stack.
#
# $border - Name of the border state to open
# $start  - The opening tag
# $end    - The closing tag
#
# Returns: Output to write to start the structure
sub _border_start {

lib/App/DocKnot/Spin/Thread.pm  view on Meta::CPAN

    # of our output was from block commands.
    if ($paragraph ne q{}) {
        $output .= $border . $self->_paragraph($paragraph);
    }
    return ($output, $block || !$nonblock);
}
## use critic

# A wrapper around parse_context for callers who don't care about the block
# level of the results.
#
# $text  - Input text to parse
# $block - True if the parse is done in a block context
#
# Returns: HTML output corresponding to $text
sub _parse {
    my ($self, $text, $block) = @_;
    my ($output) = $self->_parse_context($text, $block);
    return $output;
}

# The top-level function for parsing a thread document.  Be aware that the
# working directory from which this function is run matters a great deal,
# since thread may contain relative paths to files that the spinning process
# needs to access.
#
# $thread     - Thread to spin
# $in_path    - Input file path as a Path::Tiny object, or undef
# $out_fh     - Output file handle to which to write the HTML
# $out_path   - Output file path as a Path::Tiny object, or undef
# $input_type - Optional one-word description of input type
sub _parse_document {
    my ($self, $thread, $in_path, $out_fh, $out_path, $input_type) = @_;

    # Parse the thread into paragraphs and reverse them to form a stack.
    my @input = reverse($self->_split_paragraphs($thread));

    # Initialize object state for a new document.
    #<<<
    $self->{input}      = [[\@input, $in_path, 1]];
    $self->{input_type} = $input_type // 'thread';
    $self->{macro}      = {};
    $self->{out_fh}     = $out_fh;
    $self->{out_path}   = $out_path;
    $self->{rss}        = [];
    $self->{space}      = q{};
    $self->{state}      = ['BLOCK'];
    $self->{variable}   = {};
    #>>>

    # Parse the thread file a paragraph at a time.  _split_paragraphs takes
    # care of ensuring that each paragraph contains the complete value of a
    # command argument.
    #
    # The stack of parsed input is maintained in $self->{input} and the file
    # being parsed at any given point is $self->{input}[-1].  _cmd_include
    # will push new file information into this stack, and we pop off the top
    # element of the stack when we exhaust its paragraphs.
    while ($self->{input}->@*) {
        while (defined(my $para = pop($self->{input}[-1][0]->@*))) {
            my $result = $self->_parse(_escape($para), 1);
            $result =~ s{ \A (?:\s*\n)+ }{}xms;
            if ($result !~ m{ \A \s* \z }xms) {
                $self->_output($result);
            }
        }
        pop($self->{input}->@*);
    }

    # Close open tags and print any deferred whitespace.
    print_fh($out_fh, $out_path, $self->_block_end(), $self->{space});
    return;
}

##############################################################################
# Supporting functions
##############################################################################

# Generate the format attributes for an HTML tag.
#
# $format - Format argument to the command
#
# Returns: String suitable for interpolating into the tag, which means it
#          starts with a space if non-empty
sub _format_attr {
    my ($self, $format) = @_;
    return q{} if !$format;

    # Formats starting with # become id tags.  Otherwise, it is a class.
    if ($format =~ s{ \A \# }{}xms) {
        if ($format =~ m{ \s }xms) {
            $self->_warning(qq(space in anchor "#$format"));
        }
        return qq{ id="$format"};
    } else {
        return qq{ class="$format"};
    }
}

# Split a block of text apart at paired newlines so that it can be reparsed as
# paragraphs, but combine a paragraph with the next one if it has an
# unbalanced number of open brackets.  Used to parse the top-level structure
# of a file and by containiners like \block that can contain multiple
# paragraphs.
#
# $text - Text to split
#
# Returns: List of paragraphs
sub _split_paragraphs {
    my ($self, $text) = @_;
    my @paragraphs;

    # Collapse any consecutive newlines at the start to a single newline.
    $text =~ s{ \A \n (\s*\n)+ }{\n}xms;

    # Pull paragraphs off the text one by one.
    while ($text ne q{} && $text =~ s{ \A ( .*? (?: \n\n+ | \s*\z ) )}{}xms) {
        my $para = $1;
        my $open_count = ($para =~ tr{\[}{});
        my $close_count = ($para =~ tr{\]}{});
        while ($text ne q{} && $open_count > $close_count) {

lib/App/DocKnot/Spin/Thread.pm  view on Meta::CPAN

The base URL for style sheets.  A style sheet specified in a C<\heading>
command will be considered to be relative to this URL and this URL will be
prepended to it.  If this option is not given, the name of the style sheet
will be used verbatim as its URL, except with C<.css> appended.

=item versions

An App::DocKnot::Spin::Versions object.  This will be used as the source of
data for the C<\release> and C<\version> commands.

=back

=back

=head1 INSTANCE METHODS

=over 4

=item spin_thread(THREAD[, INPUT])

Convert the given thread to HTML, returning the result.  When run via this
API, App::DocKnot::Spin::Thread will not be able to obtain sitemap information
even if a sitemap was provided and therefore will not add inter-page links.
INPUT, if given, is the full path to the original source file, used for
relative paths and modification time information.

=item spin_thread_file([INPUT[, OUTPUT]])

Convert a single thread file to HTML.  INPUT is the path of the thread file
and OUTPUT is the path of the output file.  OUTPUT or both INPUT and OUTPUT
may be omitted, in which case standard input or standard output, respectively,
will be used.

If OUTPUT is omitted, App::DocKnot::Spin::Thread will not be able to obtain
sitemap information even if a sitemap was provided and therefore will not add
inter-page links.

=item spin_thread_output(THREAD, INPUT, TYPE[, OUTPUT])

Convert the given thread to HTML, writing the result to OUTPUT.  If OUTPUT is
not given, write the results to standard output.  This is like spin_thread()
but does use sitemap information and adds inter-page links.  It should be used
when the thread input is the result of an intermediate conversion step of a
known input file.  INPUT should be the full path to the original source file,
used for relative paths and modification time information.  TYPE should be set
to a one-word description of the format of the input file and is used for the
page footer.

=back

=head1 THREAD LANGUAGE

=head2 Basic Syntax

A thread file is Unicode text with a blank line between paragraphs.

There is no need to explicitly mark paragraphs; paragraph boundaries will be
inferred from the blank line between them and the appropriate C<< <p> >> tags
will be added to the HTML output.

There is no need to escape any character except C<\> (which should be written
as C<\\>) and an unbalanced C<[> or C<]> (which should be written as
C<\entity[91]> or C<\entity[93]> respectively).  Escaping C<[> or C<]> is not
necessary if the brackets are balanced within the paragraph, and therefore is
only rarely needed.

Commands begin with C<\>.  For example, the command to insert a line break
(corresponding to the C<< <br> >> tag in HTML) is C<\break>.  If the command
takes arguments, they are enclosed in square brackets after the command.  If
there are multiple arguments, they are each enclosed in square brackets and
follow each other.  Any amount of whitespace (but nothing else) is allowed
between the command and the arguments, or between the arguments.  So, for
example, all of the following are entirely equivalent:

    \link[index.html][Main page]
    \link  [index.html]  [Main page]

    \link[index.html]
    [Main page]

    \link
    [index.html]
    [Main page]

(C<\link> is a command that takes two arguments.)

Command arguments may contain paragraphs of text, other commands, and so
forth, nested arbitrarily (although this may not make sense for all arguments
of all commands, of course).

Some commands take an additional optional formatting instruction argument.
That argument is enclosed in parentheses and placed before any other
arguments.  It specifies the C<class> attribute for that HTML tag, for use
with style sheets, or the C<id> attribute, for use with style sheets or as an
anchor.  If the argument begins with C<#>, it will be taken to be an C<id>.
Otherwise, it will be taken as a C<class>.

For example, a first-level heading is normally written as:

    \h1[Heading]

(with one argument).  Either of the following will add a class attribute of
C<header> to that HTML container that can be referred to in style sheets:

    \h1(header)[Heading]
    \h1  (header)  [Heading]

and the following would add an id attribute of C<intro> to the heading so that
it could be referred to with the anchor C<#intro>:

    \h1(#intro)[Introduction]

Note that the heading commands have special handling for C<id> attributes; see
below for more details.

=head2 Basic Format

There are two commands that are required to occur in every document.

The first is C<\heading>, which must occur before any regular page text.  It
takes two arguments: the page title (the title that shows up in the window

lib/App/DocKnot/Spin/Thread.pm  view on Meta::CPAN

If a cell should have a class attribute, use a C<\class> command around the
CELL text.  The class attribute will be "lifted" up to become an attribute of
the enclosing C<< <th> >> tag.

=item \tablerow[CELL][CELL] ...

A regular row in a table.  C<\tablerow> takes any number of CELL arguments,
wraps them all in a C<< <tr> >> table row tag, and puts each cell inside C<<
<td> >>.

If a cell should have a class attribute, use a C<\class> command around the
CELL text.  The class attribute will be "lifted" up to become an attribute of
the enclosing C<< <td> >> tag.

=back

=head2 Inline Commands

Inline commands can be used in the middle of a paragraph intermixed with other
text.  Most of them are simple analogs to their HTML counterparts.  All of the
following take a single argument (the enclosed text), an optional formatting
instruction, and map to simple HTML tags:

    \bold       <b></b>                 (usually use \strong)
    \cite       <cite></cite>
    \code       <code></code>
    \emph       <em></em>
    \italic     <i></i>                 (usually use \emph)
    \strike     <strike></strike>       (should use styles)
    \strong     <strong></strong>
    \sub        <sub></sub>
    \sup        <sup></sup>
    \under      <u></u>                 (should use styles)

Here are the other inline commands:

=over 4

=item \break

A forced line break, C<< <br> >> in HTML.

=item \class[TEXT]

Does nothing except wrap TEXT in an HTML C<< <span> >> tag.  The only purpose
of this command is to use it with a formatting instruction to generate an HTML
C<class> attribute on the C<< <span> >> tag.  For example, you might write:

    \class(red)[A style sheet can make this text red.]

and then use a style sheet that changes the text color for class C<red>.

=item \entity[CODE]

An HTML entity with code CODE.  This normally becomes C<&CODE;> or C<&#CODE;>
in the generated HTML, depending on whether CODE is entirely numeric.

Use C<\entity[91]> and C<\entity[93]> for unbalanced C<[> and C<]> characters,
respectively.

Thread source is UTF-8, so this command is normally only necessary to escape
unbalanced square brackets.

=item \image[URL][TEXT]

Insert an inline image.  TEXT is the alt text for the image (which will be
displayed on non-graphical browsers).  Height and width tags are added
automatically if the URL is a relative path name and the corresponding file
exists and is supported by the Perl module Image::Size.

=item \link[URL][TEXT]

Create a link to URL with link text TEXT.  Equivalent to C<< <a href> >>.

=item \release[PACKAGE]

If the C<versions> argument was provided, replaced with the latest release
date of PACKAGE.  The date will be in the UTC time zone, not the local time
zone.

=item \size[FILE]

Replaced with the size of FILE in B, KB, MB, GB, or TB as is most appropriate,
without decimal places.  The next largest unit is used if the value is larger
than 1024.  1024 is used as the scaling factor, not 1000.

=item \version[PACKAGE]

If the C<versions> argument was provided, replaced with the latest version of
PACKAGE.

=back

=head2 Defining Variables and Macros

One of the reasons to use thread instead of HTML is the ability to define new
macros on the fly.  If there are constructs that are used more than once in
the page, you can define a macro at the top of that page and then use it
throughout the page.

A variable can be defined with the command:

    \=[VARIABLE][VALUE]

where VARIABLE is the name that will be used (can only be alphanumerics plus
underscore) and VALUE is the value that string will expand into.  Any later
occurrence of \=VARIABLE in the file will be replaced with <value>.  For
example:

    \=[FOO][some string]

will cause any later occurrences of C<\=FOO> in the file to be replaced with
the text C<some string>.  Consider using this to collect external URLs for
links at the top of a page for easy updating.

A macro can be defined with the command:

    \==[NAME][NARGS][DEFINITION]

where NAME is the name of the macro (again consisting only of alphanumerics or
underscore), NARGS is the number of arguments that it takes, and DEFINITION is



( run in 1.017 second using v1.01-cache-2.11-cpan-5837b0d9d2c )