view release on metacpan or search on metacpan
lib/PerlPoint/Parser.pm view on Meta::CPAN
# | | | in another line than the first table line (which is the
# | | | base of normalization);
# |10.08.2003| JSTENZEL | new helper function _semerr() to report semantic errors;
# | | JSTENZEL | new option -criticalSemanticErrors;
# |14.08.2003| JSTENZEL | input filters can access the source file by a variable
# | | | $main::_ifilterFile now;
# | | JSTENZEL | fixed an "undefined value" warning;
# |17.08.2003| JSTENZEL | bugfix: docstream "main" was ignored like any other docstream
# | | | if working in the "docstream ignore" mode;
# |10.09.2003| JSTENZEL | definition list explanations ("texts" now have an own
# | | | enveloping directive (DIRECTIVE_DPOINT_TEXT);
# |11.09.2003| JSTENZEL | LOCALTOC added to the list of standalone tags (which are
# | | | stripped of of an enveloping text paragraph if they are its
# | | | only contents);
# |05.05.2004| JSTENZEL | anchors now take the number of the page they are defined in;
# | | JSTENZEL | tag hooks now take an additional parameter: the number of
# | | | the page the tag is used on;
# | | JSTENZEL | bugfix: numerical pathes were built incorrectly: when entering
# | | | a new sublevel, the counter was not reset to 1;
# | | JSTENZEL | added anchors();
# |11.07.2004| JSTENZEL | headlines now provide a path of absolute page numbers as well
# | | | and a variable snapshot;
# | | JSTENZEL | a reset variable is removed now (as a side effect, it is no
# | | | longer possible to build variables containing spaces only);
# |24.07.2004| JSTENZEL | added -skipcomments;
# |10.09.2004| JSTENZEL | bugfix: words looking like symbolic variables (but not defined
# | | | as such) were restored without their braces ("{}");
# |27.12.2004| JSTENZEL | bugfix: skipped headline levels were filled with previous
# | | | headline strings of those levels;
# |28.12.2004| JSTENZEL | text paragraphs now have their own special character, but
# | | | optional: a dot;
# |24.02.2005| JSTENZEL | acceleration: the lexer built some data very often;
# |27.02.2005| JSTENZEL | bugfix: backslashes before variables were handled incorrectly,
# | | | now variables are no longer "boosted" but handled like macros
# | | | - which has a performance drawback, unfortunately ...;
# |16.05.2005| JSTENZEL | backslashes in tag options are no longer ignored but can be
# | | | used to guard characters;
# |23.08.2005| JSTENZEL | first chapter is checked for a headline now;
# 0.39 |01.02.2003| JSTENZEL | passing directive id chain of the current chapter
# | | | headline to tag hook functions now;
# |07.03.2003| JSTENZEL | several variable patterns were used explicitly instead
# | | | if the precompiled ones from %lexerPatterns;
# | | JSTENZEL | bugfix: guarded variables were expanded;
# | | JSTENZEL | now it is documented that list indentation is reset
# | | | automatically by a subsequent non list paragraph;
# |26.04.2003| JSTENZEL | added "no utf8" to avoid errors under perl 5.8;
# |01.05.2003| JSTENZEL | adding *all* composite anchors for headlines, not only
# | | | for the full path;
# 0.38 |07.06.2002| JSTENZEL | restoring doubled backslashes in filtered paragraphs,
# | | | restoring ">" characters as if they were guarded;
# |04.07.2002| JSTENZEL | simplified several array field access codes;
# | | JSTENZEL | bugfix: empty headlines caused an infinite loop
# | | | when trailing whitespaces should be removed;
# | | JSTENZEL | bugfix: empty headlines caused a failure when headline
# | | | anchors should be stored, skipping them now;
# |20.08.2002| JSTENZEL | improved tag streaming: stream now contains a body hint;
# | | JSTENZEL | bugfix: paragraph filters restored tag bodies even if
# | | | there was no body;
# | | JSTENZEL | old caches need to be updated - adapted compatibility hint;
# |27.08.2002| JSTENZEL | started to use precompiled lexer patterns;
# |31.08.2002| JSTENZEL | \INCLUDE, \EMBED and \TABLE now support the _cnd_ option,
# | | | like tags defined externally;
# |04.12.2002| JSTENZEL | bugfix in pfilter retranslation: backslash reinsertion was
# | | | not performed multiply;
# | | JSTENZEL | pfilter retranslation: backslash reinsertion now suppressed
# | | | in verbatim blocks;
# |01.01.2003| JSTENZEL | added input filter support to \EMBED, via option "ifilter";
# |02.01.2003| JSTENZEL | added input filter support to \INCLUDE, same interface;
# 0.37 |up to | JSTENZEL | flagSet() now takes a list of flag names;
# |14.04.2002| JSTENZEL | names of included files are resolved to avoid trouble
# | | | with links (and to avoid error messages);
# | | JSTENZEL | \INCLUDE searches pathes specified in environment
# | | | variable PERLPOINTLIB (like perl, shells, linkers etc.);
# | | JSTENZEL | if tags with finish hooks are used, a paragraph will
# | | | not be cached because it becomes potentially dynamic;
# | | JSTENZEL | anchors defined by a cached paragraph are cached now
# | | | as well - and restored after a cache hit (updated cache
# | | | format);
# | | JSTENZEL | \INCLUDE additionally searches pathes specified in an
# | | | array passed to method run() via new parameter "libpath";
# | | JSTENZEL | Filtered paragraphs that need a parser lookahead into
# | | | the next paragraph to be completely detected could cause
# | | | trouble because the reinserted result was grammatically
# | | | placed before the already parsed start token of the
# | | | subsequent paragraph. Fixed by introducing a virtual,
# | | | empty "Word" token supplied by the lexer in such cases
# | | | (look for $flags{virtualParagraphStart} and
# | | | $lexerFlags{cbell}). (By the way, this outdated an
# | | | earlier solution using a virtual text paragraph startup
# | | | and a delayed token - this former solution caused trouble
# | | | when the paragraph following the filtered one was not
# | | | a pure text (so even filtered texts did not work)).
# | | JSTENZEL | Filtered paragraphs are no longer cached - the filter
# | | | makes them dynamical. Note that for combined paragraphs
# | | | like compound blocks and lists this is true for the first
# | | | part only, because subsequent parts can be cached in
# | | | their original form (the filter will be applied when the
# | | | parts will have been combined).
# | | JSTENZEL | paragraph filters: added retranslation of headlines and
# | | | verbatim blocks;
# | | JSTENZEL | passing original paragraph type to filters by new variable
# | | | $main::_pfilterType;
# | | JSTENZEL | generalized paragraph type constant to string translation;
# | | JSTENZEL | lexer delays to flag the end of the document source
# | | | when a paragraph filter still needs to be applied
# | | | (otherwise, the parser would not request more tokens
# | | | because from his point of view the source was already
# | | | parsed completely, so the filtering result (and the
# | | | original block) would disappear from the result - it would
# | | | not be reparsed);
# | | JSTENZEL | empty text paragraphs are no longer made part of the stream;
# | | JSTENZEL | blocks were streamed with a final newline, improved;
# | | JSTENZEL | added headline shortcuts;
# | | JSTENZEL | added document stream entry points;
# |15.04.2002| JSTENZEL | added chapter docstream hints to headline stream data;
# 0.36 |10.08.2001| JSTENZEL | the stream became a more complex data structure to
# | | | allow converter authors to act according to a documents
# | | | structure (getting headlines without having to process
# | | | all tokens, moving between chapters) - basically, it
# | | | *remained* a stream (with additional structure info);
# |29.09.2001| JSTENZEL | adapted stream initialization to intermediately
# | | | modified design;
# | | JSTENZEL | bugfixes in _normalizeTableRows(): standalone single "0"
# | | | in table cells was removed;
# |07.10.2001| JSTENZEL | improved error messages provide an error pointer;
# |11.10.2001| JSTENZEL | removed unused "use fields" directive;
# | | JSTENZEL | storing headline anchors now, depending on new
lib/PerlPoint/Parser.pm view on Meta::CPAN
Note: Although table paragraphs cannot be nested, tables declared by tag possibly
I<can> (and might be embedded into table paragraphs as well). To help converter authors
handling nested tables, the opening table tag provides an internal option "__nestingLevel__".
=item Conditions
start with a "?" character. If active contents is enabled, the paragraph text
is evaluated as Perl code. The (boolean) evaluation result then determines if
subsequent PerlPoint is read and parsed. If the result is false, all subsequent
paragraphs until the next condition are I<skipped>.
Note that base data is made available by a global (package) hash reference
B<$PerlPoint>. See I<run()> for details about how to set up these data.
Conditions can be used to maintain various language versions of a presentation
in one source file:
? $PerlPoint->{targetLanguage} eq 'German'
Or you could enable parts of your document by date:
? time>$dateOfTalk
or by a special setting:
? flagSet('setting')
Please note that the condition code shares its variables with embedded and included
code.
To make usage easier and to improve readability, condition code is evaluated with
disabled warnings (the language variable in the example above may not even been set).
Converter authors might want to provide predefined variables such as "$language"
in the example.
Note: If a document uses I<document streams>, be careful in intermixing docstream
entry points and conditions. A condition placed in a skipped document stream will
not e evaluated. A document stream entry point placed in a source area hidden by
a false condition will not be reconized.
=item Variable assignment paragraphs
Variables can be used in the text and will be automatically replaced by their string
values (if declared).
The next paragraph sets a variable.
$var=var
This variable is called $var.
All variables are made available to embedded and included Perl code as well as to
conditions and can be accessed there as package variables of "main::" (or whatever
package name the Safe object is set up to). Because a
variable is already replaced by the parser if possible, you have to use the fully
qualified name or to guard the variables "$" prefix character to do so:
\EMBED{lang=perl}join(' ', $main::var, \$var)\END_EMBED
Variable modifications by embedded or included Perl I<do not> affect the variables
visible to the parser. (This is true for conditions as well.) This means that
$var=10
\EMBED{lang=perl}$main::var*=2;\END_EMBED
causes I<$var> to be different on parser and code side - the parser will still use a
value of 10, while embedded code works on with a value of 20.
=item Macro or alias definitions
Sometimes certain text parts are used more than once. It would be a relieve
to have a shortcut instead of having to insert them again and again. The same
is true for tag combinations a user may prefer to use. That's what I<aliases>
(or "macros") are designed for. They allow a presentation author to declare
his own shortcuts and to use them like a tag. The parser will resolve such aliases,
replace them by the defined replacement text and work on with this replacement.
An alias declaration starts with a "+" character followed I<immediately> by the
alias I<name> (without backslash prefix), optionally followed I<immediately>
by an option default list in "{}", followed I<immediately> by a colon.
(No additional spaces here.)
I<All text after this colon up to the paragraph closing empty line is stored as the replacement text.>
So, whereever you will use the new macro, the parser will replace it by this
text and I<reparse> the result. This means that your macro text can contain
any valid constructions like tags or other macros.
The replacement text may contain strings embedded into doubled underscores like
C<__this__>. This is a special syntax to mark that the macro takes parameters
of these names (e.g. C<this>). If a macro is used and these parameters are set,
their values will replace the mentioned placeholders. The special placeholder
"__body__" is used to mark where the macro I<body> is to place.
If a macro is used and defined options are I<unset>, but there are defaults
for them in the optional default list, these defaults will be used for the
respective options.
Here are a few examples:
+RED:\FONT{color=red}<__body__>
+F:\FONT{color=__c__}<__body__>
+COLORED{c=blue}:\FONT{color=__c__}<__body__>
+IB:\B<\I<__body__>>
This \IB<text> is \RED<colored>.
Defaults: first, text in \COLORED{c=red}<Red>,
now text in \COLORED<Blue>.
+TEXT:Macros can be used to abbreviate longer
texts as well as other tags
or tag combinations.
+HTML:\EMBED{lang=html}
Tags can be \RED<\I<nested>> into macros.
And \I<\F{c=blue}<vice versa>>.
\IB<\RED<This>> is formatted by nested macros.
\HTML This is <i>embedded HTML</i>\END_EMBED.
Please note: \TEXT
I<If no parameter is defined in the macro definition, options will not be recognized.>
The same is true for the body part.
I<Unless C<__body__> is used in the macro definition, macro bodies will not be recognized.>
This means that with the definition
+OPTIONLESS:\B<__body__>
the construction
\OPTIONLESS{something=this}<more>
is evaluated as a usage of C<\OPTIONLESS> without body, followed by the I<string>
C<{something=here}>. Likewise, the definition
+BODYLESS:found __something__
causes
\BODYLESS{something=this}<more>
to be recognized as a usage of C<\BODYLESS> with option C<something>, followed
by the I<string> C<<more>>. So this will be resolved as C<found this>. Finally,
+JUSTTHENAME:Text phrase.
enforces these constructions
... \JUSTTHENAME, ...
... \JUSTTHENAME{name=Name}, ...
... \JUSTTHENAME<text>, ...
... \JUSTTHENAME{name=Name}<text> ...
to be translated into
... Text phrase. ...
... Text phrase.{name=Name} ...
... Text phrase.<text>, ...
... Text phrase.{name=Name}<text> ...
The principle behind all this is to make macro usage I<easier> and intuative:
why think of options or a body or of special characters possibly treated as
option/body part openers unless the macro makes use of an option or body?
An I<empty> macro text I<undefines> the macro (if it was already known).
// undeclare the IB alias
+IB:
An alias can be used like a tag.
Aliases named like a tag I<overwrite> the tag (as long as they are defined).
=item Document stream entry points
A document stream is a "document in document" and best explained by example.
lib/PerlPoint/Parser.pm view on Meta::CPAN
Please note that in the last example C<import=1> will not work, as the
source file has no extension that indicates its format is POD.
If C<ifilter> is used together with C<import>, C<import> is ignored.
A PerlPoint file can be included wherever a tag is allowed, but sometimes
it has to be arranged slightly: if you place the inclusion directive at
the beginning of a new paragraph I<and> your included PerlPoint starts by
a paragraph of another type than text, you should begin the included file
by an empty line to let the parser detect the correct paragraph type. Here
is an example: if the inclusion directive is placed like
// include PerlPoint
\INCLUDE{type=pp file="file.pp"}
and file.pp immediately starts with a verbatim block like
<<VERBATIM
verbatim
VERBATIM
, I<the inclusion directive already opens a new paragraph> which is detected to
be "text" (because there is no special startup character). Now in the included
file, from the parsers point of view the included PerlPoint is simply a
continuation of this text, because a paragraph ends with an empty line. This
trouble can be avoided by beginning the included file by an empty line,
so that its first paragraph can be detected correctly.
The second special case is a file type of "Perl". If active contents is enabled,
included Perl code is read into memory and evaluated like I<embedded> Perl. The
results are made part of the input stream to be parsed.
// execute a perl script and include the results
\INCLUDE{type=perl file="disk-usage.pl"}
As another option, files may be declared to be of type "example" or "parsedexample".
This makes the file placed into the source as a verbatim block (with "example"), or
a standard block (with "parsedexample"), respectively, without need to copy its contents
into the source.
// include an external script as an example
\INCLUDE{type=example file="script.csh"}
All lines of the example file are included as they are but can be indented on request.
To do so, just set the special option "indent" to a positive numerical value equal to
the number of spaces to be inserted before each line.
// external example source, indented by 3 spaces
\INCLUDE{type=example file="script.csh" indent=3}
Including external scripts this way can accelerate PerlPoint authoring significantly,
especially if the included files are still subject to changes.
It is possible to filter the file types you wish to include (with exception
of "pp" and "example"), see below for details. I<In any case>, the mentioned file
has to exist.
=item \EMBED and \END_EMBED
Target format code does not necessarily need to be imported - it can be
directly I<embedded> as well. This means that one can write target language
code within the input stream using I<\EMBED>:
\EMBED{lang=HTML}
This is <i><b>embedded</b> HTML</i>.
The parser detects <i>no</i> PerlPoint
tag here, except of <b>END_EMBED</b>.
\END_EMBED
Because this is handled by I<tags>, not by paragraphs, it can be placed
directly in a text like this:
These \EMBED{lang=HTML}<i>italics</i>\END_EMBED
are formatted by HTML code.
Please note that the EMBED tag does not accept a tag body (to avoid
ambiguities).
Both tag and embedded text are made part of the intermediate stream.
It is the backends task to deal with it. The only exception of this rule
is the embedding of I<Perl> code, which is evaluated by the parser.
The reply of this code is made part of the input stream and parsed as
usual.
PerlPoint authors can declare an I<input filter> to preprocess the
embedded text. This is done via option I<ifilter>:
\EMBED{lang=pp ifilter="pod2pp()"}
=head1 POD formatted part
This part was written in POD.
\END_EMBED
An input filter is a snippet of user defined Perl code, taking the
embedded text via C<@main::_ifilterText> and the target language via
C<$main::_ifilterType>. The original filename can be accessed via
C<$main::_ifilterType> (but please note that this is the source with
the \EMBED tag). It should supply its result as an array of
strings which will then be processed as usual.
Input filters are Active Contents. If Active Contents is disabled,
embedded parts using input filters will be ignored I<completely>.
It is possible to filter the languages you wish to embed (with exception
of "PP"), see below for details.
=item \TABLE and \END_TABLE
It was mentioned above that tables can be built by table paragraphs.
Well, there is a tag variant of this:
\TABLE{bg=blue separator="|" border=2}
\B<column 1> | \B<column 2> | \B<column 3>
aaaa | bbbb | cccc
uuuu | vvvv | wwww
\END_TABLE
This is sligthly more powerfull than the paragraph syntax: you can set
up several table features like the border width yourself, and you can
format the headlines as you like.
As in all tables, leading and trailing whitespaces of a cell are
automatically removed, so you can use as many of them as you want to
improve the readability of your source.
The default row separator (as in the example above) is a carriage return,
so that each table line can be written as a separate source line. However,
PerlPoint allows you to specify another string to separate rows by option
C<rowseparator>. This allows to specify a table I<inlined> into a paragraph.
\TABLE{bg=blue separator="|" border=2 rowseparator="+++"}
\B<column 1> | \B<column 2> | \B<column 3> +++ aaaa
| bbbb | cccc +++ uuuu | vvvv| wwww \END_TABLE
This is exactly the same table as above.
If parser option I<nestedTables> is set to a true value calling I<run()>,
it is possible to I<nest> tables. To help converter authors handling this,
the opening table tag provides an internal option "__nestingLevel__".
Tables built by tag are normalized the same way as table paragraphs are.
=back
=head2 What about special formatting?
Earlier versions of B<pp2html> supported special format hints like the HTML
expression ">" for the ">" character, or "ü" for "ü". B<PerlPoint::Parser>
does I<not> support this directly because such hints are specific to the
I<output format> - if someone wants to translate into TeX, it might be curious
for him to use HTML syntax in his ASCII text. Further more, such hints can be
handled I<completely> by a backend which finds them unchanged in the produced
output stream.
The same is true for special headers and trailers. It is a I<backend> task to
add them if necessary. The parser does handle the I<input> only.
lib/PerlPoint/Parser.pm view on Meta::CPAN
=head1 METHODS
=head2 new()
The constructor builds and prepares a new parser object.
B<Parameters:>
=over 4
=item The class name.
=back
B<Return value:>
The new object in case of success.
B<Example:>
my ($parser)=new PerlPoint::Parser;
=cut
# = CODE SECTION ========================================================================
# startup actions
BEGIN
{
# declare startup helper function
sub _startupGenerateConstants
{
# init counter
my $c=0;
# and generate constants
foreach my $constant (@_)
{eval "use constant $constant => $c"; $c++;}
}
# declare internal constants: action timeout types (used as array indices, sort alphabetically!)
_startupGenerateConstants(
'LEXER_TOKEN', # reply symbols token;
'LEXER_FATAL', # bug: unexpected symbol;
'LEXER_IGNORE', # ignore this symbol;
'LEXER_EMPTYLINE', # reply the token "Empty_line";
'LEXER_SPACE', # reply the token "Space" and a simple whitespace;
);
# state constants
_startupGenerateConstants(
'STATE_DEFAULT', # default;
'STATE_DEFAULT_TAGMODE', # default in tag mode;
'STATE_BLOCK', # block;
'STATE_COMMENT', # comment;
'STATE_CONTROL', # control paragraph (of a single character);
'STATE_DPOINT', # definition list point;
'STATE_DPOINT_ITEM', # definition list point item (defined stuff);
'STATE_EMBEDDING', # embedded things (HTML, Perl, ...);
'STATE_PFILTER', # paragraph filter installation;
'STATE_PFILTERED', # "default" state after a pfilter installation;
'STATE_CONDITION', # condition;
'STATE_HEADLINE_LEVEL', # headline level setting;
'STATE_HEADLINE', # headline;
'STATE_OPOINT', # ordered list point;
'STATE_TEXT', # text;
'STATE_UPOINT', # unordered list point;
'STATE_VERBATIM', # verbatim block;
'STATE_TABLE', # table *paragraph*;
'STATE_DEFINITION', # macro definition;
);
# declare internal constants: list shifters
_startupGenerateConstants(
'LIST_SHIFT_RIGHT', # shift right;
'LIST_SHIFT_LEFT', # shift left;
);
# release memory
undef &_startupGenerateConstants;
}
# requires modern perl
require 5.00503;
# declare module version
$PerlPoint::Parser::VERSION=0.451;
$PerlPoint::Parser::VERSION=$PerlPoint::Parser::VERSION; # to suppress a warning of exclusive usage only;
# pragmata
use strict;
# load modules
use Carp;
# use Memoize;
use IO::File;
use File::Basename;
use File::Spec::Functions;
use File::Temp qw(tempfile);
use PerlPoint::Anchors 0.03;
use PerlPoint::Backend 0.10;
use Cwd qw(:DEFAULT abs_path);
use Digest::SHA1 qw(sha1_base64);
use Storable qw(:DEFAULT dclone nfreeze);
use PerlPoint::Constants 0.19 qw(:DEFAULT :parsing :stream :tags);
# memoizations
# startup declarations
my (
%data, # the collected declaration data;
%lineNrs, # the lexers line number hash, input handle specific;
%specials, # special character control (may be active or not);
%lexerFlags, # lexer state flags;
%lexerFlagsOfPreviousState, # buffered lexer state flags of previous state;
%statistics, # statistics data;
%variables, # user managed variables;
%flags, # various flags;
%macros, # macros / aliases;
%openedSourcefiles, # a hash of all source files already opened (to enable smart inclusion);
%paragraphTypeStrings, # paragraph type to string translation table;
@nestedSourcefiles, # a list of current source file nesting (to avoid circular inclusions);
@specialStack, # special state stack for temporary activations (to restore original states);
@stateStack, # state stack (mostly intended for non paragraph states like STATE_EMBEDDED);
@tableSeparatorStack, # the first element is the column separator string within a table, empty otherwise;
@inputStack, # a stack of additional input lines and dynamically inserted parts;
@inHandles, # a stack of input handles (to manage nested sources);
@olistLevels, # a hint storing the last recent ordered list level number of a paragraph (hyrarchically);
@inLine, # current *real* input line (the unexpanded line read from a source file);
@previousStackLines, # buffer of the last lines gotten from input stack;
@libraryPath, # a collection of pathes to find files for \INCLUDE in;
@headlineIds, # the hierarchical values of $directiveCounter pointing to the current chapter headline;
$anchors, # anchor collector object;
$safeObject, # an object of class Safe to evaluate Perl code embedded into PerlPoint;
$sourceFile, # the source file currently read;
$tagsRef, # reference to a hash of valid tag openers (strings without the "<");
$resultStreamRef, # reference to a data structure to put generated stream data in;
$inHandle, # the data input stream (to parse);
$parserState, # the current parser state;
$readCompletely, # the input file is read completely;
$_semerr, # semantic error counter;
$tableColumns, # counter of completed table columns;
$checksums, # paragraph checksums (and associated stream parts);
$macroChecksum, # the current macro checksum;
$varChecksum, # the current user variables checksum;
$pendingTags, # list of tags to be finished after parsing (collected using a structure);
$directiveCounter, # directive counter (just to mark stream directive pairs uniquely);
$retranslator, # a backend object used to restore paragraph sources to be filtered;
$retranslationBuffer, # buffer used in retranslation (needs to b global to avoid closure effects with lexicals in translator routines);
);
# ----- Startup code begins here. -----
# prepare main input handle (obsolete when all people will use perl 5.6)
$inHandle=new IO::File;
# set developer data
my ($developerName, $developer)=('J. Stenzel', 'perl@jochen-stenzel.de');
# init flag
$readCompletely=0;
# prepare a common pattern
my $patternWUmlauts=qr/[\wäöüÄÖÜß]+/;
# prepare lexer patterns
my $patternNlbBackslash=qr/(?<!\\)/;
my %lexerPatterns=(
tag => qr/$patternNlbBackslash\\([A-Z_0-9]+)/,
space => qr/(\s+)/,
pfilterDelimiter => qr/$patternNlbBackslash((\|){1,2})/,
table => qr/$patternNlbBackslash\\(TABLE)/,
endTable => qr/$patternNlbBackslash\\(END_TABLE)/,
embed => qr/$patternNlbBackslash\\(EMBED)/,
endEmbed => qr/$patternNlbBackslash\\(END_EMBED)/,
include => qr/$patternNlbBackslash\\(INCLUDE)/,
nonWhitespace => qr/$patternNlbBackslash(\S)/,
colon => qr/$patternNlbBackslash(:)/,
namedVarKernel => qr/\$($patternWUmlauts)/,
symVarKernel => qr/\$({($patternWUmlauts)})/,
);
@lexerPatterns{qw(
namedVar
symVar
)
}=(
qr/$patternNlbBackslash$lexerPatterns{namedVarKernel}/,
qr/$patternNlbBackslash$lexerPatterns{symVarKernel}/,
);
# declare paragraphs which are embedded
my %embeddedParagraphs;
@embeddedParagraphs{
DIRECTIVE_UPOINT,
DIRECTIVE_OPOINT,
}=();
# declare token descriptions (to be used in error messages)
my %tokenDescriptions=(
EOL => 'a carriage return',
Embed => 'embedded code',
Embedded => 'an \END_EMBED tag',
Empty_line => 'an empty line',
Heredoc_close => 'a string closing the "here document"',
Heredoc_open => 'a "here document" opener',
Ils => 'a indentation',
Include => 'an included part',
Named_variable => 'a named variable',
Space => 'a whitespace',
StreamedPart => undef,
Symbolic_variable => 'a symbolic variable',
Table => 'a table',
Table_separator => 'a table column separator',
Tabled => 'an \END_TABLE tag',
Tag_name => 'a tag name',
Word => 'a word',
NoToken => 'an internal dummy token that is finally ignored',
);
sub new {
my($class)=shift;
ref($class)
and $class=ref($class);
my($self)=$class->SUPER::new( yyversion => '1.05',
yystates =>
[
{#State 0
ACTIONS => {
'Paragraph_cache_hit' => 11,
"||" => 4,
'Empty_line' => 3,
"+" => 5,
"?" => 2
},
DEFAULT => -3,
GOTOS => {
'non_filterable_paragraph' => 1,
'document' => 6,
'restored_paragraph' => 7,
'alias_definition' => 8,
'built_paragraph' => 9,
'optional_paragraph_filter' => 10,
'condition' => 12,
'paragraph' => 13
}
},
{#State 1
DEFAULT => -12
},
{#State 2
DEFAULT => -33,
GOTOS => {
'@4-1' => 14
}
},
{#State 3
DEFAULT => -22
},
{#State 4
lib/PerlPoint/Parser.pm view on Meta::CPAN
return $_[3] unless defined $result;
# make the result part of the input stream, if any
_stackInput($_[0], @$result) if $result;
# reset the "end of input reached" flag if necessary
$readCompletely=0 if $readCompletely;
# supply nothing here, the result must be reparsed first
['', $_[3][2]];
}
else
{
# filters cannot be run, inform user
warn "[Warn] $sourceFile, line $_[1][1]: Active Content is disabled, paragraph cannot be filtered.\n" unless $flags{display} & DISPLAY_NOWARN;
# supply the unmodified paragraph
$_[3];
}
}
else
{
# no filter: provide paragraph data
$_[3];
}
}
],
[#Rule 12
'built_paragraph', 1, undef
],
[#Rule 13
'original_paragraph', 1, undef
],
[#Rule 14
'original_paragraph', 1,
sub
#line 1932 "ppParser.yp"
{
# remove leading dummy tokens which might have been produced by "standalone macros"
splice(@{$_[1][0]}, 1, 1) while @{$_[1][0]}>1 and !ref($_[1][0][1]) and $_[1][0][1] eq DUMMY_TOKEN;
# check if this paragraph consists of exactly one table only
# or exactly one tag which is allowed to exists standalone,
# or exactly one embedded region
if (
(
# starting with a table tag or standalone tag?
@{$_[1][0]}>1
and ref($_[1][0][1]) eq 'ARRAY'
and $_[1][0][1][STREAM_DIR_TYPE]==DIRECTIVE_TAG
and (
$_[1][0][1][STREAM_DIR_DATA]=~/^(TABLE)$/
or (
$_[1][0][1][STREAM_DIR_DATA]=~/^(\w+)$/
and (
(
exists $tagsRef->{$1}
and exists $tagsRef->{$1}{standalone}
and $tagsRef->{$1}{standalone}
)
or $1 eq 'EMBED'
)
)
)
# ending with the same tag?
and ref($_[1][0][-2]) eq 'ARRAY'
and $_[1][0][-2][STREAM_DIR_TYPE]==DIRECTIVE_TAG
and $_[1][0][-2][STREAM_DIR_DATA] eq $1
# both building the same tag?
and $_[1][0][-2][STREAM_DIR_DATA+1] eq $_[1][0][1][STREAM_DIR_DATA+1]
)
)
{
# remove the enclosing paragraph stuff - just return the contents (table / tag)
shift(@{$_[1][0]}); # text paragraph opener
pop(@{$_[1][0]}); # text paragraph trailer
}
# pass (original or modified) data
$_[1];
}
],
[#Rule 15
'original_paragraph', 1, undef
],
[#Rule 16
'original_paragraph', 1, undef
],
[#Rule 17
'original_paragraph', 1, undef
],
[#Rule 18
'original_paragraph', 1, undef
],
[#Rule 19
'original_paragraph', 1, undef
],
[#Rule 20
'original_paragraph', 1, undef
],
[#Rule 21
'original_paragraph', 1, undef
],
[#Rule 22
'non_filterable_paragraph', 1, undef
],
[#Rule 23
'non_filterable_paragraph', 1, undef
],
[#Rule 24
'non_filterable_paragraph', 1, undef
],
[#Rule 25
'restored_paragraph', 1, undef
],
[#Rule 26
'@3-1', 0,
sub
#line 2003 "ppParser.yp"
lib/PerlPoint/Parser.pm view on Meta::CPAN
warn "[Trace] $sourceFile, line $_[7][1]: Table paragraph completed.\n" if $flags{trace} & TRACE_PARAGRAPHS;
# reset column separator memory, mark table completed
shift(@tableSeparatorStack);
$tableColumns=0;
# build parameter hash (contains level information, which is always 1,
# and various retranslation hints)
my %pars=(
__nestingLevel__ => 1,
__paragraph__ => 1,
separator => join('', @{$_[3][0]}),
);
# If we are here and found anything in the table, a final row was
# closed and a new one opened at the end of the last table line.
# Because the table is completed now, the final opener tags can
# be removed. This is done *here* and by pop() for acceleration.
if (@{$_[6][0]}>4)
{
# delete final opener directives made by the final carriage return
splice(@{$_[6][0]}, -2, 2);
# normalize table rows and autoformat headline fields
($pars{__titleColumns__}, $pars{__maxColumns__})=_normalizeTableRows($_[6][0], 1);
# warn user in case of potential row width conflicts
warn qq([Warn] $sourceFile, line $_[1][1]: The maximum cell number per row ($pars{__maxColumns__}) was not detected in the first row (which has $pars{__titleColumns__} columns).\n) if $pars{__titleColumns__}<$pars{__maxColumns_...
# reply data in a "tag envelope" (for backends)
my %hints=(nr=>++$directiveCounter);
[
[
# opener directives (note that first row and column are already opened by the initial carriage return stream)
[\%hints, DIRECTIVE_TAG, DIRECTIVE_START, 'TABLE', \%pars],
[{}, DIRECTIVE_TAG, DIRECTIVE_START, 'TABLE_ROW'],
[{}, DIRECTIVE_TAG, DIRECTIVE_START, 'TABLE_HL'],
# the list of enclosed literals reduced by the final two, if any
@{$_[6][0]} ? @{$_[6][0]} : (),
# final directive
[\%hints, DIRECTIVE_TAG, DIRECTIVE_COMPLETE, 'TABLE', \%pars]
],
$_[7][1]
];
}
else
{
# empty table - reply nothing real
[[()], $_[7][1]];
}
}
],
[#Rule 147
'@28-1', 0,
sub
#line 3626 "ppParser.yp"
{
# switch to embedding mode saving the former state (including *all* special settings)
push(@stateStack, $parserState);
push(@specialStack, [%specials]);
_stateManager(STATE_EMBEDDING);
# trace, if necessary
warn "[Trace] $sourceFile, line $_[1][1]: Embedding starts.\n" if $flags{trace} & TRACE_PARAGRAPHS;
# Disable storage of a checksum. (Dynamic parts may change or have changed.
# Static parts are static of course, but the filter settings may vary.)
$flags{checksummed}=0;
# temporarily activate specials "{" and "}"
push(@specialStack, @specials{('{', '}')});
@specials{('{', '}')}=(1, 1);
# deactivate boost
$flags{noboost}=1;
}
],
[#Rule 148
'@29-3', 0,
sub
#line 3647 "ppParser.yp"
{
# reactivate boost
$flags{noboost}=0;
# restore special state of "{" and "}"
@specials{('{', '}')}=splice(@specialStack, -2, 2);
}
],
[#Rule 149
'embedded', 6,
sub
#line 3655 "ppParser.yp"
{
# restore former parser state (including *all* special settings)
_stateManager(pop(@stateStack));
%specials=@{pop(@specialStack)};
# build parameter hash, if necessary
my %pars;
if (@{$_[3][0]})
{
# the list already consists of key/value pairs
%pars=@{$_[3][0]}
}
# set default language, if necessary
$pars{lang}='pp' unless exists $pars{lang};
# Tag condition set?
if (exists $pars{_cnd_})
{
# ok, if the condition was true or could not be evaluated,
# stop processing of this tag (there is no body, so return an empty stream)
unless (_evalTagCondition($pars{_cnd_}, $sourceFile, $_[6][1]))
{return([[()], $_[6][1]]);}
else
{
# strip off this special option before the tag or macro is furtherly processed
delete $pars{_cnd_};
}
lib/PerlPoint/Parser.pm view on Meta::CPAN
[[()], $_[6][1]];
}
elsif (lc($pars{lang}) eq 'pp')
{
# embedded PerlPoint - pass it back to the parser (by stack)
_stackInput($_[0], split(/(\n)/, join('', @{$_[5][0]})));
# reset the "end of input reached" flag if necessary
$readCompletely=0 if $readCompletely;
# we have to supply something, but it should be nothing
[[()], $_[6][1]];
}
elsif (lc($pars{lang}) eq 'perl')
{
# This is embedded Perl code, anything passed really?
# And does the caller want to evaluate the code?
if (@{$_[5][0]} and $safeObject)
{
# update active contents base data, if necessary
if ($flags{activeBaseData})
{
no strict 'refs';
${join('::', ref($safeObject) ? $safeObject->root : 'main', 'PerlPoint')}=dclone($flags{activeBaseData});
}
# make the code a string and evaluate it
my $perl=join('', @{$_[5][0]});
warn "[Trace] $sourceFile, line $_[6][1]: Evaluating this code:\n\n$perl\n\n\n" if $flags{trace} & TRACE_ACTIVE;
# ignore empty code
if ($perl=~/\S/)
{
# well, there is something, evaluate it
my $result=ref($safeObject) ? $safeObject->reval($perl) : eval(join(' ', '{package main; no strict;', $perl, '}'));
# check result
if ($@)
{_semerr($_[0], "$sourceFile, line $_[6][1]: embedded Perl code could not be evaluated: $@.");}
else
{
# success - make the result part of the input stream, if any
_stackInput($_[0], split(/(\n)/, $result)) if defined $result;
}
# reset the "end of input reached" flag if necessary
$readCompletely=0 if $readCompletely;
}
}
# we have to supply something, but it should be nothing
[[()], $_[6][1]];
}
else
{
# reply data in a "tag envelope" (for backends)
my %hints=(nr=>++$directiveCounter);
[
[
# opener directive
[\%hints, DIRECTIVE_TAG, DIRECTIVE_START, 'EMBED', \%pars],
# the list of enclosed literals, if any
@{$_[5][0]} ? @{$_[5][0]} : (),
# final directive
[\%hints, DIRECTIVE_TAG, DIRECTIVE_COMPLETE, 'EMBED', \%pars]
],
$_[6][1]
];
}
}
],
[#Rule 150
'@30-1', 0,
sub
#line 3830 "ppParser.yp"
{
# trace, if necessary
warn "[Trace] $sourceFile, line $_[1][1]: Inclusion starts.\n" if $flags{trace} & TRACE_PARAGRAPHS;
# Disable storage of a checksum. (Files may change or have changed. Later on,
# we could try to keep a files modification date unless it is a nested PerlPoint
# source or a dynamic Perl part. For now, it seems to be sufficient that each file
# is cached itself.)
$flags{checksummed}=0;
# temporarily activate specials "{" and "}"
push(@specialStack, @specials{('{', '}')});
@specials{('{', '}')}=(1, 1);
# deactivate boost
$flags{noboost}=1;
}
],
[#Rule 151
'included', 3,
sub
#line 3848 "ppParser.yp"
{
# scopies
my ($errors, $originalPath);
# reactivate boost
$flags{noboost}=0;
# restore special state of "{" and "}"
@specials{('{', '}')}=splice(@specialStack, -2, 2);
# check parameters: type and filename should be set at least
my %tagpars=@{$_[3][0]};
$errors++, _semerr($_[0], "$sourceFile, line $_[3][1]: You forgot to specify the name of your included file.") unless exists $tagpars{file};
# set default type, if necessary
$tagpars{type}='pp' unless exists $tagpars{type};
# Tag condition set?
if (exists $tagpars{_cnd_})
{
# ok, if the condition was true or could not be evaluated,
# stop processing of this tag (there is no body, so return an empty stream)
unless (_evalTagCondition($tagpars{_cnd_}, $sourceFile, $_[3][1]))
{return([[()], $_[3][1]]);}
else
{
# strip off this special option before the tag or macro is furtherly processed
delete $tagpars{_cnd_};
lib/PerlPoint/Parser.pm view on Meta::CPAN
_stackInput($_[0], split(/(\n)/, $result)) if defined $result;
# reset the "end of input reached" flag if necessary
$readCompletely=0 if $readCompletely;
}
}
# we have to supply something, but it should be nothing
[[()], $_[3][1]];
}
else
{
# we include anything else: provide the contents as it is,
# declared as an "embedded" part
# open(my $included, $tagpars{file});
my $included=new IO::File;
open($included, $tagpars{file});
my @included=<$included>;
close($included);
# in case the file was declared a (parsed) example, embed its contents as a (verbatim) block,
# otherwise, include it as really embedded part (to be processed by a backend)
if ($tagpars{type}=~/^(parsed)?example$/i)
{
# set paragraph type
my $ptypeDirective=defined($1) ? DIRECTIVE_BLOCK : DIRECTIVE_VERBATIM;
# indent lines, if requested
if (exists $tagpars{indent})
{
# check parameter
unless ($tagpars{indent}=~/^\d+$/)
{$errors++, _semerr($_[0], "$sourceFile, line $_[3][1]: Invalid indentation value of \"$tagpars{indent}\", please set up a number.");}
else
{
# all right, indent
my $indentation=' ' x $tagpars{indent};
@included=map {"$indentation$_"} @included;
}
}
my %hints=(nr=>++$directiveCounter);
[
[
# opener directive
[\%hints, $ptypeDirective, DIRECTIVE_START],
# the list of enclosed literals
@included,
# final directive
[\%hints, $ptypeDirective, DIRECTIVE_COMPLETE]
],
$_[3][1]
];
}
else
{
my %hints=(nr=>++$directiveCounter);
[
[
# opener directive
[\%hints, DIRECTIVE_TAG, DIRECTIVE_START, 'EMBED', {lang=>$tagpars{type}}],
# the list of enclosed "literals", if any
@included,
# final directive
[\%hints, DIRECTIVE_TAG, DIRECTIVE_COMPLETE, 'EMBED', {lang=>$tagpars{type}}]
],
$_[3][1]
];
}
}
}
else
{
# file missing, simply inform user
$errors++, _semerr($_[0], "$sourceFile, line $_[3][1]: File $tagpars{file} does not exist or cannot be read (current directory: ", cwd(), ").");
# we have to supply something, but it should be nothing
[[()], $_[3][1]];
}
}
else
{
# we have to supply something, but it should be nothing
[[()], $_[3][1]];
}
}
],
[#Rule 152
'@31-1', 0,
sub
#line 4219 "ppParser.yp"
{
# switch to definition mode
_stateManager(STATE_DEFINITION);
# trace, if necessary
warn "[Trace] $sourceFile, line $_[1][1]: Macro definition starts.\n" if $flags{trace} & TRACE_PARAGRAPHS;
}
],
[#Rule 153
'@32-3', 0,
sub
#line 4227 "ppParser.yp"
{
# deactivate boost
$flags{noboost}=1;
}
],
[#Rule 154
'@33-5', 0,
sub
#line 4232 "ppParser.yp"
{
# reactivate boost
$flags{noboost}=0;
}
],
[#Rule 155
'@34-7', 0,
sub
#line 4237 "ppParser.yp"
{
# disable all specials to get the body as a plain text
@specials{keys %specials}=(0) x scalar(keys %specials);
}
lib/PerlPoint/Parser.pm view on Meta::CPAN
}
# reply next token: EOL?
if (/^(\n)/)
{
if ($lexerFlags{eol}==LEXER_TOKEN)
{
$found=$1;
warn("[Trace] Lexer: EOL in line $lineNrs{$inHandle}.\n") if $flags{trace} & TRACE_LEXER;
s/^$1//;
return('EOL', [$found, $lineNrs{$inHandle}]);
}
elsif ($lexerFlags{eol}==LEXER_EMPTYLINE)
{
# flag "empty line" as wished
warn("[Trace] Lexer: EOL -> Empty_line in line $lineNrs{$inHandle}.\n") if $flags{trace} & TRACE_LEXER;
s/^$1//;
return('Empty_line', ['', $lineNrs{$inHandle}]);
}
elsif ($lexerFlags{eol}==LEXER_SPACE)
{
# flag "space" as wished and reply a simple whitespace
warn("[Trace] Lexer: EOL -> Space in line $lineNrs{$inHandle}.\n") if $flags{trace} & TRACE_LEXER;
s/^$1//;
return('Space', [' ', $lineNrs{$inHandle}]);
}
else
{die "[BUG] Unhandled EOL directive $lexerFlags{eol}.";}
}
# reply next token: scan for Ils if necessary
$found=$1, s/^$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Ils in line $lineNrs{$inHandle}.\n")),
return('Ils', [$found, $lineNrs{$inHandle}]) if $parserState==STATE_PFILTERED and /^$lexerPatterns{space}/;
# reply next token: scan for spaces
$found=$1, s/^$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Space in line $lineNrs{$inHandle}.\n")),
return('Space', [$found, $lineNrs{$inHandle}]) if /^$lexerPatterns{space}/;
# reply next token: scan for paragraph filter delimiters ("||" and "|")
$found=$1, s/^\Q$1//,
(($flags{trace} & TRACE_LEXER) and warn(qq([Trace] Lexer: Paragraph filter delimiter "$found" in line $lineNrs{$inHandle}.\n))),
return($found, [$found, $lineNrs{$inHandle}]) if /^$lexerPatterns{pfilterDelimiter}/ and $specials{pfilter};
# reply next token: scan for here doc openers
$found=$1, s/^<<$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Heredoc opener $found in line $lineNrs{$inHandle}.\n")),
return('Heredoc_open', [$found, $lineNrs{$inHandle}]) if /^<<(\w+)/ and $specials{heredoc} eq '1';
# reply next token: scan for SPECIAL tagnames: \TABLE
$found=$1, s/^\\$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Table starts in line $lineNrs{$inHandle}.\n")),
return('Table', [$found, $lineNrs{$inHandle}]) if $specials{tag} and /^$lexerPatterns{table}/;
# reply next token: scan for SPECIAL tagnames: \END_TABLE
$found=$1, s/^\\$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Table completed in line $lineNrs{$inHandle}.\n")),
return('Tabled', [$found, $lineNrs{$inHandle}]) if $specials{tag} and /^$lexerPatterns{endTable}/;
# reply next token: scan for SPECIAL tagnames: \EMBED
$found=$1, s/^\\$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Embedding starts in line $lineNrs{$inHandle}.\n")),
return('Embed', [$found, $lineNrs{$inHandle}]) if $specials{tag} and /^$lexerPatterns{embed}/;
# reply next token: scan for SPECIAL tagnames: \END_EMBED
$found=$1, s/^\\$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Embedding completed in line $lineNrs{$inHandle}.\n")),
return('Embedded', [$found, $lineNrs{$inHandle}]) if $specials{embedded} and /^$lexerPatterns{endEmbed}/;
# reply next token: scan for SPECIAL tagnames: \INCLUDE
$found=$1, s/^\\$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Including starts in line $lineNrs{$inHandle}.\n")),
return('Include', [$found, $lineNrs{$inHandle}]) if $specials{tag} and /^$lexerPatterns{include}/;
# reply next token: scan for tagnames
$found=$1, s/^\\$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Tag opener $found in line $lineNrs{$inHandle}.\n")),
return('Tag_name', [$found, $lineNrs{$inHandle}]) if $specials{tag} and /^$lexerPatterns{tag}/ and (exists $tagsRef->{$1} or exists $macros{$1});
# reply next token: scan for special characters
$found=$1, s/^\Q$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Special $found in line $lineNrs{$inHandle}.\n")),
return($found, [$found, $lineNrs{$inHandle}]) if /^$patternNlbBackslash(\S)/ and exists $specials{$1} and $specials{$1};
# reply next token: scan for definition list items
$found=$1, s/^$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Colon in line $lineNrs{$inHandle}.\n")),
return('Colon', [$found, $lineNrs{$inHandle}]) if $specials{colon} and /^$lexerPatterns{colon}/;
# reply next token: search for named variables (which need to be defined except at the
# beginning of a new assignment paragraph)
$found=$1, s/^\$$1//,
(($flags{trace} & TRACE_LEXER) and warn(qq([Trace] Lexer: Named variable "$found" in line $lineNrs{$inHandle}.\n))),
return('Named_variable', [$found, $lineNrs{$inHandle}])
if /^$lexerPatterns{namedVar}(=?)/
and (
($parserState==STATE_DEFAULT and defined($2))
or exists $variables{$1}
);
# reply next token: search for symbolic variables (these cannot be used in assignments,
# so handling is easier)
$found=$2, s/^\$$1//,
(($flags{trace} & TRACE_LEXER) and warn(qq([Trace] Lexer: Symbolic variable "$found" in line $lineNrs{$inHandle}.\n))),
return('Symbolic_variable', [$found, $lineNrs{$inHandle}])
if /^$lexerPatterns{symVar}/ and exists $variables{$2};
# flag that this paragraph *might* use macros someday, if there is still something being no tag and no
# macro, but looking like a tag or a macro (somebody could *later* declare it a real macro, so the cache
# needs to check macro definitions)
$flags{checksummed}[3]=1
if $specials{tag} and /^$lexerPatterns{tag}/
and not (exists $flags{checksummed} and not $flags{checksummed});
# likewise, flag that this paragraph *might* use variables someday, if there is still something being no variable,
# but looking like a variable (somebody could *later* declare it a real var, so the cache
# needs to check variable definitions)
$flags{checksummed}[4]=1
if /($lexerPatterns{namedVarKernel})|($lexerPatterns{symVarKernel})/
and not (exists $flags{checksummed} and not $flags{checksummed});
# remove guarding \\, if necessary
s/^\\// unless $specials{heredoc}
or (defined $lexerFlags{backsl} and $lexerFlags{backsl}==LEXER_TOKEN)
or $parserState==STATE_EMBEDDING
or $parserState==STATE_PFILTER
or $parserState==STATE_CONDITION
or $parserState==STATE_DEFINITION;
# reply next token: scan for numbers, if necessary
$found=$1, s/^$1//,
(($flags{trace} & TRACE_LEXER) and warn("[Trace] Lexer: Number $found in line $lineNrs{$inHandle}.\n")),
return('Number', [$found, $lineNrs{$inHandle}]) if $specials{number} and /^(\d+)/;
unless ($flags{noboost})
{
# build set of characters to be special
my $special=join('', '([', (map {exists $specials2patterns{$_} ? $specials2patterns{$_} : $_} grep(($specials{$_} and (length==1 or exists $specials2patterns{$_})), keys %specials)), '\n\\\\', '])');
$special=qr($special|(\|{1,2})) if $specials{pfilter};
$special=qr($special|($tableSeparatorStack[0][0])|($tableSeparatorStack[0][1])) if @tableSeparatorStack;
$special=qr($special|(($lexerPatterns{namedVar})|($lexerPatterns{symVar})));
# reply next token: scan for word or single character (declared as "Word" as well)
#warn("~~~~~~~~~> $special\n");
#warn("---------> $_");
$found=$1, s/^\Q$1//,
#warn("=====> $found\n\n"),
(($flags{trace} & TRACE_LEXER) and warn(qq([Trace] Lexer: (Boosted) word "$found" in line $lineNrs{$inHandle}.\n))),
return('Word', [$found, $lineNrs{$inHandle}])
if $_!~/^$special/ and /^(.+?)($special|($))/;
}
# reply next token: scan for word or single character (declared as "Word" as well)
$found=$1, s/^\Q$1//,
(($flags{trace} & TRACE_LEXER) and warn(qq([Trace] Lexer: Word "$found" in line $lineNrs{$inHandle}.\n))),
return('Word', [$found, $lineNrs{$inHandle}]) if /^($patternWUmlauts)/ or /^(\S)/;
# everything should be handled - this code should never be executed!
die qq([BUG] $sourceFile, line $lineNrs{$inHandle}: No symbol found in "$_"!\n);
}
}
# evaluate a tag condition (can possibly be generalized: this is just a piece of code)
sub _evalTagCondition
{
# get parameters
my ($code, $file, $line)=@_;
confess "[BUG] Missing code parameter.\n" unless defined $code;
confess "[BUG] Missing file parameter.\n" unless defined $file;
confess "[BUG] Missing line parameter.\n" unless defined $line;
# declare variables
my ($rc);
# Does the caller want to evaluate the code?
if ($safeObject)
{
# update active contents base data, if necessary
if ($flags{activeBaseData})
{
no strict 'refs';
${join('::', ref($safeObject) ? $safeObject->root : 'main', 'PerlPoint')}=dclone($flags{activeBaseData});
}
lib/PerlPoint/Parser.pm view on Meta::CPAN
# flag that there is no token to return
0;
}
# ----------------------------------------------------------------------------------------------
# Internal function: error message display.
# ----------------------------------------------------------------------------------------------
sub _Error
{
# get parameters
my ($parser)=@_;
# declare base indention
my $baseIndentation=' ' x length('[Error] ');
# use $_[0]->YYCurtok to display the recognized *token* if necessary
# - for users convenience, it is suppressed in the message
warn "\n\n[Error] $sourceFile, ",
${$parser->YYCurval}[1] > 0 ? "line ${$parser->YYCurval}[1]" : 'all sources read',
(exists $statistics{cache} and $statistics{cache}[1]) ? ' (or below because of cache hits)'
: (),
': found ',
defined ${$parser->YYCurval}[0] ? qq("${$parser->YYCurval}[0]") : 'nothing',
", expected:\n$baseIndentation",
' ' x length('or '),
join("\n${baseIndentation}or ",
map {
exists $tokenDescriptions{$_} ? defined $tokenDescriptions{$_} ? $tokenDescriptions{$_}
: ()
: $_
} sort grep($_!~/cache_hit$/, $parser->YYExpect)
),
".\n\n";
# visualize error position
warn(
(map {my $l=$_->[1]; chomp($l); "$baseIndentation$l\n"} reverse @inLine==1 ? @inLine : @inLine[0, -1]), "",
$baseIndentation, ' ' x ($inLine[0][0]-length($parser->{USER}->{INPUT})-1), "^\n",
$baseIndentation, '_' x ($inLine[0][0]-length($parser->{USER}->{INPUT})-1), '|', "\n\n\n"
) if @inLine;
}
# ----------------------------------------------------------------------------------------------
# Internal function: state manager.
# ----------------------------------------------------------------------------------------------
sub _stateManager
{
# get parameter
my ($newState)=@_;
# check parameter
confess "[BUG] Invalid new state $newState passed.\n" unless $newState==STATE_DEFAULT
or $newState==STATE_DEFAULT_TAGMODE
or $newState==STATE_TEXT
or $newState==STATE_UPOINT
or $newState==STATE_OPOINT
or $newState==STATE_DPOINT
or $newState==STATE_DPOINT_ITEM
or $newState==STATE_BLOCK
or $newState==STATE_VERBATIM
or $newState==STATE_EMBEDDING
or $newState==STATE_PFILTER
or $newState==STATE_PFILTERED
or $newState==STATE_CONDITION
or $newState==STATE_HEADLINE_LEVEL
or $newState==STATE_HEADLINE
or $newState==STATE_TABLE
or $newState==STATE_DEFINITION
or $newState==STATE_CONTROL
or $newState==STATE_COMMENT;
# store the new state
$parserState=$newState;
# enter new state: default
$newState==STATE_DEFAULT and do
{
# buffer last states lexer flags (take care of a clean init)
%lexerFlagsOfPreviousState=%lexerFlags ? %lexerFlags : (cbell => LEXER_IGNORE);
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_TOKEN, LEXER_EMPTYLINE, LEXER_IGNORE, LEXER_IGNORE);
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1);
# trace, if necessary
warn "[Trace] Entered default state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# enter new state: paragraph filter installation
$newState==STATE_PFILTER and do
{
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_TOKEN, LEXER_EMPTYLINE, LEXER_IGNORE, LEXER_IGNORE);
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1);
# trace, if necessary
warn "[Trace] Entered pfilter installation state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# enter new state: paragraph filter (similar to default except for the name)
$newState==STATE_PFILTERED and do
{
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_TOKEN, LEXER_EMPTYLINE, LEXER_IGNORE, LEXER_IGNORE);
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1);
# trace, if necessary
warn "[Trace] Entered postfilter default state.\n" if $flags{trace} & TRACE_SEMANTIC;
lib/PerlPoint/Parser.pm view on Meta::CPAN
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0);
# trace, if necessary
warn "[Trace] Entered definition item state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# enter new state: list point
($newState==STATE_UPOINT or $newState==STATE_OPOINT or $newState==STATE_DPOINT) and do
{
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_IGNORE, LEXER_SPACE, LEXER_TOKEN, qr([*#:]));
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0);
# trace, if necessary
warn "[Trace] Entered point state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# enter new state: block
$newState==STATE_BLOCK and do
{
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_SPACE, LEXER_TOKEN, LEXER_TOKEN, 'Ils');
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0);
# trace, if necessary
warn "[Trace] Entered block state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# enter new state: verbatim block
$newState==STATE_VERBATIM and do
{
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_SPACE, LEXER_TOKEN, LEXER_TOKEN, LEXER_IGNORE);
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0);
# trace, if necessary
warn "[Trace] Entered verbatim state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# enter new state: embedding
$newState==STATE_EMBEDDING and do
{
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_SPACE, LEXER_TOKEN, LEXER_TOKEN, LEXER_IGNORE);
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0);
# trace, if necessary
warn "[Trace] Entered embedding state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# enter new state: condition (very similar to embedding, naturally)
$newState==STATE_CONDITION and do
{
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_SPACE, LEXER_SPACE, LEXER_TOKEN, LEXER_IGNORE);
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
# trace, if necessary
warn "[Trace] Entered condition state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# enter new state: unordered list point
$newState==STATE_CONTROL and do
{
# prepare lexer
@lexerFlags{qw(ils eol el cbell)}=(LEXER_IGNORE, LEXER_IGNORE, LEXER_TOKEN, LEXER_IGNORE);
# activate special characters as necessary
@specials{('.', '/', '*', '#', '=', '<', '>', '{', '}' , '-', '?', '@', '+', '~', 'heredoc', 'colon', 'tag', 'embedded', 'number', 'pfilter')}=(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
# trace, if necessary
warn "[Trace] Entered control state.\n" if $flags{trace} & TRACE_SEMANTIC;
# well done
return;
};
# check yourself
confess "[BUG] Unhandled state $newState.\n";
}
=pod
=head2 run()
This function starts the parser to process a number of specified files.
B<Parameters:>
All parameters except of the I<object> parameter are named (pass them by hash).