LaTeXML

 view release on metacpan or  search on metacpan

lib/LaTeXML/MathGrammar  view on Meta::CPAN

# /=====================================================================\ #
# |  LaTeXML::MathGrammar                                         | #
# | LaTeXML's Math Grammar for postprocessing                           | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
# ================================================================================
# LaTeXML's MathGrammar.
# To compile :
#      perl -MParse::RecDescent - MathGrammar LaTeXML::MathGrammar
# ================================================================================
# Startup actions: import the constructors
{ BEGIN{ use LaTeXML::MathParser qw(:constructors);
#### $::RD_TRACE=1;
}}

# Rules section
# ========================================
# Naming Conventions:
#   UPPERCASE   : is for terminals, ie. classes of TeX tokens.
#   Initial Cap : for non-terminal rules that can possibly be invoked externally.
#   Initial lowercase : internal rules.
# ========================================
# For internal rules
#   moreFoos[$foo] : Looks for more Foo's w/appropriate punctuation or operators,
#     whatever is appropriate, and combines it with whatever was passed in
#     as pattern arg. Typically, the last clause would be simply
#       | { $arg[0]; }
#     to return $foo without having found any more foo's.
#     In such a case, it appears to be advantageous to have the first clause be
#       : /^\Z/ { $arg[0]; }
#     which will return immediately if there is no additional input.
#   addFoo[$bar]  : Check for a following Foo and add it, as appropriate to
#   the $bar.
# ========================================
# Note that Parse:RecDescent does NOT backtrack within a rule:
#  If a given production succeeds, the rule succeeds, but even if the ultimate
# parse fails, the parser will NOT go back and try another production within
# that same rule!!!  Of course, if a production fails, it goes on to the next,
# and if that rule fails, etc...
#
# For example ||a|-|b|| won't work (in spite of various attempts to control it)
# After seeing the initial || and attempting to parse an Expression, it gets
#   a * abs( - abs(b))
# without anything to match the initial ||; and it will NOT backtrack to try
# a shorter Expression!
#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Top Level expressions; Just about anything?
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Note in particular, that many inline formula contain `half' a formula,
# with the lead-in text effectively being the LHS. eg. function $=foo$;
# similarly you can end up with a missing RHS, $x=$ even.

Start   : Anything /^\Z/                        { $item[1]; }

#======================================================================
Anything : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Anything : AnythingAny /^\Z/                    { $item[1]; }

#======================================================================
AnythingAny :
          Formulae
        | OPEN Formulae CLOSE             { Fence($item[1],$item[2],$item[3]); }
        | modifierFormulae
        | OPEN modifierFormula CLOSE      { Fence($item[1],$item[2],$item[3]); }
        | MODIFIER
        | MODIFIEROP Expression           { Apply($item[1],Absent(),$item[2]);}
        | METARELOP Formula               { Apply($item[1],Absent(),$item[2]); }
        | AnyOp (PUNCT(?) AnyOp {[$item[1]->[0]||InvisibleComma(), $item[2]]})(s)
                                          { NewList($item[1],map(@$_,@{$item[2]})); }
        | FLOATSUPERSCRIPT POSTSUBSCRIPT  { NewScript(NewScript(Absent(),$item[1]),$item[2]); }
        | FLOATSUBSCRIPT POSTSUPERSCRIPT  { NewScript(NewScript(Absent(),$item[1]),$item[2]); }
        | FLOATSUPERSCRIPT                { NewScript(Absent(),$item[1]); }
        | FLOATSUBSCRIPT                  { NewScript(Absent(),$item[1]); }
        | AnyOp Expression                { Apply($item[1],Absent(),$item[2]);}

# a top level rule for sub and superscripts that can accept all sorts of junk.
Subscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Subscript :
          aSubscript   (PUNCT(?) aSubscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?) endPunct(?)
                 { NewList($item[1],map(@$_,@{$item[2]}),$item[3]->[0]||Absent()); }

Superscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Superscript :
          aSuperscript (PUNCT(?) aSuperscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?) endPunct(?)
                 { NewList($item[1],map(@$_,@{$item[2]}),$item[3]->[0]||Absent()); }

aSubscript :
          Formulae
        | AnyOp Expression               { Apply($item[1],Absent(),$item[2]);}
        | AnyOp
        | OPEN aSubscript CLOSE          { Fence($item[1],$item[2],$item[3]); }

aSuperscript :
          supops
        | Formulae
        | AnyOp Expression               { Apply($item[1],Absent(),$item[2]);}
        | AnyOp
        | OPEN aSuperscript CLOSE        { Fence($item[1],$item[2],$item[3]); }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Formulae  (relations or grouping of expressions or relations)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# This maze attempts to recognize the various meaningful(?) alternations of
# Expression(s) separated by punctuation, relational operators or metarelational
# operators [Think of     $a=b=c$ vs $a=b, c=d$  vs. $a=b,c,d$  .. ]
# and group them into Formulae (collections of relations), including relations
# which have punctuated collections of Expression(s) on either the LHS or RHS,
# as well as `multirelation' like a = b = c, or simply punctuated collections of
# Expression(s)

Formulae : Formula moreFormulae[$item[1]]

# moreFormulae[$formula]; Got a Formula, what can follow?
moreFormulae :
          /^\Z/ { $arg[0];}   # short circuit!
        | (endPunct Formula { [$item[1],$item[2]]; })(s)
                    { NewFormulae($arg[0],map(@$_,@{$item[1]})); }
        | metarelopFormula(s)     { NewFormula($arg[0],map(@$_,@{$item[1]})); }
        | { $arg[0]; }

# Punctuation that ends a formula
endPunct : PUNCT | PERIOD


Formula : Expression extendFormula[$item[1]]

# extendFormula[$expression] ; expression might be followed by punct Expression...
#   or relop Expression... or arrow Expression or nothing.
extendFormula :
          /^\Z/ { $arg[0];}   # short circuit!
        | punctExpr(s) maybeRHS[$arg[0],map(@$_,@{$item[1]})]
        | relop Expression moreRHS[$arg[0],$item[1],$item[2]]
        | relop /^\Z/    { NewFormula($arg[0],$item[1], Absent()); }
        | { $arg[0]; }

# maybeRHS[$expr,(punct,$expr)*];
#    Could have RELOP Expression (which means the (collected LHS) relation RHS)
#    or done (just collection)
maybeRHS :
          /^\Z/ { NewList(@arg); }
        | relopExpr(s) { NewFormula(NewList(@arg),map(@$_,@{$item[1]})); }
        | { NewList(@arg); }
# --- either line could be followed by (>0)
# For the latter, does a,b,c (<0) mean c<0 or all of them are <0 ????

# moreRHS[$expr,$relop,$expr]; Could have more (relop Expression)
# or (punct Expression)*
moreRHS :
          /^\Z/   { NewFormula($arg[0],$arg[1],$arg[2]); } # short circuit!
        | PUNCT Expression maybeColRHS[@arg,$item[1],$item[2]]
        | relopExpr(s?) { NewFormula($arg[0],$arg[1],$arg[2],
                                     map(@$_,@{$item[1]})); }
# --- 1st line could be preceded by (>0) IF it ends up end of formula
# --- 2nd line could be followed by (>0)

# maybeColRHS[$expr,$relop,$expr,(punct, $expr)*];
#    Could be done, get punct (collection) or rel Expression (another formula)
maybeColRHS :
          /^\Z/ { NewFormula($arg[0],$arg[1],NewList(@arg[2..$#arg])); }
        | relop Expression moreRHS[$arg[$#arg],$item[1],$item[2]]
            { NewFormulae(NewFormula($arg[0],$arg[1],
                          NewList(@arg[2..$#arg-2])),$arg[$#arg-1],$item[3]); }
        | PUNCT Expression maybeColRHS[@arg,$item[1],$item[2]]
        | { NewFormula($arg[0],$arg[1],NewList(@arg[2..$#arg])); }
# --- 1st line handles it through more RHS ???
# --- 2nd line could be preceded by (>0) if it ends formula
# --- 3rd line could be followed by (>0)


punctExpr : PUNCT Expression                    { [$item[1],$item[2]]; }

relopExpr : relop Expression                    { [$item[1],$item[2]]; }
          | relop /^\Z/                         { [$item[1], Absent()]; }

metarelopFormula :
            METARELOP Formula                   { [$item[1],$item[2]]; }
          | METARELOP /^\Z/                     { [$item[1], Absent()]; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# `Modifier' formula, things like $<0$, that might follow another formula or text.
# Absent() is a placeholder for the missing thing... (?)
# [and also when the LHS is moved away, due to alignment rearrangement]
modifierFormulae : modifierFormula moreFormulae[$item[1]]
modifierFormula : relop Expression moreRHS[Absent(),$item[1],$item[2]]

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Expressions; sums of terms
# Abstractly, things combined by operators binding tighter than relations
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Expressions : Expression punctExpr(s?)
                                { NewList($item[1],map(@$_,@{$item[2]})); }

Expression  : SignedTerm moreTerms[[],$item[1]] addExpressionModifier[$item[2]]
            # # very tentatively allow an operator as a complete expression
            # # BUT, this should only suceed if at end, or followed by punctuation!!!!!!!
            # # (or CLOSE, or... ?!?!?!?)
            | AnyOp ...anyOpIsolator { $item[1]; }

anyOpIsolator : /^\Z/ | PUNCT | CLOSE

# moreTerms[ [($term,$addop)*], $term];  Check for more addop & term's
moreTerms :
          /^\Z/ { LeftRec(@{$arg[0]},$arg[1]); }   # short circuit!
        | AddOp moreTerms2[$arg[0],$arg[1],$item[1]]
        | { LeftRec(@{$arg[0]},$arg[1]); }

# moreTerms2[ [($term,$addop)*], $term, $addop]; Check if addop is followed
#  by another term, or if not, it presumably represents a limiting form
#  like "a+" (ie a from above)
moreTerms2   : Term moreTerms[ [@{$arg[0]},$arg[1],$arg[2]],$item[1] ]
            | { LeftRec(@{$arg[0]},Apply(New('limit-from'),$arg[1],$arg[2])); }

# addExpressionModifier[$expr]
addExpressionModifier :
          /^\Z/ { $arg[0];}   # short circuit!
        | PUNCT(?) OPEN relop Expression balancedClose[$item[2]]
            { Apply(New('annotated'),$arg[0],
                    Fence($item[2], Apply($item[3],Absent(),$item[4]),$item[5])); }
        # An alternative form would have OPEN Expression relop...
        # but that seems less like a "modifier" and more like a relation as argument!
###        | PUNCT(?) OPEN Expression relop Expression
###                 moreRHS[$item[3],$item[4],$item[5]] balancedClose[$item[2]]
###            { Apply(New('annotated'),$arg[0],Fence($item[2],$item[6],$item[7])); }
        | PUNCT(?) OPEN MODIFIEROP Expression balancedClose[$item[2]]
            { Apply(New('annotated'),$arg[0],
                    Fence($item[2], Apply($item[3],Absent(),$item[4]),$item[5])); }
        | MODIFIER
            { Apply(New('annotated'),$arg[0],$item[1]); }
        | MODIFIEROP Expression
            { Apply($item[1],$arg[0],$item[2]); }
        | { $arg[0]; }


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Terms: products of factors
# Abstractly, things combined by operators binding tighter than addition
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

SignedTerm : AddOp Term                         { Apply($item[1],$item[2]); }

lib/LaTeXML/MathGrammar  view on Meta::CPAN

        | aBarearg moreBareargs[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
        | { $arg[0]; }

# A variation that does not allow a bare trig function
trigBarearg : aTrigBarearg moreTrigBareargs[$item[1]]
aTrigBarearg :
          preScripted['FUNCTION'] addArgs[$item[1]]
        | preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
        | preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
        | preScripted['UNKNOWN'] doubtArgs[$item[1]]
        | NUMBER   addScripts[$item[1]]
        | VERTBAR absExpression VERTBAR
          addScripts[Fence(MorphVertbar($item[1],'OPEN'),$item[2],MorphVertbar($item[3],'CLOSE'))]

# moreTrigBareargs[$argpart]
moreTrigBareargs :
          /^\Z/ { $arg[0];}   # short circuit!
        | MulOp aTrigBarearg
                 moreTrigBareargs[ApplyNary($item[1],$arg[0],$item[2])]
        | aTrigBarearg
          moreTrigBareargs[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
        | { $arg[0]; }

# maybeEvalAt[$thing,$vertbar]
maybeEvalAt :
          POSTSUBSCRIPT moreEvalAt[$arg[0],$arg[1],$item[1]]
        | POSTSUPERSCRIPT POSTSUBSCRIPT moreFactors[NewEvalAt($arg[0],$arg[1],$item[2],$item[1])]

# moreEvalAt[$thing,$vertbar,$sub]
moreEvalAt :
          POSTSUPERSCRIPT moreFactors[NewEvalAt($arg[0],$arg[1],$arg[2],$item[1])]
         | moreFactors[NewEvalAt($arg[0],$arg[1],$arg[2],undef)]

#======================================================================
# After < a | we might be done, or get <a|b> or <a|H|b>

# <$expr |   maybeBra[$langle,$expr,$bar]
maybeBra : <rulevar: local $forbidVertBar = 1>
maybeBra :
          ketExpression maybeBraket[$arg[0],$arg[1],$arg[2],$item[1]]
        | { SawNotation('QM'); }
          addScripts[InterpretDelimited(New('bra'),
                     Annotate($arg[0],role=>'OPEN'),$arg[1],Annotate($arg[2],role=>'CLOSE'))]

# <$expr1|$expr2   maybeBraket[$langle,$expr1,$bar,$expr2]
maybeBraket :
          RANGLE { SawNotation('QM'); }
              addScripts[InterpretDelimited(New('inner-product', undef,role=>'MIDDLE'),
                                   Annotate($arg[0],role=>'OPEN'),$arg[1],
                                   Annotate($arg[2],role=>'MIDDLE'),
                                   $arg[3],Annotate($item[1],role=>'CLOSE'))]
        | MIDBAR ketExpression RANGLE { SawNotation('QM'); }
              addScripts[InterpretDelimited(New('quantum-operator-product',undef), # Is this a good representation?
                            Annotate($arg[0],role=>'OPEN'),$arg[1],
                                     Annotate($arg[2],role=>'CLOSE'),
                            $arg[3],
                            Annotate($item[1],role=>'OPEN'),$item[2],
                                    Annotate($item[3],role=>'CLOSE'))]

# bra's and ket's (ie <foo| & |foo>) can contain a rather wide variety of things
# from simple symbols to full (but typically short) formula, and so we
# want to use the Formulae production.  However, for that to work,
# we need to keep |, < and > (which delimit the bra & ket) from being
# interpreted as usual, otherwise the parse will walk off the end, or
# fail at a level that precludes backtracking.
ketExpression : <rulevar: local $forbidVertBar = 1>
ketExpression : <rulevar: local $forbidLRAngle = 1>
ketExpression : Formulae
              | METARELOP |  MODIFIEROP

#======================================================================
# absExpression; need to be careful about misinterpreting the next |
# since we can't backtrack across productions.
# Disable evalAt notation ( |_{x=0} ) and explicitly control abs nesting.
absExpression : <rulevar: local $forbidEvalAt = 1>
absExpression : <rulevar: local $MaxAbsDepth = $MaxAbsDepth-1>
absExpression : { ($MaxAbsDepth >= 0 ? 1 : (SawNotation('AbsFail')&& undef)); } Expression

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Adding pre|post sub|super scripts to various things.
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# addScripts[$base] ; adds any following sub/super scripts to $base.
addScripts :
          /^\Z/ { $arg[0];}   # short circuit!
        | POSTSUPERSCRIPT  addScripts[NewScript($arg[0],$item[1])]
        | POSTSUBSCRIPT    addScripts[NewScript($arg[0],$item[1])]
        | POSTFIX          addScripts[Apply($item[1],$arg[0])]
        | { $arg[0]; }

# ================================================================================
# preScripted['RULE']; match a RULE possibly preceded by sub/super prescripts,
#  possibly followed by sub/superscripts.  The initial prescript can only be FLOAT
#  but the following ones can be either POST (which combine) or FLOAT (which don't)
preScripted :
          FLOATSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | FLOATSUBSCRIPT   inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | <matchrule:$arg[0]> addScripts[$item[1]]
# inpreScripted[$prescript]
inpreScripted :
          POSTSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | POSTSUBSCRIPT   inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | FLOATSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | FLOATSUBSCRIPT   inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
        | <matchrule:$arg[0]> addScripts[$item[1]]

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Parenthetical: Things wrapped in OPEN .. CLOSE
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# ================================================================================
# Factors that begin with OPEN; grouped expressions and objects like sets,
# intervals, etc.
# factorOpen[$open] : Dealing with various things that start with an open.
factorOpen :
          AddOp balancedClose[$arg[0]] addScripts[Fence($arg[0],$item[1],$item[2])] # For (-)
        # Parenthesized Operator possibly w/scripts
        | preScripted['bigop'] balancedClose[$arg[0]]
                 addScripts[Fence($arg[0],$item[1],$item[2])] Factor
            { Apply($item[3],$item[4]); }
        # Parenthesized Operator including a pre-factor



( run in 1.299 second using v1.01-cache-2.11-cpan-437f7b0c052 )