LaTeXML

 view release on metacpan or  search on metacpan

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

# -*- mode: Perl -*-
# /=====================================================================\ #
# |  TeX                                                                | #
# | Core TeX Implementation for LaTeXML                                 | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Package::Pool;
use strict;
use warnings;
use LaTeXML::Package;
use Unicode::Normalize;
use LaTeXML::Util::Pathname;
use List::Util qw(min max);

# NOTE that these define the namespaces we'll (probably) use
# along with the prefixes to be used in "code"
# The generated XML will use the prefixes defined by RegisterDocumentNamespace(...) (if ever)
# or those prefixes defined by the Schema (typically RelaxNGSchema(..)
RegisterNamespace(ltx   => "http://dlmf.nist.gov/LaTeXML");
RegisterNamespace(svg   => "http://www.w3.org/2000/svg");
RegisterNamespace(xlink => "http://www.w3.org/1999/xlink");    # Needed for SVG
# Not directly used, but let's stake out the ground
RegisterNamespace(m     => "http://www.w3.org/1998/Math/MathML");
RegisterNamespace(xhtml => "http://www.w3.org/1999/xhtml");
# Namespace for arbitrary data attributes (mapped to data-xxx in html5)
RegisterNamespace(data => "http://dlmf.nist.gov/LaTeXML/data");

DefMacroI("\\\@empty", undef, Tokens());

#======================================================================
# Core ID functionality.
#======================================================================
# DOCUMENTID is the ID of the document
# AND prefixes IDs on all other elements.
if (my $docid = LookupValue('DOCUMENTID')) {
  # Wrap in T_OTHER so funny chars don't screw up (no space!)
  DefMacroI('\thedocument@ID', undef, T_OTHER($docid)); }
else {
  Let('\thedocument@ID', '\@empty'); }
NewCounter('@XMARG', 'document', idprefix => 'XM');

#======================================================================

Tag('ltx:document', afterOpen => \&ProcessPendingResources);
RequireResource('LaTeXML.css');
#======================================================================
# The default "initial context" for XML+RDFa specifies some default
# terms and prefixes, but no default vocabulary.
# Ought to have a default for @vocab, but settable?
# can we detect use of simple "term"s in attributes so we know whether we need @vocab?
# Ought to have a default set of prefixes from RDFa Core,
# but allow prefixes to be added.
# Probably ought to scan rdf attributes for all uses of prefixes,
# and include them in @prefix
# The following prefixes are listed in http://www.w3.org/2011/rdfa-context/rdfa-1.1
{
  my %rdf_prefixes = (
    "cc"      => "http://creativecommons.org/ns#",
    "ctag"    => "http://commontag.org/ns#",
    "dc"      => "http://purl.org/dc/terms/",
    "dcterms" => "http://purl.org/dc/terms/",
    "ical"    => "http://www.w3.org/2002/12/cal/icaltzd#",
    "foaf"    => "http://xmlns.com/foaf/0.1/",
    "gr"      => "http://purl.org/goodrelations/v1#",
    "grddl"   => "http://www.w3.org/2003/g/data-view#",
    "ma"      => "http://www.w3.org/ns/ma-ont#",
    "og"      => "http://ogp.me/ns#",
    "owl"     => "http://www.w3.org/2002/07/owl#",
    "rdf"     => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    "rdfa"    => "http://www.w3.org/ns/rdfa#",
    "rdfs"    => "http://www.w3.org/2000/01/rdf-schema#",
    "rev"     => "http://purl.org/stuff/rev#",
    "rif"     => "http://www.w3.org/2007/rif#",
    "rr"      => "http://www.w3.org/ns/r2rml#",
    "schema"  => "http://schema.org/",
    "sioc"    => "http://rdfs.org/sioc/ns#",
    "skos"    => "http://www.w3.org/2004/02/skos/core#",
    "skosxl"  => "http://www.w3.org/2008/05/skos-xl#",
    "v"       => "http://rdf.data-vocabulary.org/#",
    "vcard"   => "http://www.w3.org/2006/vcard/ns#",
    "void"    => "http://rdfs.org/ns/void#",
    "xhv"     => "http://www.w3.org/1999/xhtml/vocab#",
    "xml"     => "http://www.w3.org/XML/1998/namespace",
    "xsd"     => "http://www.w3.org/2001/XMLSchema#",
    "wdr"     => "http://www.w3.org/2007/05/powder#",
    "wdrs"    => "http://www.w3.org/2007/05/powder-s#",
  );

  foreach my $p (keys %rdf_prefixes) {
    AssignMapping('RDFa_prefixes', $p => $rdf_prefixes{$p}); }
}

#**********************************************************************
# CORE TeX; Built-in commands.
#**********************************************************************

#======================================================================

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

# This reads a braced tokens list, expanding as it goes,
# but expanding \the-like commands only once.
DefParameterType('Expanded', sub {
    my ($gullet) = @_;
    $gullet->readBalanced(1, 0, 1); },
  reversion => sub {
    my ($arg) = @_;
    (T_BEGIN, Revert($arg), T_END); });

# This reads an expanded definition body,
# a braced tokens list, expanding as it goes,
# but expanding \the-like commands only once,
# and also packing # parameters
DefParameterType('DefExpanded', sub {
    my ($gullet) = @_;
    return $gullet->readBalanced(1, 1, 1); },
  reversion => sub {
    my ($arg) = @_;
    (T_BEGIN, Revert($arg), T_END); });

# Read a matching keyword, eg. Match:=
DefParameterType('Match', sub { shift->readMatch(@_); });

# Read a keyword; eg. Keyword:to
# (like Match, but ignores catcodes)
DefParameterType('Keyword', sub { shift->readKeyword(@_); });

# Read balanced material (?)
DefParameterType('Balanced', sub { $_[0]->readBalanced; });

# Read a Semiverbatim argument; ie w/ most catcodes neutralized.
DefParameterType('Semiverbatim', sub { $_[0]->readArg; }, semiverbatim => 1,
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });

# Read a LaTeX-style optional argument (ie. in []), but the contents read as Semiverbatim.
DefParameterType('OptionalSemiverbatim', sub { $_[0]->readOptional; },
  semiverbatim => 1, optional => 1,
  reversion    => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });

# Be careful here: if % appears before the initial {, it's still a comment!
# Also, note that non-typewriter fonts will mess up some chars on digestion!
DefParameterType('Verbatim', sub {
    my ($gullet) = @_;
    $gullet->readUntil(T_BEGIN);
    StartSemiverbatim('%', '\\');
    my $arg = $gullet->readBalanced();
    EndSemiverbatim();
    return $arg; },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(family => 'typewriter'); },
  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });

# Read Verbatim, but allows expanding command sequences
DefParameterType('HyperVerbatim', sub {
    my ($gullet) = @_;
    $gullet->readUntil(T_BEGIN);
    StartSemiverbatim('%');
    DefMacroI('\%',              undef, T_OTHER('%'), scope => 'local');
    DefMacroI('\#',              undef, T_OTHER('#'), scope => 'local');
    DefMacroI('\&',              undef, T_OTHER('&'), scope => 'local');
    DefMacroI('\textunderscore', undef, T_OTHER('_'), scope => 'local');
    Let('\_', '\textunderscore');
    DefMacroI('\hyper@tilde', undef, T_OTHER('~'), scope => 'local');
    Let('\~',              '\hyper@tilde');
    Let('\textasciitilde', '\hyper@tilde');
    Let('\\\\',            '\@backslashchar');
    my $arg = $gullet->readBalanced(1);
    EndSemiverbatim();
    return $arg; },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(family => 'typewriter'); },
  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });

# Read an argument that will not be digested.
DefParameterType('Undigested', sub { $_[0]->readArg; }, undigested => 1,
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });

# Read a LaTeX-style optional argument (ie. in []), but it will not be digested.
DefParameterType('OptionalUndigested', sub { $_[0]->readOptional; },
  undigested => 1, optional => 1,
  reversion  => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });

# Read a keyword value (KeyVals), that will not be digested.
DefParameterType('UndigestedKey', sub { $_[0]->readArg; }, undigested => 1);
DefParameterType('UndigestedDefKey', sub {
    $_[0]->readArg->packParameters; }, undigested => 1);

# Read a token as used when defining it, ie. it may be enclosed in braces.
DefParameterType('DefToken', sub {
    my ($gullet) = @_;
    my $token = $gullet->readToken;
    while ($token && ($token->getCatcode == CC_BEGIN)) {
      my $cc;
      my @toks = grep { ($cc = $$_[1]) && ($cc != CC_SPACE) && ($cc != CC_COMMENT); }
        $gullet->readBalanced->unlist;
      $token = shift(@toks);
      $gullet->unread(@toks); }
    $token; },
  undigested => 1);

# Stub register for misdefinitions, to avoid a cascade of Errors.
DefRegisterI('\lx@DUMMY@REGISTER', undef, Tokens());

# Read a variable, ie. a token (after expansion) that is a writable register.
DefParameterType('Variable', sub {
    my ($gullet) = @_;
    my $token    = $gullet->readXToken;
    my $defn     = $token && LookupDefinition($token);
    if ((defined $defn) && $defn->isRegister && !$defn->isReadonly) {
      [$defn, ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ())]; }
    else {
      DefRegisterI($token, undef, Dimension(0));    # Don't really know what KIND of variable!
      if ($token && ($token->getCatcode == CC_CS)) {
        Error('expected', '<variable>', $gullet,
          "A <variable> was supposed to be here", "Got " . Stringify($token),
          "Defining it now.");
        DefRegisterI($token, undef, Dimension(0));    # Dimension, or what?
        return [LookupDefinition($token)]; }
      else {
        Error('expected', '<variable>', $gullet,

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('TextStyle', sub {
    $_[0]->readArg; },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(mathstyle => 'text'); },
  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('ScriptStyle', sub {
    $_[0]->readArg; },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(mathstyle => 'script'); },
  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('ScriptscriptStyle', sub {
    $_[0]->readArg; },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(mathstyle => 'scriptscript'); },
  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Perverse naming convention: not script style, but in the style of a script relative to current.
DefParameterType('InScriptStyle', sub {
    $_[0]->readArg; },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(scripted => 1); },
  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# NOTE: the various parameter features don't combine easily!!
# I need a ScriptStyleUntil for \root!!!
# I also need to redo fractions using these new types....
DefParameterType('OptionalInScriptStyle', sub {
    $_[0]->readOptional; },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(scripted => 1); },
  afterDigest => sub {
    $_[0]->egroup; },
  optional  => 1,
  reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });
DefParameterType('InFractionStyle', sub {
    $_[0]->readArg; },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(fraction => 1); },
  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
#**********************************************************************
# LaTeX has a very particular notion of "Undefined",
# so let's get that squared away at the outset; it's useful for TeX, too!
# Naturally, it uses \csname to check, which ends up DEFINING the possibly undefined macro as \relax
DefMacro('\@ifundefined{}{}{}', sub {
    my ($gullet, $name, $if, $else) = @_;
    my $cs = T_CS('\\' . ToString(Expand($name)));
    if (IsDefined($cs)) {
      return $else->unlist; }
    else {
      $STATE->assignMeaning($cs, $STATE->lookupMeaning(T_CS('\relax')));    # Let w/o AfterAssign
      return $if->unlist; } },
  locked => 1);

sub isDefinable {
  my ($token) = @_;
  return unless $token;
  my $meaning = LookupMeaning($token);
  my $name    = $token->getString; $name =~ s/^\\//;
  return (((!defined $meaning) || ($meaning eq LookupMeaning(T_CS('\relax')))
        || LookupValue('2.09_COMPATIBILITY'))    # Let redefinitions happen in compatibility mode.
      && (($name ne 'relax') && ($name !~ /^end/))); }

#**********************************************************************
# Expandable Primitives
# See The TeXBook, Ch. 20, Definitions (also called Macros) pp. 212--215
#**********************************************************************

#======================================================================
# Should complain if we aren't actually evaluating an \if

# The following special cases are built-in to Definition
DefConditional('\else',          undef);
DefConditional('\or',            undef);
DefConditional('\fi',            undef);
DefConditional('\ifcase Number', undef);

sub compare {
  my ($u, $rel, $v) = @_;
  $u = $u->valueOf if ref $u;
  $v = $v->valueOf if ref $v;
  if ($rel->equals(T_OTHER('<')) || $rel->equals(T_CS('\@@<'))) {
    return $u < $v; }
  elsif ($rel->equals(T_OTHER('='))) {
    return $u == $v; }
  elsif ($rel->equals(T_OTHER('>')) || $rel->equals(T_CS('\@@>'))) {
    return $u > $v; }
  else {
    Error('expected', '<relationaltoken>', $STATE->getStomach->getGullet,
      "Expected a relational token for comparision", "Got " . Stringify($rel));
    return; } }

DefConditional('\ifnum Number Token Number',       sub { compare($_[1], $_[2], $_[3]); });
DefConditional('\ifdim Dimension Token Dimension', sub { compare($_[1], $_[2], $_[3]); });
DefConditional('\ifodd Number',                    sub { $_[1]->valueOf % 2; });

# NOTE: We don't KNOW if we're in vertical, horizontal or inner mode!!!!!!!
DefConditionalI('\ifvmode', undef, sub { 0; });
DefConditionalI('\ifhmode', undef, sub { 0; });
DefConditionalI('\ifinner', undef, sub { 0; });

DefConditionalI('\ifmmode', undef, sub { LookupValue('IN_MATH'); });

DefParameterType('ExpandedIfToken', sub {
    my ($gullet) = @_;
    my $token = $gullet->readXToken(0, 1);
    if (!$token) {
      Error('expected', 'ExpandedIfToken', $gullet,
        "conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty");
      $token = T_CS('\@empty'); }
    return $token; });

DefConditional('\if ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCharcode == $_[2]->getCharcode; });
DefConditional('\ifcat ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCatcode == $_[2]->getCatcode; });
DefConditional('\ifx Token Token',                       sub { XEquals($_[1], $_[2]); });

# Kinda rough: We don't really keep track of modes as carefully as TeX does.
# We'll assume that a box is horizontal if there's anything at all,
# but it's not a vbox (!?!?)
sub classify_box {
  my ($boxnum) = @_;
  my $box = LookupValue('box' . $boxnum->valueOf);
  if (!$box) {
    return ''; }
  elsif ($box->isa('LaTeXML::Core::Whatsit') && ($box->getDefinition eq LookupDefinition(T_CS('\vbox')))) {
    return 'vbox'; }
  else {
    return 'hbox'; } }

DefConditional('\ifvoid Number', sub { !classify_box($_[1]); });
DefConditional('\ifhbox Number', sub { classify_box($_[1]) eq 'hbox'; });
DefConditional('\ifvbox Number', sub { classify_box($_[1]) eq 'vbox'; });

DefConditionalI('\iftrue',  undef, sub { 1; });
DefConditionalI('\iffalse', undef, sub { 0; });

#======================================================================
# This makes \relax disappear completely after digestion
# (which seems most TeX like).
DefPrimitive('\relax', sub { (); });
### However, this keeps a box, so it can appear in UnTeX
### DefPrimitive('\relax',undef);
## But if you do that, you've got to watch out since it usually
### shouldn't be a box; See the isRelax code in handleScripts, below
# Internal token produced by Gullet in response to \dont_expand;
# Acts like \relax, but isn't equal to it.
DefPrimitiveI('\special_relax', undef, sub { (); });

DefMacro('\number Number', sub { Explode($_[1]->valueOf); });
# define it here (only approxmiately), since it's already useful.
Let('\protect', '\relax');

#======================================================================

DefMacro('\romannumeral Number', sub { roman($_[1]->valueOf); });
# Hmm... I wonder, should getString itself be dealing with escapechar?
sub escapechar {
  my $code = LookupRegister('\escapechar')->valueOf;
  return (($code >= 0) && ($code <= 255) ? chr($code) : ''); }

# 1) Knuth, The TeXBook, page 40, paragraph 1, Chapter 7: How TEX Reads What You Type.
# suggests all characters except spaces are returned in category code Other, i.e. Explode()
DefMacro('\string Token', sub {
    my $s = $_[1]->toString;
    if ($s =~ s/^\\//) {
      $s = escapechar() . $s; }
    Explode($s); });

DefMacroI('\jobname', undef, Tokens());    # Set to the filename by initialization

DefMacroI('\fontname', undef, sub { Explode("fontname not implemented"); });

our @CATCODE_MEANING = (
  "the escape character",      "begin-group character",
  "end-group character",       "math shift character",
  "alignment tab character",   "end-of-line character",
  "macro parameter character", "superscript character",
  "subscript character",       "ignored character",
  "blank space",               "the letter",
  "the character",             "active character",
  "comment character",         "invalid character",
  undef,                       "latexml marker character",
  "macro parameter character");

# Not sure about this yet...
# NOTE: Lots of back-and-forth mangle with definition vs cs; don't do that!
DefMacro('\meaning Token', sub {
    my ($gullet, $tok) = @_;
    my $meaning = 'undefined';
    if (my $definition = ($tok->defined_as(T_ALIGN) ? $tok : LookupMeaning($tok))) {
      my $type = ref $definition;
      $type =~ s/^LaTeXML:://;
      # Pre-step: We can't extract the bodies of definitions which are defined via Perl subroutines.
      # So do the next best thing -- represent them as their tokens.
      if ($type =~ /(primitive|conditional|constructor)$/i) {
        $definition = $definition->getCSorAlias;
        $type       = ref $definition;
        $type =~ s/^LaTeXML:://;
        if (my $fontinfo = LookupValue('fontinfo_' . ToString($definition))) {
          $meaning = 'select font ' . ($$fontinfo{fontname} || 'fontname');
          $meaning .= ' at ' . $$fontinfo{at} if $$fontinfo{at};
          $type = 'font'; } }
      # The actual tests start here
      if ($type =~ /token$/i) {
        my $cc         = $definition->getCatcode;
        my $char       = $definition->toString;
        my $meaning_cc = $CATCODE_MEANING[$cc] || '';
        $meaning_cc .= ' ' if $meaning_cc;    # append space separator if defined
        $meaning = $meaning_cc . $char; }
      elsif ($type =~ /register$/i) {
        $meaning = $definition->getAddress; }
      elsif ($type =~ /expandable$/i) {
# short-circuit some troublesome discrepancies with TeX, which end up macros on LaTeXML's end, but \meaning expects as primitives in the CTAN ecosystem.
        my $cs = ToString($definition->getCSorAlias);
        # These exceptions could be extended further, as we add more .sty/.cls support
        return Explode($cs) if $cs =~ /^\\(?:(?:un)?expanded|detokenize)$/;
        my $expansion  = $definition->getExpansion;
        my $ltxps      = $definition->getParameters;
        my $arg_index  = 0;
        my @spec_parts = ();
        my @params     = $ltxps ? $ltxps->getParameters : ();
        my $p_trailer  = '';

        for my $param (@params) {
          my $p_spec = $$param{spec};
          if ($p_spec eq 'RequireBrace') {
            # tex's \meaning prints out the required braces for "\def\a#{}" variants
            $p_trailer = '{';
            $p_spec    = '{'; }
          elsif ($p_spec eq 'UntilBrace') {    # should only ever be used in the last argument?
            $p_trailer = '{';
            $p_spec    = "#" . (++$arg_index) . '{'; }
          elsif ($p_spec =~ s/^Match://) { }          # just match, don't increment arg index
          elsif ($p_spec =~ s/^\w?Until(\w*)://) {    # implied argument at this slot
            $p_spec = "#" . (++$arg_index) . $p_spec; }
          else {                                      # regular parameter, increment
            next if $$param{novalue}; # skip the latexml-only requirement params, but only here, since Match also have "novalue" set.
            $p_spec = "#" . (++$arg_index); }
          push @spec_parts, $p_spec; }
        my $spec = join("", @spec_parts);
        $spec =~ s/\{\}//g;
        $spec =~ s/Token//g;
        my $prefixes = join('',
          ($definition->isProtected ? '\protected' : ()),
          ($definition->isLong      ? '\long'      : ()),
          ($definition->isOuter     ? '\outer'     : ()),
        );
        my $expansion_str = '';
        if (ref $expansion eq 'LaTeXML::Core::Tokens') {
          $expansion_str = writableTokens($expansion); }
        else {
          $expansion_str = ToString($expansion); }
        $meaning = ($prefixes ? $prefixes . ' ' : '') .
          "macro:$spec->$expansion_str$p_trailer"; }
      elsif ($type =~ /chardef$/i) {    # from \chardef or \mathchardef
        my $prefix = ($$definition{mathglyph} ? '\mathchar' : '\char');
        $meaning = $prefix . '"' . $definition->valueOf->valueOf; } }
    # One catch: make sure all \s in the meaning string are normalized to a simple space ' '
    $meaning =~ s/\s/ /g;
    return Explode($meaning); });

DefParameterType('CSName', sub { $_[0]->readCSName; });

DefMacro('\csname CSName', sub {
    my ($gullet, $token) = @_;
    $STATE->assignMeaning($token, $STATE->lookupMeaning(T_CS('\relax'))) unless defined LookupMeaning($token);
    $token; });

DefPrimitive('\endcsname', sub {
    my ($stomach) = @_;
    Error('unexpected', '\endcsname', $_[0], "Extra \\endcsname",
      $stomach->getGullet->showUnexpected);
    return; });

DefMacro('\expandafter Token Token', sub {
    no warnings 'recursion';
    my ($gullet, $tok, $xtok) = @_;
    my $defn;
    if (defined($defn = $STATE->lookupExpandable($xtok))) {
      my @x = ();
      {
        local $LaTeXML::CURRENT_TOKEN = $xtok;
        @x = $defn->invoke($gullet, 1);    # Expand $xtok ONCE ONLY!
      }
      ($tok, @x); }
    elsif (!$STATE->lookupMeaning($xtok)) {
      # Undefined token is an error, as expansion is expected.
      # BUT The unknown token is NOT consumed, (see TeX B book, item 367)
      # since probably in a real TeX run it would have been defined.
      $STATE->generateErrorStub($gullet, $xtok);
      ($tok, $xtok); }
    else {
      ($tok, $xtok); } });

use constant T_expandafter => T_CS('\expandafter');
DefMacro('\expandafter Token Token', sub {
    no warnings 'recursion';
    my ($gullet, $tok, $xtok) = @_;
    my $defn;
    my @skipped = ($tok);
    while ($xtok->defined_as(T_expandafter)) {
      push(@skipped, $gullet->readToken);
      $xtok = $gullet->readToken; }
    if (defined($defn = $STATE->lookupExpandable($xtok))) {
      my @x = ();
      {
        local $LaTeXML::CURRENT_TOKEN = $xtok;
        @x = $defn->invoke($gullet, 1);    # Expand $xtok ONCE ONLY!
      }
      (@skipped, @x); }
    elsif (!$STATE->lookupMeaning($xtok)) {
      # Undefined token is an error, as expansion is expected.
      # BUT The unknown token is NOT consumed, (see TeX B book, item 367)
      # since probably in a real TeX run it would have been defined.
      $STATE->generateErrorStub($gullet, $xtok);
      (@skipped, $xtok); }
    else {
      (@skipped, $xtok); } });

# If next token is expandable, prefix it with the internal marker \dont_expand
# That token is never defined, explicitly handled in Gullet & should never escape the Gullet
DefMacroI('\noexpand', undef, sub {
    my $token = $_[0]->readToken;
    # Missing token likely the result of "{\noexpand}" for which TeX would be unperturbed
    return ($token
      ? ((($$token[1] == CC_CS) || ($$token[1] == CC_ACTIVE)) && $STATE->isDontExpandable($token)
        ? (T_CS('\dont_expand'), $token)
        : $token)
      : ()); });

DefPrimitiveI('\dont_expand', undef, sub {
    Error('misdefined', '\dont_expand', $_[0],
      "The token \\dont_expand should never reach Stomach!"); });

DefMacroI('\topmark',        undef, Tokens());
DefMacroI('\firstmark',      undef, Tokens());
DefMacroI('\botmark',        undef, Tokens());
DefMacroI('\splitfirstmark', undef, Tokens());
DefMacroI('\splitbotmark',   undef, Tokens());

DefMacro('\input TeXFileName', sub {
    my $filename = $_[1];
    my @t        = $filename->unlist;
    # If given a LaTeX-style argument, strip braces
    if (@t && $t[0] && $t[0]->getCatcode == CC_BEGIN && $t[-1]->getCatcode == CC_END) {
      $filename = Tokens(@t[1 .. $#t - 1]);
      # and load LaTeX.pool if not already
      if (!LookupValue('LaTeX.pool_loaded')) {
        LoadPool("LaTeX"); } }
    Input($filename, reloadable => 1); });

# Note that TeX doesn't actually close the mouth;
# it just flushes it so that it will close the next time it's read!
DefMacroI('\endinput', undef, sub { $_[0]->flushMouth; });

# \the<internal quantity>
DefMacro('\the Register', sub {
    my ($gullet, $variable) = @_;
    return () unless $variable;
    my ($defn, @args) = @$variable;
    if (!$defn || $defn eq 'missing') {
      Error('expected', "<register>", $gullet, "a register was expected to be here"); return (); }
    my $type = $defn->isRegister;
    if (!$type) {
      my $cs = ToString($defn->getCS);
      if ($cs eq '\font') {    # what to do here?
        return T_CS('\tenrm'); }
      Error('unexpected', "\\the$cs", $gullet, "You can't use $cs after \\the"); return (); }
    my $value = $defn->valueOf(@args);
    ## In all cases, these should be OTHER, except for space. (!?)
    my @tokens = ($type eq 'Tokens' ? ($value ? $value->unlist : ()) : Explode(ToString($value)));
    return @tokens; });

#**********************************************************************
# Primitives
# See The TeXBook, Chapter 24, Summary of Vertical Mode
#  and Chapter 25, Summary of Horizontal Mode.
# Parsing of basic types (pp.268--271) is (mostly) handled in Gullet.pm
#**********************************************************************

#======================================================================
# Registers & Parameters
# See Chapter 24, Summary of Vertical Mode
# Define a whole mess of useless registers here ...
# Values are from Appendix B, pp. 348-349 (for whatever its worth)
#======================================================================

#======================================================================
# Integer registers; TeXBook p. 272-273

DefRegister('\tracingmacros', Number(0),
  getter => sub { Number((LookupValue('TRACING') || 0) & TRACE_MACROS); },
  setter => sub { my $p = (LookupValue('TRACING') || 0);
    AssignValue(TRACING => ($_[0]->valueOf ? $p | TRACE_MACROS : $p & ~TRACE_MACROS)); });
DefRegister('\tracingcommands', Number(0),
  getter => sub { Number((LookupValue('TRACING') || 0) & TRACE_COMMANDS); },
  setter => sub { my $p = (LookupValue('TRACING') || 0);
    AssignValue(TRACING => ($_[0]->valueOf ? $p | TRACE_COMMANDS : $p & ~TRACE_COMMANDS)); });
{
  my %iparms = (
    pretolerance         => 100,   tolerance           => 200, hbadness => 1000, vbadness => 1000,
    linepenalty          => 10,    hyphenpenalty       => 50,  exhyphenpenalty => 50,
    binoppenalty         => 700,   relpenalty          => 500,
    clubpenalty          => 150,   widowpenalty        => 150, displaywidowpenalty => 50,
    brokenpenalty        => 100,   predisplaypenalty   => 10000,
    postdisplaypenalty   => 0,     interlinepenalty    => 0,
    floatingpenalty      => 0,     outputpenalty       => 0,
    doublehyphendemerits => 10000, finalhyphendemerits => 5000, adjdemerits => 10000,
    looseness            => 0,     pausing             => 0,
    holdinginserts       => 0,     tracingonline       => 0, tracingstats  => 0,
    tracingparagraphs    => 0,     tracingpages        => 0, tracingoutput => 0,
    tracinglostchars     => 1,
    tracingrestores      => 0, language   => 0, uchyph            => 1,        lefthyphenmin   => 0,
    righthyphenmin       => 0, globaldefs => 0, defaulthyphenchar => ord('-'), defaultskewchar => -1,
    escapechar => ord('\\'), endlinechar => ord("\r"), newlinechar => -1, maxdeadcycles => 0, hangafter => 0,
    fam        => -1,        mag         => 1000,      magnification     => 1000, delimiterfactor => 0,
    time       => 0,         day         => 0,         month             => 0,    year            => 0,
    showboxbreadth => 5, showboxdepth => 3,            errorcontextlines => 5);

  foreach my $p (keys %iparms) {
    DefRegister("\\$p", Number($iparms{$p})); }
}

# Most of these are ignored, but...
DefMacro('\tracingall',
  '\tracingonline=1 \tracingcommands=2 \tracingstats=2'
    . ' \tracingpages=1 \tracingoutput=1 \tracinglostchars=1'
    . ' \tracingmacros=2 \tracingparagraphs=1 \tracingrestores=1'
    . ' \showboxbreadth=\maxdimen \showboxdepth=\maxdimen \errorstopmode');
DefMacroI('\tracingnone', undef, Tokens());
DefMacroI('\hideoutput',  undef, Tokens());
# This may mess up Daemon state?
{ my ($sec, $min, $hour, $mday, $mon, $year) = defined $ENV{SOURCE_DATE_EPOCH} ? gmtime($ENV{SOURCE_DATE_EPOCH}) : localtime();
  AssignValue('\day'   => Number($mday),             'global');
  AssignValue('\month' => Number($mon + 1),          'global');
  AssignValue('\year'  => Number(1900 + $year),      'global');
  AssignValue('\time'  => Number(60 * $hour + $min), 'global'); }

our @MonthNames = (qw( January February March April May June
    July August September October November December));

# Return a string for today's date.
sub today {
  return $MonthNames[LookupValue('\month')->valueOf - 1]
    . " " . LookupValue('\day')->valueOf
    . ', ' . LookupValue('\year')->valueOf; }

# Read-only Integer registers
{
  my %ro_iparms = (lastpenalty => 0, badness => 0);
  foreach my $p (keys %ro_iparms) {
    DefRegister("\\$p", Number($ro_iparms{$p}), readonly => 1); }
}

# Special integer registers (?)
# <special integer> = \spacefactor | \prevgraf | \deadcycles | \insertpenalties
{
  my %sp_iparms = (spacefactor => 0, prevgraf => 0, deadcycles => 0, insertpenalties => 0);
  foreach my $p (keys %sp_iparms) {
    DefRegister("\\$p", Number($sp_iparms{$p})); }
}

#======================================================================
# Dimen registers; TeXBook p. 274
{
  my %dparms = (
    hfuzz              => '0.1pt', vfuzz => '0.1pt', overfullrule => '5pt',
    emergencystretch   => 0,
    hsize              => '6.5in', vsize         => '8.9in',
    maxdepth           => '4pt',   splitmaxdepth => '16383.99999pt', boxmaxdepth => '16383.99999pt',
    lineskiplimit      => 0,
    delimitershortfall => '5pt', nulldelimiterspace => '1.2pt', scriptspace => '0.5pt',
    mathsurround       => 0,
    predisplaysize     => 0, displaywidth => 0, displayindent => 0, parindent => '20pt',
    hangindent         => 0, hoffset      => 0, voffset       => 0,);

  foreach my $p (keys %dparms) {
    DefRegister("\\$p", Dimension($dparms{$p})); }
}

# Special dimension registers (?)
# <special dimen> = \prevdepth | \pagegoal | \pagetotal | \pagestretch | \pagefilstretch
#    | \pagefillstretch | \pagefilllstretch | pageshrink | \pagedepth
{
  my %sp_dparms = (
    prevdepth       => 0, pagegoal         => 0, pagetotal  => 0, pagestretch => 0, pagefilstretch => 0,
    pagefillstretch => 0, pagefilllstretch => 0, pageshrink => 0, pagedepth   => 0);
  foreach my $p (keys %sp_dparms) {
    DefRegister("\\$p", Dimension($sp_dparms{$p})); }
}
#======================================================================
# Glue registers; TeXBook p.274
{
  my %gparms = (
    baselineskip          => '12pt', lineskip => '1pt',
    parskip               => '0pt plus 1pt',
    abovedisplayskip      => '12pt plus 3pt minus 9pt',
    abovedisplayshortskip => '0pt plus 3pt',
    belowdisplayskip      => '12pt plus 3pt minus 9pt',
    belowdisplayshortskip => '0pt plus 3pt',
    leftskip              => 0, rightskip => 0, topskip    => '10pt', splittopskip => '10pt',
    tabskip               => 0, spaceskip => 0, xspaceskip => 0,      parfillskip  => '0pt plus 1fil');

  foreach my $p (keys %gparms) {
    DefRegister("\\$p", Glue($gparms{$p})); }
}
#======================================================================
# MuGlue registers; TeXBook p.274
{
  DefRegister('\thinmuskip'  => MuGlue("3mu"));
  DefRegister('\medmuskip'   => MuGlue("4mu plus 2mu minus 4mu"));
  DefRegister('\thickmuskip' => MuGlue("5mu plus 5mu"));
}
#======================================================================
# Token registers; TeXBook p.275
{
  my @tparms = qw(output everypar everymath everydisplay everyhbox everyvbox
    everyjob everycr everyhelp);
  foreach my $p (@tparms) {
    DefRegister("\\$p", Tokens()); }
}
#======================================================================
# Assignment, TeXBook Ch.24, p.275
#======================================================================
# <assignment> = <non-macro assignment> | <macro assignment>

#======================================================================
# Macros
# See Chapter 24, p.275-276
# <macro assignment> = <definition> | <prefix><macro assignment>
# <definition> = <def><control sequence><definition text>
# <def> = \def | \gdef | \edef | \xdef
# <definition text> = <register text><left brace><balanced text><right brace>

sub parseDefParameters {
  my ($cs, $params) = @_;
  my @tokens = $params->packParameters->unlist;
  # Now, recognize parameters and delimiters.
  my @params = ();
  my $n      = 0;
  while (@tokens) {
    my $t  = shift(@tokens);
    my $cc = $$t[1];
    if ($cc == CC_PARAM || $cc == CC_ARG) {
      if ($cc == CC_PARAM) {
        if (!@tokens) {    # Special case: lone # NOT following a numbered parameter
                           # Note that we require a { to appear next, but do NOT read it!
          push(@params, LaTeXML::Core::Parameter->new('RequireBrace', 'RequireBrace'));
          last; }
        else {
          $n++; $t = shift(@tokens); } }
      else {    # CC_ARG case, keep looking at this token
        $n++; }
      Fatal('expected', "#$n", $STATE->getStomach,
        "Parameters for '" . ToString($cs) . "' not in order in " . ToString($params))
        unless (defined $t) && ($n == int($$t[0]));
      # Check for delimiting text following the parameter #n
      my @delim = ();
      my $pc    = -1;
    INNER_DELIM: while (@tokens) {
        my $inner_cc = $tokens[0]->getCatcode;
        last INNER_DELIM if $inner_cc == CC_PARAM || $inner_cc == CC_ARG;
        my $d = shift(@tokens);
        push(@delim, $d) unless $pc == CC_SPACE && $inner_cc == CC_SPACE;    # BUT collapse whitespace!
        $pc = $inner_cc; }
      # Found text that marks the end of the parameter
      if (@delim) {
        my $expected = Tokens(@delim);
        push(@params, LaTeXML::Core::Parameter->new('Until',
            'Until:' . ToString($expected),
            extra => [$expected])); }
      # Special case: trailing sole # => delimited by next opening brace.
      elsif ((scalar(@tokens) == 1) && ($tokens[0]->getCatcode == CC_PARAM)) {
        shift(@tokens);
        push(@params, LaTeXML::Core::Parameter->new('UntilBrace', 'UntilBrace')); }
      # Nothing? Just a plain parameter.
      else {
        push(@params, LaTeXML::Core::Parameter->new('Plain', '{}')); } }
    else {
      # Initial delimiting text is required.
      my @lit = ($t);
      my $lit_cc;
      while (@tokens && ($lit_cc = $tokens[0]->getCatcode) &&
        ($lit_cc != CC_PARAM && $lit_cc != CC_ARG)) {
        push(@lit, shift(@tokens)); }
      my $expected = Tokens(@lit);
      push(@params, LaTeXML::Core::Parameter->new('Match',
          'Match:' . ToString($expected),
          extra   => [$expected],
          novalue => 1)); }
  }
  return (@params ? LaTeXML::Core::Parameters->new(@params) : undef); }

sub do_def {
  my ($globally, $gullet, $cs, $params, $body) = @_;
  if (!$cs) {
    Error('expected', 'Token', $gullet, "Expected definition token");
    return; }
  elsif (!$params) {
    Error('misdefined', $cs, $gullet, "Expected definition parameter list");
    return; }
  $params = parseDefParameters($cs, $params);
  # noprep=>1 : leave preparing the ##, #1-#9 tokens to the Def parameter types
  # to avoid carrying around the masks around and keep core code simple
  $STATE->installDefinition(LaTeXML::Core::Definition::Expandable->new($cs, $params, $body,
      nopackParameters => 1),
    ($globally ? 'global' : undef));
  AfterAssignment();
  return; }

DefPrimitive('\def  SkipSpaces Token UntilBrace DefPlain',    sub { do_def(0, @_); }, locked => 1);
DefPrimitive('\gdef SkipSpaces Token UntilBrace DefPlain',    sub { do_def(1, @_); }, locked => 1);
DefPrimitive('\edef SkipSpaces Token UntilBrace DefExpanded', sub { do_def(0, @_); }, locked => 1);
DefPrimitive('\xdef SkipSpaces Token UntilBrace DefExpanded', sub { do_def(1, @_); }, locked => 1);

# <prefix> = \global | \long | \outer
# See Stomach.pm & Stomach.pm
DefPrimitiveI('\global', undef, sub { $STATE->setPrefix('global'); return; }, isPrefix => 1);
DefPrimitiveI('\long',   undef, sub { $STATE->setPrefix('long');   return; }, isPrefix => 1);
DefPrimitiveI('\outer',  undef, sub { $STATE->setPrefix('outer');  return; }, isPrefix => 1);

#======================================================================
# Non-Macro assignments; TeXBook Ch.24, pp 276--277
# <non-macro assignment> = <simple assignment> | \global <non-macro assignment>

# <filler> = <optional spaces> | <filler>\relax<optional spaces>
# <general text> = <filler>{<balanced text><right brace>

# <simple assignment> = <variable assignment> | <arithmetic>
#    | <code assignment> | <let assignment> | <shorthand definition>
#    | <fontdef token> | <family assignment> | <shape assignment>
#    | \read <number> to <optional spaces><control sequence>
#    | \setbox<8bit><equals><filler><box>
#    | \font <control sequence><equals><file name><at clause>
#    | <global assignment>
# <variable assignment> = <integer variable><equals><number>
#    | <dimen variable><equals><dimen>
#    | <glue variable><equals><dimen>
#    | <muglue variable><equals><muglue>
#    | <token variable><equals><general text>
#    | <token variable><equals><token variable>
# <at clause> = at <dimen> | scaled <number> | <optional spaces>
# <code assignment> = <codename><8bit><equals><number>

sub lookupFontinfo {
  my ($token) = @_;
  my $defn = LookupDefinition($token);
  #  return LookupValue(($defn ? ToString($defn) : ToString($token)) . '_fontinfo'); }
  return LookupValue('fontinfo_' . ($defn ? $defn->getCSName : ToString($token))); }

# This should eventually actually load the font metrics,
# and tie-in to the FontMetrics data used by Font.
DefPrimitive('\font SkipSpaces Token SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub {
    my ($stomach, $cs, $name) = @_;
    my $gullet = $stomach->getGullet;
    $name = ToString($name);
    my ($at, $scaled);
    if ($gullet->readKeyword('at'))     { $at     = $gullet->readDimension; }
    if ($gullet->readKeyword('scaled')) { $scaled = $gullet->readNumber; }
    my %props = LaTeXML::Common::Font::decodeFontname($name,
      $at && $at->ptValue, $scaled && $scaled->valueOf / 1000);
    if (!keys %props) {    # Failed?
      Info('unexpected', $name, $stomach, "Unrecognized font name '$name'",
        "Font switch macro " . ToString($cs) . " will have no effect"); }
    else {
      $props{fontname} = $name; }
    my $f = ($at ? $at->divide(Dimension('1em'))->valueOf
      : ($scaled ? $scaled->valueOf / 1000
        : 1));
    my $fontinfo = \%props;
    $$fontinfo{data} = [map { $_->multiply($f); }
        Dimension(0), Dimension('0.5em'), Dimension(0),
      Dimension(0), Dimension('1ex'), Dimension('1em')];
    $gullet->skipSpaces;
    # Store the font info & metrics
    AssignValue('fontinfo_' . ToString($cs) => $fontinfo);
    # The font $cs should select the font
    DefPrimitiveI($cs, undef, undef, font => $fontinfo);
    return; });

# Not sure what this should be...
DefPrimitiveI('\nullfont', undef, undef, font => { family => 'nullfont' });

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

DefRegister('\lastpenalty', Number(0), readonly => 1);

# \parshape !?!??
DefPrimitive('\parshape SkipSpaces SkipMatch:= Number', sub {
    my ($stomach, $n) = @_;
    $n = $n->valueOf;
    my $gullet = $stomach->getGullet;
    for (my $i = 0 ; $i < $n ; $i++) {
      $gullet->readDimension; $gullet->readDimension; }
    # we _could_ conceivably store this somewhere for some attempt at stylistic purpose...
    return; });

DefRegister('\inputlineno', Number(0),
  getter => sub {
    my $locator = $STATE->getStomach->getGullet->getLocator();
    Number($locator ? $$locator{fromLine} : 0); },
  readonly => 1);

DefRegister('\badness', Number(0), readonly => 1);

# <codename> = \catcode | \mathcode | \lccode | \uccode | \sfcode | \delcode

DefRegister('\catcode Number', Number(0),
  getter => sub { my $code = LookupCatcode(chr($_[0]->valueOf));
    Number(defined $code ? $code : CC_OTHER); },
  setter => sub { AssignCatcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
# # Only used for active math characters, so far
DefRegister('\mathcode Number', Number(0),
  getter => sub {
    my $ch   = $_[0]->valueOf;
    my $code = $STATE->lookupMathcode(chr($ch));
    Number(defined $code ? $code : $ch); },    # defaults to the char's code itself(?)
  setter => sub { $STATE->assignMathcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
# Not used anywhere (yet)
DefRegister('\sfcode Number', Number(0),
  getter => sub { my $code = $STATE->lookupSFcode(chr($_[0]->valueOf));
    Number(defined $code ? $code : 0); },
  setter => sub { $STATE->assignSFcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
DefRegister('\lccode Number', Number(0),
  getter => sub { my $code = $STATE->lookupLCcode(chr($_[0]->valueOf));
    Number(defined $code ? $code : 0); },
  setter => sub { $STATE->assignLCcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
DefRegister('\uccode Number', Number(0),
  getter => sub { my $code = $STATE->lookupUCcode(chr($_[0]->valueOf));
    Number(defined $code ? $code : 0); },
  setter => sub { $STATE->assignUCcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
# Not used anywhere (yet)
DefRegister('\delcode Number', Number(0),
  getter => sub { my $code = $STATE->lookupDelcode(chr($_[0]->valueOf));
    Number(defined $code ? $code : 0); },
  setter => sub { $STATE->assignDelcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });

# Remember, we're assigning a NUMBER (codepoint) to a CHARACTER!
foreach my $letter (ord('A') .. ord('Z')) {
  $STATE->assignLCcode(chr($letter), $letter + 0x20, 'global');
  $STATE->assignUCcode(chr($letter), $letter, 'global');
  $STATE->assignLCcode(chr($letter + 0x20), $letter + 0x20, 'global');
  $STATE->assignUCcode(chr($letter + 0x20), $letter, 'global'); }

# Stub definitions ???
DefMacro('\hyphenation GeneralText', Tokens());
DefMacro('\patterns{}',              Tokens());

# <font> = <fontdef token> | \font | <family member>
# <family member> = <font range><4bit>
# <font range> = \textfont | \scriptfont | \scriptscriptfont

# Doubtful that we can do anything useful with these.
# These look essentially like Registers, although Knuth doesn't call them that.
# NOTE: These should just point to a CS token, right????
# (although it SHOULD be one defined to be a font switch??)
# NOTE: These should NOT be global(?)
DefRegister('\textfont Number' => T_CS('\tenrm'),
  getter => sub {
    my ($fam) = @_;
    LookupValue('textfont_' . $fam->valueOf); },
  setter => sub {
    my ($font, $scope, $fam) = @_;
    AssignValue('textfont_' . $fam->valueOf => $font, $scope); });
DefRegister('\scriptfont Number' => T_CS('\sevenrm'),
  getter => sub {
    my ($fam) = @_;
    LookupValue('scriptfont_' . $fam->valueOf); },
  setter => sub {
    my ($font, $scope, $fam) = @_;
    AssignValue('scriptfont_' . $fam->valueOf => $font, $scope); });
DefRegister('\scriptscriptfont Number' => T_CS('\fiverm'),
  getter => sub {
    my ($fam) = @_;
    LookupValue('scriptscriptfont_' . $fam->valueOf); },
  setter => sub {
    my ($font, $scope, $fam) = @_;
    AssignValue('scriptscriptfont_' . $fam->valueOf => $font, $scope); });

# <internal dimen> = <dimen parameter> | <special dimen> | \lastkern
#    | <dimendef token> | \dimen<8bit> | <box dimension><8bit> | \fontdimen<number><font>
DefRegister('\lastkern' => Dimension(0), readonly => 1);

# <box dimension> = \ht | \wd | \dp
DefRegister('\ht Number', Dimension(0),
  getter => sub {
    my ($n) = @_;
    my $stuff = $n && LookupValue('box' . $n->valueOf);
    return ($stuff ? $stuff->getHeight : Dimension(0)); },
  setter => sub {
    my ($value, $scope, $n) = @_;
    my $stuff = $n && LookupValue('box' . $n->valueOf);
    $stuff->setHeight($value) if $stuff;
    return; });
DefRegister('\wd Number', Dimension(0),
  getter => sub {
    my ($n) = @_;
    my $stuff = $n && LookupValue('box' . $n->valueOf);
    return ($stuff ? $stuff->getWidth : Dimension(0)); },
  setter => sub {
    my ($value, $scope, $n) = @_;
    my $stuff = $n && LookupValue('box' . $n->valueOf);
    $stuff->setWidth($value) if $stuff;
    return; });

DefRegister('\dp Number', Dimension(0),
  getter => sub {

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

    return ($data && $$data[$p - 1]) || Dimension(0); },
  setter => sub {
    my ($value, $scope, $p, $font) = @_;
    my $info = lookupFontinfo($font);
    $p = ToString($p);
    if (my $data = $info && $$info{data}) {
      $$data[$p - 1] = $value; } }
);

DefRegister('\hyphenchar FontToken' => Number(ord('-')),
  getter => sub {
    my ($font) = @_;
    my $info = lookupFontinfo($font);
    return ($info && $$info{hyphenchar}) || Number(ord('-')); },
  setter => sub {
    my ($value, $scope, $font) = @_;
    if (my $info = lookupFontinfo($font)) {
      $$info{hyphenchar} = $value; } }
);
DefRegister('\skewchar FontToken' => Number(0),
  getter => sub {
    my ($font) = @_;
    my $info = lookupFontinfo($font);
    return ($info && $$info{skewchar}) || Number(0); },
  setter => sub {
    my ($value, $scope, $font) = @_;
    if (my $info = lookupFontinfo($font)) {
      $$info{skewchar} = $value; } }
);

#   Could be handled by setting dimensions whenever the box itself is set?

# <internal glue> = <glue parameter> | \lastskip | <skipdef token> | \skip<8bit>

DefRegister('\lastskip' => Glue(0), readonly => 1);

# <internal muglue> = <muglue parameter> | \lastskip | <muskipdef token> | \muskip<8bit>

# <family assignment> = <family member><equals><font>
# <shape assignment> = \parshape<equals><number><shape dimensions>
#  <shape dimensions> is 2n <dimen>

# <global assignment> = <font assignment> | <hyphenation assignment>
#   | <box size assignment> | <interaction mode assignment>
#   | <intimate assignment>
# <font assignment> = \fontdimen <number><font><equals><dimen>
#   | \hyphenchar<font><equals><number> | \skewchar<font><equals><number>
# <hyphenation assignment> = \hyphenation<general text>
#   | \patterns<general text>
# <box size assignment> = <box dimension><8bit><equals><dimen>
# <interaction mode assignment> = \errorstopmode | \scrollmode | \nonstopmode | \batchmode
# These are no-ops; Basically, LaTeXML runs in scrollmode
DefPrimitiveI('\errorstopmode', undef, undef);
DefPrimitiveI('\scrollmode',    undef, undef);
DefPrimitiveI('\nonstopmode',   undef, undef);
DefPrimitiveI('\batchmode',     undef, undef);

# <intimate assignment> = <special integer><equals><number>
#   | <special dimension><equals><dimen>

DefMacro('\fontencoding{}', '\@@@fontencoding{#1}');

DefPrimitive('\@@@fontencoding{}', sub {
    my ($stomach, $encoding) = @_;
    $encoding = ToString(Expand($encoding));
    if (LoadFontMap($encoding)) {
      MergeFont(encoding => $encoding); }
    else {
      MergeFont(encoding => 'OT1'); }    # Default to OT1 encoding if no map found
    return; });

DefMacroI('\f@encoding',  undef, sub { ExplodeText(LookupValue('font')->getEncoding); });
DefMacroI('\cf@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); });

# Used for SemiVerbatim text
DeclareFontMap('ASCII',
  [undef, undef, undef, undef, undef, undef, undef, undef,
    undef, undef, undef, undef, undef, undef, undef, undef,
    undef, undef, undef, undef, undef, undef, undef, undef,
    undef, undef, undef, undef, undef, undef, undef, undef,
    " ",   '!',   "\"",  '#',   '$',   '%',   '&',   "'",
    '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/',
    '0',   '1',   '2',   '3',   '4',   '5',   '6',   '7',
    '8',   '9',   ':',   ';',   '<',   '=',   '>',   '?',
    '@',   'A',   'B',   'C',   'D',   'E',   'F',   'G',
    'H',   'I',   'J',   'K',   'L',   'M',   'N',   'O',
    'P',   'Q',   'R',   'S',   'T',   'U',   'V',   'W',
    'X',   'Y',   'Z',   '[',   "\\",  ']',   "^",   "_",
    "`",   'a',   'b',   'c',   'd',   'e',   'f',   'g',
    'h',   'i',   'j',   'k',   'l',   'm',   'n',   'o',
    'p',   'q',   'r',   's',   't',   'u',   'v',   'w',
    'x',   'y',   'z',   "{",   "|",   "}",   "~",   undef]);

# Note that several entries are used for accents, and in practice will actually
# be used in something like an m:mover; thus they needn't (shouldn't?) be "small"
# There are also some questions about which choices are best
# grave & acute accents (entry 0x12 & 0x13) (often typed using 0x60 & 0x27)
#   are probably best using U+60(grave accent) & U+B4(acute accent)
#   but could be U+2035 (reversed prime) & U+2032 (prime).  (particularly for math?)
#   [we do use these for \prime, however!]
#   or U+02CB (modifier letter grave accent) & U+02CA (modifier letter acute accent)
# Similarly, hat & tilde (entries 0x5E & 0x7E)
#   typed using ^ 0x5E circumflex accent) & ~ 0x7E  tilde
#   are probably best just sticking with U+5E & U+7E
#   but could be U+02C6 (modifier letter circumflex accent) U+02DC (small tilde)
# [Note that generally we're using codepoints characterized as "modifier letter"
# only when no other spacing point is available.]
DeclareFontMap('OT1',
  ["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}",
    "\x{03A6}", "\x{03A8}",      "\x{03A9}", "\x{FB00}", "\x{FB01}", "\x{FB02}", "\x{FB03}", "\x{FB04}",
    "\x{0131}", "\x{0237}",      UTF(0x60),  UTF(0xB4),  "\x{02C7}", "\x{02D8}", UTF(0xAF),  "\x{02DA}",
    UTF(0xB8),  UTF(0xDF),       UTF(0xE6),  "\x{0153}", UTF(0xF8),  UTF(0xC6),  "\x{152}",  UTF(0xD8),
    UTF(0xA0) . "\x{0335}", '!', "\x{201D}", '#',        '$',        '%',        '&',       "\x{2019}",
    '(',                    ')', '*',        '+',        ',',        '-',        '.',       '/',
    '0',                    '1', '2',        '3',        '4',        '5',        '6',       '7',
    '8',                    '9', ':',        ';',        UTF(0xA1),  '=',        UTF(0xBF), '?',
    '@',                    'A', 'B',        'C',        'D',        'E',        'F',       'G',
    'H',                    'I', 'J',        'K',        'L',        'M',        'N',       'O',
    'P',                    'Q', 'R',        'S',        'T',        'U',        'V',       'W',
    'X',                    'Y', 'Z',        '[',        "\x{201C}", ']',        "^",       "\x{02D9}",
    "\x{2018}",             'a', 'b',        'c',        'd',        'e',        'f',       'g',
    'h',                    'i', 'j',        'k',        'l',        'm',        'n',       'o',
    'p',                    'q', 'r',        's',        't',        'u',        'v',       'w',
    'x',                    'y', 'z',        "\x{2013}", "\x{2014}", "\x{02DD}", UTF(0x7E), UTF(0xA8)]);

DeclareFontMap('OT1',
  ["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}",
    "\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{2191}", "\x{2193}", "'",        UTF(0xA1), UTF(0xBF),
    "\x{0131}", "\x{0237}", UTF(0x60),  UTF(0xB4),  "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}",
    UTF(0xB8),  UTF(0xDF),  UTF(0xE6),  "\x{0153}", UTF(0xF8),  UTF(0xC6),  "\x{152}", UTF(0xD8),
    "\x{2423}", '!',        "\"",       '#',        '$',        '%',        '&',       "\x{2019}",
    '(',        ')',        '*',        '+',        ',',        '-',        '.',       '/',
    '0',        '1',        '2',        '3',        '4',        '5',        '6',       '7',

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

    elsif ($cc == CC_SPACE) { (T_SPACE); }
    elsif ($cc == CC_PARAM) { ($_, $_); }
    elsif ($cc == CC_ARG)   { (T_PARAM, T_OTHER($$_[0])); }
    else                  { $_; }
  } @tokens;
  return UnTeX(Tokens(@tokens), 1); }

DefPrimitive('\message{}', sub {
    my ($stomach, $stuff) = @_;
    NoteLog(writableTokens(Expand($stuff)));
    return; });

DefRegister('\errhelp' => Tokens());
DefPrimitive('\errmessage{}', sub {
    my ($stomach, $stuff) = @_;
    Note(ToString(Expand($stuff)) . ": " . ToString(Expand(Tokens(T_CS('\the'), T_CS('\errhelp')))));
    return; });

# TeX I/O primitives
DefPrimitive('\openin Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub {
    my ($stomach, $port, $filename) = @_;
    # possibly should close $port if it's already been opened?
    $port     = ToString($port);
    $filename = ToString($filename);
    # Rely on FindFile to enforce any access restrictions
    # It's tempting to pout noltxml=>1 here, since who would want to read in an .ltxml file's perl?
    # However, \openin is often used by low-level code to check for existence of a file
    # when we SHOULD find an .ltxml version!
    # Hopefully, if they get one, they won't actually try to read its content...
    if (my $path = FindFile($filename)) {
      my $mouth = LaTeXML::Core::Mouth->create($path,
        content => LookupValue($path . '_contents'));
      AssignValue('input_file:' . $port => $mouth, 'global'); }
    return; });

DefPrimitive('\closein Number', sub {
    my ($stomach, $port, $filename) = @_;
    #   close the mouth (if any) and clear the variable
    $port = ToString($port);
    if (my $mouth = LookupValue('input_file:' . $port)) {
      $mouth->finish;
      AssignValue('input_file:' . $port => undef, 'global'); }
    return; });

DefPrimitive('\read Number SkipKeyword:to SkipSpaces Token', sub {
    my ($stomach, $port, $token) = @_;
    $port = ToString($port);
    if (my $mouth = LookupValue('input_file:' . $port)) {
      $stomach->bgroup;
      AssignValue(PRESERVE_NEWLINES => 2);    # Special EOL/EOF treatment for \read
      AssignValue(INCLUDE_COMMENTS  => 0);
      my @tokens = ();
      my ($t, $level) = (undef, 0);
      while ($t = $mouth->readToken) {
        my $cc = $t->getCatcode;
        push(@tokens, $t) unless $cc == CC_MARKER;    # End of line marker
        $level++ if $cc == CC_BEGIN;
        $level-- if $cc == CC_END;
        last     if !$level && $mouth->isEOL; }
      $stomach->egroup;
      DefMacroI($token, undef, Tokens(@tokens), nopackParameters => 1); }
    return; });

DefConditional('\ifeof Number', sub {
    my ($gullet, $port) = @_;
    $port = ToString($port);
    if (my $mouth = LookupValue('input_file:' . $port)) {
      return $$mouth{at_eof}; }
    else {
      return 1; } });

# For output files, we'll write the data to a cached internal copy
# rather than to the actual file system.
DefPrimitive('\openout Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub {
    my ($stomach, $port, $filename) = @_;
    $port     = ToString($port);
    $filename = ToString($filename);
    AssignValue('output_file:' . $port  => $filename, 'global');
    AssignValue($filename . '_contents' => "",        'global');
    return; });

DefPrimitive('\closeout Number', sub {
    my ($stomach, $port) = @_;
    $port = ToString($port);
    AssignValue('output_file:' . $port => undef, 'global');
    return; });

DefPrimitive('\write Number {}', sub {
    my ($stomach, $port, $tokens) = @_;
    $port = ToString($port);
    if (my $filename = LookupValue('output_file:' . $port)) {
      my $handle   = $filename . '_contents';
      my $contents = LookupValue($handle);
      AssignValue($handle => $contents . UnTeX(Expand($tokens), 1) . "\n", 'global'); }
    else {
      Note(UnTeX(Expand($tokens))); }
    return; });

# Since we don't paginate, we're effectively always "shipping out",
# so all operations are \immediate
DefPrimitive('\immediate', undef);

#======================================================================
# Remaining semi- Vertical Mode primitives in Ch.24, pp.280--281

DefPrimitive('\special {}', sub {
    my ($stomach, $arg) = @_;
    my $special_str = ToString($arg);
    # recognize one special graphics inclusion case
    if ($special_str =~ /\bpsfile=(.+?)(?:\s|\})/) {
      my $graphic = $1;
      RequirePackage('graphicx', searchpaths_only => 1);
      my @kv;
      for my $prop (qw(voffset hoffset hscale vscale hsize vsize angle)) {
        if ($special_str =~ /\b$prop=(.+?)(?:\s|\})/) {
          push(@kv, T_OTHER(',')) if @kv;
          push(@kv, T_OTHER($prop), T_OTHER("="), T_OTHER($1)); } }
      @kv = (T_OTHER("["), @kv, T_OTHER("]")) if @kv;
      $stomach->getGullet->unread(
        T_CS('\ltx@special@graphics'), @kv, T_BEGIN, T_OTHER($graphic), T_END); }
    else {

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

#======================================================================

Tag('ltx:td', afterClose => \&trimNodeWhitespace);

#----------------------------------------------------------------------
# Primitive column types;
# This is really LaTeX, but the mechanisms are used behind-the-scenes here, too.
DefColumnType('|', sub {
    $LaTeXML::BUILD_TEMPLATE->addBetweenColumn(T_CS('\vrule'), T_CS('\relax')); return; });
DefColumnType('l', sub {
    $LaTeXML::BUILD_TEMPLATE->addColumn(after => Tokens(T_CS('\hfil'))); return; });
DefColumnType('c', sub {
    $LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil')),
      after => Tokens(T_CS('\hfil'))); return; });
DefColumnType('r', sub {
    $LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil'))); return; });

DefColumnType('p{Dimension}', sub {
    $LaTeXML::BUILD_TEMPLATE->addColumn(
      before => Tokens(T_CS('\vtop'), T_BEGIN, T_CS('\hbox'),
        T_LETTER('t'), T_LETTER('o'), $_[1]->revert, T_CS('\relax'),
        T_BEGIN),
      after   => Tokens(T_END, T_END),
      vattach => 'top',
      align   => 'justify',
    ); return; });

DefColumnType('*{Number}{}', sub {
    my ($gullet, $n, $pattern) = @_;
    map { $pattern->unlist } 1 .. $n->valueOf; });

DefColumnType('@{}', sub {
    my ($gullet, $filler) = @_;
    $LaTeXML::BUILD_TEMPLATE->disableIntercolumn;
    $LaTeXML::BUILD_TEMPLATE->addBetweenColumn($filler->unlist);
    $LaTeXML::BUILD_TEMPLATE->disableIntercolumn;
    return; });

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Alignment code
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#----------------------------------------------------------------------
# This is where ALL alignments start & finish
# This creates the object representing the entire alignment!
DefConstructor('\@start@alignment',
  "#alignment",
  reversion => sub { Revert($_[0]->getProperty('alignment')); },
  sizer     => '#alignment',
  #  beforeDigest => sub { $_[0]->bgroup; },
  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    $stomach->bgroup;
    if (my $alignment = LookupValue('Alignment')) {
      $whatsit->setProperty(alignment => $alignment);
      $alignment->setBody($whatsit);
      digestAlignmentBody($stomach, $whatsit); }
    $stomach->egroup;
    return; });

# Seems odd to need both end markers here...
DefMacroI('\@finish@alignment', undef,
  '\hidden@crcr\@close@alignment');
DefPrimitive('\@close@alignment', sub { });

#======================================================================
# Low-level bits that appear within alignments or \halign

DefConstructorI('\cr',   undef, "\n");
DefConstructorI('\crcr', undef, "\n");
# These are useful for reversion of higher-level macros that use alignment
# internally, but don't use explicit &,\cr in the user markup
DefConstructorI('\hidden@cr',    undef, "\n", alias => '');
DefConstructorI('\hidden@crcr',  undef, "\n", alias => '');
DefConstructorI('\hidden@align', undef, "",   alias => '');

# Handled directly in alignments, but must be defined as non-macros
DefPrimitiveI('\noalign', undef, sub {
    $_[0]->bgroup;
    Error('unexpected', '\noalign', $_[0], "\\noalign cannot be used here");
    Let(T_ALIGN,          T_CS('\relax'));
    Let(T_CS('\noalign'), T_CS('\relax'));
    Let(T_CS('\omit'),    T_CS('\relax'));
    Let(T_CS('\span'),    T_CS('\relax'));
    return; });
DefPrimitiveI('\omit', undef, sub {
    Error('unexpected', '\omit', $_[0], "\\omit cannot be used here");
    $_[0]->bgroup;
    Let(T_ALIGN,          T_CS('\relax'));
    Let(T_CS('\noalign'), T_CS('\relax'));
    Let(T_CS('\omit'),    T_CS('\relax'));
    Let(T_CS('\span'),    T_CS('\relax'));
    return; });
DefPrimitiveI('\span', undef, sub {
    $_[0]->bgroup;
    Error('unexpected', '\span', $_[0], "\\span cannot be used here");
    Let(T_ALIGN,          T_CS('\relax'));
    Let(T_CS('\noalign'), T_CS('\relax'));
    Let(T_CS('\omit'),    T_CS('\relax'));
    Let(T_CS('\span'),    T_CS('\relax'));
    return; });

#########
# Support for \\[dim] .... TO BE WORKED OUT!
# NOTE that this does NOT skip spaces before * or []!!!!!
#  As if: \@alignment@newline OptionalMatch:* [Dimension]
# Read arguments for \\, namely * and/or [Dimension]
# BUT optionally do it while skipping spaces (latex style) or not (ams style)
sub readNewlineArgs {
  my ($gullet, $skipspaces) = @_;
  my $alignment = $STATE->lookupValue('Alignment');
  local $LaTeXML::ALIGN_STATE = 1000000;
  $gullet->skipSpaces if $skipspaces;
  my $next = $gullet->readToken;
  my ($star, $optional);
  if ($next && $next->equals(T_OTHER('*'))) {
    $star = 1;
    $gullet->skipSpaces if $skipspaces;
    $next = $gullet->readToken; }
  if ($next && $next->equals(T_OTHER('['))) {
    $optional = $gullet->readUntil(T_OTHER(']'));
    $next     = undef; }
  $gullet->unread($next) if $next;
  return ($star, $optional); }

# VERY tricky (and mostly Wrong).
# The issue is for \\ to look ahead for * and [],
# Eventually we'll expand into \cr (which should be preceded by the RHS of the template)
# BUT it should NOT trigger the template if it bumps into a &
# which happens when the 1st column of an alignment is empty.
# In proper LaTeX this is inhibited by a curious construct
#   {\ifnum0='}
# and possibly by proper tracking of a Master Counter !?!?!?
# But we're not there (yet)

# This is the internal macro for \\[dim] used by LaTeX for various arrays, tabular, etc
DefMacroI('\@alignment@newline', undef, sub {
    my ($gullet) = @_;
    my ($star, $optional) = readNewlineArgs($gullet, 1);
    return (T_CS('\hidden@cr'), T_BEGIN,
      ($optional
        ? (T_CS('\@alignment@newline@markertall'), T_BEGIN, $optional, T_END)
        : T_CS('\@alignment@newline@marker')),
      T_END); });
# However, the above will skip spaces --AND a newline! -- looking for [],
# which is kinda weird in math, since there may be a reasonable math [ in the 1st column!
# AMS kindly avoids that, by using a special version of \\
DefMacroI('\@alignment@newline@noskip', undef, sub {
    my ($gullet) = @_;
    my ($star, $optional) = readNewlineArgs($gullet);
    return (T_CS('\hidden@cr'), T_BEGIN,
      ($optional
        ? (T_CS('\@alignment@newline@markertall'), T_BEGIN, $optional, T_END)
        : T_CS('\@alignment@newline@marker')),
      T_END); });

# These are the markers that produce \\ in the reversion,
# and (eventually will) add vertical space to the row!
DefConstructor('\@alignment@newline@marker', '',
  reversion => Tokens(T_CS("\\\\"), T_CR));
# AND add the spacing to the alignment!!!
DefConstructor('\@alignment@newline@markertall {Dimension}', '',
  afterDigest => sub {
    if (my $alignment = LookupValue('Alignment')) {
      $alignment->currentRow->{padding} = $_[1]->getArg(1); }
    return; },
  reversion => sub {
    Tokens(T_CS("\\\\"), T_OTHER('['), Revert($_[1]), T_OTHER(']'), T_CR); });

DefMacroI('\tabularnewline', undef, '\cr');    # ???

# \lx@intercol is our replacement for LaTeX's \@acol which places intercolumn space in tabular
# (but NOT used by TeX's \halign!)
DefMacro('\lx@intercol', '');
# Candidates for binding \lx@intercol for LaTeX tabular or math arrays
# These provide "padding" of half tabcolsep, since added before & after columns
# [these could be \hskip\tabcolsep, but the expansion confounds trimColumnSpec]
DefConstructor('\lx@text@intercol', sub {
    my ($document, %props) = @_;
    $document->absorb(DimensionToSpaces($props{width})); },
  reversion  => '\lx@intercol',
  properties => sub {
    my $defn;
    my $w = (($defn = $STATE->lookupDefinition(T_CS('\tabcolsep'))) && $defn->isRegister
      ? $defn->valueOf : Dimension(0));
    (width => $w, isSpace => 1); });
DefConstructor('\lx@math@intercol', "",    # mspace ???
  reversion  => '\lx@intercol',
  properties => sub {
    my $defn;
    my $w = (($defn = $STATE->lookupDefinition(T_CS('\arraycolsep'))) && $defn->isRegister
      ? $defn->valueOf : Dimension(0));
    (width => $w, isSpace => 1); });

#======================================================================
# Various decorations within alignments, rules, headers, etc

# Like \noalign, takes an arg; handled within alignment processing.
# But doesn't create a pseudo-row (??? Or does it?; is it still needed?)
DefConstructor('\hidden@noalign{}', '#1',
  reversion  => '',
  properties => sub {
    # Sometimes, we're smuggling stuff that needs to be carried into the XML.
    my $preserve = grep { $_->getProperty('alignmentPreserve'); } $_[1]->unlist;
    (alignmentSkippable => 1, alignmentPreserve => $preserve); });

DefMacro('\hline', '\noalign{\@@alignment@hline}');
DefConstructorI('\@@alignment@hline', undef, '',
  afterDigest => sub {
    if (my $alignment = LookupValue('Alignment')) {
      $alignment->addLine('t'); }
    return; },
  properties => { isHorizontalRule => 1 },
  sizer      => 0, alias => '\hline');

DefMacroI('\@tabular@begin@heading', undef, sub {
    my $alignment = LookupValue('Alignment');
    $$alignment{in_tabular_head} = 1;
    return; });
DefMacroI('\@tabular@end@heading', undef, sub {
    my $alignment = LookupValue('Alignment');
    $$alignment{in_tabular_head} = 0;
    return; });

#======================================================================
# Math mode in alignment
# Special forms for $ appearing within alignments.
# Note that $ within a math alignment (eg array environment),
# switches to text mode! There's no $$ for display math.

# This is the "normal" case: $ appearing with an alignment that is in text mode.
# It's just like regular $, except it doesn't look for $$ (no display math).
DefPrimitiveI('\@dollar@in@textmode', undef, sub {
    no warnings 'recursion';
    $_[0]->invokeToken(T_CS((LookupValue('IN_MATH') ? '\@@ENDINLINEMATH' : '\@@BEGININLINEMATH'))); });

# This one is for $ appearing within an alignment that's already math.
# This should switch to text mode (because it's balancing the hidden $
# wrapping each alignment cell!!!!!!)
# However, it should be like a normal $ if it's inside something like \mbox
# that itself makes a text box!!!!!!
# Thus, we need to know at what boxing level we started the last math or text.
# This is all complicated by the need to know _how_ we got into or out of math mode!
# Gawd, this is awful!
# NOTE: Probably the most "Right" thing to do would be to process
# alignments in text mode only (like TeX), sneaking $'s in where needed,
# but then afterwards, morph them into math arrays?
# This would be complicated by the need to hide these $ from untex.
DefPrimitiveI('\@dollar@in@mathmode', undef, sub {
    my ($stomach) = @_;
    my $level = $stomach->getBoxingLevel;
    if ((LookupValue('MATH_ALIGN_$_BEGUN') || 0) == $level) { # If we're begun making _something_ with $.
      my @l = ();
      if (LookupValue('IN_MATH')) {                           # But we're somehow in math?
        @l = $stomach->invokeToken(T_CS('\@@ENDINLINEMATH')); }
      else {
        @l = $stomach->invokeToken(T_CS('\@@ENDINLINETEXT')); }
      AssignValue('MATH_ALIGN_$_BEGUN' => 0);                 # Reset this AFTER finishing the something
      @l; }
    else {
      AssignValue('MATH_ALIGN_$_BEGUN' => $level + 1);        # Note that we've begun something
      if (LookupValue('IN_MATH')) {                           # If we're "still" in math
        $stomach->invokeToken(T_CS('\@@BEGININLINETEXT')); }
      else {
        $stomach->invokeToken(T_CS('\@@BEGININLINEMATH')); } } });

DefConstructorI('\@@BEGININLINETEXT', undef,
  "<ltx:XMText>"
    . "#body"
    . "</ltx:XMText>",
  alias => T_MATH, beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1);
DefConstructorI('\@@ENDINLINETEXT', undef, "", alias => T_MATH,
  beforeDigest => sub { $_[0]->endMode('text'); });

DefPrimitiveI('\@LTX@nonumber', undef, sub { AssignValue(EQUATIONROW_NUMBER => 0, 'global'); });

DefMacroI('\hidewidth', undef, Tokens());

#======================================================================
# Multicolumn support
DefMacro('\multispan{Number}', sub {
    my ($gullet, $span) = @_;
    $span = $span->valueOf;
    (T_CS('\omit'), map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1); });

DefRegisterI('\@alignment@ncolumns', undef, Dimension(0),
  getter => sub {
    if (my $alignment = LookupValue('Alignment')) {
      Number(scalar($alignment->getTemplate->columns)); }
    else { Number(0); } });
DefRegisterI('\@alignment@column', undef, Dimension(0),
  getter => sub {
    if (my $alignment = LookupValue('Alignment')) {
      Number($alignment->currentColumnNumber); }
    else { Number(0); } });

DefMacro('\@multicolumn {Number}  AlignmentTemplate {}', sub {
    my ($gullet, $span, $template, $tokens) = @_;
    my $column = $template->column(1);
    $span = $span->valueOf;
    # First part, like \multispan
    (T_CS('\omit'), (map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1),
      # Next part, just put the template in-line, since it's only used once.
      ($column ? beforeCellUnlist($$column{before}) : ()),
      $tokens->unlist,
      ($column ? afterCellUnlist($$column{after}) : ())); });

DefConditionalI('\if@in@alignment', undef, sub { LookupValue('Alignment'); });

DefPrimitive('\@alignment@bindings AlignmentTemplate []', sub {
    my ($stomach, $template, $mode) = @_;
    alignmentBindings($template, $mode); });

# Utility, not really TeX, but used by LaTeX, AmSTeX...
# Convert a vertical positioning, optional argument.
#  t = "top", b = "bottom"; default is "middle".
# Note that the default for vattach attribute is "baseline".
sub translateAttachment {
  my ($pos) = @_;
  $pos = ($pos ? ToString($pos) : '');
  return ($pos eq 't' ? 'top' : ($pos eq 'b' ? 'bottom' : 'middle')); }    # undef meaning 'baseline'

# This trims trailing whitespace from the current digested list,
# for use within latex tabular-style columns.
# But note that \halign does NOT remove this trailing space!
DefPrimitiveI('\lx@column@trimright', undef, sub {
    my $box;
    my @save = ();
    my $s;
    while ($box = $LaTeXML::LIST[-1]) {
      if ($box->getProperty('alignmentSkippable')
        || $box->getProperty('isFill')
        || IsEmpty($box)) {
        push(@save, pop(@LaTeXML::LIST)); }
      elsif (ref $box eq 'LaTeXML::Core::List') {    # Unwrap and continue
        pop(@LaTeXML::LIST);
        push(@LaTeXML::LIST, $box->unlist); }
      elsif ((ref $box eq 'LaTeXML::Core::Box')
        && defined($s = $box->getString) && ($s =~ /^\s*$/)) {
        pop(@LaTeXML::LIST); }                       # remove any box containing only spaces
      else {
        last; } }
    push(@LaTeXML::LIST, @save);
    return; });

use constant T_hfil => T_CS('\hfil');
# Yet more special case hacking. Sometimes the order of tokens works for
# TeX, but confuses us... In particular the order of $ and \hfil!
sub beforeCellUnlist {
  my ($tokens) = @_;
  return () unless $tokens;
  my @toks = $tokens->unlist;
  my @new  = ();
  while (my $t = shift(@toks)) {
    if ($t->defined_as(T_MATH) && @toks && $toks[0]->defined_as(T_hfil)) {
      push(@new, shift(@toks)); unshift(@toks, $t); }
    else {

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

    elsif ($cc == CC_PARAM) {              # Found the template's column slot
      $before = 0;
      push(@tokens, $t); }
    elsif (($cc == CC_ALIGN)
      || $t->equals(T_CS('\cr')) || $t->equals(T_CS('\crcr'))) {    # End the column
      if ($before) {                                                # Leading & ?
        $repeated = 1;
        @nonreps  = @cols; @cols = (); }    # A & while we're before a column means Repeated columns
      else {                                # Finished column spec; add it
        ## How should we be handling tabskip? An attribute on the cell or spacing?
        push(@cols, {
            tabskip => $tabskip,
            before  => Tokens(beforeCellUnlist(Tokens(@pre))),
            after   => Tokens(afterCellUnlist(Tokens(@post))) });
        $tabskip = $nexttabskip;
        @pre     = @post = (); $before = 1; }
      last unless $cc == CC_ALIGN;
      push(@tokens, $t); }
    elsif ($before) {    # Other random tokens go into the column's pre-template
      push(@pre,    $t) if @pre || ($cc != CC_SPACE);
      push(@tokens, $t); }
    else {               # Or the post-template
      push(@post,   $t) if @post || ($cc != CC_SPACE);
      push(@tokens, $t); } }
  # Now create & return the template object
  my $template = LaTeXML::Core::Alignment::Template->new(
    ($repeated
      ? (columns => [@nonreps], repeated => [@cols])
      : (columns => [@cols])),
    tokens => [@tokens]);
  $whatsit->setProperty(template => $template);
  return $template; }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# And the general alignment processing.
# If the Template is appropriately constructed, either by \halign or various \begin{tabular}
# the body of the alignment is processed the same way.

sub alignmentBindings {
  my ($template, $mode, %properties) = @_;
  $mode = LookupValue('MODE') unless $mode;
  my $ismath    = $mode =~ /math$/;
  my $container = ($ismath ? 'ltx:XMArray' : 'ltx:tabular');
  my $rowtype   = ($ismath ? 'ltx:XMRow'   : 'ltx:tr');
  my $coltype   = ($ismath ? 'ltx:XMCell'  : 'ltx:td');
  my $alignment = LaTeXML::Core::Alignment->new(
    template       => $template,
    openContainer  => sub { $_[0]->openElement($container, @_[1 .. $#_]); },
    closeContainer => sub { $_[0]->closeElement($container); },
    openRow        => sub { $_[0]->openElement($rowtype, @_[1 .. $#_]); },
    closeRow       => sub { $_[0]->closeElement($rowtype); },
    openColumn     => sub { $_[0]->openElement($coltype, @_[1 .. $#_]); },
    closeColumn    => sub { $_[0]->closeElement($coltype); },
    isMath         => $ismath,
    properties     => {%properties});
  AssignValue(Alignment => $alignment);
  Debug("Halign $alignment: New " . $template->show) if $LaTeXML::DEBUG{halign};
  Let(T_MATH, ($ismath ? '\@dollar@in@mathmode' : '\@dollar@in@textmode'));
  return; }

DefMacroI('\@row@before',    undef, undef);
DefMacroI('\@row@after',     undef, undef);
DefMacroI('\@column@before', undef, undef);
DefMacroI('\@column@after',  undef, undef);

sub pRevert {
  my ($arg) = @_;
  local $LaTeXML::DUAL_BRANCH = 'presentation';
  return Revert($arg); }

sub cRevert {
  my ($arg) = @_;
  local $LaTeXML::DUAL_BRANCH = 'content';
  return Revert($arg); }

use constant T_close_alignment => T_CS('\@close@alignment');

sub digestAlignmentBody {
  my ($stomach, $whatsit) = @_;
  my $gullet = $stomach->getGullet;
  local $LaTeXML::ALIGN_STATE = 0;
  # Now read & digest the body.
  # Note that the body MUST end with a \cr, and that we've made Special Arrangments
  # with \alignment@cr to recognize the end of the \halign
  my $alignment = LookupValue('Alignment');
  local $LaTeXML::READING_ALIGNMENT = $alignment;
  if (!$alignment) {
    Error('missing', 'alignment', $stomach, "There is no open alignment structure here");
    return; }
  $whatsit->setProperty(alignment => $alignment);
  $alignment->setBody($whatsit);
  Debug("Halign $alignment: BODY Processing...") if $LaTeXML::DEBUG{halign};
  my $lastwascr  = undef;
  my @reversion  = ();
  my @creversion = ();
  while (1) {
    my ($cell, $next, $type, $hidden) = digestAlignmentColumn($stomach, $alignment, $lastwascr);
    Debug("Halign $alignment: BODY got CELL"
        . "[" . $alignment->currentRowNumber . "," . $alignment->currentColumnNumber . "]"
        . ToString($cell) . " ended at " . Stringify($next)) if $LaTeXML::DEBUG{halign};
    if (!$cell) {
      Debug("Halign $alignment: BODY DONE!") if $LaTeXML::DEBUG{halign};
      last; }
    if ($cell) {
      push(@reversion,  trimColumnTemplate($alignment, pRevert($cell)));
      push(@creversion, trimColumnTemplate($alignment, cRevert($cell))); }
    extractAlignmentColumn($alignment, $cell);
    $lastwascr = undef;
    if (!$type && (!$next
        || $next->defined_as(T_END)                    # End of alignment
        || $next->defined_as(T_close_alignment))) {    # End of alignment
      $alignment->endRow();
      last; }
    elsif ($type eq 'align') {
      $alignment->endColumn();
      if (!$hidden) {
        push(@reversion,  $next);                      # and record the &
        push(@creversion, $next); } }                  # and record the &
    elsif ($type eq 'insert') {
      $alignment->endColumn(); }
    elsif (($type eq 'cr') || ($type eq 'crcr')) {
      $alignment->endRow();
      if (!$hidden) {
        push(@reversion,  $next);

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

      || (ref $boxes[0] eq 'LaTeXML::Core::Comment')) {
      push(@saveleft, shift(@boxes)); }
    else {
      last; } }
  while (@boxes) {
    if (ref $boxes[-1] eq 'LaTeXML::Core::List') {
      push(@boxes, pop(@boxes)->unlist); }
    elsif ($boxes[-1]->getProperty('isFill')) {
      if ($align eq 'right') { $align = 'center'; }
      pop(@boxes);
      last; }
    elsif ($boxes[-1]->getProperty('isVerticalRule')) {
      $border .= 'r';
      @rspaces = ();    # discard spacing after rule!!! (should save for next column?)
      pop(@boxes); }
    elsif ($boxes[-1]->getProperty('isSpace')) {
      unshift(@rspaces, pop(@boxes)); }
    elsif ($boxes[-1]->getProperty('isHorizontalRule')
      || $boxes[-1]->getProperty('alignmentSkippable')
      || (ref $boxes[-1] eq 'LaTeXML::Core::Comment')) {
      unshift(@saveright, pop(@boxes)); }
    else {
      last; } }
  delete $$colspec{width} unless $align eq 'justify';
  # Replacing boxes with the fil padding & vertical rules stripped off
  @boxes = (@saveleft, @boxes, @saveright);
  $boxes = List(@boxes, mode => ($boxes->isMath ? 'math' : 'text'));
  # record relevant info in the Alignment.
  $$colspec{align}   = $align;
  $$colspec{border}  = $border = ($$colspec{border} || '') . $border;
  $$colspec{boxes}   = $boxes;
  $$colspec{lspaces} = List(@lspaces) if @lspaces;
  $$colspec{rspaces} = List(@rspaces) if @rspaces;
  $$colspec{colspan} = $n1 - $n0 + 1;

  if ($$alignment{in_tabular_head} || $$alignment{in_tabular_foot}) {
    $$colspec{thead}{column} = 1; }
  for (my $i = $n0 + 1 ; $i <= $n1 ; $i++) {
    my $c = $alignment->getColumn($i);
    $$c{skipped} = 1 if $c; }
  Debug("Halign $alignment: INSTALL column " . join(',', map { $_ . "=" . ToString($$colspec{$_}); } sort keys %$colspec)) if $LaTeXML::DEBUG{halign};
  return $boxes; }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Cleanup the pre & post tokens for halign columns in math mode.
# If a pair of $..$ enclose stuff that is "OK" in math mode, we don't need the $.
# Note that the 1st $ is switching OUT of math mode!
sub stripDupMath {
  my (@tokens) = @_;
  my @poss = grep { $tokens[$_]->defined_as(T_MATH) } 0 .. $#tokens;
  shift(@poss) if scalar(@poss) % 2;    # Get pairs!
  while (@poss) {
    my ($p2, $p1) = (pop(@poss), pop(@poss));
    splice(@tokens, $p1, 2) if $p2 == $p1 + 1; }
  return @tokens; }

# "Initialized" alignment; presets spacing, but since we're ignoring it anyway...
Let('\ialign', '\halign');

# Overlapping alignments ???
DefMacro('\oalign{}',
  '\@@oalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@oalign{}',
  '#1',
  reversion    => '\oalign{#1}', bounded => 1, mode => 'text',
  beforeDigest => sub { alignmentBindings('l'); });

# This is actually different; the lines should lie ontop of each other.
# How should this be represented?
DefMacro('\ooalign{}',
  '\@@ooalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@ooalign{}',
  '#1',
  reversion    => '\ooalign{#1}', bounded => 1, mode => 'text',
  beforeDigest => sub { alignmentBindings('l'); });

#----------------------------------------------------------------------
# These determine whether the _next_ paragraph gets indented!
# thus it needs \par to check whether such indentation has been set.
DefConstructorI('\indent', undef, sub {
    my ($document) = @_;
    my $node = $document->getElement;
    if    (!$node) { }
    elsif ($document->getNodeQName($node) eq 'ltx:para') {
      $node->setAttribute(class => "ltx_indent"); }
    elsif ($document->canContainSomehow($node, "ltx:para")) {
      # Used in a position where a paragraph can be started, start
      # However, perversely ignore indent on 1st para after sectioning titles
      my $prev     = $node->lastChild;
      my $noindent = $prev && ($document->getNodeQName($prev) =~ /^ltx:(?:toc)?title$/);
      $document->openElement("ltx:para", ($noindent ? () : (class => "ltx_indent"))); }
    # Otherwise ignore.
    return; });
DefConstructorI('\noindent', undef, sub {
    my ($document) = @_;
    my $node = $document->getElement;
    if    (!$node) { }
    elsif ($document->getNodeQName($node) eq 'ltx:para') {
      $node->setAttribute(class => "ltx_noindent"); }
    elsif ($document->canContainSomehow($node, "ltx:para")) {
      # Used in a position where a paragraph can be started, start
      $document->openElement("ltx:para", class => "ltx_noindent"); }
    # Otherwise ignore.
    return; });

# <ltx:para> represents a Logical Paragraph, whereas <ltx:p> is a `physical paragraph'.
# A para can contain both p and displayed equations and such.

# Remember; \par _closes_, not opens, paragraphs!
# Here, we want to close both an open p and para (if either are open).
# NOTE Also that the whole inPreamble bit is, I think, overused.
# For example, \par should be a NOOP in vertical mode, and that would generally make it
# ignored in the preamble.
DefConstructorI('\normal@par', undef, sub {
    my ($document, %props) = @_;
    if ($props{inPreamble}) { }
    else {
      $document->maybeCloseElement('ltx:p');
      my $node  = $document->getElement;
      my $qname = ($node && $document->getNodeQName($node)) || '';
      if ($qname eq 'ltx:para' && !$node->getAttribute("class")) { # Only set on the para about to close, if unknown!
        if (my $c = $props{class}) {
          $document->setAttribute($node, class => $c); } }
      $document->maybeCloseElement('ltx:para'); } },
  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    if (LookupValue('inPreamble')) {
      $whatsit->setProperty(inPreamble => 1); }
    else {
      # Check if flags were set by prior \par:
      if (my $c = LookupValue("next_para_class")) {
        $whatsit->setProperty(class => $c);
        AssignValue(next_para_class => undef); }
      # Fish out flags for next ltx:para, to be used when the next \par closes:
      if (!LookupRegister('\parindent')->valueOf) {
        # respect \parindent if no overrides are given
        AssignValue(next_para_class => "ltx_noindent"); }
      # Vertical adjustments
      if (my $vadj = LookupValue('vAdjust')) {
        AssignValue(vAdjust => [], 'global');
        Digest(Tokens(@$vadj)); }
      else {
        return; } } },
  properties => { alignmentSkippable => 1 },
  alias      => '\par');

Let('\par', '\normal@par');
DefMacro('\inner@par OptionalMatch:* [Glue]', '\normal@par');  # Obsolete, but in case still used...

Tag('ltx:para', autoClose => 1, autoOpen => 1, afterClose => \&pruneEmpty);

sub pruneEmpty {
  my ($document, $node) = @_;
  # In some cases we could have e.g. a \noindent followed by a {table},
  # in which case we end up with an empty ltx:para which we can prune.
  if (!scalar(element_nodes($node))) {
    my $prev = element_prev($node);
    if (!$prev || ($document->getNodeQName($prev) ne 'ltx:para')) {    # If $node WAS the 1st child
      $document->addClass($node->parentNode, 'ltx_pruned_first'); }
    $node->unlinkNode; }
  return; }

sub trimNodeWhitespace {
  my ($document, $node) = @_;
  trimNodeLeftWhitespace($document, $node);
  trimNodeRightWhitespace($document, $node);
  return; }

sub trimNodeLeftWhitespace {
  my ($document, $node) = @_;
  if (my (@children) = $node->childNodes) {
    my $child = $children[0];
    my $type  = $child->nodeType;
    if ($type == XML_TEXT_NODE) {
      my $string = $child->data;
      #      if($string =~ s/^\s+//){
      #      with some trepidation, I don't think we want to trim nbsp!
      if ($string =~ s/^ +//) {
        $child->setData($string); } }
    elsif ($type == XML_ELEMENT_NODE) {
      trimNodeLeftWhitespace($document, $child); } }
  return; }

sub trimNodeRightWhitespace {
  my ($document, $node) = @_;
  if (my (@children) = $node->childNodes) {
    my $child = $children[-1];
    my $type  = $child->nodeType;
    if ($type == XML_TEXT_NODE) {
      my $string = $child->data;
      if ($string =~ s/\s+$//) {
        $child->setData($string); } }
    elsif ($type == XML_ELEMENT_NODE) {
      trimNodeRightWhitespace($document, $child); } }
  return; }

Tag('ltx:p', autoClose => 1, autoOpen => 1, afterClose => \&trimNodeWhitespace);

# \dump ???

DefPrimitiveI('\end', undef, sub { $_[0]->getGullet->flush; return; });

#======================================================================
# Horizontal Mode primitives in Ch.25, pp.285--287

# The following cause tex to start a new paragraph -- they switch to horizontal mode.
# <horizontal command> = <letter> | <other> | \char | <chardef token>
#    | \noboundary | \unhbox | \unhcopy | \valign | \vrule

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

    my ($stomach, $length) = @_;
    my $s = DimensionToSpaces($length);
    Box($s, undef, undef, Invocation(T_CS('\mkern'), $length),
      width => $length, isSpace => 1); });

DefPrimitiveI('\hss',     undef, undef);
DefPrimitiveI('\hfilneg', undef, undef);

DefPrimitiveI('\hfil', undef, sub {
    Box(' ', undef, undef, T_CS('\hfil'), isSpace => 1, isFill => 1); });
DefPrimitiveI('\hfill', undef, sub {
    Box(' ', undef, undef, T_CS('\hfill'), isSpace => 1, isFill => 1); });

# \lower <dimen> <box>
# \raise <dimen> <box>
# But <box> apparently must really explicitly be an \hbox, \vbox or \vtop (?)
# OR something that expands into one!!
sub raisedSizer {
  my ($box, $y) = @_;
  my ($w, $h, $d) = $box->getSize;
  my $z = Dimension(0);
  $h = $h->add($y)->larger($z);
  $d = $d->subtract($y)->larger($z);
  return ($w, $h, $d); }

DefConstructor('\lower Dimension MoveableBox',
  "?&inSVG()(<svg:g transform='#transform' _noautoclose='1'>#2</svg:g>)"
    . "(<ltx:text yoffset='#y'  _noautoclose='1'>#2</ltx:text>)",
  sizer       => sub { raisedSizer($_[0]->getArg(2), $_[0]->getArg(1)->negate); },
  afterDigest => sub {
    my $y         = $_[1]->getArg(1)->multiply(-1);
    my $ypx       = $y->pxValue;
    my $transform = ($ypx ? "translate(0,$ypx)" : undef);
    $_[1]->setProperties(y => $y, transform => $transform); });

DefConstructor('\raise Dimension MoveableBox',
  "?&inSVG()(<svg:g transform='#transform' _noautoclose='1'>#2</svg:g>)"
    . "(<ltx:text yoffset='#y'  _noautoclose='1'>#2</ltx:text>)",
  sizer       => sub { raisedSizer($_[0]->getArg(2), $_[0]->getArg(1)); },
  afterDigest => sub {
    my $y         = $_[1]->getArg(1);
    my $ypx       = $y->pxValue;
    my $transform = ($ypx ? "translate(0,$ypx)" : undef);
    $_[1]->setProperties(y => $y, transform => $transform); });

# \unhbox<8bit>, \unhcopy<8bit>
DefPrimitive('\unhbox Number', sub {
    my $box   = 'box' . $_[1]->valueOf;
    my $stuff = LookupValue($box);
    adjustBoxColor($stuff);
    AssignValue($box, undef);
    (defined $stuff ? $stuff->unlist : List()); });

DefPrimitive('\unhcopy Number', sub {
    my $box   = 'box' . $_[1]->valueOf;
    my $stuff = LookupValue($box);
    adjustBoxColor($stuff);
    (defined $stuff ? $stuff->unlist : List()); });

# Implement ???
# DefMacro('\vrule','\relax');
DefMacro('\valign', '');

DefMacro('\vspace{}', '\vskip#1\relax');
# \indent, \noindent, \par; see above.

DefMacro('\discretionary{}{}{}', '#3');    # No hyphenation here!
DefPrimitiveI('\-', undef, undef);
DefPrimitive('\setlanguage Number', undef);

#======================================================================
# Math mode stuff
# See TeXBook Ch.26
#======================================================================
# Decide whether we're going into or out of math, inline or display.
Tag('ltx:XMText', autoOpen => 1, autoClose => 1);
# This really should be T_MATH
# and it should (or not) check for a second $ only if not in restricted horizontal mode!
# (and then all the \@dollar@in@(text|math|normal)mode defns would not be needed.
DefPrimitiveI('\@dollar@in@normalmode', undef, sub {
    my ($stomach) = @_;
    my $gullet    = $stomach->getGullet;
    my $mode      = LookupValue('MODE');
    my $op        = '\@@BEGININLINEMATH';
    if ($mode eq 'display_math') {
      if ($gullet->ifNext(T_MATH)) {
        $gullet->readToken;
        $op = '\@@ENDDISPLAYMATH'; }
      else {
        # Avoid a Fatal, but we're likely in trouble.
        # Should we switch to text mode? (LaTeX normally wouldn't)
        # Did we miss something and would should have already been in text mode? Possibly...
        # OR, were we in a lenient package that allowed inline math mixed in with display?
        Error('expected', '$', $stomach,
          "Missing \$ closing display math.",
          "Ignoring; expect to be in wrong math/text mode.");
        $op = undef; } }
    elsif ($mode eq 'inline_math') {
      $op = '\@@ENDINLINEMATH'; }
    #  elsif(!LookupValue('Alignment') && $gullet->ifNext(T_MATH)){
    elsif ($gullet->ifNext(T_MATH)) {
      $gullet->readToken;
      $op = '\@@BEGINDISPLAYMATH'; }
    $stomach->invokeToken(T_CS($op)) if $op; });
# Let this be the default, conventional $
Let(T_MATH, T_CS('\@dollar@in@normalmode'));

# Effectively these are the math hooks, redefine these to do what you want with math?
DefConstructorI('\@@BEGINDISPLAYMATH', undef,
  "<ltx:equation>"
    . "<ltx:Math mode='display'>"
    . "<ltx:XMath>"
    . "#body"
    . "</ltx:XMath>"
    . "</ltx:Math>"
    . "</ltx:equation>",
  reversion    => Tokens(T_MATH, T_MATH),
  beforeDigest => sub {
    $_[0]->beginMode('display_math');
    if (my @everymath_toks = $STATE->lookupDefinition(T_CS('\everymath'))->valueOf->unlist()) {
      $_[0]->getGullet->unread(@everymath_toks); }
    if (my @everydisplay_toks = $STATE->lookupDefinition(T_CS('\everydisplay'))->valueOf->unlist()) {
      $_[0]->getGullet->unread(@everydisplay_toks); }
    return; }, captureBody => 1);
DefConstructorI('\@@ENDDISPLAYMATH', undef, "",
  reversion    => Tokens(T_MATH, T_MATH),
  beforeDigest => sub { $_[0]->endMode('display_math'); });

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

  my ($labels, $id, $idctr, $idctrm, $tags);
  foreach my $eq (@equations) {
    if (my $l = $eq->getAttribute('labels')) {
      $labels = ($labels ? "$labels $l" : $l); }
    $id = $eq->getAttribute('xml:id') if $eq->hasAttribute('xml:id');
    $eq->removeAttribute('xml:id')    if $id;
    $tags = $document->findnode('ltx:tags', $eq);
    # Annoying bookkeeping (should be more built in?)
    $idctr  = $eq->getAttribute('_ID_counter_')   if $eq->hasAttribute('_ID_counter_');
    $idctrm = $eq->getAttribute('_ID_counter_m_') if $eq->hasAttribute('_ID_counter_m_'); }
  $document->unRecordID($id)                                      if $id;
  $document->setAttribute($equation, labels => $labels)           if $labels;
  $document->setAttribute($equation, 'xml:id' => $id)             if $id;
  $document->setAttribute($equation, '_ID_counter_' => $idctr)    if $idctr;
  $document->setAttribute($equation, '_ID_counter_m_' => $idctrm) if $idctrm;
  $equation->appendChild($tags)                                   if $tags;

  # Scan equations to see which ones likely are continuations of previous
  my ($mainfork, $branch) = openMathFork($document, $equation);
  foreach my $eq (@equations) {
    # remove equation; parts will be added in by adding to mathfork (hopefully taking care of ids)
    $eq->unbindNode;
    my $tr    = $document->openElementAt($branch, 'ltx:tr');
    my @cells = $document->findnodes('ltx:_Capture_', $eq);
    $document->setAttribute($tr, class => 'ltx_eqn_lefteqn')
      if ($cells[0]->getAttribute('class') || '') =~ /\blefteqn\b/;
    foreach my $cell (@cells) {
      addColumnToMathFork($document, $mainfork, $tr, $cell); }
    $document->closeElementAt($tr); }
  closeMathFork($document, $equation, $mainfork, $branch);
  $document->closeElementAt($equation);
  return; }

# Given an equation generated in an equationgroup,
# collect each $ncols columns into a MathFork structure,
# with the formatted portion being the columns.
# This is typically useful for AMS's align structures,
# which contain several columns, each pair of which represent a semantic equation.
sub equationgroupJoinCols {
  my ($document, $ncols,    $equation) = @_;
  my ($col,      $mainfork, $branch)   = (0, undef, undef);
  foreach my $cell ($document->findnodes('ltx:_Capture_', $equation)) {
    next unless $document->getNodeQName($cell) =~ /(.*?:)?_Capture_$/;
    if (($col++ % $ncols) == 0) {    # Create new MathFork every $ncols cells.
      closeMathFork($document, $equation, $mainfork, $branch) if $mainfork;
      ($mainfork, $branch) = openMathFork($document, $equation); }
    addColumnToMathFork($document, $mainfork, $branch, $cell); }
  closeMathFork($document, $equation, $mainfork, $branch) if $mainfork;
  return; }

#**********************************************************************

Let('\vcenter', '\vbox');

# \eqno & \leqno are really bizzare.
# They should seemingly digest until $ (or while still in math mode),
# and use that stuff as the reference number.
# However, since people abuse this, and we're really not quite TeX,
# we really can't do it Right.
# Even a \begin{array} ends up expanding into a $ !!!
DefMacroI('\eqno', undef, sub {
    my ($gullet) = @_;
    my $locator  = $gullet->getLocator;
    my @stuff    = ();
    # This is risky!!!
    while (my $t = $gullet->readXToken(0)) {
      if ($t->defined_as(T_BEGIN)) {
        push(@stuff, $t, $gullet->readBalanced, T_END); }
      # What do I need to explicitly list here!?!?!? UGGH!
      elsif ($t->defined_as(T_MATH)
        || $t->defined_as(T_CS('\]'))
        # UGH from 2022: also don't jump over rows
        || $t->defined_as(T_CS('\cr'))
        # see arXiv:math/0001062, for one example
        || $t->defined_as(T_CS('\hidden@cr'))
        || $t->defined_as(T_CS('\@@ENDDISPLAYMATH'))
        || $t->defined_as(T_CS('\begingroup'))       # Totally wrong, but to catch expanded environments
        || (ToString($t) =~ /^\\(?:begin|end)\{/)    # any sort of environ begin or end???
                                                     # This seems needed within AmSTeX environs
      ) {
        return (Invocation(T_CS('\@@eqno'), Tokens(@stuff)), $t); }
      else {
        push(@stuff, $t); } }
    Error('unexpected', '\eqno', $gullet, "Fell of the end reading tag for \\eqno!",
      "started " . ToString($locator));
    return Tokens(@stuff); });

Let('\leqno', '\eqno');
# Revert to nothing, since it really doesn't belong in the TeX string(?)
DefConstructor('\@@eqno{}',
  "^ <ltx:tags><ltx:tag><ltx:Math><ltx:XMath>#1</ltx:XMath></ltx:Math></ltx:tag></ltx:tags>",
  reversion => '');

#======================================================================
# Scripts are a bit of a strange beast, with respect to when the arguments
# are processed, and what kind of object should be created.
#
# While scripts look like they take a normal TeX argument, they really
# take the next BOX (AFTER expansion & digestion)!  Thus, while
#   a^\frac{b}{c} and a^\mathcal{B}
# DO work in TeX, other things like
#   a^\sqrt{3} or a^\acute{b}
# DO NOT! (Hint: consider the expansions)
# Note that with
#  \def\xyz{xyz}
#   a^\xyz   =>  a^{x}yz
# So, we try to mimic, but note that our boxes don't correspond 100% to TeX's
#
# Normally, sub/super scripts should be turned into a sort of postfix operator:
# The parser will attach the script to the appropriate preceding object.
# However, there are a few special cases involving empty boxes {}.
# If the argument is an empty box $x^{}$, the whole script should just disappear.
# If the PRECEDING box is {} (in ${}^{p}$, a sort of `floating' script should be created.
# This may combine, in the parser, with the following object to generate
# a prescript.

# Remember a "safe" way to test a script Whatsit.
# Returns [ (FLOATING|POST) , (SUBSCRIPT|SUPERSCRIPT) ] or nothing
sub IsScript {
  my ($object) = @_;
  if (ref $object eq 'LaTeXML::Core::List') {

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

sub scriptSizer {
  my ($script, $base, $prev, $op, $pos) = @_;

  # NOTE: Currently, the mathstyle is NOT reflected in the font of the script!!!!
  # Or is it now ?????
  # [unless it's different from the 'expected' style!!!]
  my ($ws, $hs, $ds) = map { $_->valueOf } $script->getSize;
  $ws *= 0.8; $hs *= 0.8; $ds *= 0.8;    # HACK!@!!
  my ($wb, $hb, $db) = map { $_->valueOf } ($base ? $base->getSize
    : LookupValue('font')->getNominalSize);
  my ($w, $h, $d) = (0, 0, 0);
  # Fishing for the scriptpos on the base (if any)
  my $attr;
  $pos = $base->getProperty('scriptpos') if !defined $pos && defined $base;
  $pos = 'post'                          if !defined $pos;
  if ($pos eq 'mid') {
    $w = max(0, $ws - $wb);    # as if max width of base & script
    if ($op eq 'SUPERSCRIPT') {
      $h = $hb + $ds + $hs; }
    else {
      $d = $db + $hs + $ds; } }
  else {
    my $wp = ($prev && $prev->getWidth) || 0;    # as if max of width & prev script's width
    $w = max(0, $ws - $wp);
    if ($op eq 'SUPERSCRIPT') {
      $h = $hb + $hs / 2; }
    else {
      $d = $hs / 2 + $ds; } }
  $w = Dimension($w); $h = Dimension($h); $d = Dimension($d);
  return ($w, $h, $d); }

# NOTE: The When reverting these, the
DefConstructor('\@@POSTSUPERSCRIPT InScriptStyle',
  "<ltx:XMApp role='POSTSUPERSCRIPT' scriptpos='?#scriptpos(#scriptpos)(#scriptlevel)'>"
    . "<ltx:XMArg rule='Superscript'>#1</ltx:XMArg>"
    . "</ltx:XMApp>",
  reversion => sub { (T_SUPER, revertScript($_[1])); },
  sizer     => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'),
      $_[0]->getProperty('prevscript'), 'SUPERSCRIPT', 'post'); });
DefConstructor('\@@POSTSUBSCRIPT InScriptStyle',
  "<ltx:XMApp role='POSTSUBSCRIPT' scriptpos='?#scriptpos(#scriptpos)(#scriptlevel)'>"
    . "<ltx:XMArg rule='Subscript'>#1</ltx:XMArg>"
    . "</ltx:XMApp>",
  reversion => sub { (T_SUB, revertScript($_[1])); },
  sizer     => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'),
      $_[0]->getProperty('prevscript'),
      'SUBSCRIPT', 'post'); });
DefConstructor('\@@FLOATINGSUPERSCRIPT InScriptStyle',
  "<ltx:XMApp role='FLOATSUPERSCRIPT' scriptpos='?#scriptpos(#scriptpos)(#scriptlevel)'>"
    . "<ltx:XMArg rule='Superscript'>#1</ltx:XMArg>"
    . "</ltx:XMApp>",
  reversion => sub { (T_BEGIN, T_END, T_SUPER, revertScript($_[1])); },
  sizer     => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUPERSCRIPT', 'post'); });
DefConstructor('\@@FLOATINGSUBSCRIPT InScriptStyle',
  "<ltx:XMApp role='FLOATSUBSCRIPT' scriptpos='?#scriptpos(#scriptpos)(#scriptlevel)'>"
    . "<ltx:XMArg rule='Subscript'>#1</ltx:XMArg>"
    . "</ltx:XMApp>",
  reversion => sub { (T_BEGIN, T_END, T_SUB, revertScript($_[1])); },
  sizer     => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUBSCRIPT', 'post'); });

DefMacroI('\active@math@prime', undef, sub {
    my ($gullet) = @_;
    my @sup = (T_CS('\prime'));
    # Collect up all ', convering to \prime
    while ($gullet->ifNext(T_OTHER('\''))) {
      $gullet->readToken;
      push(@sup, T_CS('\prime')); }
    # Combine with any following superscript!
    # However, this is semantically screwed up!
    # We really need to set up separate superscripts, but at same level!
    if ($gullet->ifNext(T_SUPER)) {
      $gullet->readToken;
      push(@sup, $gullet->readArg->unlist); }
    (T_SUPER, T_BEGIN, @sup, T_END); },
  locked => 1);    # Only in math!
AssignMathcode("'" => 0x8000);
Let("'", '\active@math@prime');

# Experiment: When we detect a math element containing solely a floating superscript in the
#             *Frontmatter* of a document, assume it is a note mark, and normalize it down to
#             plain text.
DefRewrite(xpath => 'descendant::ltx:Math[child::ltx:XMath[child::ltx:XMApp[' .
    '(@role="FLOATSUPERSCRIPT" or @role="FLOATSUBSCRIPT") and ' .
    'not(preceding-sibling::*) and not(following-sibling::*) ' .
    'and not(./*/*[not(self::ltx:XMTok)]) ]]]',
  replace => sub {
    my ($document, $math) = @_;
    # We can assume the grandchild of the XMath node is the XMArg,
    # which we need to normalize to scripted Unicode.
    if (my @xmath = element_nodes($math)) {
      if (my @xmapp = element_nodes($xmath[0])) {
        if (my @xmarg = element_nodes($xmapp[0])) {
          if (my $role = $xmapp[0]->getAttribute('role')) {
            my $text = $xmarg[0]->textContent;
            local $LaTeXML::BOX = $document->getNodeBox($xmarg[0]);
            if ($role eq 'FLOATSUPERSCRIPT') {
              $document->insertElement('ltx:sup', $text);
              return; }
            elsif ($role eq 'FLOATSUBSCRIPT') {
              $document->insertElement('ltx:sub', $text);
              return; }
    } } } }
    # should never happen, but just in case:
    Info("rewrite", "footnotemark", "Failed to find floating node in: " . $math->toString(1));
    $document->getNode->appendChild($math);
    return; });

#======================================================================
# \choose & friends, also need VERY special argument handling

# After digesting the \choose (or whatever), grab the previous and following material
# and store as args in the whatsit.

# Increment the mathstyle stored in any boxes & whatsits.
# The tricky part is to know when NOT to increment!
# \displaystyle, constructors that set their own specific style,...
# And, any collateral adjustments that had been done in digestion depending on mathstyle
# WONT be adjusted!
# We don't have a clear API to find the displayable Boxes within;
# and we don't have a good handle on grouping...

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

    . "<ltx:XMRef _xmkey='#xmkey1'/>"
    . "<ltx:XMRef _xmkey='#xmkey2'/>"
    . "</ltx:XMApp>"
    . "<ltx:XMWrap>"
    . "#left)()"
    . "<ltx:XMApp>"
    . "<ltx:XMTok _xmkey='#xmkey0' role='#role' meaning='#meaning' mathstyle='#mathstyle' thickness='#thickness'/>"
    . "<ltx:XMArg _xmkey='#xmkey1'>#top</ltx:XMArg>"
    . "<ltx:XMArg _xmkey='#xmkey2'>#bottom</ltx:XMArg>"
    . "</ltx:XMApp>"
    . "?#needXMDual(#right"
    . "</ltx:XMWrap>"
    . "</ltx:XMDual>)()",
  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    my $kv = $whatsit->getArg(2);
    # Really, we want the mathstyle that was in effect BEFORE the group starting the numerator!
    # (there could be a \displaystyle INSIDE the numerator, but that's not the one we want)
    # Of course the group that started the numerator may be the start of the Math, itself!
    # AND, the numerator, which was already digested, needs it's mathstyle ADJUSTED
    my $font = ($STATE->isValueBound('MODE', 0)    # Last stack frame was a mode switch!?!?!
      ? $STATE->lookupValue('font')                # then just use whatever font we've got
      : ($STATE->isValueBound('font', 0)           # else if font was set in numerator
          && $STATE->valueInFrame('font', 1))
        || $STATE->lookupValue('font')             # then just use whatever font we've got
    );
    my $style     = $font->getMathstyle;
    my $role      = ToString($kv->getValue('role'));
    my $meaning   = ToString($kv->getValue('meaning'));
    my $thickness = ToString($kv->getValue('thickness'));
    $role    = 'FRACOP' unless $role;
    $meaning = 'divide' if (!$meaning) && ($thickness ne '0pt');
    # Unfortunately, the numerator's already digested! We have to adjust it's mathstyle
    my @top = $stomach->regurgitate;
    # really have to pass +/-1, +/-2 etc..!
    adjustMathstyle($style, {}, @top);
    MergeFont(fraction => 1);
    my @bot     = $stomach->digestNextBody();
    my $closing = pop(@bot);    # We'll leave whatever closed the list (endmath, endgroup...)
    $whatsit->setProperties(
      top       => List(@top, mode => 'math'),
      bottom    => List(@bot, mode => 'math'),
      role      => $role,
      meaning   => $meaning,
      thickness => $thickness,
      mathstyle => $style);
    if ($kv->getValue('left') || $kv->getValue('right')) {
      $whatsit->setProperties(needXMDual => 1,
        xmkey0 => LaTeXML::Package::getXMArgID(),
        xmkey1 => LaTeXML::Package::getXMArgID(),
        xmkey2 => LaTeXML::Package::getXMArgID()); }
    return $closing; },    # and leave the closing bit, whatever it is.
  properties => sub { %{ $_[2]->getKeyVals }; },
  sizer      => sub { fracSizer($_[0]->getProperty('top'), $_[0]->getProperty('bottom')); },
  reversion  => sub {
    my ($whatsit) = @_;
    (Revert($whatsit->getProperty('top')),
      $whatsit->getArg(1)->unlist,
      Revert($whatsit->getProperty('bottom'))); });

DefMacro('\choose',
  '\lx@generalized@over{\choose}{meaning=binomial,thickness=0pt,left=\@left(,right=\@right)}');
DefMacro('\brace',
  '\lx@generalized@over{\brace}{thickness=0pt,left=\@left\{,right=\@right\}}');
DefMacro('\brack',
  '\lx@generalized@over{\brack}{thickness=0pt,left=\@left[,right=\@right]}');
DefMacro('\atop',
  '\lx@generalized@over{\atop}{thickness=0pt}');
DefMacro('\atopwithdelims Token Token',
  '\lx@generalized@over{\atopwithdelims #1 #2}{thickness=0pt,left={\@left#1},right={\@right#2}}');
DefMacro('\over',
  '\lx@generalized@over{\over}{meaning=divide}');
DefMacro('\overwithdelims Token Token',
  '\lx@generalized@over{\overwithdelims #1 #2}{left={\@left#1},right={\@right#2},meaning=divide}');
# My thinking was that this is a "fraction" providing the dimension is > 0!
DefMacro('\above Dimension',
  '\lx@generalized@over{\above #1}{meaning=divide,thickness=#1}');
DefMacro('\abovewithdelims Token Token Dimension',
'\lx@generalized@over{\abovewithdelims #1 #2 #3}{left={\@left#1},right={\@right#2},meaning=divide,thickness=#3}');

#======================================================================
DefPrimitiveI('\cal', undef, undef,
  font => { family => 'caligraphic', series => 'medium', shape => 'upright' });

# In principle, <ltx:emph> is a nice markup for emphasized.
# Unfortunately, TeX really just treats it as a font switch.
# Something like:  \em et.al. \rm more stuff
# works in TeX, but in our case, since there is no explicit {},
# the <ltx:emph> stays open!  Ugh!
# This could still be made to work, but merge font would
# need to look at any open <ltx:emph>, and then somehow close it!
DefPrimitiveI('\em', undef, undef,
  beforeDigest => sub {
    my $font  = LookupValue('font');
    my $shape = $font->getShape;
    AssignValue(font => $font->merge(shape => ($shape eq 'italic' ? 'normal' : 'italic')),
      'local'); });

# Change math font while still in text!
DefPrimitiveI('\boldmath', undef, undef,
  beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 1), 'local'); },
  forbidMath => 1);
DefPrimitiveI('\unboldmath', undef, undef,
  beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 0), 'local'); },
  forbidMath => 1);

#======================================================================
# Alignments

# & gives an error except within the right context
# (which should redefine it!)
DefConstructorI('&', undef, sub { Error('unexpected', '&', $_[0], "Stray alignment \"&\""); });

#**********************************************************************
# Plain;  Extracted from Appendix B.
#**********************************************************************

#======================================================================
# TeX Book, Appendix B, p. 344
#======================================================================
RawTeX('\outer\def^^L{\par}');
DefMacro('\dospecials', '\do\ \do\\\do\{\do\}\do\$\do\&\do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~');

# Normally, the content branch contains the pure structure and meaning of a construct,
# and the presentation is generated from lower level TeX macros that only concern
# themselves with how to display the object.
# Nevertheless, it is sometimes useful to know where the tokens in the presentation branch
# came from;  particularly what their presumed "meaning" is.
# For example, when search-indexing pmml, or providing links to definitions from the pmml.
#
# The following constructor (see how it's used in DefMath), adds meaning attributes
# whereever it seems sensible on the presentation branch, after it has been generated.
DefConstructor('\@ASSERT@MEANING{}{}', '#2',
  reversion      => '#2',
  afterConstruct => sub {
    my ($document, $whatsit) = @_;
    my $node    = $document->getNode;              # This should be the wrapper just added.
    my $meaning = ToString($whatsit->getArg(1));
    addMeaningRec($document, $node, $meaning);
    $node; });

sub addMeaningRec {
  my ($document, $node, $meaning) = @_;
  if ($node->nodeType == XML_ELEMENT_NODE) {
    my $qname = $document->getModel->getNodeQName($node);
    if    ($qname eq 'ltx:XMArg') { }              # DONT cross through into arguments!
    elsif ($qname eq 'ltx:XMTok') {
      if ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN')
        && !$node->getAttribute('meaning')) {
        $document->setAttribute($node, meaning => $meaning); } }
    else {
      foreach my $c ($node->childNodes) {
        addMeaningRec($document, $c, $meaning); } } }
  return; }

#======================================================================
# Properties for plain characters.
# These are allowed in plain text, but need to act a bit special in math.
DefMathI('=', undef, '=', role => 'RELOP', meaning => 'equals');
DefMathI('+', undef, '+', role => 'ADDOP', meaning => 'plus');
DefMathI('-', undef, '-', role => 'ADDOP', meaning => 'minus');
## Redefine, if we want Unicode minus
##DefMathI('-', undef, "\x{2212}", role => 'ADDOP',   meaning  => 'minus');
DefMathI('*', undef, "\x{2217}", role => 'MULOP',   meaning => 'times');
DefMathI('/', undef, '/',        role => 'MULOP',   meaning => 'divide');
DefMathI('!', undef, '!',        role => 'POSTFIX', meaning => 'factorial');
DefMathI(',', undef, ',',        role => 'PUNCT');
DefMathI('.', undef, '.',        role => 'PERIOD');
DefMathI(';', undef, ';',        role => 'PUNCT');
DefMathI('(', undef, '(', role => 'OPEN',      stretchy => 'false');
DefMathI(')', undef, ')', role => 'CLOSE',     stretchy => 'false');
DefMathI('[', undef, '[', role => 'OPEN',      stretchy => 'false');
DefMathI(']', undef, ']', role => 'CLOSE',     stretchy => 'false');
DefMathI('|', undef, '|', role => 'VERTBAR',   stretchy => 'false');
DefMathI(':', undef, ':', role => 'METARELOP', name     => 'colon');  # Seems like good default role
DefMathI('<', undef, '<', role => 'RELOP',     meaning  => 'less-than');
DefMathI('>', undef, '>', role => 'RELOP',     meaning  => 'greater-than');

# NOTE: Need to evolve Ligatures to be easier to write.
# rough draft of tool to make ligatures more sane to write...
# It is tempting to handle these with macros,
# But that tends to run afoul of tricky packages like babel that make : active as well!

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN


#======================================================================
# TeX Book, Appendix B, p. 346

RawTeX(<<'EoTeX');
  \countdef\count@=255
  \toksdef\toks@=0
  \skipdef\skip@=0
  \dimendef\dimen@=0
  \dimendef\dimen@i=1
  \dimendef\dimen@ii=2
\count10=22 % allocates \count registers 23, 24, ...
\count11=9 % allocates \dimen registers 10, 11, ...
\count12=9 % allocates \skip registers 10, 11, ...
\count13=9 % allocates \muskip registers 10, 11, ...
\count14=9 % allocates \box registers 10, 11, ...
\count15=9 % allocates \toks registers 10, 11, ...
\count16=-1 % allocates input streams 0, 1, ...
\count17=-1 % allocates output streams 0, 1, ...
\count18=3 % allocates math families 4, 5, ...
\count19=0 % allocates \language codes 1, 2, ...
\count20=255 % allocates insertions 254, 253, ...
\countdef\insc@unt=20
\countdef\allocationnumber=21
\countdef\m@ne=22 \m@ne=-1
EoTeX
# Various \count's are set; should we?

#======================================================================
# TeX Book, Appendix B, p. 347
DefPrimitive('\wlog{}', sub {
    NoteLog(ToString(Expand($_[1])));
    return; },
  locked => 1);
# From plain.tex
DefPrimitive('\newcount  DefToken', sub {
    DefRegisterI($_[1], undef, Number(0), allocate => '\count'); });
DefPrimitive('\newdimen  DefToken', sub {
    DefRegisterI($_[1], undef, Dimension(0), allocate => '\dimen'); });
DefPrimitive('\newskip   DefToken', sub {
    DefRegisterI($_[1], undef, Glue(0), allocate => '\skip'); });
DefPrimitive('\newmuskip DefToken', sub {
    DefRegisterI($_[1], undef, MuGlue(0), allocate => '\muskip'); });
AssignValue(allocated_boxes => 0);
DefPrimitive('\newbox DefToken', sub {
    my $n = LookupValue('allocated_boxes');
    AssignValue(allocated_boxes => $n + 1, 'global');
    AssignValue("box$n", List());
    DefRegisterI($_[1], undef, Number($n), readonly => 1); });
DefPrimitive('\newhelp DefToken {}', sub { AssignValue(ToString($_[1]) => $_[2]); });
DefPrimitive('\newtoks DefToken',    sub { DefRegisterI($_[1], undef, Tokens()); });
# the next 4 actually work by doing a \chardef instead of \countdef, etc.
# which means they actually work quite differently
DefPrimitive('\alloc@@ {}', sub {
    my ($stomach, $type) = @_;
    my $c = 'allocation @' . ToString($type);
    my $n = LookupValue($c) || '0';
    $n = $n->valueOf if ref $n;
    AssignValue($c => $n + 1, 'global');
    AssignRegister('\allocationnumber' => Number($n), 'global'); });
DefMacro('\newread DefToken',     '\alloc@@{read}\global\chardef#1=\allocationnumber');
DefMacro('\newwrite DefToken',    '\alloc@@{write}\global\chardef#1=\allocationnumber');
DefMacro('\newfam DefToken',      '\alloc@@{fam}\global\chardef#1=\allocationnumber');
DefMacro('\newlanguage DefToken', '\alloc@@{language}\global\chardef#1=\allocationnumber');

DefMacro('\e@alloc{}{}{}{}{}{}',
  '\global\advance#3\@ne
%  \e@ch@ck{#3}{#4}{#5}#1%
  \allocationnumber#3\relax
  \global#2#6\allocationnumber
%  \wlog{\string#6=\string#1\the\allocationnumber}
');
DefMacro('\alloc@{}{}{}{}', '\e@alloc#2#3{\count1#1}#4\float@count');
DefMacro('\newread',        '\e@alloc\read \chardef{\count16}\m@ne\sixt@@n');
DefMacro('\newwrite', '\e@alloc\write
                   {\ifnum\allocationnumber=18
                     \advance\count17\@ne
                     \allocationnumber\count17 %
                    \fi
                    \global\chardef}%
                   {\count17}%
                   \m@ne
                   {128}');

# This implementation is quite wrong
DefPrimitive('\newinsert Token', sub { DefRegisterI($_[1], undef, Number(0)); });
# \alloc@, \ch@ck

# TeX plain uses \newdimen, etc. for these.
# Is there any advantage to that?
DefRegister('\maxdimen',  Dimension(16383.99999 * $UNITY));
DefRegister('\hideskip',  Glue('-1000pt plus 1fill'));
DefRegister('\centering', Glue('0pt plus 1000pt minus 1000pt'));
DefRegister('\p@',        Dimension($UNITY));
DefRegister('\z@',        Dimension(0));
DefRegister('\z@skip',    Glue(0, 0, 0));

# First approximation. till I figure out \newbox
RawTeX('\newbox\voidb@x');
#======================================================================
# TeX Book, Appendix B, p. 348

DefPrimitive('\newif DefToken', sub {
    my ($ignore, $cs) = @_;
    DefConditionalI($cs, undef);
    return; });

# See the section Registers & Parameters, above for setting default values.
#======================================================================
# TeX Book, Appendix B, p. 349
# See the section Registers & Parameters, above for setting default values.

# These are originally defined with \newskip, etc
DefRegister('\smallskipamount'          => Glue('3pt plus 1pt minus 1pt'));
DefRegister('\medskipamount'            => Glue('6pt plus 2pt minus 2pt'));
DefRegister('\bigskipamount'            => Glue('12pt plus 4pt minus 4pt'));
DefRegister('\normalbaselineskip'       => Glue('12pt'));
DefRegister('\normallineskip'           => Glue('1pt'));
DefRegister('\normallineskiplimit'      => Dimension('0pt'));
DefRegister('\jot'                      => Dimension('3pt'));
DefRegister('\lx@default@jot'           => LookupRegister('\jot'));
DefRegister('\interdisplaylinepenalty'  => Number(100));
DefRegister('\interfootnotelinepenalty' => Number(100));

DefMacroI('\magstephalf', undef, '1095');
our @mags = (1000, 1200, 1440, 1728, 2074, 2488);
DefMacro('\magstep{}', sub {
    my $level = ToString($_[1]);
    $level = ($level =~ /^\d$/) ? int($level) : 0;
    $level = 0 unless $level >= 0 and $level < 6;
    Explode($mags[$level]); });

#======================================================================
# TeX Book, Appendix B, p. 350

# Font stuff ...
RawTeX(<<'EoTeX');
 \font\tenrm=cmr10
 \font\sevenrm=cmr7
 \font\fiverm=cmr5
 \font\teni=cmmi10
 \font\seveni=cmmi7
 \font\fivei=cmmi7
 \font\tensy=cmsy10
 \font\sevensy=cmsy7
 \font\fivesy=cmsy5
 \font\tenex=cmex10
 \font\tenbf=cmbx10
 \font\sevenbf=cmbx7
 \font\fivebf=cmbx5
 \font\tensl=cmsl10
 \font\tentt=cmtt10
 \font\tenit=cmti10
 \newfam\itfam
 \newfam\slfam
 \newfam\bffam
 \newfam\ttfam
\textfont0=\tenrm\scriptfont0=\sevenrm\scriptscriptfont0=\fiverm
\textfont1=\teni\scriptfont1=\seveni\scriptscriptfont1=\fivei
\textfont2=\tensy\scriptfont2=\sevensy\scriptscriptfont2=\fivesy
\textfont3=\tenex
EoTeX
# Note: \newfam in math should be font switching(?)

#======================================================================
# TeX Book, Appendix B, p. 351

# Old style font styles.
# The trick is to create an empty Whatsit preserved till assimilation (for reversion'ing)
# but to change the current font used in boxes.
# (some of these were defined on different pages? or even latex...)
Tag('ltx:text', autoOpen => 1, autoClose => 1);

# Note that these, unlike \rmfamily, should set the other attributes to the defaults!
DefPrimitiveI('\rm', undef, undef,
  font => { family => 'serif', series => 'medium', shape => 'upright' });
DefPrimitiveI('\sf', undef, undef,
  font => { family => 'sansserif', series => 'medium', shape => 'upright' });
DefPrimitiveI('\bf', undef, undef,
  font => { series => 'bold', family => 'serif', shape => 'upright' });
DefPrimitiveI('\it', undef, undef,
  font => { shape => 'italic', family => 'serif', series => 'medium' });
DefPrimitiveI('\tt', undef, undef,
  font => { family => 'typewriter', series => 'medium', shape => 'upright' });
# No effect in math for the following 2 ?
DefPrimitiveI('\sl', undef, undef,
  font => { shape => 'slanted', family => 'serif', series => 'medium' });
DefPrimitiveI('\sc', undef, undef,
  font => { shape => 'smallcaps', family => 'serif', series => 'medium' });

# Ideally, we should set these sizes from class files
AssignValue(NOMINAL_FONT_SIZE => 10);
DefPrimitiveI('\tiny',         undef, undef, font => { size => 5 });
DefPrimitiveI('\scriptsize',   undef, undef, font => { size => 7 });
DefPrimitiveI('\footnotesize', undef, undef, font => { size => 8 });
DefPrimitiveI('\small',        undef, undef, font => { size => 9 });
DefPrimitiveI('\normalsize',   undef, undef, font => { size => 10 });
DefPrimitiveI('\large',        undef, undef, font => { size => 12 });
DefPrimitiveI('\Large',        undef, undef, font => { size => 14.4 });
DefPrimitiveI('\LARGE',        undef, undef, font => { size => 17.28 });
DefPrimitiveI('\huge',         undef, undef, font => { size => 20.74 });
DefPrimitiveI('\Huge',         undef, undef, font => { size => 29.8 });

DefPrimitiveI('\mit', undef, undef, requireMath => 1, font => { family => 'italic' });

DefPrimitiveI('\frenchspacing',    undef, undef);
DefPrimitiveI('\nonfrenchspacing', undef, undef);
DefMacroI('\normalbaselines', undef,
  '\lineskip=\normallineskip\baselineskip=\normalbaselineskip\lineskiplimit=\normallineskiplimit');
DefMacroI('\space', undef, Tokens(T_SPACE));
DefMacroI('\lq',    undef, "`");
DefMacroI('\rq',    undef, "'");
Let('\empty', '\@empty');
DefMacroI('\null', undef, '\hbox{}');
Let('\bgroup',  T_BEGIN);
Let('\egroup',  T_END);
Let('\endgraf', '\par');
Let('\endline', '\cr');

DefPrimitiveI('\endline', undef, undef);

# Use \r for the newline from TeX!!!
DefMacroI("\\\r", undef, '\ ');    # \<cr> == \<space> Interesting (see latex.ltx)
Let(T_ACTIVE("\r"), '\par');       # (or is this just LaTeX?)

Let("\\\t", "\\\r");               # \<tab> == \<space>, also

#======================================================================
# TeX Book, Appendix B, p. 352

DefPrimitiveI('\obeyspaces', undef, sub {
    AssignCatcode(" " => 13);
    Let(T_ACTIVE(" "), '\space');
    return });
# Curiously enough, " " (a space) is ALREADY defined to be the same as "\space"
# EVEN before it is made active. (see p.380)
Let(T_ACTIVE(" "), '\space');

DefPrimitiveI('\obeylines', undef, sub {
    AssignCatcode("\r" => 13);
    Let(T_ACTIVE("\r"), '\@break');    # More appropriate than \par, I think?
    return });

DefConstructor('\@break', "<ltx:break/>", properties => { isBreak => 1 });

RawTeX(<<'EoTeX');
\def\loop#1\repeat{\def\body{#1}\iterate}
\def\iterate{\body \let\next=\iterate \else\let\next=\relax\fi \next}
\let\repeat=\fi
EoTeX

DefPrimitiveI('\enskip', undef, sub {
    Box("\x{2002}", undef, undef, T_CS('\enskip'),
      name => 'enskip', width => Dimension('0.5em'), isSpace => 1); });

DefPrimitiveI('\enspace', undef, sub {
    Box("\x{2002}", undef, undef, T_CS('\enspace'),
      name => 'enskip', width => Dimension('0.5em'), isSpace => 1); });

DefPrimitiveI('\quad', undef, sub {
    Box("\x{2003}", undef, undef, T_CS('\quad'),
      name => 'quad', width => Dimension('1em'), isSpace => 1); });

# Conceivably should be treated as punctuation! (but maybe even \quad should !?!)
DefPrimitiveI('\qquad', undef, sub {
    Box("\x{2003}\x{2003}", undef, undef, T_CS('\qquad'),
      name => 'qquad', width => Dimension('2em'), isSpace => 1, asHint => 1); });

DefPrimitiveI('\thinspace', undef, sub {
    Box("\x{2009}", undef, undef, T_CS('\thinspace'),
      name => 'thinspace', width => Dimension('0.16667em'), isSpace => 1); });

DefPrimitiveI('\negthinspace', undef, sub {
    Box("", undef, undef, T_CS('\negthinspace'),
      name => 'negthinspace', width => Dimension('-0.16667em'), isSpace => 1); });

# DefConstructor('\hglue Glue', "?#isMath(<ltx:XMHint name='hglue' width='#width'/>)(\x{2003})",
#   properties => sub { (isSpace => 1, width => $_[1]); });

DefPrimitive('\hglue Glue', sub {
    my ($stomach, $length) = @_;
    my $s = DimensionToSpaces($length);
    return unless defined $s;
    Box($s, undef, undef, Invocation(T_CS('\hglue'), $length),
      name => 'hglue', width => $length, isSpace => 1); });

DefPrimitive('\vglue Glue', undef);
DefPrimitiveI('\topglue',          undef, undef);
DefPrimitiveI('\nointerlineskip',  undef, undef);
DefPrimitiveI('\offinterlineskip', undef, undef);

DefMacroI('\smallskip', undef, '\vskip\smallskipamount');
DefMacroI('\medskip',   undef, '\vskip\medskipamount');
DefMacroI('\bigskip',   undef, '\vskip\bigskipamount');

#======================================================================
# TeX Book, Appendix B, p. 353

DefPrimitiveI('\break',      undef, undef);
DefPrimitiveI('\nobreak',    undef, undef);
DefPrimitiveI('\allowbreak', undef, undef);

DefPrimitiveI('\nobreakspace', undef, sub {
    Box(UTF(0xA0), undef, undef, T_ACTIVE("~"),
      width => Dimension('0.333em'), isSpace => 1); });

DefMacro("~", '\nobreakspace{}');

DefMacroI('\slash', undef, '/');
DefPrimitiveI('\filbreak', undef, undef);
DefMacroI('\goodbreak', undef, '\par');
DefMacroI('\eject',     undef, '\par\LTX@newpage');
Let('\newpage', '\eject');
DefConstructorI('\LTX@newpage', undef, "^<ltx:pagination role='newpage'/>");

DefMacroI('\supereject', undef, '\par\LTX@newpage');
DefPrimitiveI('\removelastskip', undef, undef);
DefMacroI('\smallbreak', undef, '\par');
DefMacroI('\medbreak',   undef, '\par');
DefMacroI('\bigbreak',   undef, '\par');

DefMacroI('\line', undef, '\hbox to \hsize');
DefMacro('\leftline Undigested',   '\ltx@leftline{\hbox{#1}}');
DefMacro('\rightline Undigested',  '\ltx@rightline{\hbox{#1}}');
DefMacro('\centerline Undigested', '\ltx@centerline{\hbox{#1}}');
DefConstructor('\ltx@leftline{}', sub {
    alignLine($_[0], $_[1], 'left'); },
  alias   => '\leftline',
  bounded => 1);
DefConstructor('\ltx@rightline{}', sub {
    alignLine($_[0], $_[1], 'right'); },
  alias   => '\rightline',
  bounded => 1);
DefConstructor('\ltx@centerline{}', sub {
    alignLine($_[0], $_[1], 'center'); },
  alias   => '\centerline',
  bounded => 1);

sub alignLine {
  my ($document, $line, $alignment) = @_;
  if ($document->isOpenable('ltx:p')) {
    $document->insertElement('ltx:p', $line, class => 'ltx_align_' . $alignment); }
  elsif ($document->isOpenable('ltx:text')) {
    $document->insertElement('ltx:text', $line, class => 'ltx_align_' . $alignment);
    $document->insertElement('ltx:break'); }
  else {
    $document->absorb($line); }
  return; }

# These should be 0 width, but perhaps also shifted?
DefMacro('\llap{}', '\hbox to 0pt{\hss#1}');
DefMacro('\rlap{}', '\hbox to 0pt{#1\hss}');

DefMacroI('\m@th', undef, '\mathsurround=0pt ');

# \strutbox
DefMacroI('\strut', undef, Tokens());
RawTeX('\newbox\strutbox');

#======================================================================
# TeX Book, Appendix B. p. 354

# TODO: Not yet done!!
# tabbing stuff!!!

DefMacroI('\settabs', undef, undef);

#======================================================================
# TeX Book, Appendix B. p. 355

# TODO: \item, \itemitem not done!
# This could probably be adopted from LaTeX, if the <itemize> could auto-open
# and close!
DefMacro('\hang',         '\hangindent\parindent');
DefMacro('\item',         '\par\hang\textindent');
DefMacro('\itemitem',     '\par\indent \hangindent2\parindent \textindent');
DefMacro('\textindent{}', '\indent\llap{#1\enspace}\ignorespaces');
DefMacro('\narrower', '\advance\leftskip by\parindent'
    . '\advance\rightskip by\parindent');

#----------------------------------------------------------------------
# General support for Front Matter.
# Not (yet) used by TeX (finish plain?)
# But provides support for LaTeX (and other formats?) for handling frontmatter.
#
# The idea is to accumulate any frontmatter material (title, author,...)
# rather than directly drop it into the digested stream.
# When we begin constructing the document, all accumulated material is output.
# See LaTeX.ltxml for usage.
# Note: could be circumstances where you'd want modular frontmatter?
# (ie. frontmatter for each sectional unit)
AssignValue(frontmatter => {}, 'global');

DefConditionalI('\if@in@preamble', undef, sub { LookupValue('inPreamble'); });

# Add a new frontmatter item that will be enclosed in <$tag %attr>...</$tag>
# The content is the result of digesting $tokens.
# \@add@frontmatter[keys]{tag}[attributes]{content}
# keys can have
#   replace (to replace the current entry, if any)
#   ifnew   (only add if no previous entry)
DefPrimitive('\@add@frontmatter OptionalKeyVals {} OptionalKeyVals {}', sub {
    my ($stomach, $keys, $tag, $attr, $tokens) = @_;
    # Digest this as if we're already in the document body!
    my $frontmatter = LookupValue('frontmatter');
    my $inpreamble  = LookupValue('inPreamble');
    AssignValue(inPreamble => 0);
    # Be careful since the contents may also want to add frontmatter
    # (which should be inside or after this one!)
    # So, we append this entry before digesting
    $tag = ToString($tag);
    if ($keys && $keys->hasKey('replace') && $$frontmatter{$tag}) {    # if replace and previous entries
      $$frontmatter{$tag} = []; }                                      # Remove previous entries
    if ($keys && $keys->hasKey('ifnew') && $$frontmatter{$tag}) {      # if ifnew and previous entries
      return; }                                                        # Skip this one.
    my $entry = [$tag, undef, 'to-be-filled-in'];
    push(@{ $$frontmatter{$tag} }, $entry);
    if ($attr) {
      $$entry[1] = { $attr->beDigested($stomach)->getHash }; }
    $$entry[2] = Digest(Tokens(T_BEGIN, $tokens, T_END));
    AssignValue(inPreamble => $inpreamble);
    return; },
  beforeDigest => sub {
    $_[0]->bgroup; },
  afterDigest => sub {
    $_[0]->egroup; });

# Append a piece of data to an existing frontmatter item that is contained in <$tag>
# If $label is given, look for an item which has label=>$label,
# otherwise, just append to the last item in $tag.

# \@add@to@frontmatter{tag}[label]{content}
DefPrimitive('\@add@to@frontmatter {} [] {}', sub {
    my ($stomach, $tag, $label, $tokens) = @_;
    $tag   = ToString($tag);
    $label = ToString($label) if $label;
    my $frontmatter = LookupValue('frontmatter');

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN


my @frontmatter_elements = (qw(ltx:title ltx:toctitle ltx:subtitle
    ltx:creator ltx:date
    ltx:abstract ltx:keywords ltx:classification ltx:acknowledgements));
my %frontmatter_elements = map { ($_ => 1) } @frontmatter_elements;

# Insert FrontMatter into document, if not already added
sub insertFrontMatter {
  my ($document) = @_;
  return if LookupValue('frontmatter_done');
  my $frontmatter = LookupValue('frontmatter');
  my @set_keys    = $frontmatter ? (keys %$frontmatter) : ();
  # if doc ONLY has abstract as frontmatter, defer until abstract's document location
  if ((scalar(@set_keys) == 1) && ($set_keys[0] eq 'ltx:abstract') &&
    !LookupValue('frontmatter_deferred')) {
    AssignValue(frontmatter_deferred => 1, 'global');
    return; }
  AssignValue(frontmatter_done => 1, 'global');    # OK, we're placing FrontMatter here, now.
  foreach my $key (@frontmatter_elements, grep { !$frontmatter_elements{$_} } @set_keys) {
    if (my $list = $$frontmatter{$key}) {
      # Dubious, but assures that frontmatter appears in text mode...
      local $LaTeXML::BOX = Box('', $STATE->lookupValue('font'), '', T_SPACE);
      foreach my $item (@$list) {
        my ($tag, $attr, @stuff) = @$item;
        # add a dedicated class for frontmatter notes,
        # in the case we want to style those uniformly.
        if ($tag eq 'ltx:note') {
          $attr ||= {};
          $$attr{class} = ($$attr{class} ? $$attr{class} . ' ' : '') . 'ltx_note_frontmatter'; }
        $document->openElement($tag, ($attr ? %$attr : ()),
          (scalar(@stuff) && $document->canHaveAttribute($tag, 'font')
            ? (font => $stuff[0]->getFont, _force_font => 'true') : ()));
        map { $document->absorb($_) } @stuff;
        my $completed_node = $document->closeElement($tag);
        # At this time, the frontmatter element should really carry the actual literal values intended.
        # Thus, if we see an empty element, something went wrong -- including our bindings are too verbose,
        # as e.g. \preprint{} always generates a ltx:note element.
        #
        # To solve this in a single location: prune here!
        if (($tag ne "ltx:rdf") && !scalar($completed_node->childNodes)) {
          $document->removeNode($completed_node); } } } }
  return; }

# Add FrontMatter at document begin, unless deferred to a better position.
Tag('ltx:document', 'afterOpen:late' => sub {
    insertFrontMatter($_[0]) unless LookupValue('frontmatter_deferred'); });
# Request Frontmatter to appear HERE (if not already done),
# deferring it from document begin.
DefConstructor('\lx@frontmatterhere', sub { insertFrontMatter($_[0]); },
  afterDigest => sub { AssignValue(frontmatter_deferred => 1, 'global'); });

# Maintain a list of classes that apply to the document root.
# This might involve global style options, like leqno.
Tag('ltx:document', 'afterOpen:late' => sub {
    my ($document, $root) = @_;
    if (my $classes = join(' ', LookupMappingKeys('DOCUMENT_CLASSES'))) {
      $document->addClass($root, $classes); } });

# If folks start using plain TeX macros, and never load LaTeX.pool,
# they might benefit from a ltx-plain.css?
DefMacro('\beginsection Until:\par', '\@beginsection{{\bf #1}}');
DefConstructor('\@beginsection {}',
  "<ltx:section><ltx:title>#1</ltx:title>");

# POSSIBLY #1 is a name or reference number and  #2 is the theoremm TITLE
#  If so, how do know when the theorem ends?
DefMacroI('\proclaim', parseDefParameters('\proclaim', Tokenize('#1. #2\par')),
  '\@proclaim{{\bf #1}}{{\sl #2}}');
DefConstructor('\@proclaim{}{}',
  "<ltx:theorem>"
    . "<ltx:title font='#titlefont' _force_font='true' >#title</ltx:title>"
    . "#2",
  afterConstruct => sub { $_[0]->maybeCloseElement('ltx:theorem'); },
  properties     => sub {
    my $title = $_[1];
    (title => $title, titlefont => $title->getFont); });

#======================================================================
# Tags & Titles
# The reference numbers, titles, captions etc, for various objects have
# different styling conventions, and the styling various depending on context.
# We'll use ltx:tags as a container for the various forms of ltx:tag with different @role's.
# The role=refnum form is simply formatted by \the<counter> and used by \ref;
# An ltx:tag w/o @role are for the numbers, often formatted differently, which
# appear alongside the object; Such a tag also may be embedded within the title or caption.
# Cross-references automatically generated by LaTeXML benefit from a bit more context:
# these are the role=typerefnum forms.
# Additional forms are needed for bibliographies, hyperref's autoref, etc.
# An additional complication is that while the "type" determines the formatting
# of the various forms, some types (eg. theorems) share the same counter.
# LaTeX defines this handling on an adhoc basis; defines \fnum@table, \fnum@figure for some types
# but \labelenumi, etc for others.

# This section synthesizes a more uniform support for reference numbers,
# references to reference numbers, title formatting etc.
# It allows you to customize each of the forms for each type encountered.
# The design reflects LaTeX needs, more than TeX, but support starts here!

# This collects up the various declared ltx:tag's into an ltx:tags
DefMacro('\lx@make@tags {}', sub {
    my ($gullet, $type) = @_;
    my @tags       = ();
    my $formatters = LookupValue('type_tag_formatter');
    foreach my $role (sort keys %{$formatters}) {
      my $formatter = $$formatters{$role};
      push(@tags, Invocation(T_CS('\lx@tag@intags'), T_OTHER($role),
          Invocation($formatter, $type))); }
    return (T_CS('\lx@tags'), T_BEGIN, @tags, T_END); });

# Remove the last closed node, if it's empty.
sub removeEmptyElement {
  my ($document, $whatsit) = @_;
  my $node = $document->getNode->lastChild;    # This should be the wrapper just added.
  if (!$node->childNodes) {
    $document->removeNode($node); }
  return; }

# \lx@tag[open][close]{stuff}
DefConstructor('\lx@tag[][][]{}',
  "<ltx:tag open='#1' close='#2'>#4</ltx:tag>",
  bounded        => 1, mode => 'text',
  afterConstruct => \&removeEmptyElement);

# \lx@tag@intags{role}{stuff}
DefConstructor('\lx@tag@intags[]{}',
  "<ltx:tag role='#1'>#2</ltx:tag>",
  bounded        => 1, mode => 'text',
  beforeDigest   => sub { reenterTextMode(); neutralizeFont() },
  afterConstruct => \&removeEmptyElement);

DefConstructor('\lx@tags{}',
  "<ltx:tags>#1</ltx:tags>",
  afterConstruct => \&removeEmptyElement);

#----------------------------------------------------------------------
# "refnum" is the lowest level reference number for an object is typically \the<counter>
# but be sure to use the right counter!  This is how \ref will show the number.
# You'll typically customize this by defining \the<counter> (and \p@<counter) as in LaTeX.
DefMacro('\lx@counterfor{}', sub {
    my ($gullet, $type) = @_;
    my $ctr = LookupMapping('counter_for_type', ToString($type));
    return ($ctr ? T_OTHER($ctr) : $type); });
DefMacro('\lx@the@@{}',  '\expandafter\lx@@the@@\expandafter{\lx@counterfor{#1}}');
DefMacro('\lx@@the@@{}', '\csname the#1\endcsname');

DefMacro('\lx@therefnum@@{}', '\expandafter\lx@@therefnum@@\expandafter{\lx@counterfor{#1}}');
DefMacro('\lx@@therefnum@@{}',
  '{\normalfont\csname p@#1\endcsname\csname the#1\endcsname}');

AssignMapping('type_tag_formatter', 'refnum' => '\lx@therefnum@@');

#----------------------------------------------------------------------
# \lx@fnum@@{type}  Gets the formatted form of the refnum, as part of the object, (no @role).
# Customize by defining \fnum@<type> or \<type>name and \fnum@font@<type>
# Default uses \fnum@font@<type> \<type>name prefix + space (if any) and \the<counter>.
# When using the "name", uses \<type>name in preference to fallback \lx@name@<type>
DefMacro('\lx@refnum@compose{}{}',  '\expandafter\lx@refnum@compose@\expandafter{#2}{#1}');
DefMacro('\lx@refnum@compose@{}{}', '\if.#1.#2\else#2\space#1\fi');
####DefMacro('\lx@refnum@compose@{}{}', '\if.#1.#2\else#2~#1\fi');

DefMacro('\lx@fnum@@{}',
  '{\normalfont\@ifundefined{fnum@font@#1}{}{\csname fnum@font@#1\endcsname}'
    . '\@ifundefined{fnum@#1}{\lx@@fnum@@{#1}}{\csname fnum@#1\endcsname}}');

# Really seems like <type>name should take precedence over \lx@name@<type>,
# since users might define it.
# BUT amsthm defines \thmname{}!
DefMacro('\lx@@fnum@@ {}',
  '\@ifundefined{lx@name@#1}{'
    . '\@ifundefined{#1name}{'
    . '\lx@the@@{#1}'
    . '}{'
    . '\lx@refnum@compose{\csname #1name\endcsname}{\lx@the@@{#1}}'
    . '}}{'
    . '\lx@refnum@compose{\csname lx@name@#1\endcsname}{\lx@the@@{#1}}'
    . '}');

AssignMapping('type_tag_formatter', '' => '\lx@fnum@@');    # Default!

#----------------------------------------------------------------------
# \lx@fnum@toc@{type} is similar, but formats the number for use within \toctitle
# Customize by defining \fnum@toc@<type> or \fnum@tocfont@<type>
# Default uses just \the<counter>, else composes using \lx@@fnum@@{type}
DefMacro('\lx@fnum@toc@@{}',
  '{\normalfont\@ifundefined{fnum@tocfont@#1}{}{\csname fnum@tocfont@#1\endcsname}'
    . '\@ifundefined{fnum@toc@#1}{\lx@the@@{#1}}{\csname fnum@toc@#1\endcsname}}');

#----------------------------------------------------------------------
# "typerefnum" form is used by automatic cross-references, typically "type number" or similar.
# Customize by defining \typerefnum@<type> or \typerefnum@font@<type>
# Default uses either \<type>typerefname or \<type>name (if any, followed by space, then \the<counter>
DefMacro('\lx@typerefnum@@{}',
  '{\normalfont\@ifundefined{typerefnum@font@#1}{}{\csname typerefnum@font@#1\endcsname}'
    . '\@ifundefined{typerefnum@#1}{\lx@@typerefnum@@{#1}}{\csname typerefnum@#1\endcsname}}');

DefMacro('\lx@@typerefnum@@{}',
  '\@ifundefined{#1typerefname}{'
    . '\@ifundefined{lx@name@#1}{'
    . '\@ifundefined{#1name}{'
    . '}{'
    . '\lx@refnum@compose{\csname #1name\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}'
    . '}}{'
    . '\lx@refnum@compose{\csname lx@name@#1\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}'
    . '}}{'
    . '\lx@refnum@compose{\csname #1typerefname\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}'
    . '}');

AssignMapping('type_tag_formatter', 'typerefnum' => '\lx@typerefnum@@');

#----------------------------------------------------------------------
# The following macros provide similar customization for titles & toctitles
# in particular for supporting localization for different languages.
# Redefine these if you want to assemble the name (eg. \chaptername), refnum and titles differently
#----------------------------------------------------------------------
# \lx@format@title@@{type}{title}
# Format a title (or caption) appropriately for type.
# Customize by defining \format@title@type{title}
# Default composes \lx@fnum@@{type} space title.
DefMacro('\lx@format@title@@{}{}',
  '\lx@@format@title@@{#1}'
    . '{{\lx@format@title@font@@{#1}#2}}');
DefMacro('\lx@@format@title@@{}{}',
  '{\@ifundefined{format@title@#1}'
    . '{\lx@@compose@title{\lx@fnum@@{#1}}{#2}}'
    . '{\csname format@title@#1\endcsname{#2}}}');

# \lx@format@toctitle@@{type}{toctitle}
# Similar for toctitle, typically briefer
# Customize by defining \format@toctitle@type{title}
# Default composes \lx@fnum@toc@@{type} space title.
DefMacro('\lx@format@toctitle@@{}{}',
  '\lx@@format@toctitle@@{#1}'
    . '{{\lx@format@toctitle@font@@{#1}#2}}');

DefMacro('\lx@@format@toctitle@@{}{}',
  '{\@ifundefined{format@toctitle@#1}'
    . '{\lx@@compose@title{\lx@fnum@toc@@{#1}}{#2}}'
    . '{\csname format@toctitle@#1\endcsname{#2}}}');

DefMacro('\lx@@compose@title{}{}', '\lx@tag[][ ]{#1}#2');

DefMacro('\lx@format@title@font@@{}',
  '\@ifundefined{format@title@font@#1}{}{\csname format@title@font@#1\endcsname}');
DefMacro('\lx@format@toctitle@font@@{}',
  '\@ifundefined{format@toctitle@font@#1}{}{\csname format@toctitle@font@#1\endcsname}');

## NOTE that a 3rd form seems desirable: an concise form that cannot rely on context for the type.
## This would be useful for the titles in links; thus can be plain (unicode) text.

#======================================================================
# TeX Book, Appendix B. p. 356

DefPrimitiveI('\raggedright',   undef, undef);
DefPrimitiveI('\raggedleft',    undef, undef);    # this is actually LaTeX
DefPrimitiveI('\ttraggedright', undef, undef);
DefPrimitiveI('\leavevmode',    undef, undef);
DefMacro('\mathhexbox{}{}{}', '\leavevmode\hbox{$\m@th \mathchar"#1#2#3$}');

#----------------------------------------------------------------------
# Actually from LaTeX; Table 3.2. Non-English Symbols, p.39

# The following shouldn't appear in math.
DefPrimitiveI('\OE', undef, "\x{0152}");    # LATIN CAPITAL LIGATURE OE
DefPrimitiveI('\oe', undef, "\x{0153}");    # LATIN SMALL LIGATURE OE
DefPrimitiveI('\AE', undef, UTF(0xC6));     # LATIN CAPITAL LETTER AE
DefPrimitiveI('\ae', undef, UTF(0xE6));     # LATIN SMALL LETTER AE
DefPrimitiveI('\AA', undef, UTF(0xC5));     # LATIN CAPITAL LETTER A WITH RING ABOVE
DefPrimitiveI('\aa', undef, UTF(0xE5));     # LATIN SMALL LETTER A WITH RING ABOVE
DefPrimitiveI('\O',  undef, UTF(0xD8));     # LATIN CAPITAL LETTER O WITH STROKE
DefPrimitiveI('\o',  undef, UTF(0xF8));     # LATIN SMALL LETTER O WITH STROKE
DefPrimitiveI('\L',  undef, "\x{0141}");    # LATIN CAPITAL LETTER L WITH STROKE
DefPrimitiveI('\l',  undef, "\x{0142}");    # LATIN SMALL LETTER L WITH STROKE
DefPrimitiveI('\ss', undef, UTF(0xDF));     # LATIN SMALL LETTER SHARP S

# apparently the rest can appear in math.
DefPrimitiveI('\lx@sectionsign',   undef, UTF(0xa7), alias => '\S');    # SECTION SIGN
DefPrimitiveI('\lx@paragraphsign', undef, UTF(0xB6), alias => '\P');    # PILCROW SIGN
DefMacroI('\S', undef, '\lx@sectionsign');
DefMacroI('\P', undef, '\lx@paragraphsign');
DefPrimitiveI('\dag',       undef, "\x{2020}");                         # DAGGER
DefPrimitiveI('\ddag',      undef, "\x{2021}");                         # DOUBLE DAGGER
DefPrimitiveI('\copyright', undef, UTF(0xA9));                          # COPYRIGHT SIGN
DefPrimitiveI('\pounds',    undef, UTF(0xA3));                          # POUND SIGN

#----------------------------------------------------------------------
# Accents.  LaTeX Table 3.1, p.38
#----------------------------------------------------------------------
# All of TeX's accents can (sorta) be handled by Unicode's combining accents
# (which follow the character to be accented).
# We'll let unicode normalization do the combination, if needed.
# Also, note that \t is intended to combine multiple chars, but it appears to
# work (via mozilla !?) best when the combining char is after the 1st char.
# Further, the accents \d and \b seem to center the under dot or bar under multiple
# chars --- how should this be handled in Unicode?

# Since people sometimes try to get fancy by using an empty argument,
# for each, I'm providing the combining code and an equivalent(?) spacing one.
# (doesn't look quite the same to use a combining char after a space)

# Create a box applying an accent to a letter
# Hopefully, we'll get a Box from digestion with a plain string.
# Then we can apply combining accents to it.
sub applyAccent {
  my ($stomach, $letter, $combiningchar, $standalonechar, $reversion) = @_;
  my $box     = $stomach->digest($letter);
  my $locator = $box->getLocator;
  my $font    = $box->getFont;
  my $string  = $box->toString;
  $string =~ tr/\x{0131}\x{0237}/ij/;
  $string =~ s/\s/ /g;
  my @letters = split(//, $string);
  return Box(($string =~ /^\s*$/
      ? $standalonechar
      : NFC($letters[0] . $combiningchar . join('', @letters[1 .. $#letters]))),
    $font, $locator, $reversion); }

# Defines an accent command using a combining char that follows the
# 1st char of the argument.  In cases where there is no argument, $standalonechar is used.
sub DefAccent {
  my ($accent, $combiningchar, $standalonechar, %options) = @_;
  $options{above} = 1 if !(defined $options{above}) && !$options{below};
  # Used for converting a char used as an above-accent to a combining char (See \accent)
  AssignMapping('accent_combiner_above', $standalonechar => $combiningchar) if $options{above};
  AssignMapping('accent_combiner_below', $standalonechar => $combiningchar) unless $options{above};
  DefMacroI($accent, "{}",
    Tokens(T_CS('\lx@applyaccent'), T_OTHER($accent),
      T_OTHER($combiningchar), T_OTHER($standalonechar),
      T_BEGIN, T_ARG(1), T_END),
    protected => 1);
  return; }

DefPrimitiveI('\lx@applyaccent', "DefToken Token Token {}", sub {
    my ($stomach, $accent, $combiningchar, $standalonechar, $letter) = @_;
    applyAccent($stomach, $letter, $combiningchar->getString, $standalonechar->getString,
      Tokens(T_CS($accent->getString), T_BEGIN, $letter, T_END)); },
  mode => 'text');

DefAccent('\`',           "\x{0300}", UTF(0x60));  # COMBINING GRAVE ACCENT & GRAVE ACCENT
DefAccent("\\'",          "\x{0301}", UTF(0xB4));  # COMBINING ACUTE ACCENT & ACUTE ACCENT
DefAccent('\^',           "\x{0302}", UTF(0x5E));  # COMBINING CIRCUMFLEX ACCENT & CIRCUMFLEX ACCENT
DefAccent('\"',           "\x{0308}", UTF(0xA8));  # COMBINING DIAERESIS & DIAERESIS
DefAccent('\~',           "\x{0303}", "~");        # COMBINING TILDE
DefAccent('\=',           "\x{0304}", UTF(0xAF));  # COMBINING MACRON & MACRON
DefAccent('\.',           "\x{0307}", "\x{02D9}"); # COMBINING DOT ABOVE & DOT ABOVE
DefAccent('\u',           "\x{0306}", "\x{02D8}"); # COMBINING BREVE & BREVE
DefAccent('\v',           "\x{030C}", "\x{02C7}"); # COMBINING CARON & CARON
DefAccent('\@ringaccent', "\x{030A}", "o");        # COMBINING RING ABOVE & non-combining
DefAccent('\r',           "\x{030A}", "o");        # COMBINING RING ABOVE & non-combining
DefAccent('\H',           "\x{030B}", "\x{02DD}"); # COMBINING DOUBLE ACUTE ACCENT & non-combining
DefAccent('\c',           "\x{0327}", UTF(0xB8), below => 1);    # COMBINING CEDILLA & CEDILLA
    # NOTE: The next two get define for math, as well; See below
DefAccent('\@text@daccent', "\x{0323}", '.',       below => 1);   # COMBINING DOT BELOW & DOT (?)
DefAccent('\@text@baccent', "\x{0331}", UTF(0xAF), below => 1);   # COMBINING MACRON BELOW  & MACRON
DefAccent('\t',             "\x{0361}", "-");    # COMBINING DOUBLE INVERTED BREVE & ???? What????
    # this one's actually defined in mathscinet.sty, but just stick it here!
DefAccent('\lfhook', "\x{0326}", ",", below => 1);   # COMBINING COMMA BELOW
                                                     # I doubt that latter covers multiple chars...?
    #DefAccent('\bar',"\x{0304}", ?);  # COMBINING MACRON or is this the longer overbar?

# This will fail if there really are "assignments" after the number!
# We're given a number pointing into the font, from which we can derive the standalone char.
# From that, we want to figure out the combining character, but there could be one for
# both the above & below cases!  We'll prefer the above case.
DefPrimitive('\accent Number {}', sub {
    my ($stomach, $num, $letter) = @_;
    my $n        = $num->valueOf;
    my $fontinfo = lookupFontinfo(LookupValue('textfont_0'));
    my $acc      = ($fontinfo && $$fontinfo{encoding} ? FontDecode($n, $$fontinfo{encoding}) : chr($n));
    my $reversion = Invocation(T_CS('\accent'), $num, $letter);
    # NOTE: REVERSE LOOKUP in above accent list for the non-spacing accent char
    # BUT, \accent always (?) makes an above type accent... doesn't it?
    if (my $combiner = LookupMapping('accent_combiner_above', $acc)
      || LookupMapping('accent_combiner_below', $acc)) {
      applyAccent($stomach, $letter, $combiner, $acc, $reversion); }
    else {
      Warn('unexpected', "accent$n", $stomach, "Accent '$n' not recognized");
      Box(ToString($letter), undef, undef, $reversion); } });

# Note that these two apparently work in Math? BUT the argument is treated as text!!!
DefMacro('\d{}', '\ifmmode\@math@daccent{#1}\else\@text@daccent{#1}\fi');
DefMacro('\b{}', '\ifmmode\@math@baccent{#1}\else\@text@baccent{#1}\fi');

DefConstructor('\@math@daccent {}',
  "<ltx:XMApp><ltx:XMTok role='UNDERACCENT'>\x{22c5}</ltx:XMTok>"
    . "?#textarg(<ltx:XMText>#textarg</ltx:XMText>)(<ltx:XMArg>#matharg</ltx:XMArg>)"
    . "</ltx:XMApp>",
  mode        => 'text', alias => '\d',
  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    my $arg = $whatsit->getArg(1);
    if ($arg->isMath) {
      $whatsit->setProperty(matharg => $arg->getBody); }
    else {
      $whatsit->setProperty(textarg => $arg); }
    return; });

DefConstructor('\@math@baccent {}',
  "<ltx:XMApp><ltx:XMTok role='UNDERACCENT'>" . UTF(0xAF) . "</ltx:XMTok>"
    . "?#textarg(<ltx:XMText>#textarg</ltx:XMText>)(<ltx:XMArg>#matharg</ltx:XMArg>)"
    . "</ltx:XMApp>",
  mode        => 'text', alias => '\b',
  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    my $arg = $whatsit->getArg(1);
    if ($arg->isMath) {
      $whatsit->setProperty(matharg => $arg->getBody); }
    else {
      $whatsit->setProperty(textarg => $arg); }
    return; });

#======================================================================
# TeX Book, Appendix B. p. 357

foreach my $op ('\hrulefill', '\dotfill', '\rightarrowfill', '\leftarrowfill',
  '\upbracefill', '\downbracefill') {
  DefPrimitiveI($op, undef, undef); }

Let('\bye', '\end');

Let('\sp', T_SUPER);
Let('\sb', T_SUB);

DefPrimitiveI('\lx@thinmuskip', undef, sub {
    Box("\x{2009}", undef, undef, T_CS('\,'),
      name  => 'thinspace', isSpace => 1,
      width => LookupRegister('\thinmuskip')); });
DefPrimitiveI('\lx@thinspace', undef, sub {
    Box("\x{2009}", undef, undef, T_CS('\,'),
      name => 'thinspace', width => Dimension('0.16667em'), isSpace => 1); });
DefMacroI('\,', undef, '\ifmmode\lx@thinmuskip\else\lx@thinspace\fi', protected => 1);

DefPrimitiveI('\!', undef, sub {
    Box("\x{200B}", undef, undef, T_CS('\!'),    # zero width space
      name  => 'negthinspace', isSpace => 1,
      width => LookupRegister('\thinmuskip')->negate); });
DefPrimitiveI('\>', undef, sub {
    Box("\x{2005}", undef, undef, T_CS('\>'),
      name  => 'medspace', isSpace => 1,
      width => LookupRegister('\medmuskip')); });

DefPrimitiveI('\;', undef, sub {
    Box("\x{2004}", undef, undef, T_CS('\;'),
      name  => 'thickspace', isSpace => 1,
      width => LookupRegister('\thickmuskip')); });

Let('\:', '\>');

DefPrimitiveI('\ ', undef, sub {
    Box(UTF(0xA0), undef, undef, T_CS('\ '),
      name => 'space', isSpace => 1, width => Dimension('0.5em')); });

DefPrimitiveI("\\\t", undef, sub {
    Box(UTF(0xA0), undef, undef, T_CS("\\\t"),
      isSpace => 1, width => Dimension('1em')); });

DefPrimitiveI('\/', undef, sub {
    Box("", undef, undef, T_CS('\/'),
      isSpace => 1, name => 'italiccorr', width => Dimension('0em')); });

#======================================================================
# TeX Book, Appendix B. p. 358

#----------------------------------------------------------------------
#  Actually from LaTeX; Table 3.3, Greek, p.41
#----------------------------------------------------------------------
DefMathI('\alpha',      undef, "\x{03B1}");
DefMathI('\beta',       undef, "\x{03B2}");
DefMathI('\gamma',      undef, "\x{03B3}");
DefMathI('\delta',      undef, "\x{03B4}");
DefMathI('\epsilon',    undef, "\x{03F5}");
DefMathI('\varepsilon', undef, "\x{03B5}");
DefMathI('\zeta',       undef, "\x{03B6}");
DefMathI('\eta',        undef, "\x{03B7}");
DefMathI('\theta',      undef, "\x{03B8}");
DefMathI('\vartheta',   undef, "\x{03D1}");
DefMathI('\iota',       undef, "\x{03B9}");
DefMathI('\kappa',      undef, "\x{03BA}");
DefMathI('\lambda',     undef, "\x{03BB}");
DefMathI('\mu',         undef, "\x{03BC}");
DefMathI('\nu',         undef, "\x{03BD}");
DefMathI('\xi',         undef, "\x{03BE}");
DefMathI('\pi',         undef, "\x{03C0}");
DefMathI('\varpi',      undef, "\x{03D6}");
DefMathI('\rho',        undef, "\x{03C1}");
DefMathI('\varrho',     undef, "\x{03F1}");
DefMathI('\sigma',      undef, "\x{03C3}");
DefMathI('\varsigma',   undef, "\x{03C2}");
DefMathI('\tau',        undef, "\x{03C4}");
DefMathI('\upsilon',    undef, "\x{03C5}");
DefMathI('\phi',        undef, "\x{03D5}");

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

        my %roles = ();
        map { $roles{ $_->getAttribute('role') } = 1 } @rels;
        my $role = (scalar(keys %roles) == 1 ? [keys %roles]->[0] : ($roles{ARROW} ? 'ARROW' : 'RELOP'));
        map { $node->removeChild($_) } @rels;
        $document->insertElement('ltx:XMTok', [map { $_->textContent } @rels], role => $role);
  } } },
  reversion => '#1\joinrel #2');

#----------------------------------------------------------------------
# LaTeX; Table 3.6. Arrow Symbols, p.43
#----------------------------------------------------------------------
# Arrows get treated somewhat like relations (or meta-relations),
# but it's hard to associate any particular "meaning" to them.

DefMathI('\leftarrow',      undef, "\x{2190}", role => 'ARROW');         # LEFTWARDS ARROW
DefMathI('\Leftarrow',      undef, "\x{21D0}", role => 'ARROW');         # LEFTWARDS DOUBLE ARROW
DefMathI('\rightarrow',     undef, "\x{2192}", role => 'ARROW');         # RIGHTWARDS ARROW
DefMathI('\Rightarrow',     undef, "\x{21D2}", role => 'ARROW');         # RIGHTWARDS DOUBLE ARROW
DefMathI('\leftrightarrow', undef, "\x{2194}", role => 'METARELOP');     # LEFT RIGHT ARROW
DefMathI('\Leftrightarrow', undef, "\x{21D4}", role => 'METARELOP');     # LEFT RIGHT DOUBLE ARROW
DefMathI('\iff', undef, "\x{21D4}", role => 'METARELOP', meaning => 'iff'); # LEFT RIGHT DOUBLE ARROW
DefMathI('\mapsto',        undef, "\x{21A6}", role => 'ARROW', meaning => 'maps-to');
DefMathI('\hookleftarrow', undef, "\x{21A9}", role => 'ARROW');    # LEFTWARDS ARROW WITH HOOK
DefMathI('\leftharpoonup', undef, "\x{21BC}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB UPWARDS
DefMathI('\leftharpoondown', undef, "\x{21BD}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB DOWNWARDS
DefMathI('\rightleftharpoons', undef, "\x{21CC}", role => 'METARELOP'); # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
DefMathI('\longleftarrow',      undef, "\x{27F5}", role => 'ARROW');  # LONG LEFTWARDS ARROW
DefMathI('\Longleftarrow',      undef, "\x{27F8}", role => 'ARROW');  # LONG LEFTWARDS DOUBLE ARROW
DefMathI('\longrightarrow',     undef, "\x{27F6}", role => 'ARROW');  # LONG RIGHTWARDS ARROW
DefMathI('\Longrightarrow',     undef, "\x{27F9}", role => 'ARROW');  # LONG RIGHTWARDS DOUBLE ARROW
DefMathI('\longleftrightarrow', undef, "\x{27F7}", role => 'METARELOP');    # LONG LEFT RIGHT ARROW
DefMathI('\Longleftrightarrow', undef, "\x{27FA}", role => 'METARELOP'); # LONG LEFT RIGHT DOUBLE ARROW
DefMathI('\longmapsto',     undef, "\x{27FC}", role => 'ARROW');    # LONG RIGHTWARDS ARROW FROM BAR
DefMathI('\hookrightarrow', undef, "\x{21AA}", role => 'ARROW');    # RIGHTWARDS ARROW WITH HOOK
DefMathI('\rightharpoonup', undef, "\x{21C0}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB UPWARDS
DefMathI('\rightharpoondown', undef, "\x{21C1}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB DOWNWARDS
DefMathI('\leadsto',          undef, "\x{219D}", role => 'ARROW', meaning => 'leads-to');

DefMathI('\uparrow',     undef, "\x{2191}", role => 'ARROW');      # UPWARDS ARROW
DefMathI('\Uparrow',     undef, "\x{21D1}", role => 'ARROW');      # UPWARDS DOUBLE ARROW
DefMathI('\downarrow',   undef, "\x{2193}", role => 'ARROW');      # DOWNWARDS ARROW
DefMathI('\Downarrow',   undef, "\x{21D3}", role => 'ARROW');      # DOWNWARDS DOUBLE ARROW
DefMathI('\updownarrow', undef, "\x{2195}", role => 'ARROW');      # UP DOWN ARROW
DefMathI('\Updownarrow', undef, "\x{21D5}", role => 'ARROW');      # UP DOWN DOUBLE ARROW
DefMathI('\nearrow',     undef, "\x{2197}", role => 'ARROW');      # NORTH EAST ARROW
DefMathI('\searrow',     undef, "\x{2198}", role => 'ARROW');      # SOUTH EAST ARROW
DefMathI('\swarrow',     undef, "\x{2199}", role => 'ARROW');      # SOUTH WEST ARROW
DefMathI('\nwarrow',     undef, "\x{2196}", role => 'ARROW');      # NORTH WEST ARROW

# \mapstochar (3237), \lhook(312C), \rhook(312D)
# These are really wrong; I can't find the right Unicode Glyphs.
# These are only fragments intended to be assembled into meaningful(?) symbols.
DefMathI('\mapstochar', undef, "\x{2E20}");    # TeX 3237
DefMathI('\lhook',      undef, "\x{2E26}");    # TeX 312C
DefMathI('\rhook',      undef, "\x{2E27}");    # TeX 312D

#======================================================================
# TeX Book, Appendix B. p. 359

# Ah, since \ldots can appear in text and math....
DefMacroI('\ldots', undef, '\lx@ldots');
DefConstructorI('\lx@ldots', undef,
  "?#isMath(<ltx:XMTok name='ldots' font='#font' role='ID'>\x{2026}</ltx:XMTok>)(\x{2026})",
  sizer      => "\x{2026}",
  reversion  => '\ldots',
  properties => sub {
    (LookupValue('IN_MATH')
      ? (font => LookupValue('font')->merge(family => 'serif',
          series => 'medium', shape => 'upright')->specialize("\x{2026}"))
      : ()); });    # Since not DefMath!
                    # And so can \vdots
DefConstructorI('\vdots', undef,
  "?#isMath(<ltx:XMTok name='vdots' font='#font' role='ID'>\x{22EE}</ltx:XMTok>)(\x{22EE})",
  sizer      => "\x{22EE}",
  properties => sub {
    (LookupValue('IN_MATH')
      ? (font => LookupValue('font')->merge(family => 'serif',
          series => 'medium', shape => 'upright')->specialize("\x{22EE}"))
      : ()); });    # Since not DefMath!
                    # But not these!
DefMathI('\cdots', undef, "\x{22EF}", role => 'ID');    # MIDLINE HORIZONTAL ELLIPSIS

DefMathI('\ddots', undef, "\x{22F1}", role => 'ID');           # DOWN RIGHT DIAGONAL ELLIPSIS
DefMathI('\colon', undef, ':',        role => 'METARELOP');    # Seems like good default role
    # Note that amsmath redefines \dots to be `smart'.
    # Aha, also can be in text...
DefConstructorI('\dots', undef,
  "?#isMath(<ltx:XMTok name='dots' font='#font' role='ID'>\x{2026}</ltx:XMTok>)(\x{2026})",
  sizer      => "\x{2026}",
  properties => sub {
    (LookupValue('IN_MATH')
      ? (font => LookupValue('font')->merge(family => 'serif',
          series => 'medium', shape => 'upright')->specialize("\x{2026}"))
      : ()); });    # Since not DefMath!

# And while we're at it...

# Pretest for XMath to keep from interpreting math that the DOM may not allow!!
##DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'\cdot\cdot\cdot',replace=>'\cdots');

DefMathLigature("\x{22C5}\x{22C5}\x{22C5}" => "\x{22EF}", role => 'ID', name => 'cdots');

DefLigature(qr{\.\.\.}, "\x{2026}", fontTest => sub { $_[0]->getFamily ne 'typewriter'; });  # ldots

#DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'...',replace=>'\ldots');
DefMathLigature("..." => "\x{2026}", role => 'ID', name => 'ldots');

#----------------------------------------------------------------------
# Math Accents.
#----------------------------------------------------------------------
# LaTeX; Table 3.11. Math Mode Accents, p.50.
# Are these all TeX (or LaTeX)?
# Note that most of these should NOT be stretchy, by default!
DefMath('\hat Digested', UTF(0x5E),
  operator_role => 'OVERACCENT', operator_stretchy => 'false');
DefMath('\check Digested', "\x{02C7}",
  operator_role => 'OVERACCENT', operator_stretchy => 'false');    # CARON
DefMath('\breve Digested', "\x{02D8}", operator_role => 'OVERACCENT');    # BREVE
DefMath('\acute Digested', UTF(0xB4),  operator_role => 'OVERACCENT');    # ACUTE ACCENT
DefMath('\grave Digested', UTF(0x60),  operator_role => 'OVERACCENT');    # GRAVE ACCENT
DefMath('\tilde Digested', UTF(0x7E),
  operator_role => 'OVERACCENT', operator_stretchy => 'false');           # TILDE
DefMath('\bar Digested', UTF(0xAF),
  operator_role => 'OVERACCENT', operator_stretchy => 'false');           # MACRON
DefMath('\vec Digested', "\x{2192}",
  operator_role => 'OVERACCENT', operator_stretchy => 'false');           # RIGHTWARDS ARROW
DefMath('\dot Digested',      "\x{02D9}", operator_role => 'OVERACCENT');    # DOT ABOVE
DefMath('\ddot Digested',     UTF(0xA8),  operator_role => 'OVERACCENT');    # DIAERESIS
DefMath('\overline Digested', UTF(0xAF),  operator_role => 'OVERACCENT');    # MACRON
DefMath('\widehat Digested', UTF(0x5E), operator_role => 'OVERACCENT'); # CIRCUMFLEX ACCENT [plain? also amsfonts]
DefMath('\widetilde Digested', UTF(0x7E), operator_role => 'OVERACCENT'); # TILDE [plain? also amsfonts]
# These aren't handled as simple accents by TeX, so no Digested
DefMath('\overbrace {}', "\x{23DE}", operator_role => 'OVERACCENT',       # TOP CURLY BRACKET
  scriptpos => 'mid', robust => 1);
DefMath('\underbrace {}', "\x{23DF}", operator_role => 'UNDERACCENT',     # BOTTOM CURLY BRACKET
  scriptpos => 'mid', robust => 1);

# NOTE that all the above accents REQUIRE math mode
# EXCEPT underline, overrightarrow and overleftarrow!

DefMath('\math@underline{}', UTF(0xAF), operator_role => 'UNDERACCENT',
  name => 'underline', alias => '\underline');
DefConstructor('\text@underline{}', "<ltx:text framed='underline' _noautoclose='1'>#1</ltx:text>");
DefMath('\math@overrightarrow{}', "\x{2192}", operator_role => 'OVERACCENT',
  name => 'overrightarrow', alias => '\overrightarrow');
DefMath('\math@overleftarrow{}', "\x{2190}", operator_role => 'OVERACCENT',
  name => 'overleftarrow', alias => '\overleftarrow');

# Careful: Use \protect so that it doesn't expand too early in alignments, etc.
DefMacro('\underline{}', '\protect\ifmmode\math@underline{#1}\else\text@underline{#1}\fi');
Let('\underbar', '\underline');    # Will anyone notice?

DefMacro('\overrightarrow{}', '\protect\ifmmode\math@overrightarrow{#1}\else$\math@overrightarrow{#1}$\fi');
DefMacro('\overleftarrow{}', '\protect\ifmmode\math@overleftarrow{#1}\else$\math@overleftarrow{#1}$\fi');

DefMacro('\skew{}{}{}', '{#2{#3\mkern#1mu}\mkern-#1mu}{}');    # ignore the subtle spacing for now?
    #----------------------------------------------------------------------
    # LaTeX; Table 3.10. Delimiters, p.47
    #----------------------------------------------------------------------
    # The meaning of OPEN/CLOSE tends to depend upon the pairing,
    # rather than the individual tokens.
    # This meaning is handled in MathParser (for now)

DefMacroI('\{', undef, '\ifmmode\lx@math@lbrace\else\lx@text@lbrace\fi', protected => 1);
DefMacroI('\}', undef, '\ifmmode\lx@math@rbrace\else\lx@text@rbrace\fi', protected => 1);
DefMathI('\lx@math@lbrace', undef, '{', role => 'OPEN',  stretchy => 'false', alias => '\{');
DefMathI('\lx@math@rbrace', undef, '}', role => 'CLOSE', stretchy => 'false', alias => '\}');
DefPrimitiveI('\lx@text@lbrace', undef, '{', alias => '\{',
  #  font => { specialize => "{" });
  font => { shape => 'upright' }, bounded => 1);    # Since not DefMath!
DefPrimitiveI('\lx@text@rbrace', undef, '}', alias => '\}',
  #  font => { specialize => "}" });    # Since not DefMath!
  font => { shape => 'upright' }, bounded => 1);    # Since not DefMath!
Let('\lbrace', '\{');
Let('\lbrack', T_OTHER('['));
Let('\rbrace', '\}');
Let('\rbrack', T_OTHER(']'));
DefMathI('\lceil',  undef, "\x{2308}", role => 'OPEN',  stretchy => 'false');    # LEFT CEILING
DefMathI('\rceil',  undef, "\x{2309}", role => 'CLOSE', stretchy => 'false');    # RIGHT CEILING
DefMathI('\lfloor', undef, "\x{230A}", role => 'OPEN',  stretchy => 'false');    # LEFT FLOOR
DefMathI('\rfloor', undef, "\x{230B}", role => 'CLOSE', stretchy => 'false');    # RIGHT FLOOR
    # Note: We should be using 27E8,27E9, which are "mathematical", not 2329,232A
DefMathI('\langle', undef, "\x{27E8}", role => 'OPEN', stretchy => 'false'); # LEFT-POINTING ANGLE BRACKET
DefMathI('\rangle', undef, "\x{27E9}", role => 'CLOSE', stretchy => 'false'); # RIGHT-POINTING ANGLE BRACKET

# Not sure these should be defined here, or latex, or even latex compat mode.
DefMathI('\lgroup', undef, "(", font => { series => 'bold' }, role => 'OPEN',  stretchy => 'false');
DefMathI('\rgroup', undef, ")", font => { series => 'bold' }, role => 'CLOSE', stretchy => 'false');
DefMathI('\bracevert', undef, "|", font => { series => 'bold' }, role => 'VERTBAR');

## DefMath('\lmoustache',"???", font=>{series=>'bold'}, role=>'OPEN');
## DefMath('\rmoustache',"???", font=>{series=>'bold'}, role=>'OPEN');

# TeX marks some symbols as delimiters which can be used with \left,\right,
# but many of which have different grammatical roles otherwise, eg. arrows, <, >.
# Short of setting up TeX's complicated encoding machinery, I need an explicit
# mapping.  Unfortunately, this doesn't (yet) support people declaring thier own delimiters!

# This duplicates in slightly different way what DefMath has put together.
our %DELIMITER_MAP =
  ('(' => { char => "(", lrole => 'OPEN', rrole => 'CLOSE' },
  ')'          => { char => ")",        lrole => 'OPEN',    rrole => 'CLOSE' },
  '['          => { char => "[",        lrole => 'OPEN',    rrole => 'CLOSE' },
  ']'          => { char => "]",        lrole => 'OPEN',    rrole => 'CLOSE' },
  '\{'         => { char => "{",        lrole => 'OPEN',    rrole => 'CLOSE' },
  '\}'         => { char => "}",        lrole => 'OPEN',    rrole => 'CLOSE' },
  '\lfloor'    => { char => "\x{230A}", lrole => 'OPEN',    rrole => 'CLOSE', name => 'lfloor' },
  '\rfloor'    => { char => "\x{230B}", lrole => 'OPEN',    rrole => 'CLOSE', name => 'rfloor' },
  '\lceil'     => { char => "\x{2308}", lrole => 'OPEN',    rrole => 'CLOSE', name => 'lceil' },
  '\rceil'     => { char => "\x{2309}", lrole => 'OPEN',    rrole => 'CLOSE', name => 'rceil' },
  '\langle'    => { char => "\x{27E8}", lrole => 'OPEN',    rrole => 'CLOSE', name => 'langle' },
  '\rangle'    => { char => "\x{27E9}", lrole => 'OPEN',    rrole => 'CLOSE', name => 'rangle' },
  '<'          => { char => "\x{27E8}", lrole => 'OPEN',    rrole => 'CLOSE', name => 'langle' },
  '>'          => { char => "\x{27E9}", lrole => 'OPEN',    rrole => 'CLOSE', name => 'rangle' },
  '/'          => { char => "/",        lrole => 'MULOP',   rrole => 'MULOP' },
  '\backslash' => { char => UTF(0x5C),  lrole => 'MULOP',   rrole => 'MULOP', name => 'backslash' },
  '|'          => { char => "|",        lrole => 'VERTBAR', rrole => 'VERTBAR' },
  '\|'         => { char => "\x{2225}", lrole => 'VERTBAR', rrole => 'VERTBAR' },
  '\uparrow'   => { char => "\x{2191}", lrole => 'OPEN', rrole => 'CLOSE', name => 'uparrow' },   # ??
  '\Uparrow'   => { char => "\x{21D1}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Uparrow' },   # ??
  '\downarrow' => { char => "\x{2193}", lrole => 'OPEN', rrole => 'CLOSE', name => 'downarrow' }, # ??
  '\Downarrow' => { char => "\x{21D3}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Downarrow' }, # ??
  '\updownarrow' => { char => "\x{2195}", lrole => 'OPEN', rrole => 'CLOSE', name => 'updownarrow' }, # ??
  '\Updownarrow' => { char => "\x{21D5}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Updownarrow' }, # ??
  );

# With new treatment of Simple Symbols as just Box's with assigned attributes,
# we're not getting whatsits, and so we're not looking them up the same way!!!
# TEMPORARILY (?) hack the Delimiter map
foreach my $entry (values %DELIMITER_MAP) {
  $DELIMITER_MAP{ $$entry{char} } = $entry; }

sub lookup_delimiter {
  my ($delim) = @_;
  return $DELIMITER_MAP{$delim}; }

# This is a little messier than you'd think.
# These effectively create a group between the \left,\right.
# And this also gives us a single list of things to parse separately.
# Since \left,\right are TeX, primitives and must be paired up,
# we use a bit of macro trickery to simulate.
# [The \@hidden@bgroup/egroup keep from putting a {} into the UnTeX]
# HOWEVER, an additional complication is that it is a common mistake to omit the balancing \right!
# Using an \egroup (or hidden) makes it hard to recover, so use a special egroup
DefMacro('\left XToken', '\@left #1\@hidden@bgroup');
# Like \@hidden@egroup, but softer about missing \left
DefConstructor('\right@hidden@egroup', '',
  afterDigest => sub {
    my ($stomach) = @_;
    if ($STATE->isValueBound('MODE', 0)    # Last stack frame was a mode switch!?!?!
      || $STATE->lookupValue('groupNonBoxing')) {    # or group was opened with \begingroup
      Error('unexpected', '\right', undef, "Unbalanced \\right, no balancing \\left."); }
    else {
      $stomach->egroup; } },
  reversion => '');

DefMacro('\right XToken', '\right@hidden@egroup\@right #1');

DefConstructor('\@left Token',
  "?#char(<ltx:XMTok role='#role' name='#name' stretchy='#stretchy'>#char</ltx:XMTok>)"
    . "(?#hint(<ltx:XMHint/>)(#1))",
  afterDigest => sub { my ($stomach, $whatsit) = @_;
    my $arg   = $whatsit->getArg(1);
    my $delim = ToString($arg);
    if ($delim eq '.') {
      $whatsit->setProperty(hint => 1); }
    elsif (my $entry = $DELIMITER_MAP{$delim}) {
      $whatsit->setProperties(role => $$entry{lrole},
        char     => $$entry{char},
        name     => $$entry{name},
        stretchy => 'true');
      $whatsit->setFont($arg->getFont()); }
    elsif (($arg->getProperty('role') || '') eq 'OPEN') {
      $arg->setProperty(stretchy => 'true'); }
    else {
      Warn('unexpected', $delim, $stomach,
        "Missing delimiter; '.' inserted"); }
    return; },
  alias => '\left');
DefConstructor('\@right Token',
  "?#char(<ltx:XMTok role='#role' name='#name' stretchy='#stretchy'>#char</ltx:XMTok>)"
    . "(?#hint(<ltx:XMHint/>)(#1))",
  afterDigest => sub { my ($stomach, $whatsit) = @_;
    my $arg   = $whatsit->getArg(1);
    my $delim = ToString($arg);
    if ($delim eq '.') {
      $whatsit->setProperty(hint => 1); }
    elsif (my $entry = $DELIMITER_MAP{$delim}) {
      $whatsit->setProperties(role => $$entry{rrole},
        char     => $$entry{char},
        name     => $$entry{name},
        stretchy => 'true');
      $whatsit->setFont($arg->getFont()); }
    elsif (($arg->getProperty('role') || '') eq 'CLOSE') {
      $arg->setProperty(stretchy => 'true'); }
    else {
      Warn('unexpected', $delim, $stomach,
        "Missing delimiter; '.' inserted)"); }
    return; },
  alias => '\right');

# These originally had Token as parameter, rather than {}..... Why?
# Note that in TeX, \big{((} will only enlarge the 1st paren!!!
DefConstructor('\big {}',  '#1', bounded => 1, font => { size => 'big' });
DefConstructor('\Big {}',  '#1', bounded => 1, font => { size => 'Big' });
DefConstructor('\bigg {}', '#1', bounded => 1, font => { size => 'bigg' });
DefConstructor('\Bigg {}', '#1', bounded => 1, font => { size => 'Bigg' });

sub addDelimiterRole {
  my ($document, $role) = @_;
  my $current = $document->getNode;
  my $delim   = $document->getLastChildElement($current) || $current;
  my $delim_role = (($delim && ($delim->nodeType == XML_ELEMENT_NODE) && $delim->getAttribute('role')) || '<none>');
  # if there is some delimiter-like role on the "delimiter", switch it, otherwise, leave it alone!
  if ($delim && ($delim_role =~ /^(OPEN|MIDDLE|CLOSE|VERTBAR|<none>)$/)) {
    ## Maybe we shouldn't switch VERTBAR ?
    ## The catch is that occasionally people use a single \Bigl (or whatever)
    ## where they should have used a \Big
    $document->setAttribute($delim, role => $role); }
  return; }

# The "m" versions are defined in e-Tex and other places.
DefConstructor('\bigl {}', '#1', bounded => 1, font => { size => 'big' },
  afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\bigm {}', '#1', bounded => 1, font => { size => 'big' },
  afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\bigr {}', '#1', bounded => 1, font => { size => 'big' },
  afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });

DefConstructor('\Bigl {}', '#1', bounded => 1, font => { size => 'Big' },
  afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\Bigm {}', '#1', bounded => 1, font => { size => 'Big' },
  afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\Bigr {}', '#1', bounded => 1, font => { size => 'Big' },
  afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });

DefConstructor('\biggl {}', '#1', bounded => 1, font => { size => 'bigg' },
  afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\biggm {}', '#1', bounded => 1, font => { size => 'bigg' },
  afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\biggr {}', '#1', bounded => 1, font => { size => 'bigg' },
  afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });

DefConstructor('\Biggl {}', '#1', bounded => 1, font => { size => 'Bigg' },
  afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\Biggm {}', '#1', bounded => 1, font => { size => 'Bigg' },
  afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\Biggr {}', '#1', bounded => 1, font => { size => 'Bigg' },
  afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });

Let('\vert', T_OTHER('|'));
Let('\Vert', '\|');

#======================================================================
# TeX Book, Appendix B. p. 360

# \choose, et al, already handle above.
# Note that in TeX, all 4 args get digested(!)
# and the choice is made when absorbing!
DefConstructor('\mathchoice Digested Digested Digested Digested', sub {
    my ($document, $d, $t, $s, $ss, %props) = @_;
    my $style  = $props{mathstyle};
    my $choice = ($style eq 'display' ? $d
      : ($style eq 'text' ? $t
        : ($style eq 'script' ? $s
          : $ss)));
    $document->absorb($choice); },
  properties => { mathstyle => sub { LookupValue('font')->getMathstyle; } });

DefMacro('\mathpalette{}{}',
  '\mathchoice{#1\displaystyle{#2}}{#1\textstyle{#2}}'
    . '{#1\scriptstyle{#2}}{#1\scriptscriptstyle{#2}}');

DefConstructor('\phantom{}',
  "?#isMath(<ltx:XMHint width='#width' height='#height' depth='#depth' name='phantom'/>)"
    . "(<ltx:text class='ltx_phantom'>#1</ltx:text>)",    # !?!?!?!
  properties  => { isSpace => 1 },
  afterDigest => sub {
    my $whatsit = $_[1];
    my ($w, $h, $d) = $whatsit->getArg(1)->getSize;
    $whatsit->setProperties(width => $w, height => $h, depth => $d);
    return; });

DefConstructor('\hphantom{}',
  "?#isMath(<ltx:XMHint width='#width' name='hphantom'/>)"
    . "(<ltx:text class='ltx_phantom'>#1</ltx:text>)",    # !?!?!?!
  properties  => { isSpace => 1 },
  afterDigest => sub {
    my $whatsit = $_[1];
    my ($w, $h, $d) = $whatsit->getArg(1)->getSize;
    $whatsit->setProperties(width => $w, height => $h, depth => $d);
    return; });

DefConstructor('\vphantom{}',
  "?#isMath(<ltx:XMHint height='#height' depth='#depth' name='vphantom'/>)"
    . "(<ltx:text class='ltx_phantom'>#1</ltx:text>)",    # !?!?!?!
  properties  => { isSpace => 1 },
  afterDigest => sub {
    my $whatsit = $_[1];
    my ($w, $h, $d) = $whatsit->getArg(1)->getSize;
    $whatsit->setProperties(width => $w, height => $h, depth => $d);
    return; });

DefConstructor('\mathstrut', "?#isMath(<ltx:XMHint name='mathstrut'/>)()",
  properties => { isSpace => 1 });
DefConstructor('\smash{}', "#1");    # well, what?

#======================================================================
# TeX Book, Appendix B. p. 361

# This is actually LaTeX's definition, but let's just do it this way.
DefConstructor('\sqrt OptionalInScriptStyle Digested',
  "?#1(<ltx:XMApp><ltx:XMTok meaning='nth-root'/>"
    . "<ltx:XMArg>#1</ltx:XMArg><ltx:XMArg>#2</ltx:XMArg>"
    . "</ltx:XMApp>)"
    . "(<ltx:XMApp><ltx:XMTok meaning='square-root'/>"
    . "<ltx:XMArg>#2</ltx:XMArg></ltx:XMApp>)");

DefParameterType('ScriptStyleUntil', sub {
    my ($gullet, $until) = @_;
    $gullet->readUntil($until); },
  beforeDigest => sub {
    $_[0]->bgroup;
    MergeFont(mathstyle => 'script'); },
  afterDigest => sub {
    $_[0]->egroup; },
  reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });

DefConstructor('\root ScriptStyleUntil:\of {}',
  "<ltx:XMApp><ltx:XMTok meaning='nth-root'/>"

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

  scriptpos => \&doScriptpos);
DefMathI('\tan',  undef, "tan",  role => 'TRIGFUNCTION', meaning => 'tangent');
DefMathI('\tanh', undef, "tanh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-tangent');

#----------------------------------------------------------------------
# Modulo

DefMath('\pmod{}', '\;\;(\mathop{{\rm mod}} #1)', role => 'MODIFIER');    #  , meaning=>'modulo');
DefMath('\bmod', 'mod', role => 'MODIFIEROP', meaning => 'modulo');

#======================================================================
# TeX Book, Appendix B. p. 362

#----------------------------------------------------------------------
# Matrices;  Generalized

# The delimiters around a matrix may simply be notational, or for readability,
# and don't affect the "meaning" of the array structure as a matrix.
# In that case, we'll use an XMDual to indidate the content is simply the matrix,
# but the presentation includes the delimiters.
# HOWEVER, the delimeters may also signify an OPERATION on the matrix
# in which case the application & meaning of that operator must be supplied.

# keys are
#  name  : the name of the environment (for reversion)
#  datameaning: the (presumed) meaning of the array construct (typically 'matrix')
#  delimitermeaning  : the operator meaning due to delimiters (eg. norm)(as applied to the array)
#  style : typically \displaystyle, \textstyle...
#  left  : TeX code for left of matrix
#  right  : TeX code for right
#  ncolumns : the number of columns (default is not limited)
DefKeyVal('lx@GEN', 'style', 'UndigestedKey');

DefPrimitive('\lx@gen@matrix@bindings RequiredKeyVals:lx@GEN', sub {
    my ($stomach, $kv) = @_;
    $stomach->bgroup;
    my $style = $kv->getValue('style')               || T_CS('\textstyle');
    my $align = ToString($kv->getValue('alignment')) || 'c';
    # We really should be using ReadAlignmentTemplate (LaTeXML::Core::Alignment)
    # but we'd have to convert it to a repeating spec somehow.
    my @colspec = (before => Tokens(($align =~ /^(?:c|r)/ ? (T_CS('\hfil')) : ()), $style),
      after => Tokens(($align =~ /^(?:c|l)/ ? (T_CS('\hfil')) : ())));
    my $ncols      = ToString($kv->getValue('ncolumns'));
    my %attributes = ();
    foreach my $key (qw(rowsep)) {    # Probably more?
      if (my $value = $kv->getValue($key)) {
        $attributes{$key} = $value; } }
    alignmentBindings(LaTeXML::Core::Alignment::Template->new(
        ($ncols ? (columns => [map { { @colspec } } 1 .. $ncols])
          : (repeated => [{@colspec}]))),
      'math',
      (keys %attributes ? (attributes => {%attributes}) : ()));    # });
    Let("\\\\",         '\@alignment@newline');
    Let('\lx@intercol', '\lx@math@intercol');
    Let('\@row@before', '\@empty');    # Disable special row treatment (eg. numbering) unless requested
    Let('\@row@after',  '\@empty');
});

DefPrimitive('\lx@end@gen@matrix', sub { $_[0]->egroup; });

DefMacro('\lx@gen@plain@matrix{}{}',
  '\lx@gen@matrix@bindings{#1}'
    . '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\@finish@alignment}'
    #    . '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\cr\@finish@alignment}'
    . '\lx@end@gen@matrix');

# The delimiters on a matrix are presumably just for notation or readability (not an operator);
# the array data itself is the matrix.
DefConstructor('\lx@gen@plain@matrix@ RequiredKeyVals:lx@GEN {}',
  "?#needXMDual("
    . "<ltx:XMDual>"
    . "?#delimitermeaning(<ltx:XMApp><ltx:XMTok meaning='#delimitermeaning'/>)()"
    . "?#datameaning(<ltx:XMApp><ltx:XMTok meaning='#datameaning'/>)()"
    . "<ltx:XMRef _xmkey='#xmkey'/>"
    . "?#delimitermeaning(</ltx:XMApp>)()"
    . "?#datameaning(</ltx:XMApp>)()"
    . "<ltx:XMWrap>#left<ltx:XMArg _xmkey='#xmkey'>#2</ltx:XMArg>#right</ltx:XMWrap>"
    . "</ltx:XMDual>"
    . ")("
    . "#2"
    . ")",
  properties => sub { %{ $_[1]->getKeyVals }; },
  reversion  => sub {
    my ($whatsit, $kv, $body) = @_;
    my $name      = ToString($kv->getValue('name'));
    my $alignment = $whatsit->getProperty('alignment');
##    (T_CS('\\' . $name), T_BEGIN, Revert($body), T_END); },
##    (T_CS('\\' . $name), T_BEGIN, Revert($alignment), T_END); },
    (T_CS('\\' . $name), T_BEGIN, $alignment->revert, T_END); },

  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    my $kv = $whatsit->getArg(1);
    if ($kv->getValue('datameaning') || $kv->getValue('delimitermeaning')) {
      $whatsit->setProperties(
        needXMDual => 1,
        xmkey      => LaTeXML::Package::getXMArgID()); }
    $whatsit->setProperties(alignment => LookupValue('Alignment'));
    return; });

DefMacro('\matrix{}',
  '\lx@gen@plain@matrix{name=matrix,datameaning=matrix}{#1}');

DefMacro('\bordermatrix{}',    # Semantics?
  '\lx@hack@bordermatrix{\lx@gen@plain@matrix{name=bordermatrix}{#1}}');
# HACK the newly created border matrix to add columns for the (spanned) parentheses!!!
# Assume (for now) that there's no XMDual structure here.
# What is the semantics, anyway?
DefConstructor('\lx@hack@bordermatrix{}', sub {
    my ($document, $matrix) = @_;
    $document->absorb($matrix);
    my $marray = $document->getNode->lastChild;
    my @rows   = $document->findnodes('ltx:XMRow', $marray);
    my ($h, $d) = (10.0 * $UNITY, 0);    # 10pts.
                                         # Contrived, since $matrix may be a List or...
    my ($alignment) = grep { $_ } map { $_->getProperty('alignment') } $matrix->unlist;
    if ($alignment) {
      my $arrayh = $alignment->getHeight->ptValue;
      my ($row0, $row1) = $alignment->rows;    # What's row 0 ?
      $h = $$row1{y}->valueOf;
      $d = $h - $arrayh; }
    my $md = Dimension(-$d);
    $h = Dimension($h); $d = Dimension($d);

    foreach my $row (@rows) {                  # Add empty cells for 2nd & last colum
      $document->openElementAt($row, 'ltx:XMCell');
      $document->openElementAt($row, 'ltx:XMCell');
      $row->insertAfter($row->lastChild, $row->firstChild);    # Move to 2nd pos!
    }
    my @cols = element_nodes($rows[1]);
    my $col1 = $cols[1];
    my $coln = $cols[-1];
    my $n    = scalar(@rows) - 1;
    $col1->setAttribute(rowspan => $n);
    $coln->setAttribute(rowspan => $n);
    my $pfont = $STATE->lookupValue('font')->specialize('(');
    $document->appendTree($col1,
      ['ltx:XMWrap', { depth => $d },
        ['ltx:XMTok', { role   => 'OPEN', height  => 0, depth => $d, yoffset => $md, font => $pfont }, '('],
        ['ltx:XMTok', { height => $h,     yoffset => $md, font => $pfont }, ' ']]);   # Effectively, a strut
    $document->appendTree($coln,
      ['ltx:XMWrap', {},
        ['ltx:XMTok', { role   => 'CLOSE', height => 0, depth => $d, yoffset => $md, font => $pfont }, ')'],
        ['ltx:XMTok', { height => $h, yoffset => $md, font => $pfont }, ' ']]);
    return; },
  reversion => '#1');

DefMacro('\pmatrix{}',
  '\lx@gen@plain@matrix{name=pmatrix,datameaning=matrix,left=\@left(,right=\@right)}{#1}');

#----------------------------------------------------------------------
# Cases: Generalized
# keys are
#  name  : the name of the command (for reversion)
#  meaning: the (presumed) meaning of the construct
#  style : \textstyle or \displaystyle
#  conditionmode : mode of 2nd column, text or math
#  left  : TeX code for left of cases
#  right  : TeX code for right

DefConstructorI('\lx@cases@condition', undef,
  "<ltx:XMText>#body</ltx:XMText>",
  alias => '', beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1);
DefConstructorI('\lx@cases@end@condition', undef, "", alias => '',
  beforeDigest => sub { $_[0]->endMode('text'); });

DefPrimitive('\lx@gen@cases@bindings RequiredKeyVals:lx@GEN', sub {
    my ($stomach, $kv) = @_;
    $stomach->bgroup;
    my $style = $kv->getValue('style') || T_CS('\textstyle');
    $style = T_CS($style) unless ref $style;
    my @mode = (ToString($kv->getValue('conditionmode')) eq 'text'
      ? (T_MATH) : ());
    my $condtext = ToString($kv->getValue('conditionmode')) eq 'text';
    alignmentBindings(LaTeXML::Core::Alignment::Template->new(
        columns => [
          { before => Tokens($style), after => Tokens(T_CS('\hfil')) },
          { before => Tokens($style,
              ($condtext ? (T_CS('\lx@cases@condition')) : ())),
            after => Tokens(T_CS('\lx@column@trimright'),
              ($condtext ? (T_CS('\lx@cases@end@condition')) : ()),
              T_CS('\hfil')) }]),
      'math');
    Let("\\\\",         '\@alignment@newline');
    Let('\lx@intercol', '\lx@math@intercol');
    DefMacro('\@row@before', '');    # Don't inherit counter stepping from containing environments
    DefMacro('\@row@after',  '');
});

DefMacro('\lx@gen@plain@cases{}{}',
  '\lx@gen@cases@bindings{#1}'
    . '\lx@gen@plain@cases@{#1}{\@start@alignment#2\@finish@alignment}'
    . '\lx@end@gen@cases');
DefPrimitive('\lx@end@gen@cases', sub { $_[0]->egroup; });

# The logical structure for cases extracts the columns of the alignment
# to give alternating value,condition (an empty condition is replaced by "otherwise" !?!?!)
DefConstructor('\lx@gen@plain@cases@ RequiredKeyVals:lx@GEN {}',
  '<ltx:XMWrap>#left#2#right</ltx:XMWrap>',
  properties     => sub { %{ $_[1]->getKeyVals }; },
  afterConstruct => sub {
    my ($document) = @_;
    if (my $point = $document->getElement->lastChild) {
      # Get the sequence of alternating (case, condition).
      # Expecting ltx:XMArray/ltx:XMRow/ltx:XMCell [should have /ltx:XMArg, but could be empty!!!]
      my @cells = $document->findnodes('ltx:XMArray/ltx:XMRow/ltx:XMCell', $point);
      my @stuff = map { ($_->hasChildNodes ? createXMRefs($document, element_nodes($_))
          : ['ltx:XMText', {}, 'otherwise']) } @cells;
      $document->replaceTree(['ltx:XMDual', {},
          ['ltx:XMApp', {}, ['ltx:XMTok', { meaning => 'cases' }], @stuff],
          $point],
        $point); } },
  reversion => sub {
    my ($whatsit, $kv, $body) = @_;
    my $name = $kv->getValue('name');
    (T_CS('\cases'), T_BEGIN, Revert($body), T_END); });

# Note that 2nd column in \cases is in text mode!
DefMacro('\cases{}',
  '\lx@gen@plain@cases{meaning=cases,left=\@left\{,conditionmode=text,style=\textstyle}{#1}');

#----------------------------------------------------------------------
DefPrimitive('\openup Dimension', undef);

# What should this do? (needs to work with alignments..)
# see https://www.tug.org/TUGboat/tb07-1/tb14beet.pdf
# use in arXiv:hep-th/0001208
DefMacro('\displaylines{}', '\halign{\hbox to\displaywidth{$\hfil\displaystyle##\hfil$}\crcr#1\crcr}');

DefMacro('\eqalign{}',
  '\@@eqalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@eqalign{}',
  '#1',
  reversion    => '\eqalign{#1}', bounded => 1,
  beforeDigest => sub { alignmentBindings('rl', 'math',
      attributes => { vattach => 'baseline' }); });

DefMacro('\eqalignno{}',
  '\@@eqalignno{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@eqalignno{}',
  '#1',
  reversion    => '\eqalignno{#1}', bounded => 1,
  beforeDigest => sub { alignmentBindings('rll', 'math',
      attributes => { vattach => 'baseline' }); });

DefMacro('\leqalignno{}',
  '\@@leqalignno{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@leqalignno{}',
  '#1',
  reversion    => '\leqalignno{#1}', bounded => 1,
  beforeDigest => sub { alignmentBindings('rll', 'math',
      attributes => { vattach => 'baseline' }); });

DefRegister('\pageno'   => Number(0));
DefRegister('\headline' => Tokens());
DefRegister('\footline' => Tokens());
DefMacroI('\folio', undef, "1");    # What else?

DefPrimitiveI('\nopagenumbers', undef, undef);
DefMacroI('\advancepageno', undef, '\advance\pageno1\relax');

#======================================================================
# TeX Book, Appendix B. p. 363

DefPrimitive('\raggedbottom', undef);
DefPrimitive('\normalbottom', undef);

# if the mark is not simple, we add it to the content of the note
# otherwise, to the attribute.
DefConstructor('\footnote{}{}',
  "^<ltx:note role='footnote' ?#mark(mark='#mark')()>?#prenote(#prenote )()#2</ltx:note>",
  mode         => 'text', bounded => 1,
  beforeDigest => sub { reenterTextMode(1); neutralizeFont(); },
  afterDigest  => sub {
    my ($stomach, $whatsit) = @_;
    my $mark   = $whatsit->getArg(1);
    my $change = 0;
    foreach my $token (Revert($mark)) {
      unless ($token->getCatcode == CC_LETTER || $token->getCatcode == CC_SPACE ||
        $token->getCatcode == CC_OTHER) {
        $change = 1; last; } }
    $whatsit->setProperty(($change ? 'prenote' : 'mark') => $mark);
    return; });
# Until we can do the "v" properly:
DefMacro('\vfootnote', '\footnote');
DefMacro('\fo@t',      '\ifcat\bgroup\noexpand\next \let\next\f@@t  \else\let\next\f@t\fi \next');
DefMacro('\f@@t',      '\bgroup\aftergroup\@foot\let\next');
DefMacro('\f@t{}',     '#1\@foot');
DefMacro('\@foot',     '\strut\egroup');

DefPrimitiveI('\footstrut', undef, undef);
DefRegister('\footins' => Number(0));

DefPrimitiveI('\topinsert',  undef, undef);
DefPrimitiveI('\midinsert',  undef, undef);
DefPrimitiveI('\pageinsert', undef, undef);
DefPrimitiveI('\endinsert',  undef, undef);
# \topins ?

#======================================================================
# TeX Book, Appendix B. p. 364

# Let's hope nobody is messing with the output routine...

DefPrimitiveI('\footnoterule', undef, undef);

#======================================================================
# End of TeX Book definitions.
#======================================================================

#**********************************************************************
# Stray stuff .... where to ?
#**********************************************************************

# Mostly ignorable, although it could add an attribute to an ancestor
# to record the desired justification.
# Spacing stuff
DefConstructor('\@', '');
# Math spacing.

# Math style.
# Also record that this explicitly sets the mathstyle (support for \over, etal)
DefPrimitiveI('\displaystyle', undef, sub {
    MergeFont(mathstyle => 'display');
    Box(undef, undef, undef, T_CS('\displaystyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\textstyle', undef, sub {
    MergeFont(mathstyle => 'text');
    Box(undef, undef, undef, T_CS('\textstyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\scriptstyle', undef, sub {
    MergeFont(mathstyle => 'script');
    Box(undef, undef, undef, T_CS('\scriptstyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\scriptscriptstyle', undef, sub {
    MergeFont(mathstyle => 'scriptscript');
    Box(undef, undef, undef, T_CS('\scriptscriptstyle'), explicit_mathstyle => 1); });

#======================================================================

# Special Characters.
# Try to give them some sense in math...
DefMacroI('\#',   undef, '\ifmmode\lx@math@hash\else\lx@text@hash\fi',             protected => 1);
DefMacroI('\&',   undef, '\ifmmode\lx@math@amp\else\lx@text@amp\fi',               protected => 1);
DefMacroI('\%',   undef, '\ifmmode\lx@math@percent\else\lx@text@percent\fi',       protected => 1);
DefMacroI("\\\$", undef, '\ifmmode\lx@math@dollar\else\lx@text@dollar\fi',         protected => 1);
DefMacroI('\_',   undef, '\ifmmode\lx@math@underscore\else\lx@text@underscore\fi', protected => 1);
DefPrimitiveI('\lx@text@hash',       undef, '#',  alias => '\#');
DefPrimitiveI('\lx@text@amp',        undef, '&',  alias => '\&');
DefPrimitiveI('\lx@text@percent',    undef, '%',  alias => '\%');
DefPrimitiveI('\lx@text@dollar',     undef, "\$", alias => "\\\$");
DefPrimitiveI('\lx@text@underscore', undef, '_',  alias => '\_');
DefMathI('\lx@math@hash',    undef, '#', alias => '\#');
DefMathI('\lx@math@amp',     undef, '&', role  => 'ADDOP',   meaning => 'and',     alias => '\&');
DefMathI('\lx@math@percent', undef, '%', role  => 'POSTFIX', meaning => 'percent', alias => '\%');
DefMathI('\lx@math@dollar', undef, "\$", role => 'OPERATOR', meaning => 'currency-dollar',
  alias => "\\\$");
DefMathI('\lx@math@underscore', undef, '_', alias => '\_');

# Discretionary times; just treat as invisible ?
DefMathI('\*', undef, "\x{2062}", role => 'MULOP', name => '', meaning => 'times'); # INVISIBLE TIMES (or MULTIPLICATION SIGN = 00D7)

# These 3 should have some `name' assigned ... but what???

# Is XMWrap the right thing to wrap with (instead of XMArg)?
# We can't really assume that the stuff inside is sensible math.
# NOTE that \mathord and \mathbin aren't really right here.
# We need a finer granularity than TeX does: an ORD could be several things,
# a BIN could be a MULOP or ADDOP.
# AND, rarely, they're empty.... Is it wrong to drop them?
DefConstructor('\mathord{}', "?#1(<ltx:XMWrap role='ID'   >#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathop{}', "?#1(<ltx:XMWrap role='BIGOP' scriptpos='#scriptpos'>#1</ltx:XMWrap>)()",
  bounded => 1, properties => { scriptpos => \&doScriptpos });
DefConstructor('\mathbin{}',   "?#1(<ltx:XMWrap role='BINOP'>#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathrel{}',   "?#1(<ltx:XMWrap role='RELOP'>#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathopen{}',  "?#1(<ltx:XMWrap role='OPEN' >#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathclose{}', "?#1(<ltx:XMWrap role='CLOSE'>#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathpunct{}', "?#1(<ltx:XMWrap role='PUNCT'>#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathinner{}', "?#1(<ltx:XMWrap role='ATOM'>#1</ltx:XMWrap>)()",  bounded => 1);

# If an XMWrap (presumably from \mathop, \mathbin, etc)
# has multiple children, ALL are XMTok, within a restricted set of roles,
# we want to concatenate the text content into a single XMTok.
DefMathRewrite(xpath => 'descendant-or-self::ltx:XMWrap['
    # Only XMWrap's from the above class of operators
    . '(@role="OP" or @role="BIGOP" or @role="RELOP" '
    . 'or @role="ADDOP" or @role="MULOP" or @role="BINOP" '
    . 'or @role="OPEN" or @role="CLOSE")'
    . ' and count(child::*) > 1 '
    # with only XMTok as children with the roles in (roughly) the same set
    . ' and not(child::*[local-name() != "XMTok"])'
    . ' and not(ltx:XMTok['
    . '@role !="OP" and @role!="BIGOP" and @role!="RELOP" and @role!="METARELOP" '
    . 'and @role!="ADDOP" and @role!="MULOP" and @role!="BINOP" '
    . 'and @role!="OPEN" and @role!="CLOSE"'
    . '])]',
  replace => sub {
    my ($document, $node) = @_;
    my $replacement = $node->cloneNode(0);
    my $content     = $node->textContent;
    $replacement->appendText($content);
    $replacement->setName('ltx:XMTok');
    $document->getNode->appendChild($replacement);
  });

DefMacro('\hiderel{}', "#1");    # Just ignore, for now...

DefMathI('\to', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW??? a bit more explicitly relation-like?

# TeX's ligatures handled by rewrite regexps.
# Note: applied in reverse order of definition (latest defined applied first!)
# Note also, these area only applied in text content, not in attributes!
DefPrimitive('\@@endash', sub { Box("\x{2013}", undef, undef, T_CS('\@@endash')); });
DefPrimitive('\@@emdash', sub { Box("\x{2014}", undef, undef, T_CS('\@@emdash')); });

sub nonTypewriter {
  my ($font) = @_;
  return ($font->getFamily ne 'typewriter'); }

sub nonTypewriterT1 {
  my ($font) = @_;
  return ($font->getFamily ne 'typewriter') && (($font->getEncoding || 'OT1') =~ /^(OT1|T1)$/); }

# EN DASH (NOTE: With digits before & aft => \N{FIGURE DASH})
DefLigature(qr{--},  "\x{2013}", fontTest => \&nonTypewriter);    # EN dash
DefLigature(qr{---}, "\x{2014}", fontTest => \&nonTypewriter);    # EM dash

# Ligatures for doubled single left & right quotes to convert to double quotes
# [should ligatures be part of a font, in the first place? (it is in TeX!)
DefLigature(qr{\x{2018}\x{2018}}, "\x{201C}", fontTest => \&nonTypewriterT1);   # double left quote
DefLigature(qr{\x{2019}\x{2019}}, "\x{201D}", fontTest => \&nonTypewriterT1);   # double right quote
DefLigature(qr{\?\x{2018}},       UTF(0xBF),  fontTest => \&nonTypewriterT1);   # ? backquote
DefLigature(qr{!\x{2018}},        UTF(0xA1),  fontTest => \&nonTypewriterT1);   # ! backquote
# These ligatures are also handled by TeX.
# However, it appears that decent modern fonts in modern browsers handle these at that level.
# So it's likely not worth doing it at the conversion level, possibly adversely affecting search.
# DefLigature(qr{ff},               "\x{FB00}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{fi},               "\x{FB01}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{fl},               "\x{FB02}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{ffi},              "\x{FB03}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{ffl},              "\x{FB04}", fontTest => \&nonTypewriterT1);

DefConstructor('\TeX',
  "<ltx:text class='ltx_TeX_logo' cssstyle='letter-spacing:-0.2em; margin-right:0.2em'>"
    . "T"
    . "<ltx:text cssstyle='font-variant:small-caps;font-size:120%;' yoffset='-0.2ex'>e</ltx:text>"
    . "X"
    . "</ltx:text>",
  sizer => sub { (Dimension('1.9em'), Dimension('1.6ex'), Dimension('0.5ex')); });
DefPrimitiveI('\i', undef, "\x{0131}");    # LATIN SMALL LETTER DOTLESS I
DefPrimitiveI('\j', undef, "\x{0237}");

DefConstructor('\buildrel Until:\over {}',
  "<ltx:XMApp role='RELOP'>"
    . "<ltx:XMTok role='SUPERSCRIPTOP' scriptpos='#scriptpos'/>"
    . "<ltx:XMArg>#2</ltx:XMArg>"
    . "<ltx:XMArg>#1</ltx:XMArg>"
    . "</ltx:XMApp>",
  properties => { scriptpos => sub { "mid" . $_[0]->getScriptLevel; } });

#**********************************************************************
# LaTeX Hook
#**********************************************************************
# This is used for plain TeX, but needs to be undone for LaTeX (or...)!
RelaxNGSchema("LaTeXML");
Tag('ltx:section', autoClose => 1);
Tag('ltx:document', autoClose => 1, autoOpen => 1);
Tag('ltx:document', afterOpen => sub {
    my ($document, $root) = @_;
    if (my $font = $document->getNodeFont($root)) {
      if (my $bg = $font->getBackground) {
        if ($bg ne 'white') {
          $document->setAttribute($root, backgroundcolor => $bg); } } } });

# No, \documentclass isn't really a primitive -- It's not even TeX!
# But we define a number of stubs here that will automatically load
# the LaTeX pool (or AmSTeX.pool) (which will presumably redefine them), and then
# stuff the token back to be reexecuted.
foreach my $ltxtrigger (qw(documentclass
  newcommand renewcommand newenvironment renewenvironment
  NeedsTeXFormat ProvidesFile
  ProvidesPackage RequirePackage PassOptionsToPackage
  makeatletter makeatother
  typeout begin listfiles nofiles)) {
  DefAutoload($ltxtrigger, 'LaTeX.pool.ltxml'); }

foreach my $ltx3trigger (qw(ExplSyntaxOn
  ProvidesExplClass ProvidesExplPackage)) {
  # DG: note that these auto-loads are not perfect --
  #     if they are triggered with a raw .sty file for example,
  #     the expl3 support will "expire" at the end of the current scope,
  #     and e.g. \ExplSyntaxOn will once again be undefined.
  DefAutoload($ltx3trigger, 'expl3.pool.ltxml'); }

# Seemingly good candidates to trigger AmSTeX ??
foreach my $amstrigger (qw(BlackBoxes NoBlackBoxes
  TagsAsMath TagsAsText TagsOnLeft TagsOnRight CenteredTagsOnSplits TopOrBottomTagsOnSplits
  LimitsOnInts NoLimitsOnInts LimitsOnNames NoLimitsOnNames LimitsOnSums NoLimitsOnSums
  loadbold loadeufb loadeufm loadeurb loadeurm loadeusb
  loadeusm loadmathfont loadmsam loadmsbm)) {
  DefAutoload($amstrigger, 'AmSTeX.pool.ltxml'); }

# Darn; we need to be even more clever, since we need to simulate an amstex command, as well.
# For example \documentstyle[...]{amsppt} must switch to AMSTeX mode, _NOT_ LaTeX mode!!!!
DefMacro('\documentstyle OptionalSemiverbatim SkipSpaces Semiverbatim', sub {
    my ($gullet, $options, $class) = @_;
    LoadPool((ToString($class) =~ /^amsppt$/ ? "AmSTeX" : "LaTeX"));
    (T_CS('\\documentstyle'),
      ($options ? (T_OTHER('['), $options->unlist, T_OTHER(']')) : ()),
      T_BEGIN, $class->unlist, T_END); });

# Technically should be in LaTeX.pool, but we try to maintain the bookkeeping from the very start,
# in order to avoid partially defined behavior when --preload directives are mixed with \usepackage{} loads
DefMacro('\@pushfilename', '\xdef\@currnamestack{{\@currname}{\@currext}{\the\catcode`\@}\@currnamestack}');
DefMacro('\@popfilename', '\expandafter\@p@pfilename\@currnamestack\@nil');
DefMacro('\@p@pfilename {}{}{} Until:\@nil',
  '\gdef\@currname{#1}%
    \gdef\@currext{#2}%
    \catcode`\@#3\relax
    \gdef\@currnamestack{#4}');
DefMacroI(T_CS('\@currnamestack'), undef, Tokens());
Let('\@currname', '\@empty');
Let('\@currext',  '\@empty');

#**********************************************************************
# LaTeXML Specific.
# Support for Declarations & Presentation/Semantic Duality
#**********************************************************************

#======================================================================
# Normally definitions disappear; the macros are expanded or have their expected effect.
# But in a few cases (eg tabular column definitions, or LaTeX \Declarexxxx)
# they will need declarations in the (La)TeX preamble to allow (La)TeX to process snippets
# (eg. math) in order to create images.
# Returning a call to this utility from Primitives will add a preamble Processing Instruction
sub AddToPreamble {
  my ($cs, @args) = @_;
  return Digest(Invocation(T_CS('\lx@add@Preamble@PI'), Invocation((ref $cs ? $cs : T_CS($cs)), @args))); }

DefConstructor('\lx@add@Preamble@PI Undigested',
  "<?latexml preamble='#1'?>");

#======================================================================
# Support for constructing mathematical expressions

# Common XMath pattern for assigning attributes from Whatsit properties.
our $XMath_attributes =
  " role='#role' name='#name' meaning='#meaning' omcd='#omcd'"
  . " width='#width' height='#height' xoffset='#xoffset' yoffset='#yoffset'"
  . " lpadding='#lpadding' rpadding='#rpadding'";

sub XMath_copy_keyvals {
  my ($stomach, $whatsit) = @_;
  my $kv = $whatsit->getArg(1);
  $whatsit->setProperties($kv->getPairs) if $kv;
  return; }

# Build an ltx:XMApp, application of function/operator to arguments
# first piece of (TeX) argument is expected to be the operator
# Usually used on content side, but at least the arguments should be properly encapsulated:
# They should build individual subtrees; use ltx::XMArg, ltx:XMWrap ... if needed
DefConstructor('\lx@apply OptionalKeyVals:XMath {}{}',
  "<ltx:XMApp $XMath_attributes>#2#3</ltx:XMApp>",
  reversion   => '#2#3',
  afterDigest => sub { XMath_copy_keyvals(@_); });

# Build an ltx:XMTok, a mathematical symbol, with given attributes
# the argument should create text to be the content of the token.
DefConstructor('\lx@symbol OptionalKeyVals:XMath {}',
  "<ltx:XMTok $XMath_attributes>#2</ltx:XMTok>",
  reversion   => '#2',
  afterDigest => sub {
    $_[1]->setFont($_[1]->getArg(2)->getFont);
    XMath_copy_keyvals(@_); });

# Wrap the contents in an ltx:XMWrap, to stand as a single subtree & providing attributes
# The ltx:XMWrap may be collapsed, later, by parsing
DefConstructor('\lx@wrap OptionalKeyVals:XMath {}',
  "<ltx:XMWrap $XMath_attributes>#2</ltx:XMWrap>",
  reversion   => '#2',
  afterDigest => sub { XMath_copy_keyvals(@_); });

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

# These two accept key operator_meaning, operator_omcd to give a meaning to the sub/superscript
# NOTE (BUG): We SHOULD nest paired sub/superscripts, but avoid conflicting double scripts
# To do that we need to sniff at the base, whether it already contains scripts.
# However, IsScript isn't quite sufficient if the scripts are hidden within Whatsits, duals, etc.
# Currently, LaTeXML manages to deal with the double scripts anyway;
# The reversion ALWAYS wraps the base (which will render non-optimally in images but avoid Errors)
DefConstructor('\lx@superscript OptionalKeyVals:XMath {} InScriptStyle',
  "<ltx:XMApp $XMath_attributes>"
    . "<ltx:XMTok role='SUPERSCRIPTOP' meaning='#operator_meaning' omcd='#operator_omcd' scriptpos='#scriptpos'/>"
    . "<ltx:XMArg>#2</ltx:XMArg>"
    . "<ltx:XMArg rule='Superscript'>#3</ltx:XMArg>"
    . "</ltx:XMApp>",
  afterDigest => sub { XMath_copy_keyvals(@_); },
  reversion   => sub {
    my ($whatsit, $kv, $base, $sup) = @_;
    my $bump = $whatsit->getProperty('bump');
    $bump = 1;    # For now: ALWAYS {} wrap base in the reversion!
    (IsEmpty($sup)
      ? Revert($base)
      : (($bump ? (T_BEGIN, Revert($base), T_END) : Revert($base)), T_SUPER, revertScript($sup))); },
  properties => sub {
    my ($stomach, $kv, $base, $script) = @_;
    my $basetype = IsScript($base);
    my $bump     = ($basetype && ($$basetype[1] eq 'SUPERSCRIPT') ? 1 : 0);
    (scriptpos => "post" . ($_[0]->getScriptLevel + $bump),
      bump => $bump); },
  sizer => sub { scriptSizer($_[0]->getArg(3), $_[0]->getArg(2), undef, 'SUPERSCRIPT', 'post'); });

DefConstructor('\lx@subscript OptionalKeyVals:XMath {} InScriptStyle',
  "<ltx:XMApp $XMath_attributes>"
    . "<ltx:XMTok role='SUBSCRIPTOP' meaning='#operator_meaning' omcd='#operator_omcd' scriptpos='#scriptpos'/>"
    . "<ltx:XMArg>#2</ltx:XMArg>"
    . "<ltx:XMArg rule='Subscript'>#3</ltx:XMArg>"
    . "</ltx:XMApp>",
  afterDigest => sub { XMath_copy_keyvals(@_); },
  reversion   => sub {
    my ($whatsit, $kv, $base, $sub) = @_;
    my $bump = $whatsit->getProperty('bump');
    $bump = 1;    # For now: ALWAYS {} wrap base in the reversion!
    (IsEmpty($sub)
      ? Revert($base)
      : (($bump ? (T_BEGIN, Revert($base), T_END) : Revert($base)), T_SUB, revertScript($sub))); },
  properties => sub {
    my ($stomach, $kv, $base, $script) = @_;
    my $basetype = IsScript($base);
    my $bump     = ($basetype && ($$basetype[1] eq 'SUBSCRIPT') ? 1 : 0);
    (scriptpos => "post" . ($_[0]->getScriptLevel + $bump),
      bump => $bump); },
  sizer => sub { scriptSizer($_[0]->getArg(3), $_[0]->getArg(2), undef, 'SUBSCRIPT', 'post'); });

# Ignore $kv for the moment?????
sub I_subscript {
  my ($kv, $base, $script) = @_;
  return Tokens(T_CS('\lx@subscript'), I_keyvals($kv), T_BEGIN, $base, T_END, T_BEGIN, $script, T_END); }

sub I_superscript {
  my ($kv, $base, $script) = @_;
  return Tokens(T_CS('\lx@superscript'), I_keyvals($kv), T_BEGIN, $base, T_END, T_BEGIN, $script, T_END); }

# Superscript meaning power
DefMacro('\lx@power{}{}', '\lx@superscript[operator_meaning=power]{#1}{#2}');
# Superscript meaning functional (or applicative) power; iterated function/operator application
DefMacro('\lx@functionalpower{}{}', '\lx@superscript[operator_meaning=functional-power]{#1}{#2}');

# These to be used in presentation side
DefMathI('\lx@ApplyFunction', undef, "\x{2061}", reversion => '', name => '', role => 'APPLYOP');
DefMathI('\lx@InvisibleTimes', undef, "\x{2062}", reversion => '', name => '', meaning => 'times', role => 'MULOP');
DefMathI('\lx@InvisibleComma', undef, "\x{2063}", reversion => '', name => '', role => 'PUNCT');
DefMathI('\lx@InvisiblePlus', undef, "\x{2064}", reversion => '', name => '', meaning => 'plus', role => 'ADDOP');

DefConstructor('\lx@kludged{}',
  "?#isMath(<ltx:XMWrap rule='kludge'>#1</ltx:XMWrap>)(#1)",
  reversion => '#1');
DefConstructor('\lx@padded[MuDimension]{MuDimension}{}',
  '#3',
  afterConstruct => sub {
    my ($document, $whatsit) = @_;
    my $node = $document->getLastChildElement($document->getNode);
    if ($document->getNodeQName($node) eq 'ltx:XMDual') {
      my (@ch) = $node->childNodes;
      $node = $ch[1]; }
    if (my $lpadding = $whatsit->getArg(1)) {
      $document->setAttribute($node, lpadding => $lpadding); }
    if (my $rpadding = $whatsit->getArg(2)) {
      $document->setAttribute($node, rpadding => $rpadding); } },
  reversion => '#3');

#======================================================================
# Building XMDuals for Mathematical Parallel markup
# Used when the content and presentation forms have different structure.

DefKeyVal('XMath', 'reversion',              'UndigestedDefKey');
DefKeyVal('XMath', 'content_reversion',      'UndigestedDefKey');
DefKeyVal('XMath', 'presentation_reversion', 'UndigestedDefKey');
DefConstructor('\lx@dual OptionalKeyVals:XMath {}{}',
  "<ltx:XMDual $XMath_attributes>#2<ltx:XMWrap>#3</ltx:XMWrap></ltx:XMDual>",
  beforeDigest => sub {
    PushValue(PENDING_DUAL_XMARGS => {});
    return; },
  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    my $kv     = $whatsit->getArg(1);
    my $xmargs = PopValue('PENDING_DUAL_XMARGS');            # Really SHOULD be a hash
    $whatsit->setProperties(%$xmargs)      if $xmargs;       # Hopefully no name class with XM<digits>
    $whatsit->setProperties($kv->getPairs) if $kv;
    my %props = $whatsit->getProperties;
    my $cr    = $props{content_reversion};
    my $pr    = $props{presentation_reversion};
    my $r     = ToString($props{revert_as}) || 'content';    # ?????

    if (!defined $props{reversion}) {
      $whatsit->setProperty(reversion => sub {
          my ($self, $kvs, $c, $p) = @_;
          ($r eq 'content' ? $cr || Revert($c)
            : ($r eq 'presentation' ? $pr || Revert($p)
              : ($r eq 'dual'
                ? Tokens(T_CS('\lx@dual'), I_keyvals($kvs),
                  T_BEGIN, ($cr || Revert($c)), T_END,
                  T_BEGIN, ($pr || Revert($p)), T_END)
                : (($LaTeXML::DUAL_BRANCH || '') eq 'presentation'    # Context dependent reversion
                  ? $pr || Revert($p)
                  : $cr || Revert($c))))); }); }
    return; },

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

        my $key = $n->getAttribute('_xmkey');
        if (!$ids{$key}) {
          GenerateID($document, $n, undef, '');    # Generate id if none already.
          $ids{$key} = $n->getAttribute('xml:id'); } } }
    foreach my $r (@refs) {                        # Now fill in the references
      $document->setAttribute($r, idref => $ids{ $r->getAttribute('_xmkey') });
      $r->removeAttribute('_xmkey'); }
});

# Construction aids
# Build an XMDual (via \lx@dual) given the content & presentation forms.
# These forms are provided as Tokens, invoking the appropriate constructor macros,
# and refering to any arguments using #1, #2.... (see T_XMArg for syntactic sugar)
# The arguments (if any) are given separately; within the content & presentation
# they are replaced by \lx@xmref and \lx@xmarg, appropriately,
# so that they will be linked/shared in the XML tree.
# The keyvals argument is a hash containing any properties of the construct,
# along with reversion, content_reversion  & presentation_reversion, which are
# substituted for arguments as well.
sub I_dual {
  my ($keyvals, $content, $presentation, @args) = @_;
  $content      = TokenizeInternal($content)      if $content      && !ref $content;
  $presentation = TokenizeInternal($presentation) if $presentation && !ref $presentation;
  my (@revargs, @pargs, @cargs);
  foreach my $arg (@args) {
    my $id = LaTeXML::Package::getXMArgID();
    push(@revargs, Tokens(I_arg(ToString($id))));
    push(@pargs,   Invocation(T_CS('\lx@xmarg'), $id, $arg));
    push(@cargs,   Invocation(T_CS('\lx@xmref'), $id)); }
  my $optional = undef;
  if ($keyvals) {
    my @options = ();
    while (my ($key, $value) = each %$keyvals) {
      $value = TokenizeInternal($value) if $value && !ref $value;
      if ($key =~ /^(?:presentation_|content_|)reversion$/) {
        $value = $value->substituteParameters(@revargs); }
      push(@options, T_OTHER(',')) if @options;
      push(@options, T_OTHER($key), T_OTHER('='), T_BEGIN, $value, T_END); }
    $optional = Tokens(@options); }
  return
    Invocation(T_CS('\lx@dual'), $optional,
    $content->substituteParameters(@cargs),
    I_wrap({}, $presentation->substituteParameters(@pargs))); }

# A little helper to shorten things up a bit; simply generates #1 (or whatever)
sub I_arg {    # uncoditionally create an arg token
  return bless ["$_[0]", CC_ARG], 'LaTeXML::Core::Token'; }

sub I_xmarg {
  my ($id, $arg) = @_;
  return Tokens(T_CS('\lx@xmarg'),
    T_BEGIN, (ref $id ? $id : T_OTHER($id)), T_END, T_BEGIN, $arg, T_END); }

sub I_xmref {
  my ($id) = @_;
  return Tokens(T_CS('\lx@xmref'), T_BEGIN, (ref $id ? $id : T_OTHER($id)), T_END); }

#----------------------------------------------------------------------
# This group should be renamed to \lx@somethings and deprecated
# NOTE: work through this systematically!
DefMacro('\FCN{}',            '\lx@wrap[role=FUNCTION]{#1}');
DefMacro('\ROLE{}{}',         '\lx@wrap[role={#1}]{#2}');
DefMacro('\@SYMBOL{}',        '\lx@wrap[role=ID]{#1}');
DefMacro('\@CSYMBOL{}',       '\lx@symbol[meaning={#1}]{}');
DefMacro('\@APPLY{}',         '\lx@apply[]{#1}{}');                             # Sorta broken?
DefMacro('\@MAYBEAPPLY{}{}',  '\ifx.#2.#1\else\lx@apply{#1}{#2}\fi');
DefMacro('\@WRAP{}',          '\lx@wrap[]{#1}');
DefMacro('\@TOKEN{}',         '\lx@symbol[name={#1}]{}');
DefMacro('\@SUPERSCRIPT{}{}', '\ifx.#2.#1\else\lx@superscript[]{#1}{#2}\fi');
DefMacro('\@SUBSCRIPT{}{}',   '\ifx.#2.#1\else\lx@subscript[]{#1}{#2}\fi');
Let('\@PADDED',         '\lx@padded');
Let('\DUAL',            '\lx@dual');
Let('\@XMArg',          '\lx@xmarg');
Let('\@XMRef',          '\lx@xmref');
Let('\@APPLYFUNCTION',  '\lx@ApplyFunction');
Let('\@INVISIBLETIMES', '\lx@InvisibleTimes');
Let('\@INVISIBLECOMMA', '\lx@InvisibleComma');
Let('\@INVISIBLEPLUS',  '\lx@InvisiblePlus');

# End of stuff to be deprecated.
#----------------------------------------------------------------------

#======================================================================

# We OUGHT to be able to do this using \llap,\rlap,\hss...
DefMacro('\lx@tweaked{}{}', '\ifmmode\lx@math@tweaked{#1}{#2}\else\lx@text@tweaked{#1}{#2}\fi');
DefConstructor('\lx@math@tweaked RequiredKeyVals {}',
  "<ltx:XMWrap $XMath_attributes>#2</ltx:XMWrap>",
  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    my ($kv,      $body)    = $whatsit->getArgs;
    XMath_copy_keyvals($stomach, $whatsit);
    $whatsit->setFont($body->getFont);
    return; },
  reversion => '#2');

DefConstructor('\lx@text@tweaked RequiredKeyVals {}',
  "<ltx:text _noautoclose='1' %&GetKeyVals(#1)>#2</ltx:text>",
  afterDigest => sub {
    my ($stomach, $whatsit) = @_;
    my ($kv,      $body)    = $whatsit->getArgs;
    $whatsit->setProperties($kv->getPairs); });

DefMacro('\lx@nounicode {}', '\ifmmode\lx@math@nounicode#1\else\lx@text@nounicode#1\fi');

DefConstructor('\lx@framed[]{}',
  "<ltx:text framed='#frame' _noautoclose='1'>#2</ltx:text>",
  properties => { frame => sub { ToString($_[1] || 'rectangle'); } });
DefConstructor('\lx@hflipped{}',
  "<ltx:text class='ltx_hflipped' _noautoclose='1'>#1</ltx:text>");

sub reportNoUnicode {
  my ($cs) = @_;
  $cs = ToString($cs);
  if (!LookupMapping('missing_unicode' => $cs)) {
    Warn('expected', 'unicode', $cs,
      "There's no Unicode equivalent for the symbol '$cs'");
    AssignMapping('missing_unicode' => $cs => 1); }
  return; }
# Slightly contrived so that this can be used within a DefMath
# and still declare & get the semantic properties.
DefPrimitive('\lx@math@nounicode DefToken', sub {
    my ($stomach, $cs) = @_;
    reportNoUnicode($cs);
    Box(ToString($cs), undef, undef, $cs, class => 'ltx_nounicode'); });

DefConstructor('\lx@text@nounicode DefToken',
  "<ltx:text _no_autoclose='true' class='ltx_nounicode'>#1</ltx:text>",
  afterDigest => sub {
    reportNoUnicode(ToString($_[1]->getArg(0))); });

DefConstructor('\@ERROR{}{}', "<ltx:ERROR class='ltx_#1'>#2</ltx:ERROR>");

#**********************************************************************
DefConstructor('\WildCard[]', "<_WildCard_>#1</_WildCard_>");
DefConstructorI('\WildCardA', undef, "<_WildCard_/>");
DefConstructorI('\WildCardB', undef, "<_WildCard_/>");
DefConstructorI('\WildCardC', undef, "<_WildCard_/>");
#**********************************************************************
# After all other rewrites have acted, a little cleanup

DefRewrite(xpath => 'descendant-or-self::ltx:XMWrap[count(child::*)=1]',
  replace => sub { my ($document, $wrap) = @_;
    if (my $node = $document->getFirstChildElement($wrap)) {
      # Copy attributes but NOT internal ones,
      # NOR xml:id, else we get clashes
      foreach my $attribute ($wrap->attributes) {
        if ($attribute->nodeType == XML_ATTRIBUTE_NODE) {
          my $attr = $document->getNodeQName($attribute);
          $document->setAttribute($node, $attr => $attribute->getValue)
            unless ($attr eq 'xml:id') || $attr =~ /^_/;
          if    ($attr =~ /^_/) { }
          elsif ($attr eq 'xml:id') {
            my $id = $attribute->getValue;
            if (my $previd = $node->getAttribute('xml:id')) {    # Keep original id
                  # but swap any references to the one on the wrapper!
              foreach my $ref ($document->findnodes("//*[\@idref='$id']")) {
                $ref->setAttribute(idref => $previd); }
              $wrap->removeAttribute('xml"id');
              $document->unRecordID($id); }
            else {
              $wrap->removeAttribute('xml:id');
              $document->unRecordID($id);
              $document->setAttribute($node, 'xml:id' => $id); } }

lib/LaTeXML/Package/TeX.pool.ltxml  view on Meta::CPAN

  my ($document, $node, $align, $class) = @_;
  my $model = $document->getModel;
  my $qname = $model->getNodeQName($node);
  if    ($qname eq 'ltx:tag') { }                                    # HACK
  elsif ($align && $document->canHaveAttribute($qname, 'align')) {
    $node->setAttribute(align => $align); }
  elsif ($class && $document->canHaveAttribute($qname, 'class')) {
    $document->addClass($node, $class); }
  return; }

#======================================================================
# A random collection of Tokens utility functions.
# [probably should be exported from Tokens.pm ?]
# [maybe need to do some reorganization?]
# Since this is used for textual tokens, typically to split author lists,
# we don't split within braces or math
sub SplitTokens {
  my ($tokens, @delims) = @_;
  my @items = ();
  my @toks  = ();
  if ($tokens) {
    my @tokens = $tokens->unlist;
    my $t;
    while ($t = shift(@tokens)) {
      if (grep { Equals($t, $_) } @delims) {
        push(@items, [@toks]); @toks = (); }
      elsif ($t->defined_as(T_BEGIN)) {
        push(@toks, $t);
        my $level = 1;
        while ($level && defined($t = shift(@tokens))) {
          my $cc = $t->getCatcode;
          $level++ if $cc == CC_BEGIN;
          $level-- if $cc == CC_END;
          push(@toks, $t); } }
      elsif ($t->defined_as(T_MATH)) {
        push(@toks, $t);
        while (defined($t = shift(@tokens))) {
          my $cc = $t->getCatcode;
          push(@toks, $t);
          last if $cc == CC_MATH; } }
      else {
        push(@toks, $t); } } }
  return (@items, [@toks]); }

sub andSplit {
  my ($cs, $tokens) = @_;
  return map { ($cs, T_BEGIN, @$_, T_END) } SplitTokens($tokens, T_CS('\and')); }

sub orNull {
  return (grep { defined } @_) ? @_ : undef; }

# Inverse operation
sub JoinTokens {
  my ($conjunction, @things) = @_;
  if (!@things) { return (); }
  my @result = (shift(@things));
  while (my $thing = shift(@things)) {
    push(@result, $conjunction, $thing); }
  return Tokens(@result); }

DefMacro('\dump', sub {
    Warn('unexpected', 'dump', $_[0], "Do not know how to \\dump yet, sorry"); });

#**********************************************************************
LoadPool('eTeX');      # unless.... ?
LoadPool('pdfTeX');    # unless.... ?
#**********************************************************************

1;



( run in 1.123 second using v1.01-cache-2.11-cpan-d8267643d1d )