view release on metacpan or search on metacpan
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
# -*- mode: Perl -*-
# /=====================================================================\ #
# | TeX | #
# | Core TeX Implementation for LaTeXML | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
# | http://dlmf.nist.gov/LaTeXML/ (o o) | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Package::Pool;
use strict;
use warnings;
use LaTeXML::Package;
use Unicode::Normalize;
use LaTeXML::Util::Pathname;
use List::Util qw(min max);
# NOTE that these define the namespaces we'll (probably) use
# along with the prefixes to be used in "code"
# The generated XML will use the prefixes defined by RegisterDocumentNamespace(...) (if ever)
# or those prefixes defined by the Schema (typically RelaxNGSchema(..)
RegisterNamespace(ltx => "http://dlmf.nist.gov/LaTeXML");
RegisterNamespace(svg => "http://www.w3.org/2000/svg");
RegisterNamespace(xlink => "http://www.w3.org/1999/xlink"); # Needed for SVG
# Not directly used, but let's stake out the ground
RegisterNamespace(m => "http://www.w3.org/1998/Math/MathML");
RegisterNamespace(xhtml => "http://www.w3.org/1999/xhtml");
# Namespace for arbitrary data attributes (mapped to data-xxx in html5)
RegisterNamespace(data => "http://dlmf.nist.gov/LaTeXML/data");
DefMacroI("\\\@empty", undef, Tokens());
#======================================================================
# Core ID functionality.
#======================================================================
# DOCUMENTID is the ID of the document
# AND prefixes IDs on all other elements.
if (my $docid = LookupValue('DOCUMENTID')) {
# Wrap in T_OTHER so funny chars don't screw up (no space!)
DefMacroI('\thedocument@ID', undef, T_OTHER($docid)); }
else {
Let('\thedocument@ID', '\@empty'); }
NewCounter('@XMARG', 'document', idprefix => 'XM');
#======================================================================
Tag('ltx:document', afterOpen => \&ProcessPendingResources);
RequireResource('LaTeXML.css');
#======================================================================
# The default "initial context" for XML+RDFa specifies some default
# terms and prefixes, but no default vocabulary.
# Ought to have a default for @vocab, but settable?
# can we detect use of simple "term"s in attributes so we know whether we need @vocab?
# Ought to have a default set of prefixes from RDFa Core,
# but allow prefixes to be added.
# Probably ought to scan rdf attributes for all uses of prefixes,
# and include them in @prefix
# The following prefixes are listed in http://www.w3.org/2011/rdfa-context/rdfa-1.1
{
my %rdf_prefixes = (
"cc" => "http://creativecommons.org/ns#",
"ctag" => "http://commontag.org/ns#",
"dc" => "http://purl.org/dc/terms/",
"dcterms" => "http://purl.org/dc/terms/",
"ical" => "http://www.w3.org/2002/12/cal/icaltzd#",
"foaf" => "http://xmlns.com/foaf/0.1/",
"gr" => "http://purl.org/goodrelations/v1#",
"grddl" => "http://www.w3.org/2003/g/data-view#",
"ma" => "http://www.w3.org/ns/ma-ont#",
"og" => "http://ogp.me/ns#",
"owl" => "http://www.w3.org/2002/07/owl#",
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfa" => "http://www.w3.org/ns/rdfa#",
"rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
"rev" => "http://purl.org/stuff/rev#",
"rif" => "http://www.w3.org/2007/rif#",
"rr" => "http://www.w3.org/ns/r2rml#",
"schema" => "http://schema.org/",
"sioc" => "http://rdfs.org/sioc/ns#",
"skos" => "http://www.w3.org/2004/02/skos/core#",
"skosxl" => "http://www.w3.org/2008/05/skos-xl#",
"v" => "http://rdf.data-vocabulary.org/#",
"vcard" => "http://www.w3.org/2006/vcard/ns#",
"void" => "http://rdfs.org/ns/void#",
"xhv" => "http://www.w3.org/1999/xhtml/vocab#",
"xml" => "http://www.w3.org/XML/1998/namespace",
"xsd" => "http://www.w3.org/2001/XMLSchema#",
"wdr" => "http://www.w3.org/2007/05/powder#",
"wdrs" => "http://www.w3.org/2007/05/powder-s#",
);
foreach my $p (keys %rdf_prefixes) {
AssignMapping('RDFa_prefixes', $p => $rdf_prefixes{$p}); }
}
#**********************************************************************
# CORE TeX; Built-in commands.
#**********************************************************************
#======================================================================
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
# This reads a braced tokens list, expanding as it goes,
# but expanding \the-like commands only once.
DefParameterType('Expanded', sub {
my ($gullet) = @_;
$gullet->readBalanced(1, 0, 1); },
reversion => sub {
my ($arg) = @_;
(T_BEGIN, Revert($arg), T_END); });
# This reads an expanded definition body,
# a braced tokens list, expanding as it goes,
# but expanding \the-like commands only once,
# and also packing # parameters
DefParameterType('DefExpanded', sub {
my ($gullet) = @_;
return $gullet->readBalanced(1, 1, 1); },
reversion => sub {
my ($arg) = @_;
(T_BEGIN, Revert($arg), T_END); });
# Read a matching keyword, eg. Match:=
DefParameterType('Match', sub { shift->readMatch(@_); });
# Read a keyword; eg. Keyword:to
# (like Match, but ignores catcodes)
DefParameterType('Keyword', sub { shift->readKeyword(@_); });
# Read balanced material (?)
DefParameterType('Balanced', sub { $_[0]->readBalanced; });
# Read a Semiverbatim argument; ie w/ most catcodes neutralized.
DefParameterType('Semiverbatim', sub { $_[0]->readArg; }, semiverbatim => 1,
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Read a LaTeX-style optional argument (ie. in []), but the contents read as Semiverbatim.
DefParameterType('OptionalSemiverbatim', sub { $_[0]->readOptional; },
semiverbatim => 1, optional => 1,
reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });
# Be careful here: if % appears before the initial {, it's still a comment!
# Also, note that non-typewriter fonts will mess up some chars on digestion!
DefParameterType('Verbatim', sub {
my ($gullet) = @_;
$gullet->readUntil(T_BEGIN);
StartSemiverbatim('%', '\\');
my $arg = $gullet->readBalanced();
EndSemiverbatim();
return $arg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(family => 'typewriter'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Read Verbatim, but allows expanding command sequences
DefParameterType('HyperVerbatim', sub {
my ($gullet) = @_;
$gullet->readUntil(T_BEGIN);
StartSemiverbatim('%');
DefMacroI('\%', undef, T_OTHER('%'), scope => 'local');
DefMacroI('\#', undef, T_OTHER('#'), scope => 'local');
DefMacroI('\&', undef, T_OTHER('&'), scope => 'local');
DefMacroI('\textunderscore', undef, T_OTHER('_'), scope => 'local');
Let('\_', '\textunderscore');
DefMacroI('\hyper@tilde', undef, T_OTHER('~'), scope => 'local');
Let('\~', '\hyper@tilde');
Let('\textasciitilde', '\hyper@tilde');
Let('\\\\', '\@backslashchar');
my $arg = $gullet->readBalanced(1);
EndSemiverbatim();
return $arg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(family => 'typewriter'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Read an argument that will not be digested.
DefParameterType('Undigested', sub { $_[0]->readArg; }, undigested => 1,
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Read a LaTeX-style optional argument (ie. in []), but it will not be digested.
DefParameterType('OptionalUndigested', sub { $_[0]->readOptional; },
undigested => 1, optional => 1,
reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });
# Read a keyword value (KeyVals), that will not be digested.
DefParameterType('UndigestedKey', sub { $_[0]->readArg; }, undigested => 1);
DefParameterType('UndigestedDefKey', sub {
$_[0]->readArg->packParameters; }, undigested => 1);
# Read a token as used when defining it, ie. it may be enclosed in braces.
DefParameterType('DefToken', sub {
my ($gullet) = @_;
my $token = $gullet->readToken;
while ($token && ($token->getCatcode == CC_BEGIN)) {
my $cc;
my @toks = grep { ($cc = $$_[1]) && ($cc != CC_SPACE) && ($cc != CC_COMMENT); }
$gullet->readBalanced->unlist;
$token = shift(@toks);
$gullet->unread(@toks); }
$token; },
undigested => 1);
# Stub register for misdefinitions, to avoid a cascade of Errors.
DefRegisterI('\lx@DUMMY@REGISTER', undef, Tokens());
# Read a variable, ie. a token (after expansion) that is a writable register.
DefParameterType('Variable', sub {
my ($gullet) = @_;
my $token = $gullet->readXToken;
my $defn = $token && LookupDefinition($token);
if ((defined $defn) && $defn->isRegister && !$defn->isReadonly) {
[$defn, ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ())]; }
else {
DefRegisterI($token, undef, Dimension(0)); # Don't really know what KIND of variable!
if ($token && ($token->getCatcode == CC_CS)) {
Error('expected', '<variable>', $gullet,
"A <variable> was supposed to be here", "Got " . Stringify($token),
"Defining it now.");
DefRegisterI($token, undef, Dimension(0)); # Dimension, or what?
return [LookupDefinition($token)]; }
else {
Error('expected', '<variable>', $gullet,
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('TextStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(mathstyle => 'text'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('ScriptStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(mathstyle => 'script'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('ScriptscriptStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(mathstyle => 'scriptscript'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Perverse naming convention: not script style, but in the style of a script relative to current.
DefParameterType('InScriptStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(scripted => 1); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# NOTE: the various parameter features don't combine easily!!
# I need a ScriptStyleUntil for \root!!!
# I also need to redo fractions using these new types....
DefParameterType('OptionalInScriptStyle', sub {
$_[0]->readOptional; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(scripted => 1); },
afterDigest => sub {
$_[0]->egroup; },
optional => 1,
reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });
DefParameterType('InFractionStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(fraction => 1); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
#**********************************************************************
# LaTeX has a very particular notion of "Undefined",
# so let's get that squared away at the outset; it's useful for TeX, too!
# Naturally, it uses \csname to check, which ends up DEFINING the possibly undefined macro as \relax
DefMacro('\@ifundefined{}{}{}', sub {
my ($gullet, $name, $if, $else) = @_;
my $cs = T_CS('\\' . ToString(Expand($name)));
if (IsDefined($cs)) {
return $else->unlist; }
else {
$STATE->assignMeaning($cs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssign
return $if->unlist; } },
locked => 1);
sub isDefinable {
my ($token) = @_;
return unless $token;
my $meaning = LookupMeaning($token);
my $name = $token->getString; $name =~ s/^\\//;
return (((!defined $meaning) || ($meaning eq LookupMeaning(T_CS('\relax')))
|| LookupValue('2.09_COMPATIBILITY')) # Let redefinitions happen in compatibility mode.
&& (($name ne 'relax') && ($name !~ /^end/))); }
#**********************************************************************
# Expandable Primitives
# See The TeXBook, Ch. 20, Definitions (also called Macros) pp. 212--215
#**********************************************************************
#======================================================================
# Should complain if we aren't actually evaluating an \if
# The following special cases are built-in to Definition
DefConditional('\else', undef);
DefConditional('\or', undef);
DefConditional('\fi', undef);
DefConditional('\ifcase Number', undef);
sub compare {
my ($u, $rel, $v) = @_;
$u = $u->valueOf if ref $u;
$v = $v->valueOf if ref $v;
if ($rel->equals(T_OTHER('<')) || $rel->equals(T_CS('\@@<'))) {
return $u < $v; }
elsif ($rel->equals(T_OTHER('='))) {
return $u == $v; }
elsif ($rel->equals(T_OTHER('>')) || $rel->equals(T_CS('\@@>'))) {
return $u > $v; }
else {
Error('expected', '<relationaltoken>', $STATE->getStomach->getGullet,
"Expected a relational token for comparision", "Got " . Stringify($rel));
return; } }
DefConditional('\ifnum Number Token Number', sub { compare($_[1], $_[2], $_[3]); });
DefConditional('\ifdim Dimension Token Dimension', sub { compare($_[1], $_[2], $_[3]); });
DefConditional('\ifodd Number', sub { $_[1]->valueOf % 2; });
# NOTE: We don't KNOW if we're in vertical, horizontal or inner mode!!!!!!!
DefConditionalI('\ifvmode', undef, sub { 0; });
DefConditionalI('\ifhmode', undef, sub { 0; });
DefConditionalI('\ifinner', undef, sub { 0; });
DefConditionalI('\ifmmode', undef, sub { LookupValue('IN_MATH'); });
DefParameterType('ExpandedIfToken', sub {
my ($gullet) = @_;
my $token = $gullet->readXToken(0, 1);
if (!$token) {
Error('expected', 'ExpandedIfToken', $gullet,
"conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty");
$token = T_CS('\@empty'); }
return $token; });
DefConditional('\if ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCharcode == $_[2]->getCharcode; });
DefConditional('\ifcat ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCatcode == $_[2]->getCatcode; });
DefConditional('\ifx Token Token', sub { XEquals($_[1], $_[2]); });
# Kinda rough: We don't really keep track of modes as carefully as TeX does.
# We'll assume that a box is horizontal if there's anything at all,
# but it's not a vbox (!?!?)
sub classify_box {
my ($boxnum) = @_;
my $box = LookupValue('box' . $boxnum->valueOf);
if (!$box) {
return ''; }
elsif ($box->isa('LaTeXML::Core::Whatsit') && ($box->getDefinition eq LookupDefinition(T_CS('\vbox')))) {
return 'vbox'; }
else {
return 'hbox'; } }
DefConditional('\ifvoid Number', sub { !classify_box($_[1]); });
DefConditional('\ifhbox Number', sub { classify_box($_[1]) eq 'hbox'; });
DefConditional('\ifvbox Number', sub { classify_box($_[1]) eq 'vbox'; });
DefConditionalI('\iftrue', undef, sub { 1; });
DefConditionalI('\iffalse', undef, sub { 0; });
#======================================================================
# This makes \relax disappear completely after digestion
# (which seems most TeX like).
DefPrimitive('\relax', sub { (); });
### However, this keeps a box, so it can appear in UnTeX
### DefPrimitive('\relax',undef);
## But if you do that, you've got to watch out since it usually
### shouldn't be a box; See the isRelax code in handleScripts, below
# Internal token produced by Gullet in response to \dont_expand;
# Acts like \relax, but isn't equal to it.
DefPrimitiveI('\special_relax', undef, sub { (); });
DefMacro('\number Number', sub { Explode($_[1]->valueOf); });
# define it here (only approxmiately), since it's already useful.
Let('\protect', '\relax');
#======================================================================
DefMacro('\romannumeral Number', sub { roman($_[1]->valueOf); });
# Hmm... I wonder, should getString itself be dealing with escapechar?
sub escapechar {
my $code = LookupRegister('\escapechar')->valueOf;
return (($code >= 0) && ($code <= 255) ? chr($code) : ''); }
# 1) Knuth, The TeXBook, page 40, paragraph 1, Chapter 7: How TEX Reads What You Type.
# suggests all characters except spaces are returned in category code Other, i.e. Explode()
DefMacro('\string Token', sub {
my $s = $_[1]->toString;
if ($s =~ s/^\\//) {
$s = escapechar() . $s; }
Explode($s); });
DefMacroI('\jobname', undef, Tokens()); # Set to the filename by initialization
DefMacroI('\fontname', undef, sub { Explode("fontname not implemented"); });
our @CATCODE_MEANING = (
"the escape character", "begin-group character",
"end-group character", "math shift character",
"alignment tab character", "end-of-line character",
"macro parameter character", "superscript character",
"subscript character", "ignored character",
"blank space", "the letter",
"the character", "active character",
"comment character", "invalid character",
undef, "latexml marker character",
"macro parameter character");
# Not sure about this yet...
# NOTE: Lots of back-and-forth mangle with definition vs cs; don't do that!
DefMacro('\meaning Token', sub {
my ($gullet, $tok) = @_;
my $meaning = 'undefined';
if (my $definition = ($tok->defined_as(T_ALIGN) ? $tok : LookupMeaning($tok))) {
my $type = ref $definition;
$type =~ s/^LaTeXML:://;
# Pre-step: We can't extract the bodies of definitions which are defined via Perl subroutines.
# So do the next best thing -- represent them as their tokens.
if ($type =~ /(primitive|conditional|constructor)$/i) {
$definition = $definition->getCSorAlias;
$type = ref $definition;
$type =~ s/^LaTeXML:://;
if (my $fontinfo = LookupValue('fontinfo_' . ToString($definition))) {
$meaning = 'select font ' . ($$fontinfo{fontname} || 'fontname');
$meaning .= ' at ' . $$fontinfo{at} if $$fontinfo{at};
$type = 'font'; } }
# The actual tests start here
if ($type =~ /token$/i) {
my $cc = $definition->getCatcode;
my $char = $definition->toString;
my $meaning_cc = $CATCODE_MEANING[$cc] || '';
$meaning_cc .= ' ' if $meaning_cc; # append space separator if defined
$meaning = $meaning_cc . $char; }
elsif ($type =~ /register$/i) {
$meaning = $definition->getAddress; }
elsif ($type =~ /expandable$/i) {
# short-circuit some troublesome discrepancies with TeX, which end up macros on LaTeXML's end, but \meaning expects as primitives in the CTAN ecosystem.
my $cs = ToString($definition->getCSorAlias);
# These exceptions could be extended further, as we add more .sty/.cls support
return Explode($cs) if $cs =~ /^\\(?:(?:un)?expanded|detokenize)$/;
my $expansion = $definition->getExpansion;
my $ltxps = $definition->getParameters;
my $arg_index = 0;
my @spec_parts = ();
my @params = $ltxps ? $ltxps->getParameters : ();
my $p_trailer = '';
for my $param (@params) {
my $p_spec = $$param{spec};
if ($p_spec eq 'RequireBrace') {
# tex's \meaning prints out the required braces for "\def\a#{}" variants
$p_trailer = '{';
$p_spec = '{'; }
elsif ($p_spec eq 'UntilBrace') { # should only ever be used in the last argument?
$p_trailer = '{';
$p_spec = "#" . (++$arg_index) . '{'; }
elsif ($p_spec =~ s/^Match://) { } # just match, don't increment arg index
elsif ($p_spec =~ s/^\w?Until(\w*)://) { # implied argument at this slot
$p_spec = "#" . (++$arg_index) . $p_spec; }
else { # regular parameter, increment
next if $$param{novalue}; # skip the latexml-only requirement params, but only here, since Match also have "novalue" set.
$p_spec = "#" . (++$arg_index); }
push @spec_parts, $p_spec; }
my $spec = join("", @spec_parts);
$spec =~ s/\{\}//g;
$spec =~ s/Token//g;
my $prefixes = join('',
($definition->isProtected ? '\protected' : ()),
($definition->isLong ? '\long' : ()),
($definition->isOuter ? '\outer' : ()),
);
my $expansion_str = '';
if (ref $expansion eq 'LaTeXML::Core::Tokens') {
$expansion_str = writableTokens($expansion); }
else {
$expansion_str = ToString($expansion); }
$meaning = ($prefixes ? $prefixes . ' ' : '') .
"macro:$spec->$expansion_str$p_trailer"; }
elsif ($type =~ /chardef$/i) { # from \chardef or \mathchardef
my $prefix = ($$definition{mathglyph} ? '\mathchar' : '\char');
$meaning = $prefix . '"' . $definition->valueOf->valueOf; } }
# One catch: make sure all \s in the meaning string are normalized to a simple space ' '
$meaning =~ s/\s/ /g;
return Explode($meaning); });
DefParameterType('CSName', sub { $_[0]->readCSName; });
DefMacro('\csname CSName', sub {
my ($gullet, $token) = @_;
$STATE->assignMeaning($token, $STATE->lookupMeaning(T_CS('\relax'))) unless defined LookupMeaning($token);
$token; });
DefPrimitive('\endcsname', sub {
my ($stomach) = @_;
Error('unexpected', '\endcsname', $_[0], "Extra \\endcsname",
$stomach->getGullet->showUnexpected);
return; });
DefMacro('\expandafter Token Token', sub {
no warnings 'recursion';
my ($gullet, $tok, $xtok) = @_;
my $defn;
if (defined($defn = $STATE->lookupExpandable($xtok))) {
my @x = ();
{
local $LaTeXML::CURRENT_TOKEN = $xtok;
@x = $defn->invoke($gullet, 1); # Expand $xtok ONCE ONLY!
}
($tok, @x); }
elsif (!$STATE->lookupMeaning($xtok)) {
# Undefined token is an error, as expansion is expected.
# BUT The unknown token is NOT consumed, (see TeX B book, item 367)
# since probably in a real TeX run it would have been defined.
$STATE->generateErrorStub($gullet, $xtok);
($tok, $xtok); }
else {
($tok, $xtok); } });
use constant T_expandafter => T_CS('\expandafter');
DefMacro('\expandafter Token Token', sub {
no warnings 'recursion';
my ($gullet, $tok, $xtok) = @_;
my $defn;
my @skipped = ($tok);
while ($xtok->defined_as(T_expandafter)) {
push(@skipped, $gullet->readToken);
$xtok = $gullet->readToken; }
if (defined($defn = $STATE->lookupExpandable($xtok))) {
my @x = ();
{
local $LaTeXML::CURRENT_TOKEN = $xtok;
@x = $defn->invoke($gullet, 1); # Expand $xtok ONCE ONLY!
}
(@skipped, @x); }
elsif (!$STATE->lookupMeaning($xtok)) {
# Undefined token is an error, as expansion is expected.
# BUT The unknown token is NOT consumed, (see TeX B book, item 367)
# since probably in a real TeX run it would have been defined.
$STATE->generateErrorStub($gullet, $xtok);
(@skipped, $xtok); }
else {
(@skipped, $xtok); } });
# If next token is expandable, prefix it with the internal marker \dont_expand
# That token is never defined, explicitly handled in Gullet & should never escape the Gullet
DefMacroI('\noexpand', undef, sub {
my $token = $_[0]->readToken;
# Missing token likely the result of "{\noexpand}" for which TeX would be unperturbed
return ($token
? ((($$token[1] == CC_CS) || ($$token[1] == CC_ACTIVE)) && $STATE->isDontExpandable($token)
? (T_CS('\dont_expand'), $token)
: $token)
: ()); });
DefPrimitiveI('\dont_expand', undef, sub {
Error('misdefined', '\dont_expand', $_[0],
"The token \\dont_expand should never reach Stomach!"); });
DefMacroI('\topmark', undef, Tokens());
DefMacroI('\firstmark', undef, Tokens());
DefMacroI('\botmark', undef, Tokens());
DefMacroI('\splitfirstmark', undef, Tokens());
DefMacroI('\splitbotmark', undef, Tokens());
DefMacro('\input TeXFileName', sub {
my $filename = $_[1];
my @t = $filename->unlist;
# If given a LaTeX-style argument, strip braces
if (@t && $t[0] && $t[0]->getCatcode == CC_BEGIN && $t[-1]->getCatcode == CC_END) {
$filename = Tokens(@t[1 .. $#t - 1]);
# and load LaTeX.pool if not already
if (!LookupValue('LaTeX.pool_loaded')) {
LoadPool("LaTeX"); } }
Input($filename, reloadable => 1); });
# Note that TeX doesn't actually close the mouth;
# it just flushes it so that it will close the next time it's read!
DefMacroI('\endinput', undef, sub { $_[0]->flushMouth; });
# \the<internal quantity>
DefMacro('\the Register', sub {
my ($gullet, $variable) = @_;
return () unless $variable;
my ($defn, @args) = @$variable;
if (!$defn || $defn eq 'missing') {
Error('expected', "<register>", $gullet, "a register was expected to be here"); return (); }
my $type = $defn->isRegister;
if (!$type) {
my $cs = ToString($defn->getCS);
if ($cs eq '\font') { # what to do here?
return T_CS('\tenrm'); }
Error('unexpected', "\\the$cs", $gullet, "You can't use $cs after \\the"); return (); }
my $value = $defn->valueOf(@args);
## In all cases, these should be OTHER, except for space. (!?)
my @tokens = ($type eq 'Tokens' ? ($value ? $value->unlist : ()) : Explode(ToString($value)));
return @tokens; });
#**********************************************************************
# Primitives
# See The TeXBook, Chapter 24, Summary of Vertical Mode
# and Chapter 25, Summary of Horizontal Mode.
# Parsing of basic types (pp.268--271) is (mostly) handled in Gullet.pm
#**********************************************************************
#======================================================================
# Registers & Parameters
# See Chapter 24, Summary of Vertical Mode
# Define a whole mess of useless registers here ...
# Values are from Appendix B, pp. 348-349 (for whatever its worth)
#======================================================================
#======================================================================
# Integer registers; TeXBook p. 272-273
DefRegister('\tracingmacros', Number(0),
getter => sub { Number((LookupValue('TRACING') || 0) & TRACE_MACROS); },
setter => sub { my $p = (LookupValue('TRACING') || 0);
AssignValue(TRACING => ($_[0]->valueOf ? $p | TRACE_MACROS : $p & ~TRACE_MACROS)); });
DefRegister('\tracingcommands', Number(0),
getter => sub { Number((LookupValue('TRACING') || 0) & TRACE_COMMANDS); },
setter => sub { my $p = (LookupValue('TRACING') || 0);
AssignValue(TRACING => ($_[0]->valueOf ? $p | TRACE_COMMANDS : $p & ~TRACE_COMMANDS)); });
{
my %iparms = (
pretolerance => 100, tolerance => 200, hbadness => 1000, vbadness => 1000,
linepenalty => 10, hyphenpenalty => 50, exhyphenpenalty => 50,
binoppenalty => 700, relpenalty => 500,
clubpenalty => 150, widowpenalty => 150, displaywidowpenalty => 50,
brokenpenalty => 100, predisplaypenalty => 10000,
postdisplaypenalty => 0, interlinepenalty => 0,
floatingpenalty => 0, outputpenalty => 0,
doublehyphendemerits => 10000, finalhyphendemerits => 5000, adjdemerits => 10000,
looseness => 0, pausing => 0,
holdinginserts => 0, tracingonline => 0, tracingstats => 0,
tracingparagraphs => 0, tracingpages => 0, tracingoutput => 0,
tracinglostchars => 1,
tracingrestores => 0, language => 0, uchyph => 1, lefthyphenmin => 0,
righthyphenmin => 0, globaldefs => 0, defaulthyphenchar => ord('-'), defaultskewchar => -1,
escapechar => ord('\\'), endlinechar => ord("\r"), newlinechar => -1, maxdeadcycles => 0, hangafter => 0,
fam => -1, mag => 1000, magnification => 1000, delimiterfactor => 0,
time => 0, day => 0, month => 0, year => 0,
showboxbreadth => 5, showboxdepth => 3, errorcontextlines => 5);
foreach my $p (keys %iparms) {
DefRegister("\\$p", Number($iparms{$p})); }
}
# Most of these are ignored, but...
DefMacro('\tracingall',
'\tracingonline=1 \tracingcommands=2 \tracingstats=2'
. ' \tracingpages=1 \tracingoutput=1 \tracinglostchars=1'
. ' \tracingmacros=2 \tracingparagraphs=1 \tracingrestores=1'
. ' \showboxbreadth=\maxdimen \showboxdepth=\maxdimen \errorstopmode');
DefMacroI('\tracingnone', undef, Tokens());
DefMacroI('\hideoutput', undef, Tokens());
# This may mess up Daemon state?
{ my ($sec, $min, $hour, $mday, $mon, $year) = defined $ENV{SOURCE_DATE_EPOCH} ? gmtime($ENV{SOURCE_DATE_EPOCH}) : localtime();
AssignValue('\day' => Number($mday), 'global');
AssignValue('\month' => Number($mon + 1), 'global');
AssignValue('\year' => Number(1900 + $year), 'global');
AssignValue('\time' => Number(60 * $hour + $min), 'global'); }
our @MonthNames = (qw( January February March April May June
July August September October November December));
# Return a string for today's date.
sub today {
return $MonthNames[LookupValue('\month')->valueOf - 1]
. " " . LookupValue('\day')->valueOf
. ', ' . LookupValue('\year')->valueOf; }
# Read-only Integer registers
{
my %ro_iparms = (lastpenalty => 0, badness => 0);
foreach my $p (keys %ro_iparms) {
DefRegister("\\$p", Number($ro_iparms{$p}), readonly => 1); }
}
# Special integer registers (?)
# <special integer> = \spacefactor | \prevgraf | \deadcycles | \insertpenalties
{
my %sp_iparms = (spacefactor => 0, prevgraf => 0, deadcycles => 0, insertpenalties => 0);
foreach my $p (keys %sp_iparms) {
DefRegister("\\$p", Number($sp_iparms{$p})); }
}
#======================================================================
# Dimen registers; TeXBook p. 274
{
my %dparms = (
hfuzz => '0.1pt', vfuzz => '0.1pt', overfullrule => '5pt',
emergencystretch => 0,
hsize => '6.5in', vsize => '8.9in',
maxdepth => '4pt', splitmaxdepth => '16383.99999pt', boxmaxdepth => '16383.99999pt',
lineskiplimit => 0,
delimitershortfall => '5pt', nulldelimiterspace => '1.2pt', scriptspace => '0.5pt',
mathsurround => 0,
predisplaysize => 0, displaywidth => 0, displayindent => 0, parindent => '20pt',
hangindent => 0, hoffset => 0, voffset => 0,);
foreach my $p (keys %dparms) {
DefRegister("\\$p", Dimension($dparms{$p})); }
}
# Special dimension registers (?)
# <special dimen> = \prevdepth | \pagegoal | \pagetotal | \pagestretch | \pagefilstretch
# | \pagefillstretch | \pagefilllstretch | pageshrink | \pagedepth
{
my %sp_dparms = (
prevdepth => 0, pagegoal => 0, pagetotal => 0, pagestretch => 0, pagefilstretch => 0,
pagefillstretch => 0, pagefilllstretch => 0, pageshrink => 0, pagedepth => 0);
foreach my $p (keys %sp_dparms) {
DefRegister("\\$p", Dimension($sp_dparms{$p})); }
}
#======================================================================
# Glue registers; TeXBook p.274
{
my %gparms = (
baselineskip => '12pt', lineskip => '1pt',
parskip => '0pt plus 1pt',
abovedisplayskip => '12pt plus 3pt minus 9pt',
abovedisplayshortskip => '0pt plus 3pt',
belowdisplayskip => '12pt plus 3pt minus 9pt',
belowdisplayshortskip => '0pt plus 3pt',
leftskip => 0, rightskip => 0, topskip => '10pt', splittopskip => '10pt',
tabskip => 0, spaceskip => 0, xspaceskip => 0, parfillskip => '0pt plus 1fil');
foreach my $p (keys %gparms) {
DefRegister("\\$p", Glue($gparms{$p})); }
}
#======================================================================
# MuGlue registers; TeXBook p.274
{
DefRegister('\thinmuskip' => MuGlue("3mu"));
DefRegister('\medmuskip' => MuGlue("4mu plus 2mu minus 4mu"));
DefRegister('\thickmuskip' => MuGlue("5mu plus 5mu"));
}
#======================================================================
# Token registers; TeXBook p.275
{
my @tparms = qw(output everypar everymath everydisplay everyhbox everyvbox
everyjob everycr everyhelp);
foreach my $p (@tparms) {
DefRegister("\\$p", Tokens()); }
}
#======================================================================
# Assignment, TeXBook Ch.24, p.275
#======================================================================
# <assignment> = <non-macro assignment> | <macro assignment>
#======================================================================
# Macros
# See Chapter 24, p.275-276
# <macro assignment> = <definition> | <prefix><macro assignment>
# <definition> = <def><control sequence><definition text>
# <def> = \def | \gdef | \edef | \xdef
# <definition text> = <register text><left brace><balanced text><right brace>
sub parseDefParameters {
my ($cs, $params) = @_;
my @tokens = $params->packParameters->unlist;
# Now, recognize parameters and delimiters.
my @params = ();
my $n = 0;
while (@tokens) {
my $t = shift(@tokens);
my $cc = $$t[1];
if ($cc == CC_PARAM || $cc == CC_ARG) {
if ($cc == CC_PARAM) {
if (!@tokens) { # Special case: lone # NOT following a numbered parameter
# Note that we require a { to appear next, but do NOT read it!
push(@params, LaTeXML::Core::Parameter->new('RequireBrace', 'RequireBrace'));
last; }
else {
$n++; $t = shift(@tokens); } }
else { # CC_ARG case, keep looking at this token
$n++; }
Fatal('expected', "#$n", $STATE->getStomach,
"Parameters for '" . ToString($cs) . "' not in order in " . ToString($params))
unless (defined $t) && ($n == int($$t[0]));
# Check for delimiting text following the parameter #n
my @delim = ();
my $pc = -1;
INNER_DELIM: while (@tokens) {
my $inner_cc = $tokens[0]->getCatcode;
last INNER_DELIM if $inner_cc == CC_PARAM || $inner_cc == CC_ARG;
my $d = shift(@tokens);
push(@delim, $d) unless $pc == CC_SPACE && $inner_cc == CC_SPACE; # BUT collapse whitespace!
$pc = $inner_cc; }
# Found text that marks the end of the parameter
if (@delim) {
my $expected = Tokens(@delim);
push(@params, LaTeXML::Core::Parameter->new('Until',
'Until:' . ToString($expected),
extra => [$expected])); }
# Special case: trailing sole # => delimited by next opening brace.
elsif ((scalar(@tokens) == 1) && ($tokens[0]->getCatcode == CC_PARAM)) {
shift(@tokens);
push(@params, LaTeXML::Core::Parameter->new('UntilBrace', 'UntilBrace')); }
# Nothing? Just a plain parameter.
else {
push(@params, LaTeXML::Core::Parameter->new('Plain', '{}')); } }
else {
# Initial delimiting text is required.
my @lit = ($t);
my $lit_cc;
while (@tokens && ($lit_cc = $tokens[0]->getCatcode) &&
($lit_cc != CC_PARAM && $lit_cc != CC_ARG)) {
push(@lit, shift(@tokens)); }
my $expected = Tokens(@lit);
push(@params, LaTeXML::Core::Parameter->new('Match',
'Match:' . ToString($expected),
extra => [$expected],
novalue => 1)); }
}
return (@params ? LaTeXML::Core::Parameters->new(@params) : undef); }
sub do_def {
my ($globally, $gullet, $cs, $params, $body) = @_;
if (!$cs) {
Error('expected', 'Token', $gullet, "Expected definition token");
return; }
elsif (!$params) {
Error('misdefined', $cs, $gullet, "Expected definition parameter list");
return; }
$params = parseDefParameters($cs, $params);
# noprep=>1 : leave preparing the ##, #1-#9 tokens to the Def parameter types
# to avoid carrying around the masks around and keep core code simple
$STATE->installDefinition(LaTeXML::Core::Definition::Expandable->new($cs, $params, $body,
nopackParameters => 1),
($globally ? 'global' : undef));
AfterAssignment();
return; }
DefPrimitive('\def SkipSpaces Token UntilBrace DefPlain', sub { do_def(0, @_); }, locked => 1);
DefPrimitive('\gdef SkipSpaces Token UntilBrace DefPlain', sub { do_def(1, @_); }, locked => 1);
DefPrimitive('\edef SkipSpaces Token UntilBrace DefExpanded', sub { do_def(0, @_); }, locked => 1);
DefPrimitive('\xdef SkipSpaces Token UntilBrace DefExpanded', sub { do_def(1, @_); }, locked => 1);
# <prefix> = \global | \long | \outer
# See Stomach.pm & Stomach.pm
DefPrimitiveI('\global', undef, sub { $STATE->setPrefix('global'); return; }, isPrefix => 1);
DefPrimitiveI('\long', undef, sub { $STATE->setPrefix('long'); return; }, isPrefix => 1);
DefPrimitiveI('\outer', undef, sub { $STATE->setPrefix('outer'); return; }, isPrefix => 1);
#======================================================================
# Non-Macro assignments; TeXBook Ch.24, pp 276--277
# <non-macro assignment> = <simple assignment> | \global <non-macro assignment>
# <filler> = <optional spaces> | <filler>\relax<optional spaces>
# <general text> = <filler>{<balanced text><right brace>
# <simple assignment> = <variable assignment> | <arithmetic>
# | <code assignment> | <let assignment> | <shorthand definition>
# | <fontdef token> | <family assignment> | <shape assignment>
# | \read <number> to <optional spaces><control sequence>
# | \setbox<8bit><equals><filler><box>
# | \font <control sequence><equals><file name><at clause>
# | <global assignment>
# <variable assignment> = <integer variable><equals><number>
# | <dimen variable><equals><dimen>
# | <glue variable><equals><dimen>
# | <muglue variable><equals><muglue>
# | <token variable><equals><general text>
# | <token variable><equals><token variable>
# <at clause> = at <dimen> | scaled <number> | <optional spaces>
# <code assignment> = <codename><8bit><equals><number>
sub lookupFontinfo {
my ($token) = @_;
my $defn = LookupDefinition($token);
# return LookupValue(($defn ? ToString($defn) : ToString($token)) . '_fontinfo'); }
return LookupValue('fontinfo_' . ($defn ? $defn->getCSName : ToString($token))); }
# This should eventually actually load the font metrics,
# and tie-in to the FontMetrics data used by Font.
DefPrimitive('\font SkipSpaces Token SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub {
my ($stomach, $cs, $name) = @_;
my $gullet = $stomach->getGullet;
$name = ToString($name);
my ($at, $scaled);
if ($gullet->readKeyword('at')) { $at = $gullet->readDimension; }
if ($gullet->readKeyword('scaled')) { $scaled = $gullet->readNumber; }
my %props = LaTeXML::Common::Font::decodeFontname($name,
$at && $at->ptValue, $scaled && $scaled->valueOf / 1000);
if (!keys %props) { # Failed?
Info('unexpected', $name, $stomach, "Unrecognized font name '$name'",
"Font switch macro " . ToString($cs) . " will have no effect"); }
else {
$props{fontname} = $name; }
my $f = ($at ? $at->divide(Dimension('1em'))->valueOf
: ($scaled ? $scaled->valueOf / 1000
: 1));
my $fontinfo = \%props;
$$fontinfo{data} = [map { $_->multiply($f); }
Dimension(0), Dimension('0.5em'), Dimension(0),
Dimension(0), Dimension('1ex'), Dimension('1em')];
$gullet->skipSpaces;
# Store the font info & metrics
AssignValue('fontinfo_' . ToString($cs) => $fontinfo);
# The font $cs should select the font
DefPrimitiveI($cs, undef, undef, font => $fontinfo);
return; });
# Not sure what this should be...
DefPrimitiveI('\nullfont', undef, undef, font => { family => 'nullfont' });
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
DefRegister('\lastpenalty', Number(0), readonly => 1);
# \parshape !?!??
DefPrimitive('\parshape SkipSpaces SkipMatch:= Number', sub {
my ($stomach, $n) = @_;
$n = $n->valueOf;
my $gullet = $stomach->getGullet;
for (my $i = 0 ; $i < $n ; $i++) {
$gullet->readDimension; $gullet->readDimension; }
# we _could_ conceivably store this somewhere for some attempt at stylistic purpose...
return; });
DefRegister('\inputlineno', Number(0),
getter => sub {
my $locator = $STATE->getStomach->getGullet->getLocator();
Number($locator ? $$locator{fromLine} : 0); },
readonly => 1);
DefRegister('\badness', Number(0), readonly => 1);
# <codename> = \catcode | \mathcode | \lccode | \uccode | \sfcode | \delcode
DefRegister('\catcode Number', Number(0),
getter => sub { my $code = LookupCatcode(chr($_[0]->valueOf));
Number(defined $code ? $code : CC_OTHER); },
setter => sub { AssignCatcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
# # Only used for active math characters, so far
DefRegister('\mathcode Number', Number(0),
getter => sub {
my $ch = $_[0]->valueOf;
my $code = $STATE->lookupMathcode(chr($ch));
Number(defined $code ? $code : $ch); }, # defaults to the char's code itself(?)
setter => sub { $STATE->assignMathcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
# Not used anywhere (yet)
DefRegister('\sfcode Number', Number(0),
getter => sub { my $code = $STATE->lookupSFcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
setter => sub { $STATE->assignSFcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
DefRegister('\lccode Number', Number(0),
getter => sub { my $code = $STATE->lookupLCcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
setter => sub { $STATE->assignLCcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
DefRegister('\uccode Number', Number(0),
getter => sub { my $code = $STATE->lookupUCcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
setter => sub { $STATE->assignUCcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
# Not used anywhere (yet)
DefRegister('\delcode Number', Number(0),
getter => sub { my $code = $STATE->lookupDelcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
setter => sub { $STATE->assignDelcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
# Remember, we're assigning a NUMBER (codepoint) to a CHARACTER!
foreach my $letter (ord('A') .. ord('Z')) {
$STATE->assignLCcode(chr($letter), $letter + 0x20, 'global');
$STATE->assignUCcode(chr($letter), $letter, 'global');
$STATE->assignLCcode(chr($letter + 0x20), $letter + 0x20, 'global');
$STATE->assignUCcode(chr($letter + 0x20), $letter, 'global'); }
# Stub definitions ???
DefMacro('\hyphenation GeneralText', Tokens());
DefMacro('\patterns{}', Tokens());
# <font> = <fontdef token> | \font | <family member>
# <family member> = <font range><4bit>
# <font range> = \textfont | \scriptfont | \scriptscriptfont
# Doubtful that we can do anything useful with these.
# These look essentially like Registers, although Knuth doesn't call them that.
# NOTE: These should just point to a CS token, right????
# (although it SHOULD be one defined to be a font switch??)
# NOTE: These should NOT be global(?)
DefRegister('\textfont Number' => T_CS('\tenrm'),
getter => sub {
my ($fam) = @_;
LookupValue('textfont_' . $fam->valueOf); },
setter => sub {
my ($font, $scope, $fam) = @_;
AssignValue('textfont_' . $fam->valueOf => $font, $scope); });
DefRegister('\scriptfont Number' => T_CS('\sevenrm'),
getter => sub {
my ($fam) = @_;
LookupValue('scriptfont_' . $fam->valueOf); },
setter => sub {
my ($font, $scope, $fam) = @_;
AssignValue('scriptfont_' . $fam->valueOf => $font, $scope); });
DefRegister('\scriptscriptfont Number' => T_CS('\fiverm'),
getter => sub {
my ($fam) = @_;
LookupValue('scriptscriptfont_' . $fam->valueOf); },
setter => sub {
my ($font, $scope, $fam) = @_;
AssignValue('scriptscriptfont_' . $fam->valueOf => $font, $scope); });
# <internal dimen> = <dimen parameter> | <special dimen> | \lastkern
# | <dimendef token> | \dimen<8bit> | <box dimension><8bit> | \fontdimen<number><font>
DefRegister('\lastkern' => Dimension(0), readonly => 1);
# <box dimension> = \ht | \wd | \dp
DefRegister('\ht Number', Dimension(0),
getter => sub {
my ($n) = @_;
my $stuff = $n && LookupValue('box' . $n->valueOf);
return ($stuff ? $stuff->getHeight : Dimension(0)); },
setter => sub {
my ($value, $scope, $n) = @_;
my $stuff = $n && LookupValue('box' . $n->valueOf);
$stuff->setHeight($value) if $stuff;
return; });
DefRegister('\wd Number', Dimension(0),
getter => sub {
my ($n) = @_;
my $stuff = $n && LookupValue('box' . $n->valueOf);
return ($stuff ? $stuff->getWidth : Dimension(0)); },
setter => sub {
my ($value, $scope, $n) = @_;
my $stuff = $n && LookupValue('box' . $n->valueOf);
$stuff->setWidth($value) if $stuff;
return; });
DefRegister('\dp Number', Dimension(0),
getter => sub {
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
return ($data && $$data[$p - 1]) || Dimension(0); },
setter => sub {
my ($value, $scope, $p, $font) = @_;
my $info = lookupFontinfo($font);
$p = ToString($p);
if (my $data = $info && $$info{data}) {
$$data[$p - 1] = $value; } }
);
DefRegister('\hyphenchar FontToken' => Number(ord('-')),
getter => sub {
my ($font) = @_;
my $info = lookupFontinfo($font);
return ($info && $$info{hyphenchar}) || Number(ord('-')); },
setter => sub {
my ($value, $scope, $font) = @_;
if (my $info = lookupFontinfo($font)) {
$$info{hyphenchar} = $value; } }
);
DefRegister('\skewchar FontToken' => Number(0),
getter => sub {
my ($font) = @_;
my $info = lookupFontinfo($font);
return ($info && $$info{skewchar}) || Number(0); },
setter => sub {
my ($value, $scope, $font) = @_;
if (my $info = lookupFontinfo($font)) {
$$info{skewchar} = $value; } }
);
# Could be handled by setting dimensions whenever the box itself is set?
# <internal glue> = <glue parameter> | \lastskip | <skipdef token> | \skip<8bit>
DefRegister('\lastskip' => Glue(0), readonly => 1);
# <internal muglue> = <muglue parameter> | \lastskip | <muskipdef token> | \muskip<8bit>
# <family assignment> = <family member><equals><font>
# <shape assignment> = \parshape<equals><number><shape dimensions>
# <shape dimensions> is 2n <dimen>
# <global assignment> = <font assignment> | <hyphenation assignment>
# | <box size assignment> | <interaction mode assignment>
# | <intimate assignment>
# <font assignment> = \fontdimen <number><font><equals><dimen>
# | \hyphenchar<font><equals><number> | \skewchar<font><equals><number>
# <hyphenation assignment> = \hyphenation<general text>
# | \patterns<general text>
# <box size assignment> = <box dimension><8bit><equals><dimen>
# <interaction mode assignment> = \errorstopmode | \scrollmode | \nonstopmode | \batchmode
# These are no-ops; Basically, LaTeXML runs in scrollmode
DefPrimitiveI('\errorstopmode', undef, undef);
DefPrimitiveI('\scrollmode', undef, undef);
DefPrimitiveI('\nonstopmode', undef, undef);
DefPrimitiveI('\batchmode', undef, undef);
# <intimate assignment> = <special integer><equals><number>
# | <special dimension><equals><dimen>
DefMacro('\fontencoding{}', '\@@@fontencoding{#1}');
DefPrimitive('\@@@fontencoding{}', sub {
my ($stomach, $encoding) = @_;
$encoding = ToString(Expand($encoding));
if (LoadFontMap($encoding)) {
MergeFont(encoding => $encoding); }
else {
MergeFont(encoding => 'OT1'); } # Default to OT1 encoding if no map found
return; });
DefMacroI('\f@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); });
DefMacroI('\cf@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); });
# Used for SemiVerbatim text
DeclareFontMap('ASCII',
[undef, undef, undef, undef, undef, undef, undef, undef,
undef, undef, undef, undef, undef, undef, undef, undef,
undef, undef, undef, undef, undef, undef, undef, undef,
undef, undef, undef, undef, undef, undef, undef, undef,
" ", '!', "\"", '#', '$', '%', '&', "'",
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', '<', '=', '>', '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '[', "\\", ']', "^", "_",
"`", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', "{", "|", "}", "~", undef]);
# Note that several entries are used for accents, and in practice will actually
# be used in something like an m:mover; thus they needn't (shouldn't?) be "small"
# There are also some questions about which choices are best
# grave & acute accents (entry 0x12 & 0x13) (often typed using 0x60 & 0x27)
# are probably best using U+60(grave accent) & U+B4(acute accent)
# but could be U+2035 (reversed prime) & U+2032 (prime). (particularly for math?)
# [we do use these for \prime, however!]
# or U+02CB (modifier letter grave accent) & U+02CA (modifier letter acute accent)
# Similarly, hat & tilde (entries 0x5E & 0x7E)
# typed using ^ 0x5E circumflex accent) & ~ 0x7E tilde
# are probably best just sticking with U+5E & U+7E
# but could be U+02C6 (modifier letter circumflex accent) U+02DC (small tilde)
# [Note that generally we're using codepoints characterized as "modifier letter"
# only when no other spacing point is available.]
DeclareFontMap('OT1',
["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}",
"\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{FB00}", "\x{FB01}", "\x{FB02}", "\x{FB03}", "\x{FB04}",
"\x{0131}", "\x{0237}", UTF(0x60), UTF(0xB4), "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}",
UTF(0xB8), UTF(0xDF), UTF(0xE6), "\x{0153}", UTF(0xF8), UTF(0xC6), "\x{152}", UTF(0xD8),
UTF(0xA0) . "\x{0335}", '!', "\x{201D}", '#', '$', '%', '&', "\x{2019}",
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', UTF(0xA1), '=', UTF(0xBF), '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '[', "\x{201C}", ']', "^", "\x{02D9}",
"\x{2018}", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', "\x{2013}", "\x{2014}", "\x{02DD}", UTF(0x7E), UTF(0xA8)]);
DeclareFontMap('OT1',
["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}",
"\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{2191}", "\x{2193}", "'", UTF(0xA1), UTF(0xBF),
"\x{0131}", "\x{0237}", UTF(0x60), UTF(0xB4), "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}",
UTF(0xB8), UTF(0xDF), UTF(0xE6), "\x{0153}", UTF(0xF8), UTF(0xC6), "\x{152}", UTF(0xD8),
"\x{2423}", '!', "\"", '#', '$', '%', '&', "\x{2019}",
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
elsif ($cc == CC_SPACE) { (T_SPACE); }
elsif ($cc == CC_PARAM) { ($_, $_); }
elsif ($cc == CC_ARG) { (T_PARAM, T_OTHER($$_[0])); }
else { $_; }
} @tokens;
return UnTeX(Tokens(@tokens), 1); }
DefPrimitive('\message{}', sub {
my ($stomach, $stuff) = @_;
NoteLog(writableTokens(Expand($stuff)));
return; });
DefRegister('\errhelp' => Tokens());
DefPrimitive('\errmessage{}', sub {
my ($stomach, $stuff) = @_;
Note(ToString(Expand($stuff)) . ": " . ToString(Expand(Tokens(T_CS('\the'), T_CS('\errhelp')))));
return; });
# TeX I/O primitives
DefPrimitive('\openin Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub {
my ($stomach, $port, $filename) = @_;
# possibly should close $port if it's already been opened?
$port = ToString($port);
$filename = ToString($filename);
# Rely on FindFile to enforce any access restrictions
# It's tempting to pout noltxml=>1 here, since who would want to read in an .ltxml file's perl?
# However, \openin is often used by low-level code to check for existence of a file
# when we SHOULD find an .ltxml version!
# Hopefully, if they get one, they won't actually try to read its content...
if (my $path = FindFile($filename)) {
my $mouth = LaTeXML::Core::Mouth->create($path,
content => LookupValue($path . '_contents'));
AssignValue('input_file:' . $port => $mouth, 'global'); }
return; });
DefPrimitive('\closein Number', sub {
my ($stomach, $port, $filename) = @_;
# close the mouth (if any) and clear the variable
$port = ToString($port);
if (my $mouth = LookupValue('input_file:' . $port)) {
$mouth->finish;
AssignValue('input_file:' . $port => undef, 'global'); }
return; });
DefPrimitive('\read Number SkipKeyword:to SkipSpaces Token', sub {
my ($stomach, $port, $token) = @_;
$port = ToString($port);
if (my $mouth = LookupValue('input_file:' . $port)) {
$stomach->bgroup;
AssignValue(PRESERVE_NEWLINES => 2); # Special EOL/EOF treatment for \read
AssignValue(INCLUDE_COMMENTS => 0);
my @tokens = ();
my ($t, $level) = (undef, 0);
while ($t = $mouth->readToken) {
my $cc = $t->getCatcode;
push(@tokens, $t) unless $cc == CC_MARKER; # End of line marker
$level++ if $cc == CC_BEGIN;
$level-- if $cc == CC_END;
last if !$level && $mouth->isEOL; }
$stomach->egroup;
DefMacroI($token, undef, Tokens(@tokens), nopackParameters => 1); }
return; });
DefConditional('\ifeof Number', sub {
my ($gullet, $port) = @_;
$port = ToString($port);
if (my $mouth = LookupValue('input_file:' . $port)) {
return $$mouth{at_eof}; }
else {
return 1; } });
# For output files, we'll write the data to a cached internal copy
# rather than to the actual file system.
DefPrimitive('\openout Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub {
my ($stomach, $port, $filename) = @_;
$port = ToString($port);
$filename = ToString($filename);
AssignValue('output_file:' . $port => $filename, 'global');
AssignValue($filename . '_contents' => "", 'global');
return; });
DefPrimitive('\closeout Number', sub {
my ($stomach, $port) = @_;
$port = ToString($port);
AssignValue('output_file:' . $port => undef, 'global');
return; });
DefPrimitive('\write Number {}', sub {
my ($stomach, $port, $tokens) = @_;
$port = ToString($port);
if (my $filename = LookupValue('output_file:' . $port)) {
my $handle = $filename . '_contents';
my $contents = LookupValue($handle);
AssignValue($handle => $contents . UnTeX(Expand($tokens), 1) . "\n", 'global'); }
else {
Note(UnTeX(Expand($tokens))); }
return; });
# Since we don't paginate, we're effectively always "shipping out",
# so all operations are \immediate
DefPrimitive('\immediate', undef);
#======================================================================
# Remaining semi- Vertical Mode primitives in Ch.24, pp.280--281
DefPrimitive('\special {}', sub {
my ($stomach, $arg) = @_;
my $special_str = ToString($arg);
# recognize one special graphics inclusion case
if ($special_str =~ /\bpsfile=(.+?)(?:\s|\})/) {
my $graphic = $1;
RequirePackage('graphicx', searchpaths_only => 1);
my @kv;
for my $prop (qw(voffset hoffset hscale vscale hsize vsize angle)) {
if ($special_str =~ /\b$prop=(.+?)(?:\s|\})/) {
push(@kv, T_OTHER(',')) if @kv;
push(@kv, T_OTHER($prop), T_OTHER("="), T_OTHER($1)); } }
@kv = (T_OTHER("["), @kv, T_OTHER("]")) if @kv;
$stomach->getGullet->unread(
T_CS('\ltx@special@graphics'), @kv, T_BEGIN, T_OTHER($graphic), T_END); }
else {
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
#======================================================================
Tag('ltx:td', afterClose => \&trimNodeWhitespace);
#----------------------------------------------------------------------
# Primitive column types;
# This is really LaTeX, but the mechanisms are used behind-the-scenes here, too.
DefColumnType('|', sub {
$LaTeXML::BUILD_TEMPLATE->addBetweenColumn(T_CS('\vrule'), T_CS('\relax')); return; });
DefColumnType('l', sub {
$LaTeXML::BUILD_TEMPLATE->addColumn(after => Tokens(T_CS('\hfil'))); return; });
DefColumnType('c', sub {
$LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil')),
after => Tokens(T_CS('\hfil'))); return; });
DefColumnType('r', sub {
$LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil'))); return; });
DefColumnType('p{Dimension}', sub {
$LaTeXML::BUILD_TEMPLATE->addColumn(
before => Tokens(T_CS('\vtop'), T_BEGIN, T_CS('\hbox'),
T_LETTER('t'), T_LETTER('o'), $_[1]->revert, T_CS('\relax'),
T_BEGIN),
after => Tokens(T_END, T_END),
vattach => 'top',
align => 'justify',
); return; });
DefColumnType('*{Number}{}', sub {
my ($gullet, $n, $pattern) = @_;
map { $pattern->unlist } 1 .. $n->valueOf; });
DefColumnType('@{}', sub {
my ($gullet, $filler) = @_;
$LaTeXML::BUILD_TEMPLATE->disableIntercolumn;
$LaTeXML::BUILD_TEMPLATE->addBetweenColumn($filler->unlist);
$LaTeXML::BUILD_TEMPLATE->disableIntercolumn;
return; });
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Alignment code
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#----------------------------------------------------------------------
# This is where ALL alignments start & finish
# This creates the object representing the entire alignment!
DefConstructor('\@start@alignment',
"#alignment",
reversion => sub { Revert($_[0]->getProperty('alignment')); },
sizer => '#alignment',
# beforeDigest => sub { $_[0]->bgroup; },
afterDigest => sub {
my ($stomach, $whatsit) = @_;
$stomach->bgroup;
if (my $alignment = LookupValue('Alignment')) {
$whatsit->setProperty(alignment => $alignment);
$alignment->setBody($whatsit);
digestAlignmentBody($stomach, $whatsit); }
$stomach->egroup;
return; });
# Seems odd to need both end markers here...
DefMacroI('\@finish@alignment', undef,
'\hidden@crcr\@close@alignment');
DefPrimitive('\@close@alignment', sub { });
#======================================================================
# Low-level bits that appear within alignments or \halign
DefConstructorI('\cr', undef, "\n");
DefConstructorI('\crcr', undef, "\n");
# These are useful for reversion of higher-level macros that use alignment
# internally, but don't use explicit &,\cr in the user markup
DefConstructorI('\hidden@cr', undef, "\n", alias => '');
DefConstructorI('\hidden@crcr', undef, "\n", alias => '');
DefConstructorI('\hidden@align', undef, "", alias => '');
# Handled directly in alignments, but must be defined as non-macros
DefPrimitiveI('\noalign', undef, sub {
$_[0]->bgroup;
Error('unexpected', '\noalign', $_[0], "\\noalign cannot be used here");
Let(T_ALIGN, T_CS('\relax'));
Let(T_CS('\noalign'), T_CS('\relax'));
Let(T_CS('\omit'), T_CS('\relax'));
Let(T_CS('\span'), T_CS('\relax'));
return; });
DefPrimitiveI('\omit', undef, sub {
Error('unexpected', '\omit', $_[0], "\\omit cannot be used here");
$_[0]->bgroup;
Let(T_ALIGN, T_CS('\relax'));
Let(T_CS('\noalign'), T_CS('\relax'));
Let(T_CS('\omit'), T_CS('\relax'));
Let(T_CS('\span'), T_CS('\relax'));
return; });
DefPrimitiveI('\span', undef, sub {
$_[0]->bgroup;
Error('unexpected', '\span', $_[0], "\\span cannot be used here");
Let(T_ALIGN, T_CS('\relax'));
Let(T_CS('\noalign'), T_CS('\relax'));
Let(T_CS('\omit'), T_CS('\relax'));
Let(T_CS('\span'), T_CS('\relax'));
return; });
#########
# Support for \\[dim] .... TO BE WORKED OUT!
# NOTE that this does NOT skip spaces before * or []!!!!!
# As if: \@alignment@newline OptionalMatch:* [Dimension]
# Read arguments for \\, namely * and/or [Dimension]
# BUT optionally do it while skipping spaces (latex style) or not (ams style)
sub readNewlineArgs {
my ($gullet, $skipspaces) = @_;
my $alignment = $STATE->lookupValue('Alignment');
local $LaTeXML::ALIGN_STATE = 1000000;
$gullet->skipSpaces if $skipspaces;
my $next = $gullet->readToken;
my ($star, $optional);
if ($next && $next->equals(T_OTHER('*'))) {
$star = 1;
$gullet->skipSpaces if $skipspaces;
$next = $gullet->readToken; }
if ($next && $next->equals(T_OTHER('['))) {
$optional = $gullet->readUntil(T_OTHER(']'));
$next = undef; }
$gullet->unread($next) if $next;
return ($star, $optional); }
# VERY tricky (and mostly Wrong).
# The issue is for \\ to look ahead for * and [],
# Eventually we'll expand into \cr (which should be preceded by the RHS of the template)
# BUT it should NOT trigger the template if it bumps into a &
# which happens when the 1st column of an alignment is empty.
# In proper LaTeX this is inhibited by a curious construct
# {\ifnum0='}
# and possibly by proper tracking of a Master Counter !?!?!?
# But we're not there (yet)
# This is the internal macro for \\[dim] used by LaTeX for various arrays, tabular, etc
DefMacroI('\@alignment@newline', undef, sub {
my ($gullet) = @_;
my ($star, $optional) = readNewlineArgs($gullet, 1);
return (T_CS('\hidden@cr'), T_BEGIN,
($optional
? (T_CS('\@alignment@newline@markertall'), T_BEGIN, $optional, T_END)
: T_CS('\@alignment@newline@marker')),
T_END); });
# However, the above will skip spaces --AND a newline! -- looking for [],
# which is kinda weird in math, since there may be a reasonable math [ in the 1st column!
# AMS kindly avoids that, by using a special version of \\
DefMacroI('\@alignment@newline@noskip', undef, sub {
my ($gullet) = @_;
my ($star, $optional) = readNewlineArgs($gullet);
return (T_CS('\hidden@cr'), T_BEGIN,
($optional
? (T_CS('\@alignment@newline@markertall'), T_BEGIN, $optional, T_END)
: T_CS('\@alignment@newline@marker')),
T_END); });
# These are the markers that produce \\ in the reversion,
# and (eventually will) add vertical space to the row!
DefConstructor('\@alignment@newline@marker', '',
reversion => Tokens(T_CS("\\\\"), T_CR));
# AND add the spacing to the alignment!!!
DefConstructor('\@alignment@newline@markertall {Dimension}', '',
afterDigest => sub {
if (my $alignment = LookupValue('Alignment')) {
$alignment->currentRow->{padding} = $_[1]->getArg(1); }
return; },
reversion => sub {
Tokens(T_CS("\\\\"), T_OTHER('['), Revert($_[1]), T_OTHER(']'), T_CR); });
DefMacroI('\tabularnewline', undef, '\cr'); # ???
# \lx@intercol is our replacement for LaTeX's \@acol which places intercolumn space in tabular
# (but NOT used by TeX's \halign!)
DefMacro('\lx@intercol', '');
# Candidates for binding \lx@intercol for LaTeX tabular or math arrays
# These provide "padding" of half tabcolsep, since added before & after columns
# [these could be \hskip\tabcolsep, but the expansion confounds trimColumnSpec]
DefConstructor('\lx@text@intercol', sub {
my ($document, %props) = @_;
$document->absorb(DimensionToSpaces($props{width})); },
reversion => '\lx@intercol',
properties => sub {
my $defn;
my $w = (($defn = $STATE->lookupDefinition(T_CS('\tabcolsep'))) && $defn->isRegister
? $defn->valueOf : Dimension(0));
(width => $w, isSpace => 1); });
DefConstructor('\lx@math@intercol', "", # mspace ???
reversion => '\lx@intercol',
properties => sub {
my $defn;
my $w = (($defn = $STATE->lookupDefinition(T_CS('\arraycolsep'))) && $defn->isRegister
? $defn->valueOf : Dimension(0));
(width => $w, isSpace => 1); });
#======================================================================
# Various decorations within alignments, rules, headers, etc
# Like \noalign, takes an arg; handled within alignment processing.
# But doesn't create a pseudo-row (??? Or does it?; is it still needed?)
DefConstructor('\hidden@noalign{}', '#1',
reversion => '',
properties => sub {
# Sometimes, we're smuggling stuff that needs to be carried into the XML.
my $preserve = grep { $_->getProperty('alignmentPreserve'); } $_[1]->unlist;
(alignmentSkippable => 1, alignmentPreserve => $preserve); });
DefMacro('\hline', '\noalign{\@@alignment@hline}');
DefConstructorI('\@@alignment@hline', undef, '',
afterDigest => sub {
if (my $alignment = LookupValue('Alignment')) {
$alignment->addLine('t'); }
return; },
properties => { isHorizontalRule => 1 },
sizer => 0, alias => '\hline');
DefMacroI('\@tabular@begin@heading', undef, sub {
my $alignment = LookupValue('Alignment');
$$alignment{in_tabular_head} = 1;
return; });
DefMacroI('\@tabular@end@heading', undef, sub {
my $alignment = LookupValue('Alignment');
$$alignment{in_tabular_head} = 0;
return; });
#======================================================================
# Math mode in alignment
# Special forms for $ appearing within alignments.
# Note that $ within a math alignment (eg array environment),
# switches to text mode! There's no $$ for display math.
# This is the "normal" case: $ appearing with an alignment that is in text mode.
# It's just like regular $, except it doesn't look for $$ (no display math).
DefPrimitiveI('\@dollar@in@textmode', undef, sub {
no warnings 'recursion';
$_[0]->invokeToken(T_CS((LookupValue('IN_MATH') ? '\@@ENDINLINEMATH' : '\@@BEGININLINEMATH'))); });
# This one is for $ appearing within an alignment that's already math.
# This should switch to text mode (because it's balancing the hidden $
# wrapping each alignment cell!!!!!!)
# However, it should be like a normal $ if it's inside something like \mbox
# that itself makes a text box!!!!!!
# Thus, we need to know at what boxing level we started the last math or text.
# This is all complicated by the need to know _how_ we got into or out of math mode!
# Gawd, this is awful!
# NOTE: Probably the most "Right" thing to do would be to process
# alignments in text mode only (like TeX), sneaking $'s in where needed,
# but then afterwards, morph them into math arrays?
# This would be complicated by the need to hide these $ from untex.
DefPrimitiveI('\@dollar@in@mathmode', undef, sub {
my ($stomach) = @_;
my $level = $stomach->getBoxingLevel;
if ((LookupValue('MATH_ALIGN_$_BEGUN') || 0) == $level) { # If we're begun making _something_ with $.
my @l = ();
if (LookupValue('IN_MATH')) { # But we're somehow in math?
@l = $stomach->invokeToken(T_CS('\@@ENDINLINEMATH')); }
else {
@l = $stomach->invokeToken(T_CS('\@@ENDINLINETEXT')); }
AssignValue('MATH_ALIGN_$_BEGUN' => 0); # Reset this AFTER finishing the something
@l; }
else {
AssignValue('MATH_ALIGN_$_BEGUN' => $level + 1); # Note that we've begun something
if (LookupValue('IN_MATH')) { # If we're "still" in math
$stomach->invokeToken(T_CS('\@@BEGININLINETEXT')); }
else {
$stomach->invokeToken(T_CS('\@@BEGININLINEMATH')); } } });
DefConstructorI('\@@BEGININLINETEXT', undef,
"<ltx:XMText>"
. "#body"
. "</ltx:XMText>",
alias => T_MATH, beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1);
DefConstructorI('\@@ENDINLINETEXT', undef, "", alias => T_MATH,
beforeDigest => sub { $_[0]->endMode('text'); });
DefPrimitiveI('\@LTX@nonumber', undef, sub { AssignValue(EQUATIONROW_NUMBER => 0, 'global'); });
DefMacroI('\hidewidth', undef, Tokens());
#======================================================================
# Multicolumn support
DefMacro('\multispan{Number}', sub {
my ($gullet, $span) = @_;
$span = $span->valueOf;
(T_CS('\omit'), map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1); });
DefRegisterI('\@alignment@ncolumns', undef, Dimension(0),
getter => sub {
if (my $alignment = LookupValue('Alignment')) {
Number(scalar($alignment->getTemplate->columns)); }
else { Number(0); } });
DefRegisterI('\@alignment@column', undef, Dimension(0),
getter => sub {
if (my $alignment = LookupValue('Alignment')) {
Number($alignment->currentColumnNumber); }
else { Number(0); } });
DefMacro('\@multicolumn {Number} AlignmentTemplate {}', sub {
my ($gullet, $span, $template, $tokens) = @_;
my $column = $template->column(1);
$span = $span->valueOf;
# First part, like \multispan
(T_CS('\omit'), (map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1),
# Next part, just put the template in-line, since it's only used once.
($column ? beforeCellUnlist($$column{before}) : ()),
$tokens->unlist,
($column ? afterCellUnlist($$column{after}) : ())); });
DefConditionalI('\if@in@alignment', undef, sub { LookupValue('Alignment'); });
DefPrimitive('\@alignment@bindings AlignmentTemplate []', sub {
my ($stomach, $template, $mode) = @_;
alignmentBindings($template, $mode); });
# Utility, not really TeX, but used by LaTeX, AmSTeX...
# Convert a vertical positioning, optional argument.
# t = "top", b = "bottom"; default is "middle".
# Note that the default for vattach attribute is "baseline".
sub translateAttachment {
my ($pos) = @_;
$pos = ($pos ? ToString($pos) : '');
return ($pos eq 't' ? 'top' : ($pos eq 'b' ? 'bottom' : 'middle')); } # undef meaning 'baseline'
# This trims trailing whitespace from the current digested list,
# for use within latex tabular-style columns.
# But note that \halign does NOT remove this trailing space!
DefPrimitiveI('\lx@column@trimright', undef, sub {
my $box;
my @save = ();
my $s;
while ($box = $LaTeXML::LIST[-1]) {
if ($box->getProperty('alignmentSkippable')
|| $box->getProperty('isFill')
|| IsEmpty($box)) {
push(@save, pop(@LaTeXML::LIST)); }
elsif (ref $box eq 'LaTeXML::Core::List') { # Unwrap and continue
pop(@LaTeXML::LIST);
push(@LaTeXML::LIST, $box->unlist); }
elsif ((ref $box eq 'LaTeXML::Core::Box')
&& defined($s = $box->getString) && ($s =~ /^\s*$/)) {
pop(@LaTeXML::LIST); } # remove any box containing only spaces
else {
last; } }
push(@LaTeXML::LIST, @save);
return; });
use constant T_hfil => T_CS('\hfil');
# Yet more special case hacking. Sometimes the order of tokens works for
# TeX, but confuses us... In particular the order of $ and \hfil!
sub beforeCellUnlist {
my ($tokens) = @_;
return () unless $tokens;
my @toks = $tokens->unlist;
my @new = ();
while (my $t = shift(@toks)) {
if ($t->defined_as(T_MATH) && @toks && $toks[0]->defined_as(T_hfil)) {
push(@new, shift(@toks)); unshift(@toks, $t); }
else {
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
elsif ($cc == CC_PARAM) { # Found the template's column slot
$before = 0;
push(@tokens, $t); }
elsif (($cc == CC_ALIGN)
|| $t->equals(T_CS('\cr')) || $t->equals(T_CS('\crcr'))) { # End the column
if ($before) { # Leading & ?
$repeated = 1;
@nonreps = @cols; @cols = (); } # A & while we're before a column means Repeated columns
else { # Finished column spec; add it
## How should we be handling tabskip? An attribute on the cell or spacing?
push(@cols, {
tabskip => $tabskip,
before => Tokens(beforeCellUnlist(Tokens(@pre))),
after => Tokens(afterCellUnlist(Tokens(@post))) });
$tabskip = $nexttabskip;
@pre = @post = (); $before = 1; }
last unless $cc == CC_ALIGN;
push(@tokens, $t); }
elsif ($before) { # Other random tokens go into the column's pre-template
push(@pre, $t) if @pre || ($cc != CC_SPACE);
push(@tokens, $t); }
else { # Or the post-template
push(@post, $t) if @post || ($cc != CC_SPACE);
push(@tokens, $t); } }
# Now create & return the template object
my $template = LaTeXML::Core::Alignment::Template->new(
($repeated
? (columns => [@nonreps], repeated => [@cols])
: (columns => [@cols])),
tokens => [@tokens]);
$whatsit->setProperty(template => $template);
return $template; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# And the general alignment processing.
# If the Template is appropriately constructed, either by \halign or various \begin{tabular}
# the body of the alignment is processed the same way.
sub alignmentBindings {
my ($template, $mode, %properties) = @_;
$mode = LookupValue('MODE') unless $mode;
my $ismath = $mode =~ /math$/;
my $container = ($ismath ? 'ltx:XMArray' : 'ltx:tabular');
my $rowtype = ($ismath ? 'ltx:XMRow' : 'ltx:tr');
my $coltype = ($ismath ? 'ltx:XMCell' : 'ltx:td');
my $alignment = LaTeXML::Core::Alignment->new(
template => $template,
openContainer => sub { $_[0]->openElement($container, @_[1 .. $#_]); },
closeContainer => sub { $_[0]->closeElement($container); },
openRow => sub { $_[0]->openElement($rowtype, @_[1 .. $#_]); },
closeRow => sub { $_[0]->closeElement($rowtype); },
openColumn => sub { $_[0]->openElement($coltype, @_[1 .. $#_]); },
closeColumn => sub { $_[0]->closeElement($coltype); },
isMath => $ismath,
properties => {%properties});
AssignValue(Alignment => $alignment);
Debug("Halign $alignment: New " . $template->show) if $LaTeXML::DEBUG{halign};
Let(T_MATH, ($ismath ? '\@dollar@in@mathmode' : '\@dollar@in@textmode'));
return; }
DefMacroI('\@row@before', undef, undef);
DefMacroI('\@row@after', undef, undef);
DefMacroI('\@column@before', undef, undef);
DefMacroI('\@column@after', undef, undef);
sub pRevert {
my ($arg) = @_;
local $LaTeXML::DUAL_BRANCH = 'presentation';
return Revert($arg); }
sub cRevert {
my ($arg) = @_;
local $LaTeXML::DUAL_BRANCH = 'content';
return Revert($arg); }
use constant T_close_alignment => T_CS('\@close@alignment');
sub digestAlignmentBody {
my ($stomach, $whatsit) = @_;
my $gullet = $stomach->getGullet;
local $LaTeXML::ALIGN_STATE = 0;
# Now read & digest the body.
# Note that the body MUST end with a \cr, and that we've made Special Arrangments
# with \alignment@cr to recognize the end of the \halign
my $alignment = LookupValue('Alignment');
local $LaTeXML::READING_ALIGNMENT = $alignment;
if (!$alignment) {
Error('missing', 'alignment', $stomach, "There is no open alignment structure here");
return; }
$whatsit->setProperty(alignment => $alignment);
$alignment->setBody($whatsit);
Debug("Halign $alignment: BODY Processing...") if $LaTeXML::DEBUG{halign};
my $lastwascr = undef;
my @reversion = ();
my @creversion = ();
while (1) {
my ($cell, $next, $type, $hidden) = digestAlignmentColumn($stomach, $alignment, $lastwascr);
Debug("Halign $alignment: BODY got CELL"
. "[" . $alignment->currentRowNumber . "," . $alignment->currentColumnNumber . "]"
. ToString($cell) . " ended at " . Stringify($next)) if $LaTeXML::DEBUG{halign};
if (!$cell) {
Debug("Halign $alignment: BODY DONE!") if $LaTeXML::DEBUG{halign};
last; }
if ($cell) {
push(@reversion, trimColumnTemplate($alignment, pRevert($cell)));
push(@creversion, trimColumnTemplate($alignment, cRevert($cell))); }
extractAlignmentColumn($alignment, $cell);
$lastwascr = undef;
if (!$type && (!$next
|| $next->defined_as(T_END) # End of alignment
|| $next->defined_as(T_close_alignment))) { # End of alignment
$alignment->endRow();
last; }
elsif ($type eq 'align') {
$alignment->endColumn();
if (!$hidden) {
push(@reversion, $next); # and record the &
push(@creversion, $next); } } # and record the &
elsif ($type eq 'insert') {
$alignment->endColumn(); }
elsif (($type eq 'cr') || ($type eq 'crcr')) {
$alignment->endRow();
if (!$hidden) {
push(@reversion, $next);
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
|| (ref $boxes[0] eq 'LaTeXML::Core::Comment')) {
push(@saveleft, shift(@boxes)); }
else {
last; } }
while (@boxes) {
if (ref $boxes[-1] eq 'LaTeXML::Core::List') {
push(@boxes, pop(@boxes)->unlist); }
elsif ($boxes[-1]->getProperty('isFill')) {
if ($align eq 'right') { $align = 'center'; }
pop(@boxes);
last; }
elsif ($boxes[-1]->getProperty('isVerticalRule')) {
$border .= 'r';
@rspaces = (); # discard spacing after rule!!! (should save for next column?)
pop(@boxes); }
elsif ($boxes[-1]->getProperty('isSpace')) {
unshift(@rspaces, pop(@boxes)); }
elsif ($boxes[-1]->getProperty('isHorizontalRule')
|| $boxes[-1]->getProperty('alignmentSkippable')
|| (ref $boxes[-1] eq 'LaTeXML::Core::Comment')) {
unshift(@saveright, pop(@boxes)); }
else {
last; } }
delete $$colspec{width} unless $align eq 'justify';
# Replacing boxes with the fil padding & vertical rules stripped off
@boxes = (@saveleft, @boxes, @saveright);
$boxes = List(@boxes, mode => ($boxes->isMath ? 'math' : 'text'));
# record relevant info in the Alignment.
$$colspec{align} = $align;
$$colspec{border} = $border = ($$colspec{border} || '') . $border;
$$colspec{boxes} = $boxes;
$$colspec{lspaces} = List(@lspaces) if @lspaces;
$$colspec{rspaces} = List(@rspaces) if @rspaces;
$$colspec{colspan} = $n1 - $n0 + 1;
if ($$alignment{in_tabular_head} || $$alignment{in_tabular_foot}) {
$$colspec{thead}{column} = 1; }
for (my $i = $n0 + 1 ; $i <= $n1 ; $i++) {
my $c = $alignment->getColumn($i);
$$c{skipped} = 1 if $c; }
Debug("Halign $alignment: INSTALL column " . join(',', map { $_ . "=" . ToString($$colspec{$_}); } sort keys %$colspec)) if $LaTeXML::DEBUG{halign};
return $boxes; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Cleanup the pre & post tokens for halign columns in math mode.
# If a pair of $..$ enclose stuff that is "OK" in math mode, we don't need the $.
# Note that the 1st $ is switching OUT of math mode!
sub stripDupMath {
my (@tokens) = @_;
my @poss = grep { $tokens[$_]->defined_as(T_MATH) } 0 .. $#tokens;
shift(@poss) if scalar(@poss) % 2; # Get pairs!
while (@poss) {
my ($p2, $p1) = (pop(@poss), pop(@poss));
splice(@tokens, $p1, 2) if $p2 == $p1 + 1; }
return @tokens; }
# "Initialized" alignment; presets spacing, but since we're ignoring it anyway...
Let('\ialign', '\halign');
# Overlapping alignments ???
DefMacro('\oalign{}',
'\@@oalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@oalign{}',
'#1',
reversion => '\oalign{#1}', bounded => 1, mode => 'text',
beforeDigest => sub { alignmentBindings('l'); });
# This is actually different; the lines should lie ontop of each other.
# How should this be represented?
DefMacro('\ooalign{}',
'\@@ooalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@ooalign{}',
'#1',
reversion => '\ooalign{#1}', bounded => 1, mode => 'text',
beforeDigest => sub { alignmentBindings('l'); });
#----------------------------------------------------------------------
# These determine whether the _next_ paragraph gets indented!
# thus it needs \par to check whether such indentation has been set.
DefConstructorI('\indent', undef, sub {
my ($document) = @_;
my $node = $document->getElement;
if (!$node) { }
elsif ($document->getNodeQName($node) eq 'ltx:para') {
$node->setAttribute(class => "ltx_indent"); }
elsif ($document->canContainSomehow($node, "ltx:para")) {
# Used in a position where a paragraph can be started, start
# However, perversely ignore indent on 1st para after sectioning titles
my $prev = $node->lastChild;
my $noindent = $prev && ($document->getNodeQName($prev) =~ /^ltx:(?:toc)?title$/);
$document->openElement("ltx:para", ($noindent ? () : (class => "ltx_indent"))); }
# Otherwise ignore.
return; });
DefConstructorI('\noindent', undef, sub {
my ($document) = @_;
my $node = $document->getElement;
if (!$node) { }
elsif ($document->getNodeQName($node) eq 'ltx:para') {
$node->setAttribute(class => "ltx_noindent"); }
elsif ($document->canContainSomehow($node, "ltx:para")) {
# Used in a position where a paragraph can be started, start
$document->openElement("ltx:para", class => "ltx_noindent"); }
# Otherwise ignore.
return; });
# <ltx:para> represents a Logical Paragraph, whereas <ltx:p> is a `physical paragraph'.
# A para can contain both p and displayed equations and such.
# Remember; \par _closes_, not opens, paragraphs!
# Here, we want to close both an open p and para (if either are open).
# NOTE Also that the whole inPreamble bit is, I think, overused.
# For example, \par should be a NOOP in vertical mode, and that would generally make it
# ignored in the preamble.
DefConstructorI('\normal@par', undef, sub {
my ($document, %props) = @_;
if ($props{inPreamble}) { }
else {
$document->maybeCloseElement('ltx:p');
my $node = $document->getElement;
my $qname = ($node && $document->getNodeQName($node)) || '';
if ($qname eq 'ltx:para' && !$node->getAttribute("class")) { # Only set on the para about to close, if unknown!
if (my $c = $props{class}) {
$document->setAttribute($node, class => $c); } }
$document->maybeCloseElement('ltx:para'); } },
afterDigest => sub {
my ($stomach, $whatsit) = @_;
if (LookupValue('inPreamble')) {
$whatsit->setProperty(inPreamble => 1); }
else {
# Check if flags were set by prior \par:
if (my $c = LookupValue("next_para_class")) {
$whatsit->setProperty(class => $c);
AssignValue(next_para_class => undef); }
# Fish out flags for next ltx:para, to be used when the next \par closes:
if (!LookupRegister('\parindent')->valueOf) {
# respect \parindent if no overrides are given
AssignValue(next_para_class => "ltx_noindent"); }
# Vertical adjustments
if (my $vadj = LookupValue('vAdjust')) {
AssignValue(vAdjust => [], 'global');
Digest(Tokens(@$vadj)); }
else {
return; } } },
properties => { alignmentSkippable => 1 },
alias => '\par');
Let('\par', '\normal@par');
DefMacro('\inner@par OptionalMatch:* [Glue]', '\normal@par'); # Obsolete, but in case still used...
Tag('ltx:para', autoClose => 1, autoOpen => 1, afterClose => \&pruneEmpty);
sub pruneEmpty {
my ($document, $node) = @_;
# In some cases we could have e.g. a \noindent followed by a {table},
# in which case we end up with an empty ltx:para which we can prune.
if (!scalar(element_nodes($node))) {
my $prev = element_prev($node);
if (!$prev || ($document->getNodeQName($prev) ne 'ltx:para')) { # If $node WAS the 1st child
$document->addClass($node->parentNode, 'ltx_pruned_first'); }
$node->unlinkNode; }
return; }
sub trimNodeWhitespace {
my ($document, $node) = @_;
trimNodeLeftWhitespace($document, $node);
trimNodeRightWhitespace($document, $node);
return; }
sub trimNodeLeftWhitespace {
my ($document, $node) = @_;
if (my (@children) = $node->childNodes) {
my $child = $children[0];
my $type = $child->nodeType;
if ($type == XML_TEXT_NODE) {
my $string = $child->data;
# if($string =~ s/^\s+//){
# with some trepidation, I don't think we want to trim nbsp!
if ($string =~ s/^ +//) {
$child->setData($string); } }
elsif ($type == XML_ELEMENT_NODE) {
trimNodeLeftWhitespace($document, $child); } }
return; }
sub trimNodeRightWhitespace {
my ($document, $node) = @_;
if (my (@children) = $node->childNodes) {
my $child = $children[-1];
my $type = $child->nodeType;
if ($type == XML_TEXT_NODE) {
my $string = $child->data;
if ($string =~ s/\s+$//) {
$child->setData($string); } }
elsif ($type == XML_ELEMENT_NODE) {
trimNodeRightWhitespace($document, $child); } }
return; }
Tag('ltx:p', autoClose => 1, autoOpen => 1, afterClose => \&trimNodeWhitespace);
# \dump ???
DefPrimitiveI('\end', undef, sub { $_[0]->getGullet->flush; return; });
#======================================================================
# Horizontal Mode primitives in Ch.25, pp.285--287
# The following cause tex to start a new paragraph -- they switch to horizontal mode.
# <horizontal command> = <letter> | <other> | \char | <chardef token>
# | \noboundary | \unhbox | \unhcopy | \valign | \vrule
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
my ($stomach, $length) = @_;
my $s = DimensionToSpaces($length);
Box($s, undef, undef, Invocation(T_CS('\mkern'), $length),
width => $length, isSpace => 1); });
DefPrimitiveI('\hss', undef, undef);
DefPrimitiveI('\hfilneg', undef, undef);
DefPrimitiveI('\hfil', undef, sub {
Box(' ', undef, undef, T_CS('\hfil'), isSpace => 1, isFill => 1); });
DefPrimitiveI('\hfill', undef, sub {
Box(' ', undef, undef, T_CS('\hfill'), isSpace => 1, isFill => 1); });
# \lower <dimen> <box>
# \raise <dimen> <box>
# But <box> apparently must really explicitly be an \hbox, \vbox or \vtop (?)
# OR something that expands into one!!
sub raisedSizer {
my ($box, $y) = @_;
my ($w, $h, $d) = $box->getSize;
my $z = Dimension(0);
$h = $h->add($y)->larger($z);
$d = $d->subtract($y)->larger($z);
return ($w, $h, $d); }
DefConstructor('\lower Dimension MoveableBox',
"?&inSVG()(<svg:g transform='#transform' _noautoclose='1'>#2</svg:g>)"
. "(<ltx:text yoffset='#y' _noautoclose='1'>#2</ltx:text>)",
sizer => sub { raisedSizer($_[0]->getArg(2), $_[0]->getArg(1)->negate); },
afterDigest => sub {
my $y = $_[1]->getArg(1)->multiply(-1);
my $ypx = $y->pxValue;
my $transform = ($ypx ? "translate(0,$ypx)" : undef);
$_[1]->setProperties(y => $y, transform => $transform); });
DefConstructor('\raise Dimension MoveableBox',
"?&inSVG()(<svg:g transform='#transform' _noautoclose='1'>#2</svg:g>)"
. "(<ltx:text yoffset='#y' _noautoclose='1'>#2</ltx:text>)",
sizer => sub { raisedSizer($_[0]->getArg(2), $_[0]->getArg(1)); },
afterDigest => sub {
my $y = $_[1]->getArg(1);
my $ypx = $y->pxValue;
my $transform = ($ypx ? "translate(0,$ypx)" : undef);
$_[1]->setProperties(y => $y, transform => $transform); });
# \unhbox<8bit>, \unhcopy<8bit>
DefPrimitive('\unhbox Number', sub {
my $box = 'box' . $_[1]->valueOf;
my $stuff = LookupValue($box);
adjustBoxColor($stuff);
AssignValue($box, undef);
(defined $stuff ? $stuff->unlist : List()); });
DefPrimitive('\unhcopy Number', sub {
my $box = 'box' . $_[1]->valueOf;
my $stuff = LookupValue($box);
adjustBoxColor($stuff);
(defined $stuff ? $stuff->unlist : List()); });
# Implement ???
# DefMacro('\vrule','\relax');
DefMacro('\valign', '');
DefMacro('\vspace{}', '\vskip#1\relax');
# \indent, \noindent, \par; see above.
DefMacro('\discretionary{}{}{}', '#3'); # No hyphenation here!
DefPrimitiveI('\-', undef, undef);
DefPrimitive('\setlanguage Number', undef);
#======================================================================
# Math mode stuff
# See TeXBook Ch.26
#======================================================================
# Decide whether we're going into or out of math, inline or display.
Tag('ltx:XMText', autoOpen => 1, autoClose => 1);
# This really should be T_MATH
# and it should (or not) check for a second $ only if not in restricted horizontal mode!
# (and then all the \@dollar@in@(text|math|normal)mode defns would not be needed.
DefPrimitiveI('\@dollar@in@normalmode', undef, sub {
my ($stomach) = @_;
my $gullet = $stomach->getGullet;
my $mode = LookupValue('MODE');
my $op = '\@@BEGININLINEMATH';
if ($mode eq 'display_math') {
if ($gullet->ifNext(T_MATH)) {
$gullet->readToken;
$op = '\@@ENDDISPLAYMATH'; }
else {
# Avoid a Fatal, but we're likely in trouble.
# Should we switch to text mode? (LaTeX normally wouldn't)
# Did we miss something and would should have already been in text mode? Possibly...
# OR, were we in a lenient package that allowed inline math mixed in with display?
Error('expected', '$', $stomach,
"Missing \$ closing display math.",
"Ignoring; expect to be in wrong math/text mode.");
$op = undef; } }
elsif ($mode eq 'inline_math') {
$op = '\@@ENDINLINEMATH'; }
# elsif(!LookupValue('Alignment') && $gullet->ifNext(T_MATH)){
elsif ($gullet->ifNext(T_MATH)) {
$gullet->readToken;
$op = '\@@BEGINDISPLAYMATH'; }
$stomach->invokeToken(T_CS($op)) if $op; });
# Let this be the default, conventional $
Let(T_MATH, T_CS('\@dollar@in@normalmode'));
# Effectively these are the math hooks, redefine these to do what you want with math?
DefConstructorI('\@@BEGINDISPLAYMATH', undef,
"<ltx:equation>"
. "<ltx:Math mode='display'>"
. "<ltx:XMath>"
. "#body"
. "</ltx:XMath>"
. "</ltx:Math>"
. "</ltx:equation>",
reversion => Tokens(T_MATH, T_MATH),
beforeDigest => sub {
$_[0]->beginMode('display_math');
if (my @everymath_toks = $STATE->lookupDefinition(T_CS('\everymath'))->valueOf->unlist()) {
$_[0]->getGullet->unread(@everymath_toks); }
if (my @everydisplay_toks = $STATE->lookupDefinition(T_CS('\everydisplay'))->valueOf->unlist()) {
$_[0]->getGullet->unread(@everydisplay_toks); }
return; }, captureBody => 1);
DefConstructorI('\@@ENDDISPLAYMATH', undef, "",
reversion => Tokens(T_MATH, T_MATH),
beforeDigest => sub { $_[0]->endMode('display_math'); });
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
my ($labels, $id, $idctr, $idctrm, $tags);
foreach my $eq (@equations) {
if (my $l = $eq->getAttribute('labels')) {
$labels = ($labels ? "$labels $l" : $l); }
$id = $eq->getAttribute('xml:id') if $eq->hasAttribute('xml:id');
$eq->removeAttribute('xml:id') if $id;
$tags = $document->findnode('ltx:tags', $eq);
# Annoying bookkeeping (should be more built in?)
$idctr = $eq->getAttribute('_ID_counter_') if $eq->hasAttribute('_ID_counter_');
$idctrm = $eq->getAttribute('_ID_counter_m_') if $eq->hasAttribute('_ID_counter_m_'); }
$document->unRecordID($id) if $id;
$document->setAttribute($equation, labels => $labels) if $labels;
$document->setAttribute($equation, 'xml:id' => $id) if $id;
$document->setAttribute($equation, '_ID_counter_' => $idctr) if $idctr;
$document->setAttribute($equation, '_ID_counter_m_' => $idctrm) if $idctrm;
$equation->appendChild($tags) if $tags;
# Scan equations to see which ones likely are continuations of previous
my ($mainfork, $branch) = openMathFork($document, $equation);
foreach my $eq (@equations) {
# remove equation; parts will be added in by adding to mathfork (hopefully taking care of ids)
$eq->unbindNode;
my $tr = $document->openElementAt($branch, 'ltx:tr');
my @cells = $document->findnodes('ltx:_Capture_', $eq);
$document->setAttribute($tr, class => 'ltx_eqn_lefteqn')
if ($cells[0]->getAttribute('class') || '') =~ /\blefteqn\b/;
foreach my $cell (@cells) {
addColumnToMathFork($document, $mainfork, $tr, $cell); }
$document->closeElementAt($tr); }
closeMathFork($document, $equation, $mainfork, $branch);
$document->closeElementAt($equation);
return; }
# Given an equation generated in an equationgroup,
# collect each $ncols columns into a MathFork structure,
# with the formatted portion being the columns.
# This is typically useful for AMS's align structures,
# which contain several columns, each pair of which represent a semantic equation.
sub equationgroupJoinCols {
my ($document, $ncols, $equation) = @_;
my ($col, $mainfork, $branch) = (0, undef, undef);
foreach my $cell ($document->findnodes('ltx:_Capture_', $equation)) {
next unless $document->getNodeQName($cell) =~ /(.*?:)?_Capture_$/;
if (($col++ % $ncols) == 0) { # Create new MathFork every $ncols cells.
closeMathFork($document, $equation, $mainfork, $branch) if $mainfork;
($mainfork, $branch) = openMathFork($document, $equation); }
addColumnToMathFork($document, $mainfork, $branch, $cell); }
closeMathFork($document, $equation, $mainfork, $branch) if $mainfork;
return; }
#**********************************************************************
Let('\vcenter', '\vbox');
# \eqno & \leqno are really bizzare.
# They should seemingly digest until $ (or while still in math mode),
# and use that stuff as the reference number.
# However, since people abuse this, and we're really not quite TeX,
# we really can't do it Right.
# Even a \begin{array} ends up expanding into a $ !!!
DefMacroI('\eqno', undef, sub {
my ($gullet) = @_;
my $locator = $gullet->getLocator;
my @stuff = ();
# This is risky!!!
while (my $t = $gullet->readXToken(0)) {
if ($t->defined_as(T_BEGIN)) {
push(@stuff, $t, $gullet->readBalanced, T_END); }
# What do I need to explicitly list here!?!?!? UGGH!
elsif ($t->defined_as(T_MATH)
|| $t->defined_as(T_CS('\]'))
# UGH from 2022: also don't jump over rows
|| $t->defined_as(T_CS('\cr'))
# see arXiv:math/0001062, for one example
|| $t->defined_as(T_CS('\hidden@cr'))
|| $t->defined_as(T_CS('\@@ENDDISPLAYMATH'))
|| $t->defined_as(T_CS('\begingroup')) # Totally wrong, but to catch expanded environments
|| (ToString($t) =~ /^\\(?:begin|end)\{/) # any sort of environ begin or end???
# This seems needed within AmSTeX environs
) {
return (Invocation(T_CS('\@@eqno'), Tokens(@stuff)), $t); }
else {
push(@stuff, $t); } }
Error('unexpected', '\eqno', $gullet, "Fell of the end reading tag for \\eqno!",
"started " . ToString($locator));
return Tokens(@stuff); });
Let('\leqno', '\eqno');
# Revert to nothing, since it really doesn't belong in the TeX string(?)
DefConstructor('\@@eqno{}',
"^ <ltx:tags><ltx:tag><ltx:Math><ltx:XMath>#1</ltx:XMath></ltx:Math></ltx:tag></ltx:tags>",
reversion => '');
#======================================================================
# Scripts are a bit of a strange beast, with respect to when the arguments
# are processed, and what kind of object should be created.
#
# While scripts look like they take a normal TeX argument, they really
# take the next BOX (AFTER expansion & digestion)! Thus, while
# a^\frac{b}{c} and a^\mathcal{B}
# DO work in TeX, other things like
# a^\sqrt{3} or a^\acute{b}
# DO NOT! (Hint: consider the expansions)
# Note that with
# \def\xyz{xyz}
# a^\xyz => a^{x}yz
# So, we try to mimic, but note that our boxes don't correspond 100% to TeX's
#
# Normally, sub/super scripts should be turned into a sort of postfix operator:
# The parser will attach the script to the appropriate preceding object.
# However, there are a few special cases involving empty boxes {}.
# If the argument is an empty box $x^{}$, the whole script should just disappear.
# If the PRECEDING box is {} (in ${}^{p}$, a sort of `floating' script should be created.
# This may combine, in the parser, with the following object to generate
# a prescript.
# Remember a "safe" way to test a script Whatsit.
# Returns [ (FLOATING|POST) , (SUBSCRIPT|SUPERSCRIPT) ] or nothing
sub IsScript {
my ($object) = @_;
if (ref $object eq 'LaTeXML::Core::List') {
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
sub scriptSizer {
my ($script, $base, $prev, $op, $pos) = @_;
# NOTE: Currently, the mathstyle is NOT reflected in the font of the script!!!!
# Or is it now ?????
# [unless it's different from the 'expected' style!!!]
my ($ws, $hs, $ds) = map { $_->valueOf } $script->getSize;
$ws *= 0.8; $hs *= 0.8; $ds *= 0.8; # HACK!@!!
my ($wb, $hb, $db) = map { $_->valueOf } ($base ? $base->getSize
: LookupValue('font')->getNominalSize);
my ($w, $h, $d) = (0, 0, 0);
# Fishing for the scriptpos on the base (if any)
my $attr;
$pos = $base->getProperty('scriptpos') if !defined $pos && defined $base;
$pos = 'post' if !defined $pos;
if ($pos eq 'mid') {
$w = max(0, $ws - $wb); # as if max width of base & script
if ($op eq 'SUPERSCRIPT') {
$h = $hb + $ds + $hs; }
else {
$d = $db + $hs + $ds; } }
else {
my $wp = ($prev && $prev->getWidth) || 0; # as if max of width & prev script's width
$w = max(0, $ws - $wp);
if ($op eq 'SUPERSCRIPT') {
$h = $hb + $hs / 2; }
else {
$d = $hs / 2 + $ds; } }
$w = Dimension($w); $h = Dimension($h); $d = Dimension($d);
return ($w, $h, $d); }
# NOTE: The When reverting these, the
DefConstructor('\@@POSTSUPERSCRIPT InScriptStyle',
"<ltx:XMApp role='POSTSUPERSCRIPT' scriptpos='?#scriptpos(#scriptpos)(#scriptlevel)'>"
. "<ltx:XMArg rule='Superscript'>#1</ltx:XMArg>"
. "</ltx:XMApp>",
reversion => sub { (T_SUPER, revertScript($_[1])); },
sizer => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'),
$_[0]->getProperty('prevscript'), 'SUPERSCRIPT', 'post'); });
DefConstructor('\@@POSTSUBSCRIPT InScriptStyle',
"<ltx:XMApp role='POSTSUBSCRIPT' scriptpos='?#scriptpos(#scriptpos)(#scriptlevel)'>"
. "<ltx:XMArg rule='Subscript'>#1</ltx:XMArg>"
. "</ltx:XMApp>",
reversion => sub { (T_SUB, revertScript($_[1])); },
sizer => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'),
$_[0]->getProperty('prevscript'),
'SUBSCRIPT', 'post'); });
DefConstructor('\@@FLOATINGSUPERSCRIPT InScriptStyle',
"<ltx:XMApp role='FLOATSUPERSCRIPT' scriptpos='?#scriptpos(#scriptpos)(#scriptlevel)'>"
. "<ltx:XMArg rule='Superscript'>#1</ltx:XMArg>"
. "</ltx:XMApp>",
reversion => sub { (T_BEGIN, T_END, T_SUPER, revertScript($_[1])); },
sizer => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUPERSCRIPT', 'post'); });
DefConstructor('\@@FLOATINGSUBSCRIPT InScriptStyle',
"<ltx:XMApp role='FLOATSUBSCRIPT' scriptpos='?#scriptpos(#scriptpos)(#scriptlevel)'>"
. "<ltx:XMArg rule='Subscript'>#1</ltx:XMArg>"
. "</ltx:XMApp>",
reversion => sub { (T_BEGIN, T_END, T_SUB, revertScript($_[1])); },
sizer => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUBSCRIPT', 'post'); });
DefMacroI('\active@math@prime', undef, sub {
my ($gullet) = @_;
my @sup = (T_CS('\prime'));
# Collect up all ', convering to \prime
while ($gullet->ifNext(T_OTHER('\''))) {
$gullet->readToken;
push(@sup, T_CS('\prime')); }
# Combine with any following superscript!
# However, this is semantically screwed up!
# We really need to set up separate superscripts, but at same level!
if ($gullet->ifNext(T_SUPER)) {
$gullet->readToken;
push(@sup, $gullet->readArg->unlist); }
(T_SUPER, T_BEGIN, @sup, T_END); },
locked => 1); # Only in math!
AssignMathcode("'" => 0x8000);
Let("'", '\active@math@prime');
# Experiment: When we detect a math element containing solely a floating superscript in the
# *Frontmatter* of a document, assume it is a note mark, and normalize it down to
# plain text.
DefRewrite(xpath => 'descendant::ltx:Math[child::ltx:XMath[child::ltx:XMApp[' .
'(@role="FLOATSUPERSCRIPT" or @role="FLOATSUBSCRIPT") and ' .
'not(preceding-sibling::*) and not(following-sibling::*) ' .
'and not(./*/*[not(self::ltx:XMTok)]) ]]]',
replace => sub {
my ($document, $math) = @_;
# We can assume the grandchild of the XMath node is the XMArg,
# which we need to normalize to scripted Unicode.
if (my @xmath = element_nodes($math)) {
if (my @xmapp = element_nodes($xmath[0])) {
if (my @xmarg = element_nodes($xmapp[0])) {
if (my $role = $xmapp[0]->getAttribute('role')) {
my $text = $xmarg[0]->textContent;
local $LaTeXML::BOX = $document->getNodeBox($xmarg[0]);
if ($role eq 'FLOATSUPERSCRIPT') {
$document->insertElement('ltx:sup', $text);
return; }
elsif ($role eq 'FLOATSUBSCRIPT') {
$document->insertElement('ltx:sub', $text);
return; }
} } } }
# should never happen, but just in case:
Info("rewrite", "footnotemark", "Failed to find floating node in: " . $math->toString(1));
$document->getNode->appendChild($math);
return; });
#======================================================================
# \choose & friends, also need VERY special argument handling
# After digesting the \choose (or whatever), grab the previous and following material
# and store as args in the whatsit.
# Increment the mathstyle stored in any boxes & whatsits.
# The tricky part is to know when NOT to increment!
# \displaystyle, constructors that set their own specific style,...
# And, any collateral adjustments that had been done in digestion depending on mathstyle
# WONT be adjusted!
# We don't have a clear API to find the displayable Boxes within;
# and we don't have a good handle on grouping...
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
. "<ltx:XMRef _xmkey='#xmkey1'/>"
. "<ltx:XMRef _xmkey='#xmkey2'/>"
. "</ltx:XMApp>"
. "<ltx:XMWrap>"
. "#left)()"
. "<ltx:XMApp>"
. "<ltx:XMTok _xmkey='#xmkey0' role='#role' meaning='#meaning' mathstyle='#mathstyle' thickness='#thickness'/>"
. "<ltx:XMArg _xmkey='#xmkey1'>#top</ltx:XMArg>"
. "<ltx:XMArg _xmkey='#xmkey2'>#bottom</ltx:XMArg>"
. "</ltx:XMApp>"
. "?#needXMDual(#right"
. "</ltx:XMWrap>"
. "</ltx:XMDual>)()",
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $kv = $whatsit->getArg(2);
# Really, we want the mathstyle that was in effect BEFORE the group starting the numerator!
# (there could be a \displaystyle INSIDE the numerator, but that's not the one we want)
# Of course the group that started the numerator may be the start of the Math, itself!
# AND, the numerator, which was already digested, needs it's mathstyle ADJUSTED
my $font = ($STATE->isValueBound('MODE', 0) # Last stack frame was a mode switch!?!?!
? $STATE->lookupValue('font') # then just use whatever font we've got
: ($STATE->isValueBound('font', 0) # else if font was set in numerator
&& $STATE->valueInFrame('font', 1))
|| $STATE->lookupValue('font') # then just use whatever font we've got
);
my $style = $font->getMathstyle;
my $role = ToString($kv->getValue('role'));
my $meaning = ToString($kv->getValue('meaning'));
my $thickness = ToString($kv->getValue('thickness'));
$role = 'FRACOP' unless $role;
$meaning = 'divide' if (!$meaning) && ($thickness ne '0pt');
# Unfortunately, the numerator's already digested! We have to adjust it's mathstyle
my @top = $stomach->regurgitate;
# really have to pass +/-1, +/-2 etc..!
adjustMathstyle($style, {}, @top);
MergeFont(fraction => 1);
my @bot = $stomach->digestNextBody();
my $closing = pop(@bot); # We'll leave whatever closed the list (endmath, endgroup...)
$whatsit->setProperties(
top => List(@top, mode => 'math'),
bottom => List(@bot, mode => 'math'),
role => $role,
meaning => $meaning,
thickness => $thickness,
mathstyle => $style);
if ($kv->getValue('left') || $kv->getValue('right')) {
$whatsit->setProperties(needXMDual => 1,
xmkey0 => LaTeXML::Package::getXMArgID(),
xmkey1 => LaTeXML::Package::getXMArgID(),
xmkey2 => LaTeXML::Package::getXMArgID()); }
return $closing; }, # and leave the closing bit, whatever it is.
properties => sub { %{ $_[2]->getKeyVals }; },
sizer => sub { fracSizer($_[0]->getProperty('top'), $_[0]->getProperty('bottom')); },
reversion => sub {
my ($whatsit) = @_;
(Revert($whatsit->getProperty('top')),
$whatsit->getArg(1)->unlist,
Revert($whatsit->getProperty('bottom'))); });
DefMacro('\choose',
'\lx@generalized@over{\choose}{meaning=binomial,thickness=0pt,left=\@left(,right=\@right)}');
DefMacro('\brace',
'\lx@generalized@over{\brace}{thickness=0pt,left=\@left\{,right=\@right\}}');
DefMacro('\brack',
'\lx@generalized@over{\brack}{thickness=0pt,left=\@left[,right=\@right]}');
DefMacro('\atop',
'\lx@generalized@over{\atop}{thickness=0pt}');
DefMacro('\atopwithdelims Token Token',
'\lx@generalized@over{\atopwithdelims #1 #2}{thickness=0pt,left={\@left#1},right={\@right#2}}');
DefMacro('\over',
'\lx@generalized@over{\over}{meaning=divide}');
DefMacro('\overwithdelims Token Token',
'\lx@generalized@over{\overwithdelims #1 #2}{left={\@left#1},right={\@right#2},meaning=divide}');
# My thinking was that this is a "fraction" providing the dimension is > 0!
DefMacro('\above Dimension',
'\lx@generalized@over{\above #1}{meaning=divide,thickness=#1}');
DefMacro('\abovewithdelims Token Token Dimension',
'\lx@generalized@over{\abovewithdelims #1 #2 #3}{left={\@left#1},right={\@right#2},meaning=divide,thickness=#3}');
#======================================================================
DefPrimitiveI('\cal', undef, undef,
font => { family => 'caligraphic', series => 'medium', shape => 'upright' });
# In principle, <ltx:emph> is a nice markup for emphasized.
# Unfortunately, TeX really just treats it as a font switch.
# Something like: \em et.al. \rm more stuff
# works in TeX, but in our case, since there is no explicit {},
# the <ltx:emph> stays open! Ugh!
# This could still be made to work, but merge font would
# need to look at any open <ltx:emph>, and then somehow close it!
DefPrimitiveI('\em', undef, undef,
beforeDigest => sub {
my $font = LookupValue('font');
my $shape = $font->getShape;
AssignValue(font => $font->merge(shape => ($shape eq 'italic' ? 'normal' : 'italic')),
'local'); });
# Change math font while still in text!
DefPrimitiveI('\boldmath', undef, undef,
beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 1), 'local'); },
forbidMath => 1);
DefPrimitiveI('\unboldmath', undef, undef,
beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 0), 'local'); },
forbidMath => 1);
#======================================================================
# Alignments
# & gives an error except within the right context
# (which should redefine it!)
DefConstructorI('&', undef, sub { Error('unexpected', '&', $_[0], "Stray alignment \"&\""); });
#**********************************************************************
# Plain; Extracted from Appendix B.
#**********************************************************************
#======================================================================
# TeX Book, Appendix B, p. 344
#======================================================================
RawTeX('\outer\def^^L{\par}');
DefMacro('\dospecials', '\do\ \do\\\do\{\do\}\do\$\do\&\do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~');
# Normally, the content branch contains the pure structure and meaning of a construct,
# and the presentation is generated from lower level TeX macros that only concern
# themselves with how to display the object.
# Nevertheless, it is sometimes useful to know where the tokens in the presentation branch
# came from; particularly what their presumed "meaning" is.
# For example, when search-indexing pmml, or providing links to definitions from the pmml.
#
# The following constructor (see how it's used in DefMath), adds meaning attributes
# whereever it seems sensible on the presentation branch, after it has been generated.
DefConstructor('\@ASSERT@MEANING{}{}', '#2',
reversion => '#2',
afterConstruct => sub {
my ($document, $whatsit) = @_;
my $node = $document->getNode; # This should be the wrapper just added.
my $meaning = ToString($whatsit->getArg(1));
addMeaningRec($document, $node, $meaning);
$node; });
sub addMeaningRec {
my ($document, $node, $meaning) = @_;
if ($node->nodeType == XML_ELEMENT_NODE) {
my $qname = $document->getModel->getNodeQName($node);
if ($qname eq 'ltx:XMArg') { } # DONT cross through into arguments!
elsif ($qname eq 'ltx:XMTok') {
if ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN')
&& !$node->getAttribute('meaning')) {
$document->setAttribute($node, meaning => $meaning); } }
else {
foreach my $c ($node->childNodes) {
addMeaningRec($document, $c, $meaning); } } }
return; }
#======================================================================
# Properties for plain characters.
# These are allowed in plain text, but need to act a bit special in math.
DefMathI('=', undef, '=', role => 'RELOP', meaning => 'equals');
DefMathI('+', undef, '+', role => 'ADDOP', meaning => 'plus');
DefMathI('-', undef, '-', role => 'ADDOP', meaning => 'minus');
## Redefine, if we want Unicode minus
##DefMathI('-', undef, "\x{2212}", role => 'ADDOP', meaning => 'minus');
DefMathI('*', undef, "\x{2217}", role => 'MULOP', meaning => 'times');
DefMathI('/', undef, '/', role => 'MULOP', meaning => 'divide');
DefMathI('!', undef, '!', role => 'POSTFIX', meaning => 'factorial');
DefMathI(',', undef, ',', role => 'PUNCT');
DefMathI('.', undef, '.', role => 'PERIOD');
DefMathI(';', undef, ';', role => 'PUNCT');
DefMathI('(', undef, '(', role => 'OPEN', stretchy => 'false');
DefMathI(')', undef, ')', role => 'CLOSE', stretchy => 'false');
DefMathI('[', undef, '[', role => 'OPEN', stretchy => 'false');
DefMathI(']', undef, ']', role => 'CLOSE', stretchy => 'false');
DefMathI('|', undef, '|', role => 'VERTBAR', stretchy => 'false');
DefMathI(':', undef, ':', role => 'METARELOP', name => 'colon'); # Seems like good default role
DefMathI('<', undef, '<', role => 'RELOP', meaning => 'less-than');
DefMathI('>', undef, '>', role => 'RELOP', meaning => 'greater-than');
# NOTE: Need to evolve Ligatures to be easier to write.
# rough draft of tool to make ligatures more sane to write...
# It is tempting to handle these with macros,
# But that tends to run afoul of tricky packages like babel that make : active as well!
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
#======================================================================
# TeX Book, Appendix B, p. 346
RawTeX(<<'EoTeX');
\countdef\count@=255
\toksdef\toks@=0
\skipdef\skip@=0
\dimendef\dimen@=0
\dimendef\dimen@i=1
\dimendef\dimen@ii=2
\count10=22 % allocates \count registers 23, 24, ...
\count11=9 % allocates \dimen registers 10, 11, ...
\count12=9 % allocates \skip registers 10, 11, ...
\count13=9 % allocates \muskip registers 10, 11, ...
\count14=9 % allocates \box registers 10, 11, ...
\count15=9 % allocates \toks registers 10, 11, ...
\count16=-1 % allocates input streams 0, 1, ...
\count17=-1 % allocates output streams 0, 1, ...
\count18=3 % allocates math families 4, 5, ...
\count19=0 % allocates \language codes 1, 2, ...
\count20=255 % allocates insertions 254, 253, ...
\countdef\insc@unt=20
\countdef\allocationnumber=21
\countdef\m@ne=22 \m@ne=-1
EoTeX
# Various \count's are set; should we?
#======================================================================
# TeX Book, Appendix B, p. 347
DefPrimitive('\wlog{}', sub {
NoteLog(ToString(Expand($_[1])));
return; },
locked => 1);
# From plain.tex
DefPrimitive('\newcount DefToken', sub {
DefRegisterI($_[1], undef, Number(0), allocate => '\count'); });
DefPrimitive('\newdimen DefToken', sub {
DefRegisterI($_[1], undef, Dimension(0), allocate => '\dimen'); });
DefPrimitive('\newskip DefToken', sub {
DefRegisterI($_[1], undef, Glue(0), allocate => '\skip'); });
DefPrimitive('\newmuskip DefToken', sub {
DefRegisterI($_[1], undef, MuGlue(0), allocate => '\muskip'); });
AssignValue(allocated_boxes => 0);
DefPrimitive('\newbox DefToken', sub {
my $n = LookupValue('allocated_boxes');
AssignValue(allocated_boxes => $n + 1, 'global');
AssignValue("box$n", List());
DefRegisterI($_[1], undef, Number($n), readonly => 1); });
DefPrimitive('\newhelp DefToken {}', sub { AssignValue(ToString($_[1]) => $_[2]); });
DefPrimitive('\newtoks DefToken', sub { DefRegisterI($_[1], undef, Tokens()); });
# the next 4 actually work by doing a \chardef instead of \countdef, etc.
# which means they actually work quite differently
DefPrimitive('\alloc@@ {}', sub {
my ($stomach, $type) = @_;
my $c = 'allocation @' . ToString($type);
my $n = LookupValue($c) || '0';
$n = $n->valueOf if ref $n;
AssignValue($c => $n + 1, 'global');
AssignRegister('\allocationnumber' => Number($n), 'global'); });
DefMacro('\newread DefToken', '\alloc@@{read}\global\chardef#1=\allocationnumber');
DefMacro('\newwrite DefToken', '\alloc@@{write}\global\chardef#1=\allocationnumber');
DefMacro('\newfam DefToken', '\alloc@@{fam}\global\chardef#1=\allocationnumber');
DefMacro('\newlanguage DefToken', '\alloc@@{language}\global\chardef#1=\allocationnumber');
DefMacro('\e@alloc{}{}{}{}{}{}',
'\global\advance#3\@ne
% \e@ch@ck{#3}{#4}{#5}#1%
\allocationnumber#3\relax
\global#2#6\allocationnumber
% \wlog{\string#6=\string#1\the\allocationnumber}
');
DefMacro('\alloc@{}{}{}{}', '\e@alloc#2#3{\count1#1}#4\float@count');
DefMacro('\newread', '\e@alloc\read \chardef{\count16}\m@ne\sixt@@n');
DefMacro('\newwrite', '\e@alloc\write
{\ifnum\allocationnumber=18
\advance\count17\@ne
\allocationnumber\count17 %
\fi
\global\chardef}%
{\count17}%
\m@ne
{128}');
# This implementation is quite wrong
DefPrimitive('\newinsert Token', sub { DefRegisterI($_[1], undef, Number(0)); });
# \alloc@, \ch@ck
# TeX plain uses \newdimen, etc. for these.
# Is there any advantage to that?
DefRegister('\maxdimen', Dimension(16383.99999 * $UNITY));
DefRegister('\hideskip', Glue('-1000pt plus 1fill'));
DefRegister('\centering', Glue('0pt plus 1000pt minus 1000pt'));
DefRegister('\p@', Dimension($UNITY));
DefRegister('\z@', Dimension(0));
DefRegister('\z@skip', Glue(0, 0, 0));
# First approximation. till I figure out \newbox
RawTeX('\newbox\voidb@x');
#======================================================================
# TeX Book, Appendix B, p. 348
DefPrimitive('\newif DefToken', sub {
my ($ignore, $cs) = @_;
DefConditionalI($cs, undef);
return; });
# See the section Registers & Parameters, above for setting default values.
#======================================================================
# TeX Book, Appendix B, p. 349
# See the section Registers & Parameters, above for setting default values.
# These are originally defined with \newskip, etc
DefRegister('\smallskipamount' => Glue('3pt plus 1pt minus 1pt'));
DefRegister('\medskipamount' => Glue('6pt plus 2pt minus 2pt'));
DefRegister('\bigskipamount' => Glue('12pt plus 4pt minus 4pt'));
DefRegister('\normalbaselineskip' => Glue('12pt'));
DefRegister('\normallineskip' => Glue('1pt'));
DefRegister('\normallineskiplimit' => Dimension('0pt'));
DefRegister('\jot' => Dimension('3pt'));
DefRegister('\lx@default@jot' => LookupRegister('\jot'));
DefRegister('\interdisplaylinepenalty' => Number(100));
DefRegister('\interfootnotelinepenalty' => Number(100));
DefMacroI('\magstephalf', undef, '1095');
our @mags = (1000, 1200, 1440, 1728, 2074, 2488);
DefMacro('\magstep{}', sub {
my $level = ToString($_[1]);
$level = ($level =~ /^\d$/) ? int($level) : 0;
$level = 0 unless $level >= 0 and $level < 6;
Explode($mags[$level]); });
#======================================================================
# TeX Book, Appendix B, p. 350
# Font stuff ...
RawTeX(<<'EoTeX');
\font\tenrm=cmr10
\font\sevenrm=cmr7
\font\fiverm=cmr5
\font\teni=cmmi10
\font\seveni=cmmi7
\font\fivei=cmmi7
\font\tensy=cmsy10
\font\sevensy=cmsy7
\font\fivesy=cmsy5
\font\tenex=cmex10
\font\tenbf=cmbx10
\font\sevenbf=cmbx7
\font\fivebf=cmbx5
\font\tensl=cmsl10
\font\tentt=cmtt10
\font\tenit=cmti10
\newfam\itfam
\newfam\slfam
\newfam\bffam
\newfam\ttfam
\textfont0=\tenrm\scriptfont0=\sevenrm\scriptscriptfont0=\fiverm
\textfont1=\teni\scriptfont1=\seveni\scriptscriptfont1=\fivei
\textfont2=\tensy\scriptfont2=\sevensy\scriptscriptfont2=\fivesy
\textfont3=\tenex
EoTeX
# Note: \newfam in math should be font switching(?)
#======================================================================
# TeX Book, Appendix B, p. 351
# Old style font styles.
# The trick is to create an empty Whatsit preserved till assimilation (for reversion'ing)
# but to change the current font used in boxes.
# (some of these were defined on different pages? or even latex...)
Tag('ltx:text', autoOpen => 1, autoClose => 1);
# Note that these, unlike \rmfamily, should set the other attributes to the defaults!
DefPrimitiveI('\rm', undef, undef,
font => { family => 'serif', series => 'medium', shape => 'upright' });
DefPrimitiveI('\sf', undef, undef,
font => { family => 'sansserif', series => 'medium', shape => 'upright' });
DefPrimitiveI('\bf', undef, undef,
font => { series => 'bold', family => 'serif', shape => 'upright' });
DefPrimitiveI('\it', undef, undef,
font => { shape => 'italic', family => 'serif', series => 'medium' });
DefPrimitiveI('\tt', undef, undef,
font => { family => 'typewriter', series => 'medium', shape => 'upright' });
# No effect in math for the following 2 ?
DefPrimitiveI('\sl', undef, undef,
font => { shape => 'slanted', family => 'serif', series => 'medium' });
DefPrimitiveI('\sc', undef, undef,
font => { shape => 'smallcaps', family => 'serif', series => 'medium' });
# Ideally, we should set these sizes from class files
AssignValue(NOMINAL_FONT_SIZE => 10);
DefPrimitiveI('\tiny', undef, undef, font => { size => 5 });
DefPrimitiveI('\scriptsize', undef, undef, font => { size => 7 });
DefPrimitiveI('\footnotesize', undef, undef, font => { size => 8 });
DefPrimitiveI('\small', undef, undef, font => { size => 9 });
DefPrimitiveI('\normalsize', undef, undef, font => { size => 10 });
DefPrimitiveI('\large', undef, undef, font => { size => 12 });
DefPrimitiveI('\Large', undef, undef, font => { size => 14.4 });
DefPrimitiveI('\LARGE', undef, undef, font => { size => 17.28 });
DefPrimitiveI('\huge', undef, undef, font => { size => 20.74 });
DefPrimitiveI('\Huge', undef, undef, font => { size => 29.8 });
DefPrimitiveI('\mit', undef, undef, requireMath => 1, font => { family => 'italic' });
DefPrimitiveI('\frenchspacing', undef, undef);
DefPrimitiveI('\nonfrenchspacing', undef, undef);
DefMacroI('\normalbaselines', undef,
'\lineskip=\normallineskip\baselineskip=\normalbaselineskip\lineskiplimit=\normallineskiplimit');
DefMacroI('\space', undef, Tokens(T_SPACE));
DefMacroI('\lq', undef, "`");
DefMacroI('\rq', undef, "'");
Let('\empty', '\@empty');
DefMacroI('\null', undef, '\hbox{}');
Let('\bgroup', T_BEGIN);
Let('\egroup', T_END);
Let('\endgraf', '\par');
Let('\endline', '\cr');
DefPrimitiveI('\endline', undef, undef);
# Use \r for the newline from TeX!!!
DefMacroI("\\\r", undef, '\ '); # \<cr> == \<space> Interesting (see latex.ltx)
Let(T_ACTIVE("\r"), '\par'); # (or is this just LaTeX?)
Let("\\\t", "\\\r"); # \<tab> == \<space>, also
#======================================================================
# TeX Book, Appendix B, p. 352
DefPrimitiveI('\obeyspaces', undef, sub {
AssignCatcode(" " => 13);
Let(T_ACTIVE(" "), '\space');
return });
# Curiously enough, " " (a space) is ALREADY defined to be the same as "\space"
# EVEN before it is made active. (see p.380)
Let(T_ACTIVE(" "), '\space');
DefPrimitiveI('\obeylines', undef, sub {
AssignCatcode("\r" => 13);
Let(T_ACTIVE("\r"), '\@break'); # More appropriate than \par, I think?
return });
DefConstructor('\@break', "<ltx:break/>", properties => { isBreak => 1 });
RawTeX(<<'EoTeX');
\def\loop#1\repeat{\def\body{#1}\iterate}
\def\iterate{\body \let\next=\iterate \else\let\next=\relax\fi \next}
\let\repeat=\fi
EoTeX
DefPrimitiveI('\enskip', undef, sub {
Box("\x{2002}", undef, undef, T_CS('\enskip'),
name => 'enskip', width => Dimension('0.5em'), isSpace => 1); });
DefPrimitiveI('\enspace', undef, sub {
Box("\x{2002}", undef, undef, T_CS('\enspace'),
name => 'enskip', width => Dimension('0.5em'), isSpace => 1); });
DefPrimitiveI('\quad', undef, sub {
Box("\x{2003}", undef, undef, T_CS('\quad'),
name => 'quad', width => Dimension('1em'), isSpace => 1); });
# Conceivably should be treated as punctuation! (but maybe even \quad should !?!)
DefPrimitiveI('\qquad', undef, sub {
Box("\x{2003}\x{2003}", undef, undef, T_CS('\qquad'),
name => 'qquad', width => Dimension('2em'), isSpace => 1, asHint => 1); });
DefPrimitiveI('\thinspace', undef, sub {
Box("\x{2009}", undef, undef, T_CS('\thinspace'),
name => 'thinspace', width => Dimension('0.16667em'), isSpace => 1); });
DefPrimitiveI('\negthinspace', undef, sub {
Box("", undef, undef, T_CS('\negthinspace'),
name => 'negthinspace', width => Dimension('-0.16667em'), isSpace => 1); });
# DefConstructor('\hglue Glue', "?#isMath(<ltx:XMHint name='hglue' width='#width'/>)(\x{2003})",
# properties => sub { (isSpace => 1, width => $_[1]); });
DefPrimitive('\hglue Glue', sub {
my ($stomach, $length) = @_;
my $s = DimensionToSpaces($length);
return unless defined $s;
Box($s, undef, undef, Invocation(T_CS('\hglue'), $length),
name => 'hglue', width => $length, isSpace => 1); });
DefPrimitive('\vglue Glue', undef);
DefPrimitiveI('\topglue', undef, undef);
DefPrimitiveI('\nointerlineskip', undef, undef);
DefPrimitiveI('\offinterlineskip', undef, undef);
DefMacroI('\smallskip', undef, '\vskip\smallskipamount');
DefMacroI('\medskip', undef, '\vskip\medskipamount');
DefMacroI('\bigskip', undef, '\vskip\bigskipamount');
#======================================================================
# TeX Book, Appendix B, p. 353
DefPrimitiveI('\break', undef, undef);
DefPrimitiveI('\nobreak', undef, undef);
DefPrimitiveI('\allowbreak', undef, undef);
DefPrimitiveI('\nobreakspace', undef, sub {
Box(UTF(0xA0), undef, undef, T_ACTIVE("~"),
width => Dimension('0.333em'), isSpace => 1); });
DefMacro("~", '\nobreakspace{}');
DefMacroI('\slash', undef, '/');
DefPrimitiveI('\filbreak', undef, undef);
DefMacroI('\goodbreak', undef, '\par');
DefMacroI('\eject', undef, '\par\LTX@newpage');
Let('\newpage', '\eject');
DefConstructorI('\LTX@newpage', undef, "^<ltx:pagination role='newpage'/>");
DefMacroI('\supereject', undef, '\par\LTX@newpage');
DefPrimitiveI('\removelastskip', undef, undef);
DefMacroI('\smallbreak', undef, '\par');
DefMacroI('\medbreak', undef, '\par');
DefMacroI('\bigbreak', undef, '\par');
DefMacroI('\line', undef, '\hbox to \hsize');
DefMacro('\leftline Undigested', '\ltx@leftline{\hbox{#1}}');
DefMacro('\rightline Undigested', '\ltx@rightline{\hbox{#1}}');
DefMacro('\centerline Undigested', '\ltx@centerline{\hbox{#1}}');
DefConstructor('\ltx@leftline{}', sub {
alignLine($_[0], $_[1], 'left'); },
alias => '\leftline',
bounded => 1);
DefConstructor('\ltx@rightline{}', sub {
alignLine($_[0], $_[1], 'right'); },
alias => '\rightline',
bounded => 1);
DefConstructor('\ltx@centerline{}', sub {
alignLine($_[0], $_[1], 'center'); },
alias => '\centerline',
bounded => 1);
sub alignLine {
my ($document, $line, $alignment) = @_;
if ($document->isOpenable('ltx:p')) {
$document->insertElement('ltx:p', $line, class => 'ltx_align_' . $alignment); }
elsif ($document->isOpenable('ltx:text')) {
$document->insertElement('ltx:text', $line, class => 'ltx_align_' . $alignment);
$document->insertElement('ltx:break'); }
else {
$document->absorb($line); }
return; }
# These should be 0 width, but perhaps also shifted?
DefMacro('\llap{}', '\hbox to 0pt{\hss#1}');
DefMacro('\rlap{}', '\hbox to 0pt{#1\hss}');
DefMacroI('\m@th', undef, '\mathsurround=0pt ');
# \strutbox
DefMacroI('\strut', undef, Tokens());
RawTeX('\newbox\strutbox');
#======================================================================
# TeX Book, Appendix B. p. 354
# TODO: Not yet done!!
# tabbing stuff!!!
DefMacroI('\settabs', undef, undef);
#======================================================================
# TeX Book, Appendix B. p. 355
# TODO: \item, \itemitem not done!
# This could probably be adopted from LaTeX, if the <itemize> could auto-open
# and close!
DefMacro('\hang', '\hangindent\parindent');
DefMacro('\item', '\par\hang\textindent');
DefMacro('\itemitem', '\par\indent \hangindent2\parindent \textindent');
DefMacro('\textindent{}', '\indent\llap{#1\enspace}\ignorespaces');
DefMacro('\narrower', '\advance\leftskip by\parindent'
. '\advance\rightskip by\parindent');
#----------------------------------------------------------------------
# General support for Front Matter.
# Not (yet) used by TeX (finish plain?)
# But provides support for LaTeX (and other formats?) for handling frontmatter.
#
# The idea is to accumulate any frontmatter material (title, author,...)
# rather than directly drop it into the digested stream.
# When we begin constructing the document, all accumulated material is output.
# See LaTeX.ltxml for usage.
# Note: could be circumstances where you'd want modular frontmatter?
# (ie. frontmatter for each sectional unit)
AssignValue(frontmatter => {}, 'global');
DefConditionalI('\if@in@preamble', undef, sub { LookupValue('inPreamble'); });
# Add a new frontmatter item that will be enclosed in <$tag %attr>...</$tag>
# The content is the result of digesting $tokens.
# \@add@frontmatter[keys]{tag}[attributes]{content}
# keys can have
# replace (to replace the current entry, if any)
# ifnew (only add if no previous entry)
DefPrimitive('\@add@frontmatter OptionalKeyVals {} OptionalKeyVals {}', sub {
my ($stomach, $keys, $tag, $attr, $tokens) = @_;
# Digest this as if we're already in the document body!
my $frontmatter = LookupValue('frontmatter');
my $inpreamble = LookupValue('inPreamble');
AssignValue(inPreamble => 0);
# Be careful since the contents may also want to add frontmatter
# (which should be inside or after this one!)
# So, we append this entry before digesting
$tag = ToString($tag);
if ($keys && $keys->hasKey('replace') && $$frontmatter{$tag}) { # if replace and previous entries
$$frontmatter{$tag} = []; } # Remove previous entries
if ($keys && $keys->hasKey('ifnew') && $$frontmatter{$tag}) { # if ifnew and previous entries
return; } # Skip this one.
my $entry = [$tag, undef, 'to-be-filled-in'];
push(@{ $$frontmatter{$tag} }, $entry);
if ($attr) {
$$entry[1] = { $attr->beDigested($stomach)->getHash }; }
$$entry[2] = Digest(Tokens(T_BEGIN, $tokens, T_END));
AssignValue(inPreamble => $inpreamble);
return; },
beforeDigest => sub {
$_[0]->bgroup; },
afterDigest => sub {
$_[0]->egroup; });
# Append a piece of data to an existing frontmatter item that is contained in <$tag>
# If $label is given, look for an item which has label=>$label,
# otherwise, just append to the last item in $tag.
# \@add@to@frontmatter{tag}[label]{content}
DefPrimitive('\@add@to@frontmatter {} [] {}', sub {
my ($stomach, $tag, $label, $tokens) = @_;
$tag = ToString($tag);
$label = ToString($label) if $label;
my $frontmatter = LookupValue('frontmatter');
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
my @frontmatter_elements = (qw(ltx:title ltx:toctitle ltx:subtitle
ltx:creator ltx:date
ltx:abstract ltx:keywords ltx:classification ltx:acknowledgements));
my %frontmatter_elements = map { ($_ => 1) } @frontmatter_elements;
# Insert FrontMatter into document, if not already added
sub insertFrontMatter {
my ($document) = @_;
return if LookupValue('frontmatter_done');
my $frontmatter = LookupValue('frontmatter');
my @set_keys = $frontmatter ? (keys %$frontmatter) : ();
# if doc ONLY has abstract as frontmatter, defer until abstract's document location
if ((scalar(@set_keys) == 1) && ($set_keys[0] eq 'ltx:abstract') &&
!LookupValue('frontmatter_deferred')) {
AssignValue(frontmatter_deferred => 1, 'global');
return; }
AssignValue(frontmatter_done => 1, 'global'); # OK, we're placing FrontMatter here, now.
foreach my $key (@frontmatter_elements, grep { !$frontmatter_elements{$_} } @set_keys) {
if (my $list = $$frontmatter{$key}) {
# Dubious, but assures that frontmatter appears in text mode...
local $LaTeXML::BOX = Box('', $STATE->lookupValue('font'), '', T_SPACE);
foreach my $item (@$list) {
my ($tag, $attr, @stuff) = @$item;
# add a dedicated class for frontmatter notes,
# in the case we want to style those uniformly.
if ($tag eq 'ltx:note') {
$attr ||= {};
$$attr{class} = ($$attr{class} ? $$attr{class} . ' ' : '') . 'ltx_note_frontmatter'; }
$document->openElement($tag, ($attr ? %$attr : ()),
(scalar(@stuff) && $document->canHaveAttribute($tag, 'font')
? (font => $stuff[0]->getFont, _force_font => 'true') : ()));
map { $document->absorb($_) } @stuff;
my $completed_node = $document->closeElement($tag);
# At this time, the frontmatter element should really carry the actual literal values intended.
# Thus, if we see an empty element, something went wrong -- including our bindings are too verbose,
# as e.g. \preprint{} always generates a ltx:note element.
#
# To solve this in a single location: prune here!
if (($tag ne "ltx:rdf") && !scalar($completed_node->childNodes)) {
$document->removeNode($completed_node); } } } }
return; }
# Add FrontMatter at document begin, unless deferred to a better position.
Tag('ltx:document', 'afterOpen:late' => sub {
insertFrontMatter($_[0]) unless LookupValue('frontmatter_deferred'); });
# Request Frontmatter to appear HERE (if not already done),
# deferring it from document begin.
DefConstructor('\lx@frontmatterhere', sub { insertFrontMatter($_[0]); },
afterDigest => sub { AssignValue(frontmatter_deferred => 1, 'global'); });
# Maintain a list of classes that apply to the document root.
# This might involve global style options, like leqno.
Tag('ltx:document', 'afterOpen:late' => sub {
my ($document, $root) = @_;
if (my $classes = join(' ', LookupMappingKeys('DOCUMENT_CLASSES'))) {
$document->addClass($root, $classes); } });
# If folks start using plain TeX macros, and never load LaTeX.pool,
# they might benefit from a ltx-plain.css?
DefMacro('\beginsection Until:\par', '\@beginsection{{\bf #1}}');
DefConstructor('\@beginsection {}',
"<ltx:section><ltx:title>#1</ltx:title>");
# POSSIBLY #1 is a name or reference number and #2 is the theoremm TITLE
# If so, how do know when the theorem ends?
DefMacroI('\proclaim', parseDefParameters('\proclaim', Tokenize('#1. #2\par')),
'\@proclaim{{\bf #1}}{{\sl #2}}');
DefConstructor('\@proclaim{}{}',
"<ltx:theorem>"
. "<ltx:title font='#titlefont' _force_font='true' >#title</ltx:title>"
. "#2",
afterConstruct => sub { $_[0]->maybeCloseElement('ltx:theorem'); },
properties => sub {
my $title = $_[1];
(title => $title, titlefont => $title->getFont); });
#======================================================================
# Tags & Titles
# The reference numbers, titles, captions etc, for various objects have
# different styling conventions, and the styling various depending on context.
# We'll use ltx:tags as a container for the various forms of ltx:tag with different @role's.
# The role=refnum form is simply formatted by \the<counter> and used by \ref;
# An ltx:tag w/o @role are for the numbers, often formatted differently, which
# appear alongside the object; Such a tag also may be embedded within the title or caption.
# Cross-references automatically generated by LaTeXML benefit from a bit more context:
# these are the role=typerefnum forms.
# Additional forms are needed for bibliographies, hyperref's autoref, etc.
# An additional complication is that while the "type" determines the formatting
# of the various forms, some types (eg. theorems) share the same counter.
# LaTeX defines this handling on an adhoc basis; defines \fnum@table, \fnum@figure for some types
# but \labelenumi, etc for others.
# This section synthesizes a more uniform support for reference numbers,
# references to reference numbers, title formatting etc.
# It allows you to customize each of the forms for each type encountered.
# The design reflects LaTeX needs, more than TeX, but support starts here!
# This collects up the various declared ltx:tag's into an ltx:tags
DefMacro('\lx@make@tags {}', sub {
my ($gullet, $type) = @_;
my @tags = ();
my $formatters = LookupValue('type_tag_formatter');
foreach my $role (sort keys %{$formatters}) {
my $formatter = $$formatters{$role};
push(@tags, Invocation(T_CS('\lx@tag@intags'), T_OTHER($role),
Invocation($formatter, $type))); }
return (T_CS('\lx@tags'), T_BEGIN, @tags, T_END); });
# Remove the last closed node, if it's empty.
sub removeEmptyElement {
my ($document, $whatsit) = @_;
my $node = $document->getNode->lastChild; # This should be the wrapper just added.
if (!$node->childNodes) {
$document->removeNode($node); }
return; }
# \lx@tag[open][close]{stuff}
DefConstructor('\lx@tag[][][]{}',
"<ltx:tag open='#1' close='#2'>#4</ltx:tag>",
bounded => 1, mode => 'text',
afterConstruct => \&removeEmptyElement);
# \lx@tag@intags{role}{stuff}
DefConstructor('\lx@tag@intags[]{}',
"<ltx:tag role='#1'>#2</ltx:tag>",
bounded => 1, mode => 'text',
beforeDigest => sub { reenterTextMode(); neutralizeFont() },
afterConstruct => \&removeEmptyElement);
DefConstructor('\lx@tags{}',
"<ltx:tags>#1</ltx:tags>",
afterConstruct => \&removeEmptyElement);
#----------------------------------------------------------------------
# "refnum" is the lowest level reference number for an object is typically \the<counter>
# but be sure to use the right counter! This is how \ref will show the number.
# You'll typically customize this by defining \the<counter> (and \p@<counter) as in LaTeX.
DefMacro('\lx@counterfor{}', sub {
my ($gullet, $type) = @_;
my $ctr = LookupMapping('counter_for_type', ToString($type));
return ($ctr ? T_OTHER($ctr) : $type); });
DefMacro('\lx@the@@{}', '\expandafter\lx@@the@@\expandafter{\lx@counterfor{#1}}');
DefMacro('\lx@@the@@{}', '\csname the#1\endcsname');
DefMacro('\lx@therefnum@@{}', '\expandafter\lx@@therefnum@@\expandafter{\lx@counterfor{#1}}');
DefMacro('\lx@@therefnum@@{}',
'{\normalfont\csname p@#1\endcsname\csname the#1\endcsname}');
AssignMapping('type_tag_formatter', 'refnum' => '\lx@therefnum@@');
#----------------------------------------------------------------------
# \lx@fnum@@{type} Gets the formatted form of the refnum, as part of the object, (no @role).
# Customize by defining \fnum@<type> or \<type>name and \fnum@font@<type>
# Default uses \fnum@font@<type> \<type>name prefix + space (if any) and \the<counter>.
# When using the "name", uses \<type>name in preference to fallback \lx@name@<type>
DefMacro('\lx@refnum@compose{}{}', '\expandafter\lx@refnum@compose@\expandafter{#2}{#1}');
DefMacro('\lx@refnum@compose@{}{}', '\if.#1.#2\else#2\space#1\fi');
####DefMacro('\lx@refnum@compose@{}{}', '\if.#1.#2\else#2~#1\fi');
DefMacro('\lx@fnum@@{}',
'{\normalfont\@ifundefined{fnum@font@#1}{}{\csname fnum@font@#1\endcsname}'
. '\@ifundefined{fnum@#1}{\lx@@fnum@@{#1}}{\csname fnum@#1\endcsname}}');
# Really seems like <type>name should take precedence over \lx@name@<type>,
# since users might define it.
# BUT amsthm defines \thmname{}!
DefMacro('\lx@@fnum@@ {}',
'\@ifundefined{lx@name@#1}{'
. '\@ifundefined{#1name}{'
. '\lx@the@@{#1}'
. '}{'
. '\lx@refnum@compose{\csname #1name\endcsname}{\lx@the@@{#1}}'
. '}}{'
. '\lx@refnum@compose{\csname lx@name@#1\endcsname}{\lx@the@@{#1}}'
. '}');
AssignMapping('type_tag_formatter', '' => '\lx@fnum@@'); # Default!
#----------------------------------------------------------------------
# \lx@fnum@toc@{type} is similar, but formats the number for use within \toctitle
# Customize by defining \fnum@toc@<type> or \fnum@tocfont@<type>
# Default uses just \the<counter>, else composes using \lx@@fnum@@{type}
DefMacro('\lx@fnum@toc@@{}',
'{\normalfont\@ifundefined{fnum@tocfont@#1}{}{\csname fnum@tocfont@#1\endcsname}'
. '\@ifundefined{fnum@toc@#1}{\lx@the@@{#1}}{\csname fnum@toc@#1\endcsname}}');
#----------------------------------------------------------------------
# "typerefnum" form is used by automatic cross-references, typically "type number" or similar.
# Customize by defining \typerefnum@<type> or \typerefnum@font@<type>
# Default uses either \<type>typerefname or \<type>name (if any, followed by space, then \the<counter>
DefMacro('\lx@typerefnum@@{}',
'{\normalfont\@ifundefined{typerefnum@font@#1}{}{\csname typerefnum@font@#1\endcsname}'
. '\@ifundefined{typerefnum@#1}{\lx@@typerefnum@@{#1}}{\csname typerefnum@#1\endcsname}}');
DefMacro('\lx@@typerefnum@@{}',
'\@ifundefined{#1typerefname}{'
. '\@ifundefined{lx@name@#1}{'
. '\@ifundefined{#1name}{'
. '}{'
. '\lx@refnum@compose{\csname #1name\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}'
. '}}{'
. '\lx@refnum@compose{\csname lx@name@#1\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}'
. '}}{'
. '\lx@refnum@compose{\csname #1typerefname\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}'
. '}');
AssignMapping('type_tag_formatter', 'typerefnum' => '\lx@typerefnum@@');
#----------------------------------------------------------------------
# The following macros provide similar customization for titles & toctitles
# in particular for supporting localization for different languages.
# Redefine these if you want to assemble the name (eg. \chaptername), refnum and titles differently
#----------------------------------------------------------------------
# \lx@format@title@@{type}{title}
# Format a title (or caption) appropriately for type.
# Customize by defining \format@title@type{title}
# Default composes \lx@fnum@@{type} space title.
DefMacro('\lx@format@title@@{}{}',
'\lx@@format@title@@{#1}'
. '{{\lx@format@title@font@@{#1}#2}}');
DefMacro('\lx@@format@title@@{}{}',
'{\@ifundefined{format@title@#1}'
. '{\lx@@compose@title{\lx@fnum@@{#1}}{#2}}'
. '{\csname format@title@#1\endcsname{#2}}}');
# \lx@format@toctitle@@{type}{toctitle}
# Similar for toctitle, typically briefer
# Customize by defining \format@toctitle@type{title}
# Default composes \lx@fnum@toc@@{type} space title.
DefMacro('\lx@format@toctitle@@{}{}',
'\lx@@format@toctitle@@{#1}'
. '{{\lx@format@toctitle@font@@{#1}#2}}');
DefMacro('\lx@@format@toctitle@@{}{}',
'{\@ifundefined{format@toctitle@#1}'
. '{\lx@@compose@title{\lx@fnum@toc@@{#1}}{#2}}'
. '{\csname format@toctitle@#1\endcsname{#2}}}');
DefMacro('\lx@@compose@title{}{}', '\lx@tag[][ ]{#1}#2');
DefMacro('\lx@format@title@font@@{}',
'\@ifundefined{format@title@font@#1}{}{\csname format@title@font@#1\endcsname}');
DefMacro('\lx@format@toctitle@font@@{}',
'\@ifundefined{format@toctitle@font@#1}{}{\csname format@toctitle@font@#1\endcsname}');
## NOTE that a 3rd form seems desirable: an concise form that cannot rely on context for the type.
## This would be useful for the titles in links; thus can be plain (unicode) text.
#======================================================================
# TeX Book, Appendix B. p. 356
DefPrimitiveI('\raggedright', undef, undef);
DefPrimitiveI('\raggedleft', undef, undef); # this is actually LaTeX
DefPrimitiveI('\ttraggedright', undef, undef);
DefPrimitiveI('\leavevmode', undef, undef);
DefMacro('\mathhexbox{}{}{}', '\leavevmode\hbox{$\m@th \mathchar"#1#2#3$}');
#----------------------------------------------------------------------
# Actually from LaTeX; Table 3.2. Non-English Symbols, p.39
# The following shouldn't appear in math.
DefPrimitiveI('\OE', undef, "\x{0152}"); # LATIN CAPITAL LIGATURE OE
DefPrimitiveI('\oe', undef, "\x{0153}"); # LATIN SMALL LIGATURE OE
DefPrimitiveI('\AE', undef, UTF(0xC6)); # LATIN CAPITAL LETTER AE
DefPrimitiveI('\ae', undef, UTF(0xE6)); # LATIN SMALL LETTER AE
DefPrimitiveI('\AA', undef, UTF(0xC5)); # LATIN CAPITAL LETTER A WITH RING ABOVE
DefPrimitiveI('\aa', undef, UTF(0xE5)); # LATIN SMALL LETTER A WITH RING ABOVE
DefPrimitiveI('\O', undef, UTF(0xD8)); # LATIN CAPITAL LETTER O WITH STROKE
DefPrimitiveI('\o', undef, UTF(0xF8)); # LATIN SMALL LETTER O WITH STROKE
DefPrimitiveI('\L', undef, "\x{0141}"); # LATIN CAPITAL LETTER L WITH STROKE
DefPrimitiveI('\l', undef, "\x{0142}"); # LATIN SMALL LETTER L WITH STROKE
DefPrimitiveI('\ss', undef, UTF(0xDF)); # LATIN SMALL LETTER SHARP S
# apparently the rest can appear in math.
DefPrimitiveI('\lx@sectionsign', undef, UTF(0xa7), alias => '\S'); # SECTION SIGN
DefPrimitiveI('\lx@paragraphsign', undef, UTF(0xB6), alias => '\P'); # PILCROW SIGN
DefMacroI('\S', undef, '\lx@sectionsign');
DefMacroI('\P', undef, '\lx@paragraphsign');
DefPrimitiveI('\dag', undef, "\x{2020}"); # DAGGER
DefPrimitiveI('\ddag', undef, "\x{2021}"); # DOUBLE DAGGER
DefPrimitiveI('\copyright', undef, UTF(0xA9)); # COPYRIGHT SIGN
DefPrimitiveI('\pounds', undef, UTF(0xA3)); # POUND SIGN
#----------------------------------------------------------------------
# Accents. LaTeX Table 3.1, p.38
#----------------------------------------------------------------------
# All of TeX's accents can (sorta) be handled by Unicode's combining accents
# (which follow the character to be accented).
# We'll let unicode normalization do the combination, if needed.
# Also, note that \t is intended to combine multiple chars, but it appears to
# work (via mozilla !?) best when the combining char is after the 1st char.
# Further, the accents \d and \b seem to center the under dot or bar under multiple
# chars --- how should this be handled in Unicode?
# Since people sometimes try to get fancy by using an empty argument,
# for each, I'm providing the combining code and an equivalent(?) spacing one.
# (doesn't look quite the same to use a combining char after a space)
# Create a box applying an accent to a letter
# Hopefully, we'll get a Box from digestion with a plain string.
# Then we can apply combining accents to it.
sub applyAccent {
my ($stomach, $letter, $combiningchar, $standalonechar, $reversion) = @_;
my $box = $stomach->digest($letter);
my $locator = $box->getLocator;
my $font = $box->getFont;
my $string = $box->toString;
$string =~ tr/\x{0131}\x{0237}/ij/;
$string =~ s/\s/ /g;
my @letters = split(//, $string);
return Box(($string =~ /^\s*$/
? $standalonechar
: NFC($letters[0] . $combiningchar . join('', @letters[1 .. $#letters]))),
$font, $locator, $reversion); }
# Defines an accent command using a combining char that follows the
# 1st char of the argument. In cases where there is no argument, $standalonechar is used.
sub DefAccent {
my ($accent, $combiningchar, $standalonechar, %options) = @_;
$options{above} = 1 if !(defined $options{above}) && !$options{below};
# Used for converting a char used as an above-accent to a combining char (See \accent)
AssignMapping('accent_combiner_above', $standalonechar => $combiningchar) if $options{above};
AssignMapping('accent_combiner_below', $standalonechar => $combiningchar) unless $options{above};
DefMacroI($accent, "{}",
Tokens(T_CS('\lx@applyaccent'), T_OTHER($accent),
T_OTHER($combiningchar), T_OTHER($standalonechar),
T_BEGIN, T_ARG(1), T_END),
protected => 1);
return; }
DefPrimitiveI('\lx@applyaccent', "DefToken Token Token {}", sub {
my ($stomach, $accent, $combiningchar, $standalonechar, $letter) = @_;
applyAccent($stomach, $letter, $combiningchar->getString, $standalonechar->getString,
Tokens(T_CS($accent->getString), T_BEGIN, $letter, T_END)); },
mode => 'text');
DefAccent('\`', "\x{0300}", UTF(0x60)); # COMBINING GRAVE ACCENT & GRAVE ACCENT
DefAccent("\\'", "\x{0301}", UTF(0xB4)); # COMBINING ACUTE ACCENT & ACUTE ACCENT
DefAccent('\^', "\x{0302}", UTF(0x5E)); # COMBINING CIRCUMFLEX ACCENT & CIRCUMFLEX ACCENT
DefAccent('\"', "\x{0308}", UTF(0xA8)); # COMBINING DIAERESIS & DIAERESIS
DefAccent('\~', "\x{0303}", "~"); # COMBINING TILDE
DefAccent('\=', "\x{0304}", UTF(0xAF)); # COMBINING MACRON & MACRON
DefAccent('\.', "\x{0307}", "\x{02D9}"); # COMBINING DOT ABOVE & DOT ABOVE
DefAccent('\u', "\x{0306}", "\x{02D8}"); # COMBINING BREVE & BREVE
DefAccent('\v', "\x{030C}", "\x{02C7}"); # COMBINING CARON & CARON
DefAccent('\@ringaccent', "\x{030A}", "o"); # COMBINING RING ABOVE & non-combining
DefAccent('\r', "\x{030A}", "o"); # COMBINING RING ABOVE & non-combining
DefAccent('\H', "\x{030B}", "\x{02DD}"); # COMBINING DOUBLE ACUTE ACCENT & non-combining
DefAccent('\c', "\x{0327}", UTF(0xB8), below => 1); # COMBINING CEDILLA & CEDILLA
# NOTE: The next two get define for math, as well; See below
DefAccent('\@text@daccent', "\x{0323}", '.', below => 1); # COMBINING DOT BELOW & DOT (?)
DefAccent('\@text@baccent', "\x{0331}", UTF(0xAF), below => 1); # COMBINING MACRON BELOW & MACRON
DefAccent('\t', "\x{0361}", "-"); # COMBINING DOUBLE INVERTED BREVE & ???? What????
# this one's actually defined in mathscinet.sty, but just stick it here!
DefAccent('\lfhook', "\x{0326}", ",", below => 1); # COMBINING COMMA BELOW
# I doubt that latter covers multiple chars...?
#DefAccent('\bar',"\x{0304}", ?); # COMBINING MACRON or is this the longer overbar?
# This will fail if there really are "assignments" after the number!
# We're given a number pointing into the font, from which we can derive the standalone char.
# From that, we want to figure out the combining character, but there could be one for
# both the above & below cases! We'll prefer the above case.
DefPrimitive('\accent Number {}', sub {
my ($stomach, $num, $letter) = @_;
my $n = $num->valueOf;
my $fontinfo = lookupFontinfo(LookupValue('textfont_0'));
my $acc = ($fontinfo && $$fontinfo{encoding} ? FontDecode($n, $$fontinfo{encoding}) : chr($n));
my $reversion = Invocation(T_CS('\accent'), $num, $letter);
# NOTE: REVERSE LOOKUP in above accent list for the non-spacing accent char
# BUT, \accent always (?) makes an above type accent... doesn't it?
if (my $combiner = LookupMapping('accent_combiner_above', $acc)
|| LookupMapping('accent_combiner_below', $acc)) {
applyAccent($stomach, $letter, $combiner, $acc, $reversion); }
else {
Warn('unexpected', "accent$n", $stomach, "Accent '$n' not recognized");
Box(ToString($letter), undef, undef, $reversion); } });
# Note that these two apparently work in Math? BUT the argument is treated as text!!!
DefMacro('\d{}', '\ifmmode\@math@daccent{#1}\else\@text@daccent{#1}\fi');
DefMacro('\b{}', '\ifmmode\@math@baccent{#1}\else\@text@baccent{#1}\fi');
DefConstructor('\@math@daccent {}',
"<ltx:XMApp><ltx:XMTok role='UNDERACCENT'>\x{22c5}</ltx:XMTok>"
. "?#textarg(<ltx:XMText>#textarg</ltx:XMText>)(<ltx:XMArg>#matharg</ltx:XMArg>)"
. "</ltx:XMApp>",
mode => 'text', alias => '\d',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $arg = $whatsit->getArg(1);
if ($arg->isMath) {
$whatsit->setProperty(matharg => $arg->getBody); }
else {
$whatsit->setProperty(textarg => $arg); }
return; });
DefConstructor('\@math@baccent {}',
"<ltx:XMApp><ltx:XMTok role='UNDERACCENT'>" . UTF(0xAF) . "</ltx:XMTok>"
. "?#textarg(<ltx:XMText>#textarg</ltx:XMText>)(<ltx:XMArg>#matharg</ltx:XMArg>)"
. "</ltx:XMApp>",
mode => 'text', alias => '\b',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $arg = $whatsit->getArg(1);
if ($arg->isMath) {
$whatsit->setProperty(matharg => $arg->getBody); }
else {
$whatsit->setProperty(textarg => $arg); }
return; });
#======================================================================
# TeX Book, Appendix B. p. 357
foreach my $op ('\hrulefill', '\dotfill', '\rightarrowfill', '\leftarrowfill',
'\upbracefill', '\downbracefill') {
DefPrimitiveI($op, undef, undef); }
Let('\bye', '\end');
Let('\sp', T_SUPER);
Let('\sb', T_SUB);
DefPrimitiveI('\lx@thinmuskip', undef, sub {
Box("\x{2009}", undef, undef, T_CS('\,'),
name => 'thinspace', isSpace => 1,
width => LookupRegister('\thinmuskip')); });
DefPrimitiveI('\lx@thinspace', undef, sub {
Box("\x{2009}", undef, undef, T_CS('\,'),
name => 'thinspace', width => Dimension('0.16667em'), isSpace => 1); });
DefMacroI('\,', undef, '\ifmmode\lx@thinmuskip\else\lx@thinspace\fi', protected => 1);
DefPrimitiveI('\!', undef, sub {
Box("\x{200B}", undef, undef, T_CS('\!'), # zero width space
name => 'negthinspace', isSpace => 1,
width => LookupRegister('\thinmuskip')->negate); });
DefPrimitiveI('\>', undef, sub {
Box("\x{2005}", undef, undef, T_CS('\>'),
name => 'medspace', isSpace => 1,
width => LookupRegister('\medmuskip')); });
DefPrimitiveI('\;', undef, sub {
Box("\x{2004}", undef, undef, T_CS('\;'),
name => 'thickspace', isSpace => 1,
width => LookupRegister('\thickmuskip')); });
Let('\:', '\>');
DefPrimitiveI('\ ', undef, sub {
Box(UTF(0xA0), undef, undef, T_CS('\ '),
name => 'space', isSpace => 1, width => Dimension('0.5em')); });
DefPrimitiveI("\\\t", undef, sub {
Box(UTF(0xA0), undef, undef, T_CS("\\\t"),
isSpace => 1, width => Dimension('1em')); });
DefPrimitiveI('\/', undef, sub {
Box("", undef, undef, T_CS('\/'),
isSpace => 1, name => 'italiccorr', width => Dimension('0em')); });
#======================================================================
# TeX Book, Appendix B. p. 358
#----------------------------------------------------------------------
# Actually from LaTeX; Table 3.3, Greek, p.41
#----------------------------------------------------------------------
DefMathI('\alpha', undef, "\x{03B1}");
DefMathI('\beta', undef, "\x{03B2}");
DefMathI('\gamma', undef, "\x{03B3}");
DefMathI('\delta', undef, "\x{03B4}");
DefMathI('\epsilon', undef, "\x{03F5}");
DefMathI('\varepsilon', undef, "\x{03B5}");
DefMathI('\zeta', undef, "\x{03B6}");
DefMathI('\eta', undef, "\x{03B7}");
DefMathI('\theta', undef, "\x{03B8}");
DefMathI('\vartheta', undef, "\x{03D1}");
DefMathI('\iota', undef, "\x{03B9}");
DefMathI('\kappa', undef, "\x{03BA}");
DefMathI('\lambda', undef, "\x{03BB}");
DefMathI('\mu', undef, "\x{03BC}");
DefMathI('\nu', undef, "\x{03BD}");
DefMathI('\xi', undef, "\x{03BE}");
DefMathI('\pi', undef, "\x{03C0}");
DefMathI('\varpi', undef, "\x{03D6}");
DefMathI('\rho', undef, "\x{03C1}");
DefMathI('\varrho', undef, "\x{03F1}");
DefMathI('\sigma', undef, "\x{03C3}");
DefMathI('\varsigma', undef, "\x{03C2}");
DefMathI('\tau', undef, "\x{03C4}");
DefMathI('\upsilon', undef, "\x{03C5}");
DefMathI('\phi', undef, "\x{03D5}");
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
my %roles = ();
map { $roles{ $_->getAttribute('role') } = 1 } @rels;
my $role = (scalar(keys %roles) == 1 ? [keys %roles]->[0] : ($roles{ARROW} ? 'ARROW' : 'RELOP'));
map { $node->removeChild($_) } @rels;
$document->insertElement('ltx:XMTok', [map { $_->textContent } @rels], role => $role);
} } },
reversion => '#1\joinrel #2');
#----------------------------------------------------------------------
# LaTeX; Table 3.6. Arrow Symbols, p.43
#----------------------------------------------------------------------
# Arrows get treated somewhat like relations (or meta-relations),
# but it's hard to associate any particular "meaning" to them.
DefMathI('\leftarrow', undef, "\x{2190}", role => 'ARROW'); # LEFTWARDS ARROW
DefMathI('\Leftarrow', undef, "\x{21D0}", role => 'ARROW'); # LEFTWARDS DOUBLE ARROW
DefMathI('\rightarrow', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW
DefMathI('\Rightarrow', undef, "\x{21D2}", role => 'ARROW'); # RIGHTWARDS DOUBLE ARROW
DefMathI('\leftrightarrow', undef, "\x{2194}", role => 'METARELOP'); # LEFT RIGHT ARROW
DefMathI('\Leftrightarrow', undef, "\x{21D4}", role => 'METARELOP'); # LEFT RIGHT DOUBLE ARROW
DefMathI('\iff', undef, "\x{21D4}", role => 'METARELOP', meaning => 'iff'); # LEFT RIGHT DOUBLE ARROW
DefMathI('\mapsto', undef, "\x{21A6}", role => 'ARROW', meaning => 'maps-to');
DefMathI('\hookleftarrow', undef, "\x{21A9}", role => 'ARROW'); # LEFTWARDS ARROW WITH HOOK
DefMathI('\leftharpoonup', undef, "\x{21BC}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB UPWARDS
DefMathI('\leftharpoondown', undef, "\x{21BD}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB DOWNWARDS
DefMathI('\rightleftharpoons', undef, "\x{21CC}", role => 'METARELOP'); # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
DefMathI('\longleftarrow', undef, "\x{27F5}", role => 'ARROW'); # LONG LEFTWARDS ARROW
DefMathI('\Longleftarrow', undef, "\x{27F8}", role => 'ARROW'); # LONG LEFTWARDS DOUBLE ARROW
DefMathI('\longrightarrow', undef, "\x{27F6}", role => 'ARROW'); # LONG RIGHTWARDS ARROW
DefMathI('\Longrightarrow', undef, "\x{27F9}", role => 'ARROW'); # LONG RIGHTWARDS DOUBLE ARROW
DefMathI('\longleftrightarrow', undef, "\x{27F7}", role => 'METARELOP'); # LONG LEFT RIGHT ARROW
DefMathI('\Longleftrightarrow', undef, "\x{27FA}", role => 'METARELOP'); # LONG LEFT RIGHT DOUBLE ARROW
DefMathI('\longmapsto', undef, "\x{27FC}", role => 'ARROW'); # LONG RIGHTWARDS ARROW FROM BAR
DefMathI('\hookrightarrow', undef, "\x{21AA}", role => 'ARROW'); # RIGHTWARDS ARROW WITH HOOK
DefMathI('\rightharpoonup', undef, "\x{21C0}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB UPWARDS
DefMathI('\rightharpoondown', undef, "\x{21C1}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB DOWNWARDS
DefMathI('\leadsto', undef, "\x{219D}", role => 'ARROW', meaning => 'leads-to');
DefMathI('\uparrow', undef, "\x{2191}", role => 'ARROW'); # UPWARDS ARROW
DefMathI('\Uparrow', undef, "\x{21D1}", role => 'ARROW'); # UPWARDS DOUBLE ARROW
DefMathI('\downarrow', undef, "\x{2193}", role => 'ARROW'); # DOWNWARDS ARROW
DefMathI('\Downarrow', undef, "\x{21D3}", role => 'ARROW'); # DOWNWARDS DOUBLE ARROW
DefMathI('\updownarrow', undef, "\x{2195}", role => 'ARROW'); # UP DOWN ARROW
DefMathI('\Updownarrow', undef, "\x{21D5}", role => 'ARROW'); # UP DOWN DOUBLE ARROW
DefMathI('\nearrow', undef, "\x{2197}", role => 'ARROW'); # NORTH EAST ARROW
DefMathI('\searrow', undef, "\x{2198}", role => 'ARROW'); # SOUTH EAST ARROW
DefMathI('\swarrow', undef, "\x{2199}", role => 'ARROW'); # SOUTH WEST ARROW
DefMathI('\nwarrow', undef, "\x{2196}", role => 'ARROW'); # NORTH WEST ARROW
# \mapstochar (3237), \lhook(312C), \rhook(312D)
# These are really wrong; I can't find the right Unicode Glyphs.
# These are only fragments intended to be assembled into meaningful(?) symbols.
DefMathI('\mapstochar', undef, "\x{2E20}"); # TeX 3237
DefMathI('\lhook', undef, "\x{2E26}"); # TeX 312C
DefMathI('\rhook', undef, "\x{2E27}"); # TeX 312D
#======================================================================
# TeX Book, Appendix B. p. 359
# Ah, since \ldots can appear in text and math....
DefMacroI('\ldots', undef, '\lx@ldots');
DefConstructorI('\lx@ldots', undef,
"?#isMath(<ltx:XMTok name='ldots' font='#font' role='ID'>\x{2026}</ltx:XMTok>)(\x{2026})",
sizer => "\x{2026}",
reversion => '\ldots',
properties => sub {
(LookupValue('IN_MATH')
? (font => LookupValue('font')->merge(family => 'serif',
series => 'medium', shape => 'upright')->specialize("\x{2026}"))
: ()); }); # Since not DefMath!
# And so can \vdots
DefConstructorI('\vdots', undef,
"?#isMath(<ltx:XMTok name='vdots' font='#font' role='ID'>\x{22EE}</ltx:XMTok>)(\x{22EE})",
sizer => "\x{22EE}",
properties => sub {
(LookupValue('IN_MATH')
? (font => LookupValue('font')->merge(family => 'serif',
series => 'medium', shape => 'upright')->specialize("\x{22EE}"))
: ()); }); # Since not DefMath!
# But not these!
DefMathI('\cdots', undef, "\x{22EF}", role => 'ID'); # MIDLINE HORIZONTAL ELLIPSIS
DefMathI('\ddots', undef, "\x{22F1}", role => 'ID'); # DOWN RIGHT DIAGONAL ELLIPSIS
DefMathI('\colon', undef, ':', role => 'METARELOP'); # Seems like good default role
# Note that amsmath redefines \dots to be `smart'.
# Aha, also can be in text...
DefConstructorI('\dots', undef,
"?#isMath(<ltx:XMTok name='dots' font='#font' role='ID'>\x{2026}</ltx:XMTok>)(\x{2026})",
sizer => "\x{2026}",
properties => sub {
(LookupValue('IN_MATH')
? (font => LookupValue('font')->merge(family => 'serif',
series => 'medium', shape => 'upright')->specialize("\x{2026}"))
: ()); }); # Since not DefMath!
# And while we're at it...
# Pretest for XMath to keep from interpreting math that the DOM may not allow!!
##DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'\cdot\cdot\cdot',replace=>'\cdots');
DefMathLigature("\x{22C5}\x{22C5}\x{22C5}" => "\x{22EF}", role => 'ID', name => 'cdots');
DefLigature(qr{\.\.\.}, "\x{2026}", fontTest => sub { $_[0]->getFamily ne 'typewriter'; }); # ldots
#DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'...',replace=>'\ldots');
DefMathLigature("..." => "\x{2026}", role => 'ID', name => 'ldots');
#----------------------------------------------------------------------
# Math Accents.
#----------------------------------------------------------------------
# LaTeX; Table 3.11. Math Mode Accents, p.50.
# Are these all TeX (or LaTeX)?
# Note that most of these should NOT be stretchy, by default!
DefMath('\hat Digested', UTF(0x5E),
operator_role => 'OVERACCENT', operator_stretchy => 'false');
DefMath('\check Digested', "\x{02C7}",
operator_role => 'OVERACCENT', operator_stretchy => 'false'); # CARON
DefMath('\breve Digested', "\x{02D8}", operator_role => 'OVERACCENT'); # BREVE
DefMath('\acute Digested', UTF(0xB4), operator_role => 'OVERACCENT'); # ACUTE ACCENT
DefMath('\grave Digested', UTF(0x60), operator_role => 'OVERACCENT'); # GRAVE ACCENT
DefMath('\tilde Digested', UTF(0x7E),
operator_role => 'OVERACCENT', operator_stretchy => 'false'); # TILDE
DefMath('\bar Digested', UTF(0xAF),
operator_role => 'OVERACCENT', operator_stretchy => 'false'); # MACRON
DefMath('\vec Digested', "\x{2192}",
operator_role => 'OVERACCENT', operator_stretchy => 'false'); # RIGHTWARDS ARROW
DefMath('\dot Digested', "\x{02D9}", operator_role => 'OVERACCENT'); # DOT ABOVE
DefMath('\ddot Digested', UTF(0xA8), operator_role => 'OVERACCENT'); # DIAERESIS
DefMath('\overline Digested', UTF(0xAF), operator_role => 'OVERACCENT'); # MACRON
DefMath('\widehat Digested', UTF(0x5E), operator_role => 'OVERACCENT'); # CIRCUMFLEX ACCENT [plain? also amsfonts]
DefMath('\widetilde Digested', UTF(0x7E), operator_role => 'OVERACCENT'); # TILDE [plain? also amsfonts]
# These aren't handled as simple accents by TeX, so no Digested
DefMath('\overbrace {}', "\x{23DE}", operator_role => 'OVERACCENT', # TOP CURLY BRACKET
scriptpos => 'mid', robust => 1);
DefMath('\underbrace {}', "\x{23DF}", operator_role => 'UNDERACCENT', # BOTTOM CURLY BRACKET
scriptpos => 'mid', robust => 1);
# NOTE that all the above accents REQUIRE math mode
# EXCEPT underline, overrightarrow and overleftarrow!
DefMath('\math@underline{}', UTF(0xAF), operator_role => 'UNDERACCENT',
name => 'underline', alias => '\underline');
DefConstructor('\text@underline{}', "<ltx:text framed='underline' _noautoclose='1'>#1</ltx:text>");
DefMath('\math@overrightarrow{}', "\x{2192}", operator_role => 'OVERACCENT',
name => 'overrightarrow', alias => '\overrightarrow');
DefMath('\math@overleftarrow{}', "\x{2190}", operator_role => 'OVERACCENT',
name => 'overleftarrow', alias => '\overleftarrow');
# Careful: Use \protect so that it doesn't expand too early in alignments, etc.
DefMacro('\underline{}', '\protect\ifmmode\math@underline{#1}\else\text@underline{#1}\fi');
Let('\underbar', '\underline'); # Will anyone notice?
DefMacro('\overrightarrow{}', '\protect\ifmmode\math@overrightarrow{#1}\else$\math@overrightarrow{#1}$\fi');
DefMacro('\overleftarrow{}', '\protect\ifmmode\math@overleftarrow{#1}\else$\math@overleftarrow{#1}$\fi');
DefMacro('\skew{}{}{}', '{#2{#3\mkern#1mu}\mkern-#1mu}{}'); # ignore the subtle spacing for now?
#----------------------------------------------------------------------
# LaTeX; Table 3.10. Delimiters, p.47
#----------------------------------------------------------------------
# The meaning of OPEN/CLOSE tends to depend upon the pairing,
# rather than the individual tokens.
# This meaning is handled in MathParser (for now)
DefMacroI('\{', undef, '\ifmmode\lx@math@lbrace\else\lx@text@lbrace\fi', protected => 1);
DefMacroI('\}', undef, '\ifmmode\lx@math@rbrace\else\lx@text@rbrace\fi', protected => 1);
DefMathI('\lx@math@lbrace', undef, '{', role => 'OPEN', stretchy => 'false', alias => '\{');
DefMathI('\lx@math@rbrace', undef, '}', role => 'CLOSE', stretchy => 'false', alias => '\}');
DefPrimitiveI('\lx@text@lbrace', undef, '{', alias => '\{',
# font => { specialize => "{" });
font => { shape => 'upright' }, bounded => 1); # Since not DefMath!
DefPrimitiveI('\lx@text@rbrace', undef, '}', alias => '\}',
# font => { specialize => "}" }); # Since not DefMath!
font => { shape => 'upright' }, bounded => 1); # Since not DefMath!
Let('\lbrace', '\{');
Let('\lbrack', T_OTHER('['));
Let('\rbrace', '\}');
Let('\rbrack', T_OTHER(']'));
DefMathI('\lceil', undef, "\x{2308}", role => 'OPEN', stretchy => 'false'); # LEFT CEILING
DefMathI('\rceil', undef, "\x{2309}", role => 'CLOSE', stretchy => 'false'); # RIGHT CEILING
DefMathI('\lfloor', undef, "\x{230A}", role => 'OPEN', stretchy => 'false'); # LEFT FLOOR
DefMathI('\rfloor', undef, "\x{230B}", role => 'CLOSE', stretchy => 'false'); # RIGHT FLOOR
# Note: We should be using 27E8,27E9, which are "mathematical", not 2329,232A
DefMathI('\langle', undef, "\x{27E8}", role => 'OPEN', stretchy => 'false'); # LEFT-POINTING ANGLE BRACKET
DefMathI('\rangle', undef, "\x{27E9}", role => 'CLOSE', stretchy => 'false'); # RIGHT-POINTING ANGLE BRACKET
# Not sure these should be defined here, or latex, or even latex compat mode.
DefMathI('\lgroup', undef, "(", font => { series => 'bold' }, role => 'OPEN', stretchy => 'false');
DefMathI('\rgroup', undef, ")", font => { series => 'bold' }, role => 'CLOSE', stretchy => 'false');
DefMathI('\bracevert', undef, "|", font => { series => 'bold' }, role => 'VERTBAR');
## DefMath('\lmoustache',"???", font=>{series=>'bold'}, role=>'OPEN');
## DefMath('\rmoustache',"???", font=>{series=>'bold'}, role=>'OPEN');
# TeX marks some symbols as delimiters which can be used with \left,\right,
# but many of which have different grammatical roles otherwise, eg. arrows, <, >.
# Short of setting up TeX's complicated encoding machinery, I need an explicit
# mapping. Unfortunately, this doesn't (yet) support people declaring thier own delimiters!
# This duplicates in slightly different way what DefMath has put together.
our %DELIMITER_MAP =
('(' => { char => "(", lrole => 'OPEN', rrole => 'CLOSE' },
')' => { char => ")", lrole => 'OPEN', rrole => 'CLOSE' },
'[' => { char => "[", lrole => 'OPEN', rrole => 'CLOSE' },
']' => { char => "]", lrole => 'OPEN', rrole => 'CLOSE' },
'\{' => { char => "{", lrole => 'OPEN', rrole => 'CLOSE' },
'\}' => { char => "}", lrole => 'OPEN', rrole => 'CLOSE' },
'\lfloor' => { char => "\x{230A}", lrole => 'OPEN', rrole => 'CLOSE', name => 'lfloor' },
'\rfloor' => { char => "\x{230B}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rfloor' },
'\lceil' => { char => "\x{2308}", lrole => 'OPEN', rrole => 'CLOSE', name => 'lceil' },
'\rceil' => { char => "\x{2309}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rceil' },
'\langle' => { char => "\x{27E8}", lrole => 'OPEN', rrole => 'CLOSE', name => 'langle' },
'\rangle' => { char => "\x{27E9}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rangle' },
'<' => { char => "\x{27E8}", lrole => 'OPEN', rrole => 'CLOSE', name => 'langle' },
'>' => { char => "\x{27E9}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rangle' },
'/' => { char => "/", lrole => 'MULOP', rrole => 'MULOP' },
'\backslash' => { char => UTF(0x5C), lrole => 'MULOP', rrole => 'MULOP', name => 'backslash' },
'|' => { char => "|", lrole => 'VERTBAR', rrole => 'VERTBAR' },
'\|' => { char => "\x{2225}", lrole => 'VERTBAR', rrole => 'VERTBAR' },
'\uparrow' => { char => "\x{2191}", lrole => 'OPEN', rrole => 'CLOSE', name => 'uparrow' }, # ??
'\Uparrow' => { char => "\x{21D1}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Uparrow' }, # ??
'\downarrow' => { char => "\x{2193}", lrole => 'OPEN', rrole => 'CLOSE', name => 'downarrow' }, # ??
'\Downarrow' => { char => "\x{21D3}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Downarrow' }, # ??
'\updownarrow' => { char => "\x{2195}", lrole => 'OPEN', rrole => 'CLOSE', name => 'updownarrow' }, # ??
'\Updownarrow' => { char => "\x{21D5}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Updownarrow' }, # ??
);
# With new treatment of Simple Symbols as just Box's with assigned attributes,
# we're not getting whatsits, and so we're not looking them up the same way!!!
# TEMPORARILY (?) hack the Delimiter map
foreach my $entry (values %DELIMITER_MAP) {
$DELIMITER_MAP{ $$entry{char} } = $entry; }
sub lookup_delimiter {
my ($delim) = @_;
return $DELIMITER_MAP{$delim}; }
# This is a little messier than you'd think.
# These effectively create a group between the \left,\right.
# And this also gives us a single list of things to parse separately.
# Since \left,\right are TeX, primitives and must be paired up,
# we use a bit of macro trickery to simulate.
# [The \@hidden@bgroup/egroup keep from putting a {} into the UnTeX]
# HOWEVER, an additional complication is that it is a common mistake to omit the balancing \right!
# Using an \egroup (or hidden) makes it hard to recover, so use a special egroup
DefMacro('\left XToken', '\@left #1\@hidden@bgroup');
# Like \@hidden@egroup, but softer about missing \left
DefConstructor('\right@hidden@egroup', '',
afterDigest => sub {
my ($stomach) = @_;
if ($STATE->isValueBound('MODE', 0) # Last stack frame was a mode switch!?!?!
|| $STATE->lookupValue('groupNonBoxing')) { # or group was opened with \begingroup
Error('unexpected', '\right', undef, "Unbalanced \\right, no balancing \\left."); }
else {
$stomach->egroup; } },
reversion => '');
DefMacro('\right XToken', '\right@hidden@egroup\@right #1');
DefConstructor('\@left Token',
"?#char(<ltx:XMTok role='#role' name='#name' stretchy='#stretchy'>#char</ltx:XMTok>)"
. "(?#hint(<ltx:XMHint/>)(#1))",
afterDigest => sub { my ($stomach, $whatsit) = @_;
my $arg = $whatsit->getArg(1);
my $delim = ToString($arg);
if ($delim eq '.') {
$whatsit->setProperty(hint => 1); }
elsif (my $entry = $DELIMITER_MAP{$delim}) {
$whatsit->setProperties(role => $$entry{lrole},
char => $$entry{char},
name => $$entry{name},
stretchy => 'true');
$whatsit->setFont($arg->getFont()); }
elsif (($arg->getProperty('role') || '') eq 'OPEN') {
$arg->setProperty(stretchy => 'true'); }
else {
Warn('unexpected', $delim, $stomach,
"Missing delimiter; '.' inserted"); }
return; },
alias => '\left');
DefConstructor('\@right Token',
"?#char(<ltx:XMTok role='#role' name='#name' stretchy='#stretchy'>#char</ltx:XMTok>)"
. "(?#hint(<ltx:XMHint/>)(#1))",
afterDigest => sub { my ($stomach, $whatsit) = @_;
my $arg = $whatsit->getArg(1);
my $delim = ToString($arg);
if ($delim eq '.') {
$whatsit->setProperty(hint => 1); }
elsif (my $entry = $DELIMITER_MAP{$delim}) {
$whatsit->setProperties(role => $$entry{rrole},
char => $$entry{char},
name => $$entry{name},
stretchy => 'true');
$whatsit->setFont($arg->getFont()); }
elsif (($arg->getProperty('role') || '') eq 'CLOSE') {
$arg->setProperty(stretchy => 'true'); }
else {
Warn('unexpected', $delim, $stomach,
"Missing delimiter; '.' inserted)"); }
return; },
alias => '\right');
# These originally had Token as parameter, rather than {}..... Why?
# Note that in TeX, \big{((} will only enlarge the 1st paren!!!
DefConstructor('\big {}', '#1', bounded => 1, font => { size => 'big' });
DefConstructor('\Big {}', '#1', bounded => 1, font => { size => 'Big' });
DefConstructor('\bigg {}', '#1', bounded => 1, font => { size => 'bigg' });
DefConstructor('\Bigg {}', '#1', bounded => 1, font => { size => 'Bigg' });
sub addDelimiterRole {
my ($document, $role) = @_;
my $current = $document->getNode;
my $delim = $document->getLastChildElement($current) || $current;
my $delim_role = (($delim && ($delim->nodeType == XML_ELEMENT_NODE) && $delim->getAttribute('role')) || '<none>');
# if there is some delimiter-like role on the "delimiter", switch it, otherwise, leave it alone!
if ($delim && ($delim_role =~ /^(OPEN|MIDDLE|CLOSE|VERTBAR|<none>)$/)) {
## Maybe we shouldn't switch VERTBAR ?
## The catch is that occasionally people use a single \Bigl (or whatever)
## where they should have used a \Big
$document->setAttribute($delim, role => $role); }
return; }
# The "m" versions are defined in e-Tex and other places.
DefConstructor('\bigl {}', '#1', bounded => 1, font => { size => 'big' },
afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\bigm {}', '#1', bounded => 1, font => { size => 'big' },
afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\bigr {}', '#1', bounded => 1, font => { size => 'big' },
afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });
DefConstructor('\Bigl {}', '#1', bounded => 1, font => { size => 'Big' },
afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\Bigm {}', '#1', bounded => 1, font => { size => 'Big' },
afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\Bigr {}', '#1', bounded => 1, font => { size => 'Big' },
afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });
DefConstructor('\biggl {}', '#1', bounded => 1, font => { size => 'bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\biggm {}', '#1', bounded => 1, font => { size => 'bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\biggr {}', '#1', bounded => 1, font => { size => 'bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });
DefConstructor('\Biggl {}', '#1', bounded => 1, font => { size => 'Bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\Biggm {}', '#1', bounded => 1, font => { size => 'Bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\Biggr {}', '#1', bounded => 1, font => { size => 'Bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });
Let('\vert', T_OTHER('|'));
Let('\Vert', '\|');
#======================================================================
# TeX Book, Appendix B. p. 360
# \choose, et al, already handle above.
# Note that in TeX, all 4 args get digested(!)
# and the choice is made when absorbing!
DefConstructor('\mathchoice Digested Digested Digested Digested', sub {
my ($document, $d, $t, $s, $ss, %props) = @_;
my $style = $props{mathstyle};
my $choice = ($style eq 'display' ? $d
: ($style eq 'text' ? $t
: ($style eq 'script' ? $s
: $ss)));
$document->absorb($choice); },
properties => { mathstyle => sub { LookupValue('font')->getMathstyle; } });
DefMacro('\mathpalette{}{}',
'\mathchoice{#1\displaystyle{#2}}{#1\textstyle{#2}}'
. '{#1\scriptstyle{#2}}{#1\scriptscriptstyle{#2}}');
DefConstructor('\phantom{}',
"?#isMath(<ltx:XMHint width='#width' height='#height' depth='#depth' name='phantom'/>)"
. "(<ltx:text class='ltx_phantom'>#1</ltx:text>)", # !?!?!?!
properties => { isSpace => 1 },
afterDigest => sub {
my $whatsit = $_[1];
my ($w, $h, $d) = $whatsit->getArg(1)->getSize;
$whatsit->setProperties(width => $w, height => $h, depth => $d);
return; });
DefConstructor('\hphantom{}',
"?#isMath(<ltx:XMHint width='#width' name='hphantom'/>)"
. "(<ltx:text class='ltx_phantom'>#1</ltx:text>)", # !?!?!?!
properties => { isSpace => 1 },
afterDigest => sub {
my $whatsit = $_[1];
my ($w, $h, $d) = $whatsit->getArg(1)->getSize;
$whatsit->setProperties(width => $w, height => $h, depth => $d);
return; });
DefConstructor('\vphantom{}',
"?#isMath(<ltx:XMHint height='#height' depth='#depth' name='vphantom'/>)"
. "(<ltx:text class='ltx_phantom'>#1</ltx:text>)", # !?!?!?!
properties => { isSpace => 1 },
afterDigest => sub {
my $whatsit = $_[1];
my ($w, $h, $d) = $whatsit->getArg(1)->getSize;
$whatsit->setProperties(width => $w, height => $h, depth => $d);
return; });
DefConstructor('\mathstrut', "?#isMath(<ltx:XMHint name='mathstrut'/>)()",
properties => { isSpace => 1 });
DefConstructor('\smash{}', "#1"); # well, what?
#======================================================================
# TeX Book, Appendix B. p. 361
# This is actually LaTeX's definition, but let's just do it this way.
DefConstructor('\sqrt OptionalInScriptStyle Digested',
"?#1(<ltx:XMApp><ltx:XMTok meaning='nth-root'/>"
. "<ltx:XMArg>#1</ltx:XMArg><ltx:XMArg>#2</ltx:XMArg>"
. "</ltx:XMApp>)"
. "(<ltx:XMApp><ltx:XMTok meaning='square-root'/>"
. "<ltx:XMArg>#2</ltx:XMArg></ltx:XMApp>)");
DefParameterType('ScriptStyleUntil', sub {
my ($gullet, $until) = @_;
$gullet->readUntil($until); },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(mathstyle => 'script'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefConstructor('\root ScriptStyleUntil:\of {}',
"<ltx:XMApp><ltx:XMTok meaning='nth-root'/>"
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
scriptpos => \&doScriptpos);
DefMathI('\tan', undef, "tan", role => 'TRIGFUNCTION', meaning => 'tangent');
DefMathI('\tanh', undef, "tanh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-tangent');
#----------------------------------------------------------------------
# Modulo
DefMath('\pmod{}', '\;\;(\mathop{{\rm mod}} #1)', role => 'MODIFIER'); # , meaning=>'modulo');
DefMath('\bmod', 'mod', role => 'MODIFIEROP', meaning => 'modulo');
#======================================================================
# TeX Book, Appendix B. p. 362
#----------------------------------------------------------------------
# Matrices; Generalized
# The delimiters around a matrix may simply be notational, or for readability,
# and don't affect the "meaning" of the array structure as a matrix.
# In that case, we'll use an XMDual to indidate the content is simply the matrix,
# but the presentation includes the delimiters.
# HOWEVER, the delimeters may also signify an OPERATION on the matrix
# in which case the application & meaning of that operator must be supplied.
# keys are
# name : the name of the environment (for reversion)
# datameaning: the (presumed) meaning of the array construct (typically 'matrix')
# delimitermeaning : the operator meaning due to delimiters (eg. norm)(as applied to the array)
# style : typically \displaystyle, \textstyle...
# left : TeX code for left of matrix
# right : TeX code for right
# ncolumns : the number of columns (default is not limited)
DefKeyVal('lx@GEN', 'style', 'UndigestedKey');
DefPrimitive('\lx@gen@matrix@bindings RequiredKeyVals:lx@GEN', sub {
my ($stomach, $kv) = @_;
$stomach->bgroup;
my $style = $kv->getValue('style') || T_CS('\textstyle');
my $align = ToString($kv->getValue('alignment')) || 'c';
# We really should be using ReadAlignmentTemplate (LaTeXML::Core::Alignment)
# but we'd have to convert it to a repeating spec somehow.
my @colspec = (before => Tokens(($align =~ /^(?:c|r)/ ? (T_CS('\hfil')) : ()), $style),
after => Tokens(($align =~ /^(?:c|l)/ ? (T_CS('\hfil')) : ())));
my $ncols = ToString($kv->getValue('ncolumns'));
my %attributes = ();
foreach my $key (qw(rowsep)) { # Probably more?
if (my $value = $kv->getValue($key)) {
$attributes{$key} = $value; } }
alignmentBindings(LaTeXML::Core::Alignment::Template->new(
($ncols ? (columns => [map { { @colspec } } 1 .. $ncols])
: (repeated => [{@colspec}]))),
'math',
(keys %attributes ? (attributes => {%attributes}) : ())); # });
Let("\\\\", '\@alignment@newline');
Let('\lx@intercol', '\lx@math@intercol');
Let('\@row@before', '\@empty'); # Disable special row treatment (eg. numbering) unless requested
Let('\@row@after', '\@empty');
});
DefPrimitive('\lx@end@gen@matrix', sub { $_[0]->egroup; });
DefMacro('\lx@gen@plain@matrix{}{}',
'\lx@gen@matrix@bindings{#1}'
. '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\@finish@alignment}'
# . '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\cr\@finish@alignment}'
. '\lx@end@gen@matrix');
# The delimiters on a matrix are presumably just for notation or readability (not an operator);
# the array data itself is the matrix.
DefConstructor('\lx@gen@plain@matrix@ RequiredKeyVals:lx@GEN {}',
"?#needXMDual("
. "<ltx:XMDual>"
. "?#delimitermeaning(<ltx:XMApp><ltx:XMTok meaning='#delimitermeaning'/>)()"
. "?#datameaning(<ltx:XMApp><ltx:XMTok meaning='#datameaning'/>)()"
. "<ltx:XMRef _xmkey='#xmkey'/>"
. "?#delimitermeaning(</ltx:XMApp>)()"
. "?#datameaning(</ltx:XMApp>)()"
. "<ltx:XMWrap>#left<ltx:XMArg _xmkey='#xmkey'>#2</ltx:XMArg>#right</ltx:XMWrap>"
. "</ltx:XMDual>"
. ")("
. "#2"
. ")",
properties => sub { %{ $_[1]->getKeyVals }; },
reversion => sub {
my ($whatsit, $kv, $body) = @_;
my $name = ToString($kv->getValue('name'));
my $alignment = $whatsit->getProperty('alignment');
## (T_CS('\\' . $name), T_BEGIN, Revert($body), T_END); },
## (T_CS('\\' . $name), T_BEGIN, Revert($alignment), T_END); },
(T_CS('\\' . $name), T_BEGIN, $alignment->revert, T_END); },
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $kv = $whatsit->getArg(1);
if ($kv->getValue('datameaning') || $kv->getValue('delimitermeaning')) {
$whatsit->setProperties(
needXMDual => 1,
xmkey => LaTeXML::Package::getXMArgID()); }
$whatsit->setProperties(alignment => LookupValue('Alignment'));
return; });
DefMacro('\matrix{}',
'\lx@gen@plain@matrix{name=matrix,datameaning=matrix}{#1}');
DefMacro('\bordermatrix{}', # Semantics?
'\lx@hack@bordermatrix{\lx@gen@plain@matrix{name=bordermatrix}{#1}}');
# HACK the newly created border matrix to add columns for the (spanned) parentheses!!!
# Assume (for now) that there's no XMDual structure here.
# What is the semantics, anyway?
DefConstructor('\lx@hack@bordermatrix{}', sub {
my ($document, $matrix) = @_;
$document->absorb($matrix);
my $marray = $document->getNode->lastChild;
my @rows = $document->findnodes('ltx:XMRow', $marray);
my ($h, $d) = (10.0 * $UNITY, 0); # 10pts.
# Contrived, since $matrix may be a List or...
my ($alignment) = grep { $_ } map { $_->getProperty('alignment') } $matrix->unlist;
if ($alignment) {
my $arrayh = $alignment->getHeight->ptValue;
my ($row0, $row1) = $alignment->rows; # What's row 0 ?
$h = $$row1{y}->valueOf;
$d = $h - $arrayh; }
my $md = Dimension(-$d);
$h = Dimension($h); $d = Dimension($d);
foreach my $row (@rows) { # Add empty cells for 2nd & last colum
$document->openElementAt($row, 'ltx:XMCell');
$document->openElementAt($row, 'ltx:XMCell');
$row->insertAfter($row->lastChild, $row->firstChild); # Move to 2nd pos!
}
my @cols = element_nodes($rows[1]);
my $col1 = $cols[1];
my $coln = $cols[-1];
my $n = scalar(@rows) - 1;
$col1->setAttribute(rowspan => $n);
$coln->setAttribute(rowspan => $n);
my $pfont = $STATE->lookupValue('font')->specialize('(');
$document->appendTree($col1,
['ltx:XMWrap', { depth => $d },
['ltx:XMTok', { role => 'OPEN', height => 0, depth => $d, yoffset => $md, font => $pfont }, '('],
['ltx:XMTok', { height => $h, yoffset => $md, font => $pfont }, ' ']]); # Effectively, a strut
$document->appendTree($coln,
['ltx:XMWrap', {},
['ltx:XMTok', { role => 'CLOSE', height => 0, depth => $d, yoffset => $md, font => $pfont }, ')'],
['ltx:XMTok', { height => $h, yoffset => $md, font => $pfont }, ' ']]);
return; },
reversion => '#1');
DefMacro('\pmatrix{}',
'\lx@gen@plain@matrix{name=pmatrix,datameaning=matrix,left=\@left(,right=\@right)}{#1}');
#----------------------------------------------------------------------
# Cases: Generalized
# keys are
# name : the name of the command (for reversion)
# meaning: the (presumed) meaning of the construct
# style : \textstyle or \displaystyle
# conditionmode : mode of 2nd column, text or math
# left : TeX code for left of cases
# right : TeX code for right
DefConstructorI('\lx@cases@condition', undef,
"<ltx:XMText>#body</ltx:XMText>",
alias => '', beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1);
DefConstructorI('\lx@cases@end@condition', undef, "", alias => '',
beforeDigest => sub { $_[0]->endMode('text'); });
DefPrimitive('\lx@gen@cases@bindings RequiredKeyVals:lx@GEN', sub {
my ($stomach, $kv) = @_;
$stomach->bgroup;
my $style = $kv->getValue('style') || T_CS('\textstyle');
$style = T_CS($style) unless ref $style;
my @mode = (ToString($kv->getValue('conditionmode')) eq 'text'
? (T_MATH) : ());
my $condtext = ToString($kv->getValue('conditionmode')) eq 'text';
alignmentBindings(LaTeXML::Core::Alignment::Template->new(
columns => [
{ before => Tokens($style), after => Tokens(T_CS('\hfil')) },
{ before => Tokens($style,
($condtext ? (T_CS('\lx@cases@condition')) : ())),
after => Tokens(T_CS('\lx@column@trimright'),
($condtext ? (T_CS('\lx@cases@end@condition')) : ()),
T_CS('\hfil')) }]),
'math');
Let("\\\\", '\@alignment@newline');
Let('\lx@intercol', '\lx@math@intercol');
DefMacro('\@row@before', ''); # Don't inherit counter stepping from containing environments
DefMacro('\@row@after', '');
});
DefMacro('\lx@gen@plain@cases{}{}',
'\lx@gen@cases@bindings{#1}'
. '\lx@gen@plain@cases@{#1}{\@start@alignment#2\@finish@alignment}'
. '\lx@end@gen@cases');
DefPrimitive('\lx@end@gen@cases', sub { $_[0]->egroup; });
# The logical structure for cases extracts the columns of the alignment
# to give alternating value,condition (an empty condition is replaced by "otherwise" !?!?!)
DefConstructor('\lx@gen@plain@cases@ RequiredKeyVals:lx@GEN {}',
'<ltx:XMWrap>#left#2#right</ltx:XMWrap>',
properties => sub { %{ $_[1]->getKeyVals }; },
afterConstruct => sub {
my ($document) = @_;
if (my $point = $document->getElement->lastChild) {
# Get the sequence of alternating (case, condition).
# Expecting ltx:XMArray/ltx:XMRow/ltx:XMCell [should have /ltx:XMArg, but could be empty!!!]
my @cells = $document->findnodes('ltx:XMArray/ltx:XMRow/ltx:XMCell', $point);
my @stuff = map { ($_->hasChildNodes ? createXMRefs($document, element_nodes($_))
: ['ltx:XMText', {}, 'otherwise']) } @cells;
$document->replaceTree(['ltx:XMDual', {},
['ltx:XMApp', {}, ['ltx:XMTok', { meaning => 'cases' }], @stuff],
$point],
$point); } },
reversion => sub {
my ($whatsit, $kv, $body) = @_;
my $name = $kv->getValue('name');
(T_CS('\cases'), T_BEGIN, Revert($body), T_END); });
# Note that 2nd column in \cases is in text mode!
DefMacro('\cases{}',
'\lx@gen@plain@cases{meaning=cases,left=\@left\{,conditionmode=text,style=\textstyle}{#1}');
#----------------------------------------------------------------------
DefPrimitive('\openup Dimension', undef);
# What should this do? (needs to work with alignments..)
# see https://www.tug.org/TUGboat/tb07-1/tb14beet.pdf
# use in arXiv:hep-th/0001208
DefMacro('\displaylines{}', '\halign{\hbox to\displaywidth{$\hfil\displaystyle##\hfil$}\crcr#1\crcr}');
DefMacro('\eqalign{}',
'\@@eqalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@eqalign{}',
'#1',
reversion => '\eqalign{#1}', bounded => 1,
beforeDigest => sub { alignmentBindings('rl', 'math',
attributes => { vattach => 'baseline' }); });
DefMacro('\eqalignno{}',
'\@@eqalignno{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@eqalignno{}',
'#1',
reversion => '\eqalignno{#1}', bounded => 1,
beforeDigest => sub { alignmentBindings('rll', 'math',
attributes => { vattach => 'baseline' }); });
DefMacro('\leqalignno{}',
'\@@leqalignno{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@leqalignno{}',
'#1',
reversion => '\leqalignno{#1}', bounded => 1,
beforeDigest => sub { alignmentBindings('rll', 'math',
attributes => { vattach => 'baseline' }); });
DefRegister('\pageno' => Number(0));
DefRegister('\headline' => Tokens());
DefRegister('\footline' => Tokens());
DefMacroI('\folio', undef, "1"); # What else?
DefPrimitiveI('\nopagenumbers', undef, undef);
DefMacroI('\advancepageno', undef, '\advance\pageno1\relax');
#======================================================================
# TeX Book, Appendix B. p. 363
DefPrimitive('\raggedbottom', undef);
DefPrimitive('\normalbottom', undef);
# if the mark is not simple, we add it to the content of the note
# otherwise, to the attribute.
DefConstructor('\footnote{}{}',
"^<ltx:note role='footnote' ?#mark(mark='#mark')()>?#prenote(#prenote )()#2</ltx:note>",
mode => 'text', bounded => 1,
beforeDigest => sub { reenterTextMode(1); neutralizeFont(); },
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $mark = $whatsit->getArg(1);
my $change = 0;
foreach my $token (Revert($mark)) {
unless ($token->getCatcode == CC_LETTER || $token->getCatcode == CC_SPACE ||
$token->getCatcode == CC_OTHER) {
$change = 1; last; } }
$whatsit->setProperty(($change ? 'prenote' : 'mark') => $mark);
return; });
# Until we can do the "v" properly:
DefMacro('\vfootnote', '\footnote');
DefMacro('\fo@t', '\ifcat\bgroup\noexpand\next \let\next\f@@t \else\let\next\f@t\fi \next');
DefMacro('\f@@t', '\bgroup\aftergroup\@foot\let\next');
DefMacro('\f@t{}', '#1\@foot');
DefMacro('\@foot', '\strut\egroup');
DefPrimitiveI('\footstrut', undef, undef);
DefRegister('\footins' => Number(0));
DefPrimitiveI('\topinsert', undef, undef);
DefPrimitiveI('\midinsert', undef, undef);
DefPrimitiveI('\pageinsert', undef, undef);
DefPrimitiveI('\endinsert', undef, undef);
# \topins ?
#======================================================================
# TeX Book, Appendix B. p. 364
# Let's hope nobody is messing with the output routine...
DefPrimitiveI('\footnoterule', undef, undef);
#======================================================================
# End of TeX Book definitions.
#======================================================================
#**********************************************************************
# Stray stuff .... where to ?
#**********************************************************************
# Mostly ignorable, although it could add an attribute to an ancestor
# to record the desired justification.
# Spacing stuff
DefConstructor('\@', '');
# Math spacing.
# Math style.
# Also record that this explicitly sets the mathstyle (support for \over, etal)
DefPrimitiveI('\displaystyle', undef, sub {
MergeFont(mathstyle => 'display');
Box(undef, undef, undef, T_CS('\displaystyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\textstyle', undef, sub {
MergeFont(mathstyle => 'text');
Box(undef, undef, undef, T_CS('\textstyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\scriptstyle', undef, sub {
MergeFont(mathstyle => 'script');
Box(undef, undef, undef, T_CS('\scriptstyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\scriptscriptstyle', undef, sub {
MergeFont(mathstyle => 'scriptscript');
Box(undef, undef, undef, T_CS('\scriptscriptstyle'), explicit_mathstyle => 1); });
#======================================================================
# Special Characters.
# Try to give them some sense in math...
DefMacroI('\#', undef, '\ifmmode\lx@math@hash\else\lx@text@hash\fi', protected => 1);
DefMacroI('\&', undef, '\ifmmode\lx@math@amp\else\lx@text@amp\fi', protected => 1);
DefMacroI('\%', undef, '\ifmmode\lx@math@percent\else\lx@text@percent\fi', protected => 1);
DefMacroI("\\\$", undef, '\ifmmode\lx@math@dollar\else\lx@text@dollar\fi', protected => 1);
DefMacroI('\_', undef, '\ifmmode\lx@math@underscore\else\lx@text@underscore\fi', protected => 1);
DefPrimitiveI('\lx@text@hash', undef, '#', alias => '\#');
DefPrimitiveI('\lx@text@amp', undef, '&', alias => '\&');
DefPrimitiveI('\lx@text@percent', undef, '%', alias => '\%');
DefPrimitiveI('\lx@text@dollar', undef, "\$", alias => "\\\$");
DefPrimitiveI('\lx@text@underscore', undef, '_', alias => '\_');
DefMathI('\lx@math@hash', undef, '#', alias => '\#');
DefMathI('\lx@math@amp', undef, '&', role => 'ADDOP', meaning => 'and', alias => '\&');
DefMathI('\lx@math@percent', undef, '%', role => 'POSTFIX', meaning => 'percent', alias => '\%');
DefMathI('\lx@math@dollar', undef, "\$", role => 'OPERATOR', meaning => 'currency-dollar',
alias => "\\\$");
DefMathI('\lx@math@underscore', undef, '_', alias => '\_');
# Discretionary times; just treat as invisible ?
DefMathI('\*', undef, "\x{2062}", role => 'MULOP', name => '', meaning => 'times'); # INVISIBLE TIMES (or MULTIPLICATION SIGN = 00D7)
# These 3 should have some `name' assigned ... but what???
# Is XMWrap the right thing to wrap with (instead of XMArg)?
# We can't really assume that the stuff inside is sensible math.
# NOTE that \mathord and \mathbin aren't really right here.
# We need a finer granularity than TeX does: an ORD could be several things,
# a BIN could be a MULOP or ADDOP.
# AND, rarely, they're empty.... Is it wrong to drop them?
DefConstructor('\mathord{}', "?#1(<ltx:XMWrap role='ID' >#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathop{}', "?#1(<ltx:XMWrap role='BIGOP' scriptpos='#scriptpos'>#1</ltx:XMWrap>)()",
bounded => 1, properties => { scriptpos => \&doScriptpos });
DefConstructor('\mathbin{}', "?#1(<ltx:XMWrap role='BINOP'>#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathrel{}', "?#1(<ltx:XMWrap role='RELOP'>#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathopen{}', "?#1(<ltx:XMWrap role='OPEN' >#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathclose{}', "?#1(<ltx:XMWrap role='CLOSE'>#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathpunct{}', "?#1(<ltx:XMWrap role='PUNCT'>#1</ltx:XMWrap>)()", bounded => 1);
DefConstructor('\mathinner{}', "?#1(<ltx:XMWrap role='ATOM'>#1</ltx:XMWrap>)()", bounded => 1);
# If an XMWrap (presumably from \mathop, \mathbin, etc)
# has multiple children, ALL are XMTok, within a restricted set of roles,
# we want to concatenate the text content into a single XMTok.
DefMathRewrite(xpath => 'descendant-or-self::ltx:XMWrap['
# Only XMWrap's from the above class of operators
. '(@role="OP" or @role="BIGOP" or @role="RELOP" '
. 'or @role="ADDOP" or @role="MULOP" or @role="BINOP" '
. 'or @role="OPEN" or @role="CLOSE")'
. ' and count(child::*) > 1 '
# with only XMTok as children with the roles in (roughly) the same set
. ' and not(child::*[local-name() != "XMTok"])'
. ' and not(ltx:XMTok['
. '@role !="OP" and @role!="BIGOP" and @role!="RELOP" and @role!="METARELOP" '
. 'and @role!="ADDOP" and @role!="MULOP" and @role!="BINOP" '
. 'and @role!="OPEN" and @role!="CLOSE"'
. '])]',
replace => sub {
my ($document, $node) = @_;
my $replacement = $node->cloneNode(0);
my $content = $node->textContent;
$replacement->appendText($content);
$replacement->setName('ltx:XMTok');
$document->getNode->appendChild($replacement);
});
DefMacro('\hiderel{}', "#1"); # Just ignore, for now...
DefMathI('\to', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW??? a bit more explicitly relation-like?
# TeX's ligatures handled by rewrite regexps.
# Note: applied in reverse order of definition (latest defined applied first!)
# Note also, these area only applied in text content, not in attributes!
DefPrimitive('\@@endash', sub { Box("\x{2013}", undef, undef, T_CS('\@@endash')); });
DefPrimitive('\@@emdash', sub { Box("\x{2014}", undef, undef, T_CS('\@@emdash')); });
sub nonTypewriter {
my ($font) = @_;
return ($font->getFamily ne 'typewriter'); }
sub nonTypewriterT1 {
my ($font) = @_;
return ($font->getFamily ne 'typewriter') && (($font->getEncoding || 'OT1') =~ /^(OT1|T1)$/); }
# EN DASH (NOTE: With digits before & aft => \N{FIGURE DASH})
DefLigature(qr{--}, "\x{2013}", fontTest => \&nonTypewriter); # EN dash
DefLigature(qr{---}, "\x{2014}", fontTest => \&nonTypewriter); # EM dash
# Ligatures for doubled single left & right quotes to convert to double quotes
# [should ligatures be part of a font, in the first place? (it is in TeX!)
DefLigature(qr{\x{2018}\x{2018}}, "\x{201C}", fontTest => \&nonTypewriterT1); # double left quote
DefLigature(qr{\x{2019}\x{2019}}, "\x{201D}", fontTest => \&nonTypewriterT1); # double right quote
DefLigature(qr{\?\x{2018}}, UTF(0xBF), fontTest => \&nonTypewriterT1); # ? backquote
DefLigature(qr{!\x{2018}}, UTF(0xA1), fontTest => \&nonTypewriterT1); # ! backquote
# These ligatures are also handled by TeX.
# However, it appears that decent modern fonts in modern browsers handle these at that level.
# So it's likely not worth doing it at the conversion level, possibly adversely affecting search.
# DefLigature(qr{ff}, "\x{FB00}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{fi}, "\x{FB01}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{fl}, "\x{FB02}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{ffi}, "\x{FB03}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{ffl}, "\x{FB04}", fontTest => \&nonTypewriterT1);
DefConstructor('\TeX',
"<ltx:text class='ltx_TeX_logo' cssstyle='letter-spacing:-0.2em; margin-right:0.2em'>"
. "T"
. "<ltx:text cssstyle='font-variant:small-caps;font-size:120%;' yoffset='-0.2ex'>e</ltx:text>"
. "X"
. "</ltx:text>",
sizer => sub { (Dimension('1.9em'), Dimension('1.6ex'), Dimension('0.5ex')); });
DefPrimitiveI('\i', undef, "\x{0131}"); # LATIN SMALL LETTER DOTLESS I
DefPrimitiveI('\j', undef, "\x{0237}");
DefConstructor('\buildrel Until:\over {}',
"<ltx:XMApp role='RELOP'>"
. "<ltx:XMTok role='SUPERSCRIPTOP' scriptpos='#scriptpos'/>"
. "<ltx:XMArg>#2</ltx:XMArg>"
. "<ltx:XMArg>#1</ltx:XMArg>"
. "</ltx:XMApp>",
properties => { scriptpos => sub { "mid" . $_[0]->getScriptLevel; } });
#**********************************************************************
# LaTeX Hook
#**********************************************************************
# This is used for plain TeX, but needs to be undone for LaTeX (or...)!
RelaxNGSchema("LaTeXML");
Tag('ltx:section', autoClose => 1);
Tag('ltx:document', autoClose => 1, autoOpen => 1);
Tag('ltx:document', afterOpen => sub {
my ($document, $root) = @_;
if (my $font = $document->getNodeFont($root)) {
if (my $bg = $font->getBackground) {
if ($bg ne 'white') {
$document->setAttribute($root, backgroundcolor => $bg); } } } });
# No, \documentclass isn't really a primitive -- It's not even TeX!
# But we define a number of stubs here that will automatically load
# the LaTeX pool (or AmSTeX.pool) (which will presumably redefine them), and then
# stuff the token back to be reexecuted.
foreach my $ltxtrigger (qw(documentclass
newcommand renewcommand newenvironment renewenvironment
NeedsTeXFormat ProvidesFile
ProvidesPackage RequirePackage PassOptionsToPackage
makeatletter makeatother
typeout begin listfiles nofiles)) {
DefAutoload($ltxtrigger, 'LaTeX.pool.ltxml'); }
foreach my $ltx3trigger (qw(ExplSyntaxOn
ProvidesExplClass ProvidesExplPackage)) {
# DG: note that these auto-loads are not perfect --
# if they are triggered with a raw .sty file for example,
# the expl3 support will "expire" at the end of the current scope,
# and e.g. \ExplSyntaxOn will once again be undefined.
DefAutoload($ltx3trigger, 'expl3.pool.ltxml'); }
# Seemingly good candidates to trigger AmSTeX ??
foreach my $amstrigger (qw(BlackBoxes NoBlackBoxes
TagsAsMath TagsAsText TagsOnLeft TagsOnRight CenteredTagsOnSplits TopOrBottomTagsOnSplits
LimitsOnInts NoLimitsOnInts LimitsOnNames NoLimitsOnNames LimitsOnSums NoLimitsOnSums
loadbold loadeufb loadeufm loadeurb loadeurm loadeusb
loadeusm loadmathfont loadmsam loadmsbm)) {
DefAutoload($amstrigger, 'AmSTeX.pool.ltxml'); }
# Darn; we need to be even more clever, since we need to simulate an amstex command, as well.
# For example \documentstyle[...]{amsppt} must switch to AMSTeX mode, _NOT_ LaTeX mode!!!!
DefMacro('\documentstyle OptionalSemiverbatim SkipSpaces Semiverbatim', sub {
my ($gullet, $options, $class) = @_;
LoadPool((ToString($class) =~ /^amsppt$/ ? "AmSTeX" : "LaTeX"));
(T_CS('\\documentstyle'),
($options ? (T_OTHER('['), $options->unlist, T_OTHER(']')) : ()),
T_BEGIN, $class->unlist, T_END); });
# Technically should be in LaTeX.pool, but we try to maintain the bookkeeping from the very start,
# in order to avoid partially defined behavior when --preload directives are mixed with \usepackage{} loads
DefMacro('\@pushfilename', '\xdef\@currnamestack{{\@currname}{\@currext}{\the\catcode`\@}\@currnamestack}');
DefMacro('\@popfilename', '\expandafter\@p@pfilename\@currnamestack\@nil');
DefMacro('\@p@pfilename {}{}{} Until:\@nil',
'\gdef\@currname{#1}%
\gdef\@currext{#2}%
\catcode`\@#3\relax
\gdef\@currnamestack{#4}');
DefMacroI(T_CS('\@currnamestack'), undef, Tokens());
Let('\@currname', '\@empty');
Let('\@currext', '\@empty');
#**********************************************************************
# LaTeXML Specific.
# Support for Declarations & Presentation/Semantic Duality
#**********************************************************************
#======================================================================
# Normally definitions disappear; the macros are expanded or have their expected effect.
# But in a few cases (eg tabular column definitions, or LaTeX \Declarexxxx)
# they will need declarations in the (La)TeX preamble to allow (La)TeX to process snippets
# (eg. math) in order to create images.
# Returning a call to this utility from Primitives will add a preamble Processing Instruction
sub AddToPreamble {
my ($cs, @args) = @_;
return Digest(Invocation(T_CS('\lx@add@Preamble@PI'), Invocation((ref $cs ? $cs : T_CS($cs)), @args))); }
DefConstructor('\lx@add@Preamble@PI Undigested',
"<?latexml preamble='#1'?>");
#======================================================================
# Support for constructing mathematical expressions
# Common XMath pattern for assigning attributes from Whatsit properties.
our $XMath_attributes =
" role='#role' name='#name' meaning='#meaning' omcd='#omcd'"
. " width='#width' height='#height' xoffset='#xoffset' yoffset='#yoffset'"
. " lpadding='#lpadding' rpadding='#rpadding'";
sub XMath_copy_keyvals {
my ($stomach, $whatsit) = @_;
my $kv = $whatsit->getArg(1);
$whatsit->setProperties($kv->getPairs) if $kv;
return; }
# Build an ltx:XMApp, application of function/operator to arguments
# first piece of (TeX) argument is expected to be the operator
# Usually used on content side, but at least the arguments should be properly encapsulated:
# They should build individual subtrees; use ltx::XMArg, ltx:XMWrap ... if needed
DefConstructor('\lx@apply OptionalKeyVals:XMath {}{}',
"<ltx:XMApp $XMath_attributes>#2#3</ltx:XMApp>",
reversion => '#2#3',
afterDigest => sub { XMath_copy_keyvals(@_); });
# Build an ltx:XMTok, a mathematical symbol, with given attributes
# the argument should create text to be the content of the token.
DefConstructor('\lx@symbol OptionalKeyVals:XMath {}',
"<ltx:XMTok $XMath_attributes>#2</ltx:XMTok>",
reversion => '#2',
afterDigest => sub {
$_[1]->setFont($_[1]->getArg(2)->getFont);
XMath_copy_keyvals(@_); });
# Wrap the contents in an ltx:XMWrap, to stand as a single subtree & providing attributes
# The ltx:XMWrap may be collapsed, later, by parsing
DefConstructor('\lx@wrap OptionalKeyVals:XMath {}',
"<ltx:XMWrap $XMath_attributes>#2</ltx:XMWrap>",
reversion => '#2',
afterDigest => sub { XMath_copy_keyvals(@_); });
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
# These two accept key operator_meaning, operator_omcd to give a meaning to the sub/superscript
# NOTE (BUG): We SHOULD nest paired sub/superscripts, but avoid conflicting double scripts
# To do that we need to sniff at the base, whether it already contains scripts.
# However, IsScript isn't quite sufficient if the scripts are hidden within Whatsits, duals, etc.
# Currently, LaTeXML manages to deal with the double scripts anyway;
# The reversion ALWAYS wraps the base (which will render non-optimally in images but avoid Errors)
DefConstructor('\lx@superscript OptionalKeyVals:XMath {} InScriptStyle',
"<ltx:XMApp $XMath_attributes>"
. "<ltx:XMTok role='SUPERSCRIPTOP' meaning='#operator_meaning' omcd='#operator_omcd' scriptpos='#scriptpos'/>"
. "<ltx:XMArg>#2</ltx:XMArg>"
. "<ltx:XMArg rule='Superscript'>#3</ltx:XMArg>"
. "</ltx:XMApp>",
afterDigest => sub { XMath_copy_keyvals(@_); },
reversion => sub {
my ($whatsit, $kv, $base, $sup) = @_;
my $bump = $whatsit->getProperty('bump');
$bump = 1; # For now: ALWAYS {} wrap base in the reversion!
(IsEmpty($sup)
? Revert($base)
: (($bump ? (T_BEGIN, Revert($base), T_END) : Revert($base)), T_SUPER, revertScript($sup))); },
properties => sub {
my ($stomach, $kv, $base, $script) = @_;
my $basetype = IsScript($base);
my $bump = ($basetype && ($$basetype[1] eq 'SUPERSCRIPT') ? 1 : 0);
(scriptpos => "post" . ($_[0]->getScriptLevel + $bump),
bump => $bump); },
sizer => sub { scriptSizer($_[0]->getArg(3), $_[0]->getArg(2), undef, 'SUPERSCRIPT', 'post'); });
DefConstructor('\lx@subscript OptionalKeyVals:XMath {} InScriptStyle',
"<ltx:XMApp $XMath_attributes>"
. "<ltx:XMTok role='SUBSCRIPTOP' meaning='#operator_meaning' omcd='#operator_omcd' scriptpos='#scriptpos'/>"
. "<ltx:XMArg>#2</ltx:XMArg>"
. "<ltx:XMArg rule='Subscript'>#3</ltx:XMArg>"
. "</ltx:XMApp>",
afterDigest => sub { XMath_copy_keyvals(@_); },
reversion => sub {
my ($whatsit, $kv, $base, $sub) = @_;
my $bump = $whatsit->getProperty('bump');
$bump = 1; # For now: ALWAYS {} wrap base in the reversion!
(IsEmpty($sub)
? Revert($base)
: (($bump ? (T_BEGIN, Revert($base), T_END) : Revert($base)), T_SUB, revertScript($sub))); },
properties => sub {
my ($stomach, $kv, $base, $script) = @_;
my $basetype = IsScript($base);
my $bump = ($basetype && ($$basetype[1] eq 'SUBSCRIPT') ? 1 : 0);
(scriptpos => "post" . ($_[0]->getScriptLevel + $bump),
bump => $bump); },
sizer => sub { scriptSizer($_[0]->getArg(3), $_[0]->getArg(2), undef, 'SUBSCRIPT', 'post'); });
# Ignore $kv for the moment?????
sub I_subscript {
my ($kv, $base, $script) = @_;
return Tokens(T_CS('\lx@subscript'), I_keyvals($kv), T_BEGIN, $base, T_END, T_BEGIN, $script, T_END); }
sub I_superscript {
my ($kv, $base, $script) = @_;
return Tokens(T_CS('\lx@superscript'), I_keyvals($kv), T_BEGIN, $base, T_END, T_BEGIN, $script, T_END); }
# Superscript meaning power
DefMacro('\lx@power{}{}', '\lx@superscript[operator_meaning=power]{#1}{#2}');
# Superscript meaning functional (or applicative) power; iterated function/operator application
DefMacro('\lx@functionalpower{}{}', '\lx@superscript[operator_meaning=functional-power]{#1}{#2}');
# These to be used in presentation side
DefMathI('\lx@ApplyFunction', undef, "\x{2061}", reversion => '', name => '', role => 'APPLYOP');
DefMathI('\lx@InvisibleTimes', undef, "\x{2062}", reversion => '', name => '', meaning => 'times', role => 'MULOP');
DefMathI('\lx@InvisibleComma', undef, "\x{2063}", reversion => '', name => '', role => 'PUNCT');
DefMathI('\lx@InvisiblePlus', undef, "\x{2064}", reversion => '', name => '', meaning => 'plus', role => 'ADDOP');
DefConstructor('\lx@kludged{}',
"?#isMath(<ltx:XMWrap rule='kludge'>#1</ltx:XMWrap>)(#1)",
reversion => '#1');
DefConstructor('\lx@padded[MuDimension]{MuDimension}{}',
'#3',
afterConstruct => sub {
my ($document, $whatsit) = @_;
my $node = $document->getLastChildElement($document->getNode);
if ($document->getNodeQName($node) eq 'ltx:XMDual') {
my (@ch) = $node->childNodes;
$node = $ch[1]; }
if (my $lpadding = $whatsit->getArg(1)) {
$document->setAttribute($node, lpadding => $lpadding); }
if (my $rpadding = $whatsit->getArg(2)) {
$document->setAttribute($node, rpadding => $rpadding); } },
reversion => '#3');
#======================================================================
# Building XMDuals for Mathematical Parallel markup
# Used when the content and presentation forms have different structure.
DefKeyVal('XMath', 'reversion', 'UndigestedDefKey');
DefKeyVal('XMath', 'content_reversion', 'UndigestedDefKey');
DefKeyVal('XMath', 'presentation_reversion', 'UndigestedDefKey');
DefConstructor('\lx@dual OptionalKeyVals:XMath {}{}',
"<ltx:XMDual $XMath_attributes>#2<ltx:XMWrap>#3</ltx:XMWrap></ltx:XMDual>",
beforeDigest => sub {
PushValue(PENDING_DUAL_XMARGS => {});
return; },
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $kv = $whatsit->getArg(1);
my $xmargs = PopValue('PENDING_DUAL_XMARGS'); # Really SHOULD be a hash
$whatsit->setProperties(%$xmargs) if $xmargs; # Hopefully no name class with XM<digits>
$whatsit->setProperties($kv->getPairs) if $kv;
my %props = $whatsit->getProperties;
my $cr = $props{content_reversion};
my $pr = $props{presentation_reversion};
my $r = ToString($props{revert_as}) || 'content'; # ?????
if (!defined $props{reversion}) {
$whatsit->setProperty(reversion => sub {
my ($self, $kvs, $c, $p) = @_;
($r eq 'content' ? $cr || Revert($c)
: ($r eq 'presentation' ? $pr || Revert($p)
: ($r eq 'dual'
? Tokens(T_CS('\lx@dual'), I_keyvals($kvs),
T_BEGIN, ($cr || Revert($c)), T_END,
T_BEGIN, ($pr || Revert($p)), T_END)
: (($LaTeXML::DUAL_BRANCH || '') eq 'presentation' # Context dependent reversion
? $pr || Revert($p)
: $cr || Revert($c))))); }); }
return; },
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
my $key = $n->getAttribute('_xmkey');
if (!$ids{$key}) {
GenerateID($document, $n, undef, ''); # Generate id if none already.
$ids{$key} = $n->getAttribute('xml:id'); } } }
foreach my $r (@refs) { # Now fill in the references
$document->setAttribute($r, idref => $ids{ $r->getAttribute('_xmkey') });
$r->removeAttribute('_xmkey'); }
});
# Construction aids
# Build an XMDual (via \lx@dual) given the content & presentation forms.
# These forms are provided as Tokens, invoking the appropriate constructor macros,
# and refering to any arguments using #1, #2.... (see T_XMArg for syntactic sugar)
# The arguments (if any) are given separately; within the content & presentation
# they are replaced by \lx@xmref and \lx@xmarg, appropriately,
# so that they will be linked/shared in the XML tree.
# The keyvals argument is a hash containing any properties of the construct,
# along with reversion, content_reversion & presentation_reversion, which are
# substituted for arguments as well.
sub I_dual {
my ($keyvals, $content, $presentation, @args) = @_;
$content = TokenizeInternal($content) if $content && !ref $content;
$presentation = TokenizeInternal($presentation) if $presentation && !ref $presentation;
my (@revargs, @pargs, @cargs);
foreach my $arg (@args) {
my $id = LaTeXML::Package::getXMArgID();
push(@revargs, Tokens(I_arg(ToString($id))));
push(@pargs, Invocation(T_CS('\lx@xmarg'), $id, $arg));
push(@cargs, Invocation(T_CS('\lx@xmref'), $id)); }
my $optional = undef;
if ($keyvals) {
my @options = ();
while (my ($key, $value) = each %$keyvals) {
$value = TokenizeInternal($value) if $value && !ref $value;
if ($key =~ /^(?:presentation_|content_|)reversion$/) {
$value = $value->substituteParameters(@revargs); }
push(@options, T_OTHER(',')) if @options;
push(@options, T_OTHER($key), T_OTHER('='), T_BEGIN, $value, T_END); }
$optional = Tokens(@options); }
return
Invocation(T_CS('\lx@dual'), $optional,
$content->substituteParameters(@cargs),
I_wrap({}, $presentation->substituteParameters(@pargs))); }
# A little helper to shorten things up a bit; simply generates #1 (or whatever)
sub I_arg { # uncoditionally create an arg token
return bless ["$_[0]", CC_ARG], 'LaTeXML::Core::Token'; }
sub I_xmarg {
my ($id, $arg) = @_;
return Tokens(T_CS('\lx@xmarg'),
T_BEGIN, (ref $id ? $id : T_OTHER($id)), T_END, T_BEGIN, $arg, T_END); }
sub I_xmref {
my ($id) = @_;
return Tokens(T_CS('\lx@xmref'), T_BEGIN, (ref $id ? $id : T_OTHER($id)), T_END); }
#----------------------------------------------------------------------
# This group should be renamed to \lx@somethings and deprecated
# NOTE: work through this systematically!
DefMacro('\FCN{}', '\lx@wrap[role=FUNCTION]{#1}');
DefMacro('\ROLE{}{}', '\lx@wrap[role={#1}]{#2}');
DefMacro('\@SYMBOL{}', '\lx@wrap[role=ID]{#1}');
DefMacro('\@CSYMBOL{}', '\lx@symbol[meaning={#1}]{}');
DefMacro('\@APPLY{}', '\lx@apply[]{#1}{}'); # Sorta broken?
DefMacro('\@MAYBEAPPLY{}{}', '\ifx.#2.#1\else\lx@apply{#1}{#2}\fi');
DefMacro('\@WRAP{}', '\lx@wrap[]{#1}');
DefMacro('\@TOKEN{}', '\lx@symbol[name={#1}]{}');
DefMacro('\@SUPERSCRIPT{}{}', '\ifx.#2.#1\else\lx@superscript[]{#1}{#2}\fi');
DefMacro('\@SUBSCRIPT{}{}', '\ifx.#2.#1\else\lx@subscript[]{#1}{#2}\fi');
Let('\@PADDED', '\lx@padded');
Let('\DUAL', '\lx@dual');
Let('\@XMArg', '\lx@xmarg');
Let('\@XMRef', '\lx@xmref');
Let('\@APPLYFUNCTION', '\lx@ApplyFunction');
Let('\@INVISIBLETIMES', '\lx@InvisibleTimes');
Let('\@INVISIBLECOMMA', '\lx@InvisibleComma');
Let('\@INVISIBLEPLUS', '\lx@InvisiblePlus');
# End of stuff to be deprecated.
#----------------------------------------------------------------------
#======================================================================
# We OUGHT to be able to do this using \llap,\rlap,\hss...
DefMacro('\lx@tweaked{}{}', '\ifmmode\lx@math@tweaked{#1}{#2}\else\lx@text@tweaked{#1}{#2}\fi');
DefConstructor('\lx@math@tweaked RequiredKeyVals {}',
"<ltx:XMWrap $XMath_attributes>#2</ltx:XMWrap>",
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my ($kv, $body) = $whatsit->getArgs;
XMath_copy_keyvals($stomach, $whatsit);
$whatsit->setFont($body->getFont);
return; },
reversion => '#2');
DefConstructor('\lx@text@tweaked RequiredKeyVals {}',
"<ltx:text _noautoclose='1' %&GetKeyVals(#1)>#2</ltx:text>",
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my ($kv, $body) = $whatsit->getArgs;
$whatsit->setProperties($kv->getPairs); });
DefMacro('\lx@nounicode {}', '\ifmmode\lx@math@nounicode#1\else\lx@text@nounicode#1\fi');
DefConstructor('\lx@framed[]{}',
"<ltx:text framed='#frame' _noautoclose='1'>#2</ltx:text>",
properties => { frame => sub { ToString($_[1] || 'rectangle'); } });
DefConstructor('\lx@hflipped{}',
"<ltx:text class='ltx_hflipped' _noautoclose='1'>#1</ltx:text>");
sub reportNoUnicode {
my ($cs) = @_;
$cs = ToString($cs);
if (!LookupMapping('missing_unicode' => $cs)) {
Warn('expected', 'unicode', $cs,
"There's no Unicode equivalent for the symbol '$cs'");
AssignMapping('missing_unicode' => $cs => 1); }
return; }
# Slightly contrived so that this can be used within a DefMath
# and still declare & get the semantic properties.
DefPrimitive('\lx@math@nounicode DefToken', sub {
my ($stomach, $cs) = @_;
reportNoUnicode($cs);
Box(ToString($cs), undef, undef, $cs, class => 'ltx_nounicode'); });
DefConstructor('\lx@text@nounicode DefToken',
"<ltx:text _no_autoclose='true' class='ltx_nounicode'>#1</ltx:text>",
afterDigest => sub {
reportNoUnicode(ToString($_[1]->getArg(0))); });
DefConstructor('\@ERROR{}{}', "<ltx:ERROR class='ltx_#1'>#2</ltx:ERROR>");
#**********************************************************************
DefConstructor('\WildCard[]', "<_WildCard_>#1</_WildCard_>");
DefConstructorI('\WildCardA', undef, "<_WildCard_/>");
DefConstructorI('\WildCardB', undef, "<_WildCard_/>");
DefConstructorI('\WildCardC', undef, "<_WildCard_/>");
#**********************************************************************
# After all other rewrites have acted, a little cleanup
DefRewrite(xpath => 'descendant-or-self::ltx:XMWrap[count(child::*)=1]',
replace => sub { my ($document, $wrap) = @_;
if (my $node = $document->getFirstChildElement($wrap)) {
# Copy attributes but NOT internal ones,
# NOR xml:id, else we get clashes
foreach my $attribute ($wrap->attributes) {
if ($attribute->nodeType == XML_ATTRIBUTE_NODE) {
my $attr = $document->getNodeQName($attribute);
$document->setAttribute($node, $attr => $attribute->getValue)
unless ($attr eq 'xml:id') || $attr =~ /^_/;
if ($attr =~ /^_/) { }
elsif ($attr eq 'xml:id') {
my $id = $attribute->getValue;
if (my $previd = $node->getAttribute('xml:id')) { # Keep original id
# but swap any references to the one on the wrapper!
foreach my $ref ($document->findnodes("//*[\@idref='$id']")) {
$ref->setAttribute(idref => $previd); }
$wrap->removeAttribute('xml"id');
$document->unRecordID($id); }
else {
$wrap->removeAttribute('xml:id');
$document->unRecordID($id);
$document->setAttribute($node, 'xml:id' => $id); } }
lib/LaTeXML/Package/TeX.pool.ltxml view on Meta::CPAN
my ($document, $node, $align, $class) = @_;
my $model = $document->getModel;
my $qname = $model->getNodeQName($node);
if ($qname eq 'ltx:tag') { } # HACK
elsif ($align && $document->canHaveAttribute($qname, 'align')) {
$node->setAttribute(align => $align); }
elsif ($class && $document->canHaveAttribute($qname, 'class')) {
$document->addClass($node, $class); }
return; }
#======================================================================
# A random collection of Tokens utility functions.
# [probably should be exported from Tokens.pm ?]
# [maybe need to do some reorganization?]
# Since this is used for textual tokens, typically to split author lists,
# we don't split within braces or math
sub SplitTokens {
my ($tokens, @delims) = @_;
my @items = ();
my @toks = ();
if ($tokens) {
my @tokens = $tokens->unlist;
my $t;
while ($t = shift(@tokens)) {
if (grep { Equals($t, $_) } @delims) {
push(@items, [@toks]); @toks = (); }
elsif ($t->defined_as(T_BEGIN)) {
push(@toks, $t);
my $level = 1;
while ($level && defined($t = shift(@tokens))) {
my $cc = $t->getCatcode;
$level++ if $cc == CC_BEGIN;
$level-- if $cc == CC_END;
push(@toks, $t); } }
elsif ($t->defined_as(T_MATH)) {
push(@toks, $t);
while (defined($t = shift(@tokens))) {
my $cc = $t->getCatcode;
push(@toks, $t);
last if $cc == CC_MATH; } }
else {
push(@toks, $t); } } }
return (@items, [@toks]); }
sub andSplit {
my ($cs, $tokens) = @_;
return map { ($cs, T_BEGIN, @$_, T_END) } SplitTokens($tokens, T_CS('\and')); }
sub orNull {
return (grep { defined } @_) ? @_ : undef; }
# Inverse operation
sub JoinTokens {
my ($conjunction, @things) = @_;
if (!@things) { return (); }
my @result = (shift(@things));
while (my $thing = shift(@things)) {
push(@result, $conjunction, $thing); }
return Tokens(@result); }
DefMacro('\dump', sub {
Warn('unexpected', 'dump', $_[0], "Do not know how to \\dump yet, sorry"); });
#**********************************************************************
LoadPool('eTeX'); # unless.... ?
LoadPool('pdfTeX'); # unless.... ?
#**********************************************************************
1;