Alien-Judy
view release on metacpan or search on metacpan
src/judy-1.0.5/tool/jhton.c view on Meta::CPAN
// Warning: This cannot be used around compiler directives, such as
// "#include", nor in the case where Code contains a comma other than nested
// within parentheses or quotes.
#ifndef DEBUG
#define DBGCODE(Code) // null.
#else
#define DBGCODE(Code) Code
#endif
// ****************************************************************************
// MISCELLANEOUS GLOBAL VALUES:
#define FUNCTION // null; easy to find functions.
#define FALSE 0
#define TRUE 1
#define CHNULL ('\0')
#define PCNULL ((char *) NULL)
typedef int bool_t; // for clarity with Boolean values.
char * gc_usage[] = {
"usage: %s filename.htm[l]",
"",
"Reads restricted (Judy-specific) HTML from filename.htm[l] and emits",
"equivalent nroff -man to stdout.",
PCNULL,
};
char * gc_myname; // how program was invoked.
#define OKEXIT 0
#define NOEXIT 0 // values for Error().
#define ERREXIT 1
#define USAGE 2
#define NOERRNO 0
// Prefix for printf formats:
#define FILELINE "File \"%s\", line %d: "
// Common error string:
char * FmtErrLineEnds = FILELINE "Input line ends within an HTML tag; for this "
"translator, all tags must be on a single input line";
// Macros for skipping whitespace or non-whitespace; in the latter case,
// stopping at end of line or end of tag:
#define SKIPSPACE(Pch) { while (ISSPACE(*(Pch))) ++(Pch); }
#define SKIPNONSPACE(Pch) { while ((! ISSPACE(*(Pch))) \
&& (*(Pch) != CHNULL) \
&& (*(Pch) != '>')) ++(Pch); }
// Highest line number + 1, and last input line number that caused output:
int g_linenumlim;
int g_prevlinenum = 0;
// <PRE> block equivalents in nroff need some special handling for bold font
// and for continuing a tagged paragraph; these are bit flags:
#define INPRE_BLOCK 0x1 // came from <PRE>.
#define INPRE_BOLD 0x2 // came from <B><PRE>.
#define INPRE_INDENT 0x4 // under <DL> below top level.
// ****************************************************************************
// DOCUMENT NODE TYPES:
//
// If an HTML tag is not in this list, it's unrecognized and causes a fatal
// error. Otherwise the tag type (dn_type) is one of DN_TYPE_*, which are
// defined so the code can use them, but they MUST match the order of
// initialization of g_dntype[].
//
// Note: The default node type is DN_TYPE_TEXT, that is, text outside of any
// tag.
enum {
DN_TYPE_TEXT = 0,
DN_TYPE_HTML,
DN_TYPE_HEAD,
DN_TYPE_TITLE,
DN_TYPE_BODY,
DN_TYPE_COMM,
DN_TYPE_TABLE,
DN_TYPE_TR,
DN_TYPE_TD,
DN_TYPE_DL,
DN_TYPE_DT,
DN_TYPE_DD,
DN_TYPE_A,
DN_TYPE_B,
DN_TYPE_I,
DN_TYPE_PRE,
DN_TYPE_P,
DN_TYPE_BR
};
// Regarding dnt_nest: If an HTML tag type is marked as nesting, that means
// it is required not to be a singleton in this context; it must have a closing
// tag, and when the tree is built, the intervening text is nested as a child.
// Otherwise, intervening text is a sibling; a closing tag is allowed (whether
// or not this makes sense), but is not required; however, if present, it must
// match.
struct docnode_type {
char * dnt_tag; // HTML tag.
bool_t dnt_savetag; // flag: save HTML tag.
bool_t dnt_nest; // flag: see comments above.
int dnt_type; // corresponding number.
} g_dntype[] = {
// Note: HTML is case-insensitive, but for expediency this program is
// case-sensitive. Tags must be as shown below.
{ "", FALSE, FALSE, DN_TYPE_TEXT, }, // special, see above.
{ "HTML", FALSE, TRUE, DN_TYPE_HTML, },
{ "HEAD", FALSE, TRUE, DN_TYPE_HEAD, },
src/judy-1.0.5/tool/jhton.c view on Meta::CPAN
{
PUTS("\n.SH ");
if ((Pdn->dn_Pchild->dn_type) == DN_TYPE_B)
(Pdn->dn_Pchild->dn_noemit) = TRUE; // skip <B>...</B>.
}
// If a <DT> immediately follows a previous <DT>, use .PD 0 for the successive
// .TP to join lines:
else
{
if (((Pdn->dn_Pprev) != PDNNULL)
&& ((Pdn->dn_Pprev->dn_type) == DN_TYPE_DT))
{
PUTS("\n.PD 0\n");
suffix = "\n.PD\n";
}
PUTS("\n.TP 15\n.C ");
}
SETPREVNONL;
break;
// DESCRIPTIVE LIST DATUM:
//
// Just proceed to dump the embedded text.
case DN_TYPE_DD: break;
// ANCHOR:
//
// Ignore inbound ("name") anchors and process outbound ("href") anchor labels
// into appropriately highlighted text.
case DN_TYPE_A:
{
size_t len; // of substring.
Pdn_t Pdn2; // child node.
char * Pch; // place in text.
assert((Pdn->dn_text) != PCNULL);
if (strstr(Pdn->dn_text, "name=") != PCNULL) break;
if (strstr(Pdn->dn_text, "href=") == PCNULL)
{
Error(NOEXIT, NOERRNO, "Unrecognized HTML anchor type \"%s\" "
"at input line %d ignored; only \"name=\" and \"href=\" "
"are allowed by this translator",
Pdn->dn_text, Pdn->dn_linenum);
break;
}
// Check for nested text (anchor label):
//
// TBD: The error message lies a little. If the text is something like,
// "foo<B>bar</B>", it passes this test; and later, all font tags in the anchor
// label are marked no-emit; and any other embedded tags, who knows what
// happens?
if (((Pdn2 = Pdn->dn_Pchild)->dn_type) != DN_TYPE_TEXT)
{
Error(ERREXIT, NOERRNO, "HTML \"href\" anchor at input line "
"%d lacks a directly nested anchor label, with no "
"further nested tags; this translator cannot support "
"nested tags in anchor labels", Pdn->dn_linenum);
}
assert((Pdn2->dn_text) != PCNULL);
// If the anchor is within a <B><PRE>, do nothing special with fonts, as
// explained earlier:
if (ParentPre(Pdn, /* BoldOnly = */ TRUE)) break;
// Since anchor label text font will be forced in a moment, ignore any nested
// font directives so they don't mess up nroff:
MarkNoEmit(Pdn->dn_Pchild, /* Font = */ TRUE);
// See if anchor label appears to be a reference to the current page, to some
// other page, or else just make it italicized text:
//
// TBD: This is pretty shaky, hope it's close enough.
len = strlen(PageName);
if (strncmp(Pdn2->dn_text, PageName, len) == 0) // self-reference.
{
CHECKPREV;
PUTS("\\fB"); // bold font.
SETPREVNONL;
suffix = "\\fP"; // revert to previous font.
break;
}
// Contains '(' and no whitespace => appears to reference some other page:
//
// Emit revised, tagged anchor label text immediately.
if (((Pch = strchr(Pdn2->dn_text, '(')) != PCNULL)
&& NoWhiteSpace(Pdn2->dn_text))
{
CHECKPREV;
PUTS("\\fI"); // italic font.
*Pch = CHNULL; // terminate briefly.
PUTS(Pdn2->dn_text);
*Pch = '(';
PUTS("\\fP"); // revert to previous font.
PUTS(Pch);
SETPREV(Pdn2->dn_text);
(Pdn2->dn_noemit) = TRUE; // skip later.
break;
}
// Just make the anchor label italicized text:
CHECKPREV;
PUTS("\\fI"); // italic font.
SETPREVNONL;
suffix = "\\fP"; // revert to previous font.
break;
} // case DN_TYPE_A
// BOLD TEXT:
//
// If the first child is <PRE>, use a "hard" font change; otherwise an in-line
// change.
//
// Note: For <DT><B>, this node is already marked dn_noemit and not seen here.
//
// Note: For <B><PRE>, nroff seems to reset font upon .PP, so mark the bold
// for later emission.
case DN_TYPE_B:
if (((Pdn->dn_Pchild) != PDNNULL)
&& ((Pdn->dn_Pchild->dn_type) == DN_TYPE_PRE))
{
(Pdn->dn_Pchild->dn_bold) = TRUE; // see above.
break;
}
CHECKPREV;
PUTS("\\fB"); // bold font.
SETPREVNONL;
suffix = "\\fP"; // revert to previous font.
break;
// ITALIC TEXT:
case DN_TYPE_I:
CHECKPREV;
PUTS("\\fI"); // italic font.
SETPREVNONL;
suffix = "\\fP"; // revert to previous font.
break;
// PREFORMATTED TEXT:
//
// Emit prefix/suffix directives based on example in strchr(3C).
case DN_TYPE_PRE:
PUTS(UNDER_DL ? "\n.IP\n.nf\n.ps +1\n" : "\n.PP\n.nf\n.ps +1\n");
if (Pdn->dn_bold) PUTS(".ft B\n"); // deferred bold.
SETPREVNONL;
suffix = ((Pdn->dn_bold) ? "\n.ft P\n.ps\n.fi\n" : "\n.ps\n.fi\n");
// set for all children:
InPRE = INPRE_BLOCK
| ((Pdn->dn_bold) ? INPRE_BOLD : 0)
| (UNDER_DL ? INPRE_INDENT : 0);
break;
// PARAGRAPH BREAK:
//
// If the parent is a <DL> below the top level, use .IP to continue a .TP
// (tagged paragraph); otherwise emit a standard .PP.
case DN_TYPE_P:
PUTS(UNDER_DL ? "\n.IP\n" : "\n.PP\n");
SETPREVNONL;
break;
// LINE BREAK:
case DN_TYPE_BR: PUTS("\n.br\n"); SETPREVNONL; break;
// UNRECOGNIZED DOCNODE TYPE:
default:
Error(ERREXIT, NOERRNO, "Internal error: Unexpected docnode type "
"%d in docnodes tree", Pdn->dn_type);
} // end switch on dn_type
// VISIT CHILD AND SIBLING DOCNODES:
//
// If this was a <DL> here, pass an incremented value to child nodes, but not
// to sibling nodes.
NextNode:
if ((Pdn->dn_Pchild) != PDNNULL)
EmitNroffBody(Pdn->dn_Pchild, DLLevel + DLcount, InPRE, PageName);
if (suffix != PCNULL) PUTS(suffix);
if ((Pdn->dn_Pnext) != PDNNULL)
EmitNroffBody(Pdn->dn_Pnext, DLLevel, InPRE, PageName);
src/judy-1.0.5/tool/jhton.c view on Meta::CPAN
assert( Pch != PCNULL);
assert(*Pch != CHNULL);
// Save type:
(Pdn->dn_type) = DN_Type;
// Pass whitespace and then find the end of the tag:
SKIPSPACE(Pch);
if ((*Pch == CHNULL) || ((Pch2 = strchr(Pch, '>')) == PCNULL))
Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, Linenum);
// Optionally save tag text:
if (SAVETAG(DN_Type))
{
*Pch2 = CHNULL; // temporarily terminate.
(Pdn->dn_text) = StrSave(Pch);
*Pch2 = '>';
}
return(Pch2 + 1);
} // SaveDocNode()
// ****************************************************************************
// P A R E N T P R E
//
// Given a docnode (can be null) and a flag whether only bold <PRE> is of
// interest, return TRUE if any of its parents is a <PRE> (marked for bold
// text), that is, DN_TYPE_PRE (with dn_bold set); otherwise return FALSE.
FUNCTION bool_t ParentPre(
Pdn_t Pdn, // starting node.
bool_t BoldOnly) // flag: only care about bold <PRE>.
{
if (Pdn == PDNNULL) return (FALSE); // no parent.
for (Pdn = Pdn->dn_Pparent; Pdn != PDNNULL; Pdn = Pdn->dn_Pparent)
{
if (((Pdn->dn_type) == DN_TYPE_PRE)
&& ((! BoldOnly) || (Pdn->dn_bold)))
{
return(TRUE);
}
}
return(FALSE);
} // ParentPre()
// ****************************************************************************
// M A R K N O E M I T
//
// Given a docnode (can be null), and a flag, recursively mark the node and all
// children and siblings as do-not-emit, unless the flag is set, only mark font
// docnodes.
FUNCTION void MarkNoEmit(
Pdn_t Pdn, // top node to mark.
bool_t Font) // flag: only mark font docnodes.
{
if (Pdn == PDNNULL) return;
if ((! Font)
|| ((Pdn->dn_type) == DN_TYPE_B)
|| ((Pdn->dn_type) == DN_TYPE_I))
{
(Pdn->dn_noemit) = TRUE;
}
if ((Pdn->dn_Pchild) != PDNNULL) MarkNoEmit(Pdn->dn_Pchild, Font);
if ((Pdn->dn_Pnext) != PDNNULL) MarkNoEmit(Pdn->dn_Pnext, Font);
} // MarkNoEmit()
// ****************************************************************************
// E M I T T E X T
//
// Given a text string, a bitflag for <PRE> status, and an input line number
// for error reporting, copy the text string to stdout with no added newlines,
// but translating selected HTML escape codes to simple characters, doubling
// any backslashes, and if InPRE, inserting .IP (if INPRE_INDENT) or .PP at
// blank lines (between successive newlines), and if INPRE_BOLD, putting back
// bold font since .IP/.PP seems to reset the font. Warn about unrecognized
// escape codes.
struct et_list {
char * et_escape; // expected text.
size_t et_len; // of expected text.
char et_emit; // equivalent char.
} et_list[] = {
{ "amp;", 4, '&', },
{ "gt;", 3, '>', },
{ "lt;", 3, '<', },
{ PCNULL, 0, ' ', }, // end of list.
};
FUNCTION void EmitText(
char * Pch, // text to emit.
int InPRE, // bitflag for <PRE> status.
int Linenum) // for error reporting.
{
char * Pch2; // place in text.
struct et_list * Pet; // place in et_list[].
while ((Pch2 = strchr(Pch, '&')) != PCNULL) // another escape code.
{
*Pch2 = CHNULL; // briefly terminate.
EmitTextPRE(Pch, InPRE); // emit preceding part.
*Pch2 = '&';
Pch = Pch2 + 1; // past '&'.
for (Pet = et_list; Pet->et_escape != PCNULL; ++Pet)
{
if (strncmp(Pch, Pet->et_escape, Pet->et_len) == 0)
{
PUTC(Pet->et_emit); // translate.
Pch += Pet->et_len; // skip escapecode.
break;
}
}
if (Pet->et_escape == PCNULL) // no match found.
{
Error(NOEXIT, NOERRNO, "Unrecognized HTML escape code in "
"line %d (or text beginning on that line): \"%.4s...\", "
"passed through unaltered", Linenum, Pch2);
PUTC('&'); // emit start of escape code.
// continue with Pch is just after the '&'.
}
}
EmitTextPRE(Pch, InPRE); // emit remaining part.
} // EmitText()
// ****************************************************************************
// E M I T T E X T P R E
//
// Given a text string with no HTML escape codes in it and a bitflag for <PRE>
// status (see EmitText()), emit the string with <PRE> handling, and with any
// backslashes doubled.
FUNCTION void EmitTextPRE(
char * Pch, // string to emit.
int InPRE) // bitflag for <PRE> status.
{
char * Pch2; // place in string.
if (! InPRE) { EmitTextBS(Pch); return; }
while ((Pch2 = strchr(Pch, '\n')) != PCNULL) // another newline.
{
*Pch2 = CHNULL; // briefly terminate.
EmitTextBS(Pch); // emit preceding part.
*Pch2 = '\n';
PUTC('\n'); // emit current newline.
if (*(Pch = Pch2 + 1) == '\n') // successive newline.
{
// emit before next newline:
PUTS((InPRE & INPRE_INDENT) ? ".IP" : ".PP");
// also reset font:
if (InPRE & INPRE_BOLD) PUTS("\n.ft B");
}
}
EmitTextBS(Pch); // emit trailing part.
} // EmitTextPRE()
// ****************************************************************************
// E M I T T E X T B S
//
// Given a text string with no HTML escape codes in it, emit the string with
// any backslashes doubled.
FUNCTION void EmitTextBS(
char * Pch) // string to emit.
{
while (*Pch != CHNULL)
{
PUTC(*Pch); if (*Pch == '\\') PUTC('\\');
++Pch;
}
} // EmitTextBS()
// ****************************************************************************
// N O W H I T E S P A C E
//
// Given a string, return TRUE if it contains no whitespace, otherwise FALSE.
FUNCTION bool_t NoWhiteSpace(
char * Pch) // string to check.
{
assert(Pch != PCNULL);
while (*Pch != CHNULL) { if (ISSPACE(*Pch)) return(FALSE); ++Pch; }
return(TRUE);
} // NoWhiteSpace()
// ****************************************************************************
// C O U N T N E W L I N E S
//
// Return the number of newline chars in a string.
FUNCTION int CountNewlines(
char * Pch) // in which to count newlines.
{
int count = 0;
assert(Pch != PCNULL);
while (*Pch != CHNULL) count += ((*Pch++) == '\n');
return(count);
} // CountNewlines()
( run in 1.190 second using v1.01-cache-2.11-cpan-5735350b133 )