Alien-Judy
view release on metacpan or search on metacpan
src/judy-1.0.5/tool/jhton.c view on Meta::CPAN
// "#include", nor in the case where Code contains a comma other than nested
// within parentheses or quotes.
#ifndef DEBUG
#define DBGCODE(Code) // null.
#else
#define DBGCODE(Code) Code
#endif
// ****************************************************************************
// MISCELLANEOUS GLOBAL VALUES:
#define FUNCTION // null; easy to find functions.
#define FALSE 0
#define TRUE 1
#define CHNULL ('\0')
#define PCNULL ((char *) NULL)
typedef int bool_t; // for clarity with Boolean values.
char * gc_usage[] = {
"usage: %s filename.htm[l]",
"",
"Reads restricted (Judy-specific) HTML from filename.htm[l] and emits",
"equivalent nroff -man to stdout.",
PCNULL,
};
char * gc_myname; // how program was invoked.
#define OKEXIT 0
#define NOEXIT 0 // values for Error().
#define ERREXIT 1
#define USAGE 2
#define NOERRNO 0
// Prefix for printf formats:
#define FILELINE "File \"%s\", line %d: "
// Common error string:
char * FmtErrLineEnds = FILELINE "Input line ends within an HTML tag; for this "
"translator, all tags must be on a single input line";
// Macros for skipping whitespace or non-whitespace; in the latter case,
// stopping at end of line or end of tag:
#define SKIPSPACE(Pch) { while (ISSPACE(*(Pch))) ++(Pch); }
#define SKIPNONSPACE(Pch) { while ((! ISSPACE(*(Pch))) \
&& (*(Pch) != CHNULL) \
&& (*(Pch) != '>')) ++(Pch); }
// Highest line number + 1, and last input line number that caused output:
int g_linenumlim;
int g_prevlinenum = 0;
// <PRE> block equivalents in nroff need some special handling for bold font
// and for continuing a tagged paragraph; these are bit flags:
#define INPRE_BLOCK 0x1 // came from <PRE>.
#define INPRE_BOLD 0x2 // came from <B><PRE>.
#define INPRE_INDENT 0x4 // under <DL> below top level.
// ****************************************************************************
// DOCUMENT NODE TYPES:
//
// If an HTML tag is not in this list, it's unrecognized and causes a fatal
// error. Otherwise the tag type (dn_type) is one of DN_TYPE_*, which are
// defined so the code can use them, but they MUST match the order of
// initialization of g_dntype[].
//
// Note: The default node type is DN_TYPE_TEXT, that is, text outside of any
// tag.
enum {
DN_TYPE_TEXT = 0,
DN_TYPE_HTML,
DN_TYPE_HEAD,
DN_TYPE_TITLE,
DN_TYPE_BODY,
DN_TYPE_COMM,
DN_TYPE_TABLE,
DN_TYPE_TR,
DN_TYPE_TD,
DN_TYPE_DL,
DN_TYPE_DT,
DN_TYPE_DD,
DN_TYPE_A,
DN_TYPE_B,
DN_TYPE_I,
DN_TYPE_PRE,
DN_TYPE_P,
DN_TYPE_BR
};
// Regarding dnt_nest: If an HTML tag type is marked as nesting, that means
// it is required not to be a singleton in this context; it must have a closing
// tag, and when the tree is built, the intervening text is nested as a child.
// Otherwise, intervening text is a sibling; a closing tag is allowed (whether
// or not this makes sense), but is not required; however, if present, it must
// match.
struct docnode_type {
char * dnt_tag; // HTML tag.
bool_t dnt_savetag; // flag: save HTML tag.
bool_t dnt_nest; // flag: see comments above.
int dnt_type; // corresponding number.
} g_dntype[] = {
// Note: HTML is case-insensitive, but for expediency this program is
// case-sensitive. Tags must be as shown below.
{ "", FALSE, FALSE, DN_TYPE_TEXT, }, // special, see above.
{ "HTML", FALSE, TRUE, DN_TYPE_HTML, },
{ "HEAD", FALSE, TRUE, DN_TYPE_HEAD, },
{ "TITLE", FALSE, TRUE, DN_TYPE_TITLE, },
{ "BODY", FALSE, TRUE, DN_TYPE_BODY, },
{ "!--", TRUE, FALSE, DN_TYPE_COMM, }, // comments are singleton tags.
{ "TABLE", FALSE, TRUE, DN_TYPE_TABLE, }, // limited understanding!
{ "TR", FALSE, TRUE, DN_TYPE_TR, },
{ "TD", TRUE, TRUE, DN_TYPE_TD, },
{ "DL", FALSE, TRUE, DN_TYPE_DL, },
{ "DT", FALSE, TRUE, DN_TYPE_DT, },
{ "DD", FALSE, FALSE, DN_TYPE_DD, }, // </DD> not req in our manuals.
{ "A", TRUE, TRUE, DN_TYPE_A, }, // either "name" or "href" type.
{ "B", FALSE, TRUE, DN_TYPE_B, },
{ "I", FALSE, TRUE, DN_TYPE_I, },
{ "PRE", FALSE, TRUE, DN_TYPE_PRE, },
{ "P", FALSE, FALSE, DN_TYPE_P, }, // </P> not req in our manuals.
{ "BR", FALSE, FALSE, DN_TYPE_BR, }, // </BR> not req in our manuals.
{ PCNULL, FALSE, FALSE, 0, }, // end of list.
};
// Convenience macros:
#define TAG(DN_Type) (g_dntype[DN_Type].dnt_tag)
#define SAVETAG(DN_Type) (g_dntype[DN_Type].dnt_savetag)
#define NEST(DN_Type) (g_dntype[DN_Type].dnt_nest)
// ****************************************************************************
// DOCUMENT NODE DATA STRUCTURES:
//
// Document nodes are saved in a doubly-linked tree of docnodes. Each docnode
// points sideways to a doubly-linked list of sibling docnodes for
// previous/successive unnested document objects, plus points to its parent and
// to the first of a sideways doubly-linked child list of nested objects. All
// data lives in malloc'd memory.
//
// The dn_text field is null for a tag node unless the tag text is worth
// saving. The field is non-null for non-tag (document) text.
typedef struct docnode * Pdn_t;
struct docnode {
int dn_type; // node type, index in g_dntype[].
int dn_linenum; // where introduced, for reconstructing.
bool_t dn_closed; // flag: closing tag was seen.
bool_t dn_noemit; // flag: skip on output, for marking ahead.
bool_t dn_bold; // flag: for <PRE>, whole section is bold.
char * dn_text; // node text; see above.
Pdn_t dn_Pprev; // previous node in sibling list.
Pdn_t dn_Pnext; // next node in sibling list.
Pdn_t dn_Pparent; // up-link to parent node, if any.
Pdn_t dn_Pchild; // down-link to first node in child subtree.
};
#define PDNNULL ((Pdn_t) NULL)
Pdn_t g_Pdnhead = PDNNULL; // head of docnode tree.
// ****************************************************************************
// FUNCTION SIGNATURES (forward declarations):
int main(int argc, char ** argv);
void ReadInputFile( char * Filename, FILE * PFile);
void CheckNesting(Pdn_t Pdn);
void EmitNroffHeader(char * Filename, char ** PPageName);
void EmitNroffBody(Pdn_t Pdn, int DLLevel, int InPRE, char * PageName);
void ExtractHeader( Pdn_t Pdn, char ** PFileRev,
char ** PPageName, char ** PPageSection,
char * PLcLetter, char ** PRevision);
char * ExtractText( Pdn_t Pdn);
void ExtractPageInfo(Pdn_t Pdn, char * Pch,
char ** PPageName, char ** PPageSection,
char * PLcLetter);
int TagType(char * Tag, bool_t * isclosing, char * Filename, int Linenum);
Pdn_t AppDocNode( Pdn_t Pdn, int linenum);
Pdn_t NewDocNode( Pdn_t dn_Pparent, int linenum);
char * SaveDocNode(Pdn_t Pdn, int DN_Type, char * Pch,
char * Filename, int Linenum);
bool_t ParentPre(Pdn_t Pdn, bool_t BoldOnly);
void MarkNoEmit( Pdn_t Pdn, bool_t Font);
void EmitText( char * Pch, int InPRE, int Linenum);
void EmitTextPRE( char * Pch, int InPRE);
void EmitTextBS( char * Pch);
bool_t NoWhiteSpace( char * Pch);
int CountNewlines(char * Pch);
char * StrSave( char * String);
char * StrSaveN( char * String, ...);
void * Malloc( size_t Size);
void Usage(void);
void Error(int Exitvalue, int MyErrno, char * Message, ...);
DBGCODE(void DumpTree(Pdn_t Pdn, int Depth, bool_t Separator);)
// ****************************************************************************
// M A I N
FUNCTION int main(
src/judy-1.0.5/tool/jhton.c view on Meta::CPAN
"from input line %d", TAG(Pdn->dn_type), Pdn->dn_linenum);
}
if ((Pdn->dn_Pchild) != PDNNULL) CheckNesting(Pdn->dn_Pchild);
if ((Pdn->dn_Pnext) != PDNNULL) CheckNesting(Pdn->dn_Pnext);
} // CheckNesting()
// ****************************************************************************
// E M I T N R O F F H E A D E R
//
// Given the input filename, a pointer to a page name string to return, and the
// docnode tree under g_Pdnhead, extract header info and emit nroff header
// lines.
FUNCTION void EmitNroffHeader(
char * Filename, // input file.
char ** PPageName) // such as "Judy1", to return.
{
char * filerev = PCNULL; // from first comment in input.
char * pagesection; // such as "3X".
char lcletter; // manual tab section, such as "j".
char * revision; // from centered table datum.
time_t currtime; // for ctime().
// Extract "weird" header values:
//
// These must be found in the docnodes tree and prepared for emitting nroff.
ExtractHeader(g_Pdnhead, &filerev,
PPageName, &pagesection, &lcletter, &revision);
if (filerev == PCNULL)
{
Error(ERREXIT, NOERRNO, "HTML file lacks comment lines; it must "
"contain at least one comment line, and the first one must "
"contain revision information");
}
// Emit file header; note, ctime() output already contains a newline:
(void) time(&currtime);
(void) printf(".\\\" Auto-translated to nroff -man from %s by %s at %s",
Filename, gc_myname, ctime(&currtime));
(void) printf(".\\\" %s\n", filerev);
(void) printf(".TA %c\n", lcletter);
(void) printf(".TH %s %s\n", *PPageName, pagesection);
(void) puts( ".ds )H Hewlett-Packard Company");
(void) printf(".ds ]W %s\n", revision);
} // EmitNroffHeader()
// ****************************************************************************
// E M I T N R O F F B O D Y
//
// Given a current node in the docnodes tree, the current <DL> level, a flag
// whether below a <PRE> node, the manual entry page name, and in
// g_prevlinenum, the previous input line number that resulted in output,
// recursively emit nroff body text. Translate the HTML docnodes as described
// in the comments prior to EmitNroffHeader(), and also translate certain HTML
// escaped chars back to literal form. Hope the results are legal nroff
// without spurious unintended nroff commands embedded.
//
// Note: This function recurses two ways; first, to the child subtree, and
// second, to the next sibling at the current level.
FUNCTION void EmitNroffBody(
Pdn_t Pdn, // current top of subtree.
int DLLevel, // <DL> level, top = 0.
int InPRE, // bit flags for <PRE> handling.
char * PageName) // such as "Judy1".
{
int DLcount = 0; // set to 1 if hit <DL> here.
char * suffix = PCNULL; // to print after children, before siblings.
// When about to emit text, if the previous output came from a lower input line
// number, start with a newline; otherwise do not, and let the text
// concatenate:
//
// Use CHECKPREV except when the text to be emitted is forced to a new line.
#ifdef CPPRINT // for special debugging:
#define CHECKPREVPRINT printf("\ncp %d %d\n", g_prevlinenum, Pdn->dn_linenum)
#else
#define CHECKPREVPRINT // null
#endif
#define CHECKPREV \
CHECKPREVPRINT; \
{ if (g_prevlinenum && (g_prevlinenum < (Pdn->dn_linenum))) PUTC('\n');}
// To support CHECKPREV, call SETPREV() after emitting text that might need a
// line break to a new line, or SETPREVNONL to ensure NO newline, that is, the
// next text concatenates on the same line:
//
// Note: For a correct line number, SETPREV() must account for any newlines in
// the text just emitted.
#define SETPREV(Text) g_prevlinenum = (Pdn->dn_linenum) + CountNewlines(Text)
#define SETPREVNONL g_prevlinenum = g_linenumlim // no newline.
// Check if under a lower-level <DL>, for continuing an indented paragraph:
#define UNDER_DL ((DLLevel > 1) \
&& ((Pdn->dn_Pparent) != PDNNULL) \
&& ((Pdn->dn_Pparent->dn_type) == DN_TYPE_DL))
// SWITCH ON DOCNODE TYPE:
if (Pdn->dn_noemit) // upstream node said to skip this one.
goto NextNode;
switch (Pdn->dn_type)
{
// DOCUMENT TEXT:
//
// Just emit it with HTML escaped chars modified, with backslashes doubled,
// with no trailing newline, and if not within <PRE> text, with any leading
// whitespace deleted, so that, for example, something like "\fI text\fP" does
// not result.
case DN_TYPE_TEXT:
assert((Pdn->dn_text) != PCNULL);
CHECKPREV;
EmitText(Pdn->dn_text, InPRE, Pdn->dn_linenum);
SETPREV(Pdn->dn_text);
src/judy-1.0.5/tool/jhton.c view on Meta::CPAN
TAG(Pdn->dn_type), Pdn->dn_linenum);
}
if (Pch == Pch2)
{
Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
"line %d, has a child \"text\" node whose text starts with "
"'(' and lacks a leading pagename",
TAG(Pdn->dn_type), Pdn->dn_linenum);
}
// Validate the "()" suffix, such as "(1)" or "(3X)":
if ((! ISDIGIT(Pch2[1])) // not "(<digit>".
|| ((Pch3 = strchr(Pch2, ')')) == PCNULL) // no ")".
|| (Pch2 + 3 < Pch3) // too far away.
|| ((Pch2 + 3 == Pch3) // <digit><suffix>.
&& (! ISUPPER(Pch2[2])))) // not <A-Z>.
{
Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
"line %d, has a child \"text\" node whose text lacks a "
"standard UNIX manual entry suffix in the form "
"\"(<digit>[<A-Z>])\", such as \"(1)\" or \"(3X)\"",
TAG(Pdn->dn_type), Pdn->dn_linenum);
}
// Break out parts:
*Pch2 = *Pch3 = CHNULL; // terminate at '(' and ')'.
*PPageName = StrSave(Pch);
*PPageSection = StrSave(Pch2 + 1);
// Look for *PLcLetter:
if (! ISUPPER(**PPageName))
{
Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
"line %d, has a child \"text\" node whose text does not "
"start with an uppercase letter",
TAG(Pdn->dn_type), Pdn->dn_linenum);
}
*PLcLetter = tolower((int) (**PPageName));
} // ExtractPageInfo()
// ****************************************************************************
// T A G T Y P E
//
// Given a non-null string that should be an HTML tag type, a pointer to a
// bool_t to return whether this is a closing tag, and a filename and line
// number for error reporting, look up the tag type in g_dntype[] and return
// its index. Error out if not found.
//
// As a special case, if presented with "!---" with any number of dashes, look
// for "!--".
FUNCTION int TagType(
char * Tag, // to look up.
bool_t * Pisclosing, // return flag: is a closing tag.
char * Filename, // for error reporting.
int Linenum) // for error reporting.
{
int dn_type; // to return.
char * mytag; // local variation.
assert( Tag != PCNULL);
assert(*Tag != CHNULL);
// Check for closing tag (yes, even for types that don't really allow it):
if ((*Pisclosing = (*Tag == '/'))) // (()) for gcc.
{
++Tag;
SKIPSPACE(Tag);
if (*Tag == CHNULL)
Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, Linenum);
}
// Translate comment tag to known type:
mytag = (strncmp(Tag, "!--", 3) ? Tag : "!--"); // see above.
// Look up tag:
//
// Note: Main code already asserted dnt_type == dn_type for each entry.
for (dn_type = 0; TAG(dn_type) != PCNULL; ++dn_type)
if (strcmp(mytag, TAG(dn_type)) == 0) return(dn_type);
Error(ERREXIT, NOERRNO, FILELINE "Unrecognized HTML tag \"%s\"; "
"see program source file for recognized types; this is a "
"limited, special-purpose translator", Filename, Linenum, Tag);
/*NOTREACHED*/
return(0); // make some compilers happy.
} // TagType()
// ****************************************************************************
// A P P D O C N O D E
//
// Given a current docnode tree node, the input file line number, and
// g_Pdnhead, create a new docnode, append it to the tree in the right place,
// and return a pointer to it, with g_Pdnhead updated if required:
//
// * If empty tree, insert new as head of tree.
//
// * Otherwise if current node nests and is not closed, insert as its child.
//
// * Otherwise insert as a sibling of the current node.
//
// Note: Most HTML tags are non-singletons and hence nest, but if the nesting
// doesn't make sense, too bad, it's not detected, at least not here.
FUNCTION Pdn_t AppDocNode(
Pdn_t Pdn, // current docnode tree node.
int Linenum) // in input file.
src/judy-1.0.5/tool/jhton.c view on Meta::CPAN
(Pdn -> dn_Pprev) = PDNNULL;
(Pdn -> dn_Pnext) = PDNNULL;
(Pdn -> dn_Pparent) = dn_Pparent;
(Pdn -> dn_Pchild) = PDNNULL;
return(Pdn);
} // NewDocNode()
// ****************************************************************************
// S A V E D O C N O D E
//
// Given a pointer to a docnode, the docnode type, a string for the current
// location (past tag name at whitespace or ">"), and a filename and line
// number for error reporting, save the docnode type in the node, and also save
// the tag text if appropriate; then find the end of the tag (">") and return
// past that location (possibly before more whitespace). Error out in case of
// syntax error.
FUNCTION char * SaveDocNode(
Pdn_t Pdn, // docnode to modify.
int DN_Type, // new type to save.
char * Pch, // current location past tagname.
char * Filename, // for error reporting.
int Linenum) // for error reporting.
{
char * Pch2 = PCNULL; // second location; init for gcc -Wall.
assert( Pch != PCNULL);
assert(*Pch != CHNULL);
// Save type:
(Pdn->dn_type) = DN_Type;
// Pass whitespace and then find the end of the tag:
SKIPSPACE(Pch);
if ((*Pch == CHNULL) || ((Pch2 = strchr(Pch, '>')) == PCNULL))
Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, Linenum);
// Optionally save tag text:
if (SAVETAG(DN_Type))
{
*Pch2 = CHNULL; // temporarily terminate.
(Pdn->dn_text) = StrSave(Pch);
*Pch2 = '>';
}
return(Pch2 + 1);
} // SaveDocNode()
// ****************************************************************************
// P A R E N T P R E
//
// Given a docnode (can be null) and a flag whether only bold <PRE> is of
// interest, return TRUE if any of its parents is a <PRE> (marked for bold
// text), that is, DN_TYPE_PRE (with dn_bold set); otherwise return FALSE.
FUNCTION bool_t ParentPre(
Pdn_t Pdn, // starting node.
bool_t BoldOnly) // flag: only care about bold <PRE>.
{
if (Pdn == PDNNULL) return (FALSE); // no parent.
for (Pdn = Pdn->dn_Pparent; Pdn != PDNNULL; Pdn = Pdn->dn_Pparent)
{
if (((Pdn->dn_type) == DN_TYPE_PRE)
&& ((! BoldOnly) || (Pdn->dn_bold)))
{
return(TRUE);
}
}
return(FALSE);
} // ParentPre()
// ****************************************************************************
// M A R K N O E M I T
//
// Given a docnode (can be null), and a flag, recursively mark the node and all
// children and siblings as do-not-emit, unless the flag is set, only mark font
// docnodes.
FUNCTION void MarkNoEmit(
Pdn_t Pdn, // top node to mark.
bool_t Font) // flag: only mark font docnodes.
{
if (Pdn == PDNNULL) return;
if ((! Font)
|| ((Pdn->dn_type) == DN_TYPE_B)
|| ((Pdn->dn_type) == DN_TYPE_I))
{
(Pdn->dn_noemit) = TRUE;
}
if ((Pdn->dn_Pchild) != PDNNULL) MarkNoEmit(Pdn->dn_Pchild, Font);
if ((Pdn->dn_Pnext) != PDNNULL) MarkNoEmit(Pdn->dn_Pnext, Font);
} // MarkNoEmit()
// ****************************************************************************
// E M I T T E X T
//
// Given a text string, a bitflag for <PRE> status, and an input line number
// for error reporting, copy the text string to stdout with no added newlines,
// but translating selected HTML escape codes to simple characters, doubling
// any backslashes, and if InPRE, inserting .IP (if INPRE_INDENT) or .PP at
// blank lines (between successive newlines), and if INPRE_BOLD, putting back
// bold font since .IP/.PP seems to reset the font. Warn about unrecognized
// escape codes.
struct et_list {
char * et_escape; // expected text.
size_t et_len; // of expected text.
char et_emit; // equivalent char.
} et_list[] = {
{ "amp;", 4, '&', },
{ "gt;", 3, '>', },
{ "lt;", 3, '<', },
{ PCNULL, 0, ' ', }, // end of list.
};
FUNCTION void EmitText(
char * Pch, // text to emit.
int InPRE, // bitflag for <PRE> status.
int Linenum) // for error reporting.
{
char * Pch2; // place in text.
struct et_list * Pet; // place in et_list[].
while ((Pch2 = strchr(Pch, '&')) != PCNULL) // another escape code.
{
*Pch2 = CHNULL; // briefly terminate.
EmitTextPRE(Pch, InPRE); // emit preceding part.
*Pch2 = '&';
Pch = Pch2 + 1; // past '&'.
for (Pet = et_list; Pet->et_escape != PCNULL; ++Pet)
{
if (strncmp(Pch, Pet->et_escape, Pet->et_len) == 0)
{
PUTC(Pet->et_emit); // translate.
Pch += Pet->et_len; // skip escapecode.
break;
}
}
if (Pet->et_escape == PCNULL) // no match found.
{
Error(NOEXIT, NOERRNO, "Unrecognized HTML escape code in "
"line %d (or text beginning on that line): \"%.4s...\", "
"passed through unaltered", Linenum, Pch2);
PUTC('&'); // emit start of escape code.
// continue with Pch is just after the '&'.
}
}
EmitTextPRE(Pch, InPRE); // emit remaining part.
} // EmitText()
// ****************************************************************************
// E M I T T E X T P R E
//
// Given a text string with no HTML escape codes in it and a bitflag for <PRE>
// status (see EmitText()), emit the string with <PRE> handling, and with any
// backslashes doubled.
FUNCTION void EmitTextPRE(
char * Pch, // string to emit.
int InPRE) // bitflag for <PRE> status.
{
char * Pch2; // place in string.
if (! InPRE) { EmitTextBS(Pch); return; }
while ((Pch2 = strchr(Pch, '\n')) != PCNULL) // another newline.
{
*Pch2 = CHNULL; // briefly terminate.
EmitTextBS(Pch); // emit preceding part.
*Pch2 = '\n';
PUTC('\n'); // emit current newline.
if (*(Pch = Pch2 + 1) == '\n') // successive newline.
{
// emit before next newline:
PUTS((InPRE & INPRE_INDENT) ? ".IP" : ".PP");
// also reset font:
if (InPRE & INPRE_BOLD) PUTS("\n.ft B");
}
}
EmitTextBS(Pch); // emit trailing part.
} // EmitTextPRE()
// ****************************************************************************
// E M I T T E X T B S
//
// Given a text string with no HTML escape codes in it, emit the string with
// any backslashes doubled.
FUNCTION void EmitTextBS(
char * Pch) // string to emit.
{
while (*Pch != CHNULL)
{
PUTC(*Pch); if (*Pch == '\\') PUTC('\\');
++Pch;
}
} // EmitTextBS()
// ****************************************************************************
// N O W H I T E S P A C E
//
// Given a string, return TRUE if it contains no whitespace, otherwise FALSE.
FUNCTION bool_t NoWhiteSpace(
char * Pch) // string to check.
{
assert(Pch != PCNULL);
while (*Pch != CHNULL) { if (ISSPACE(*Pch)) return(FALSE); ++Pch; }
return(TRUE);
} // NoWhiteSpace()
( run in 0.584 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )