sec results from the CPAN

AcePerl

*/
{
  static char filename_array[MAXPATHLEN] = "";
  char *filename = &filename_array[0];
  char *subject_copy;
  Array dirList;

  if (subject == NULL) 
    return NULL;

  if (strlen(subject) == 0) 
    return NULL;

  if (strcmp(subject, "?") == 0)
    {
      /* return ? to signal that the calling 
	 function needs to display a dynamically
	 created index or show some kind of help. 
       */
      /* if the construct
	 page = htmlPageCreate(helpGetFilename(subject_requested));
	 is used, the resulting page will therefor be a marked up
	 directory listing of helpsubjects
       */
      strcpy (filename, "?");
      return filename;
    }

  subject_copy = strnew (subject, 0);

  strcpy (filename, "");	/* intialise, if this is
				   non-empty at the end of the loop,
				   we found a matching helpfile */
  while (TRUE)
    {
      /* simple attempt to locate file - path/helpDir/subject.html */
      sprintf(filename, "%s%s%s.%s", 
	      filGetFullPath(helpGetDir()),
	      SUBDIR_DELIMITER_STR,
	      subject_copy, HELP_FILE_EXTENSION);

      if (filName(filename, 0, "r"))
	break;

      /* advanced attempt, try to find a matching file from
	 the list of available ones by scanning the directory
	 contents of the helpdirectory */
      if ((dirList = filDirectoryCreate
	   (helpGetDir(), HELP_FILE_EXTENSION, "r")) )
	{
	  int i;
	  int matches;
	  char *s;
	  
	  /* first look for an exact case-insensitive match */
	  strcpy (filename, "");
	  for (i = 0 ; i < arrayMax(dirList) ; i++)
	    {
	      s = arr(dirList,i,char*);
	      
	      if (strcasecmp (s, subject_copy) == 0)
		{
		  sprintf(filename, "%s%s%s.%s", 
			  filGetFullPath(helpGetDir()),
			  SUBDIR_DELIMITER_STR,
			  s, HELP_FILE_EXTENSION);
		  if (filName(filename, 0, "r"))
		    break;	/* exit for-loop */

		  strcpy (filename, "");
		}
	    }

	  if (strlen(filename) > 0)
	    break;		/* exit while(true) loop */

	  /* count the number of filenames starting with the
	     given subject string */
	  matches = 0;
	  for (i = 0 ; i < arrayMax(dirList) ; i++)
	    {
	      s = arr(dirList,i,char*);
	      
	      if (strncasecmp (s, subject_copy, 
			       strlen(subject_copy)) == 0)
		{
		  sprintf(filename, "%s%s%s.%s", 
			  filGetFullPath(helpGetDir()),
			  SUBDIR_DELIMITER_STR,
			  s, HELP_FILE_EXTENSION);
		  ++matches;
		}
	    }

	  if (matches == 0)
	    {
	      strcpy (filename, ""); /* not found */
	    }
	  else if (matches == 1)
	    {
	      /* the one exact match (already in filename string)
		 is the complete filename */
	      if (filName(filename, 0, "r"))
		break;		/* exit while(true) loop */
	    }
	  else if (matches > 1)
	    {
	      /* construct a filename that we know won't work.
		 But it may be used by the help display
		 function to give a meaningful message
		 to say that this subject is ambiguos.
		 The returned filename is then considered
		 a template, similar to 'ls subject*'
		 so the help-display function may give a list 
		 of possible matching subjects. */

	      sprintf(filename, "%s%s%s", 
		      filGetFullPath(helpGetDir()),
		      SUBDIR_DELIMITER_STR, subject_copy);
	      break;
	    }

	  filDirectoryDestroy (dirList);

	} /* endif dirList */

      /* file didn't exist, whichever way we tried so far,
	 so we try to chop off the last bit of the subject name.
	 In case trySubject was "Tree_Clone_Inside", we now
	 go through the look again with "Tree_Clone" and re-try. */

      if (strchr (subject_copy, '_'))
	{
	  int j;

	  j = strlen (subject_copy);
	  while (subject_copy[j--] != '_') ; /* find the last _ char */
	  subject_copy[j + 1] = '\0';
	}
      else
	{
	  /* If we run out of trailing components, then we exit
	   * anyway.
	   */
	  strcpy (filename, "");
	  break;		/* exit while(true)loop */
	}
    } /* end-while(true) */

  messfree (subject_copy);


  if (strcmp(filename, "") != 0)
    return filename;		/* success */

  if ((strcasecmp(subject, "index") == 0) ||
      (strcasecmp(subject, "home") == 0) ||
      (strcasecmp(subject, "toc") == 0))
    {
      /* we asked for some kind of index-page but couldn't find it,
	 so we can always try to return the question mark '?'
	 which will ask the calling function to display a
	 dynamically created index of help-subjects. */

      strcpy (filename, "?");
      return filename;
    }


  return NULL;			/* failure - no file found */
} /* helpSubjectGetFilename */


/************************************************************/
/* helpPackage utility to find out the filename of a given
   link reference. Absolute filenames are returned unchanged,
   but relative filenames are expanded to be the full path
   of the helpfile. Can be used for html/gif files referred to
   by the HREF of anchor tags or the SRC or IMG tags */

/* NOTE: the pointer returned is a static copy, which is
   re-used everytime it is called. If the calling function
   wants to mess about with the returned string, a copy
   has to be made.
   NULL is returned if the resulting file can't be opened.
   the calling function can inspect the result of
   messSysErrorText(), the report the resaon for failure */
/************************************************************/
UTIL_FUNC_DEF char *helpLinkGetFilename (char *link)
{
  static char link_path_array[MAXPATHLEN] = "";
  char *link_path = &link_path_array[0];

  if (link[0] == SUBDIR_DELIMITER) /* absolute path (UNIX) */
    {
      strcpy (link_path, link);
    }
  else				/* relative path */
    {
      strcpy (link_path, helpGetDir());
      strcat (link_path, SUBDIR_DELIMITER_STR);
      strcat (link_path, link);
    }

  if (filName(link_path, "", "r"))
    return link_path;

  return NULL;
} /* helpLinkGetFilename */


/************************************************************/
/******************                   ***********************/
/************** private helpPackage functions ***************/
/******************                   ***********************/
/************************************************************/


HtmlPage *htmlPageCreate (char *helpFilename)
/* complemeted by htmlPageDestroy */
{
  FILE *fil;
  HtmlPage *page = 0;

  if (!helpFilename)		/* we could get a NULL filename */
    return 0;                   /* here, which might come from
				   helpSubjectGetFilename() that couldn't
				   find a file matching the subject */

  /* create a page with a marked up directory listing */
  if (strcmp(helpFilename, "?") == 0)
    {
      page = messalloc (sizeof(HtmlPage));
      page->handle = handleCreate();
      page->htmlText = makeHtmlIndex(page->handle);
      if (!(page->root = parseHtmlText(page->htmlText, page->handle)))
	htmlPageDestroy(page);

      return page;
    }

  if (!(filName(helpFilename, "", "r")))
    return 0;			/* prevent error caused 
				   by unsucsessful filopen */


  /* create a page inlining the image */
  if (strcasecmp (helpFilename + (strlen(helpFilename)-4), ".gif") == 0)
    {
      page = messalloc (sizeof(HtmlPage));
      page->handle = handleCreate();
      page->htmlText = makeHtmlImagePage(helpFilename, page->handle);
      if (!(page->root = parseHtmlText(page->htmlText, page->handle)))
	htmlPageDestroy(page);

      return page;
    }


  /* assume HTML page */
  if ((fil = filopen(helpFilename, "", "r")))
    {
      page = htmlPageCreateFromFile (fil);
      filclose (fil);
    }

  return page;
} /* htmlPageCreate */
/************************************************************/

HtmlPage *htmlPageCreateFromFile (FILE *fil)
{
  HtmlPage *page;
  int fileSize;

  if (!fil)
    return (HtmlPage*)0;

  /* determine filesize */
  rewind (fil);
  fseek (fil, 0, SEEK_END);
  fileSize = ftell (fil);
  rewind (fil);
      
  if (fileSize == 0)
    return (HtmlPage*)0;
    
  /* if we have a positive fileSize, we are pretty much
     guaranteed, that we'll get some HTML text and a parsetree */

  page = messalloc (sizeof(HtmlPage));

  page->handle = handleCreate();

  /* grab the contents of the file */
  page->htmlText = halloc ((fileSize + 1) * sizeof(char), page->handle);
  fread (page->htmlText, sizeof (char), fileSize, fil);
  page->htmlText[fileSize] = '\0'; /* add string terminator */
      
  /* get parsetree */
  page->root = parseHtmlText(page->htmlText, page->handle);

  return page;
} /* htmlPageCreateFromFile */
/************************************************************/

void htmlPageDestroy (HtmlPage *page)
{

acelib/helpsubs.c view on Meta::CPAN

	       s, HELP_FILE_EXTENSION, s) ;
      cp += strlen(s)*2 + strlen(HELP_FILE_EXTENSION) + 19;
    }
  sprintf (cp, "</UL>\n") ;
  text[len] = 0 ;

  filDirectoryDestroy (dirList) ;

  return text ;
} /* makeHtmlIndex */
/************************************************************/




/*************************************************************
 *****************  HTML Parsing package *********************
 *** currently very crude parser, will fall over any bad  ****
 *** whether Mosaic, Netscape or MSIE can deal with or not. **
 ************************************************************/
 

static HtmlNode *parseHtmlText(char *text, STORE_HANDLE handle)
/* return root node of html parse-tree, 
   generated from the HTML source text */
{
  char *cp = text;
  HtmlNode *node;
  
  if (!text) return 0;

  /* start recursion */
  parseSection (&cp, &node, handle) ;

  return node;			/* return root-node */
} /* parseHtmlText */
/************************************************************/

static void skipSpaces (char **cp)
{
  while (**cp && isspace((int)**cp)) { ++(*cp) ; }
} /* skipSpaces */

/************************************************************/

static void replaceEscapeCodes (char *cp)
{
  char *s ;

/*
   quotation mark                       &#34;  --> "    &quot;   --> "
   ampersand                            &#38;  --> &    &amp;    --> &
   less-than sign                       &#60;  --> <    &lt;     --> <
   greater-than sign                    &#62;  --> >    &gt;     --> >
*/
  
  s = cp ;

  while (*s)
    {
      if (strncasecmp (s, "&#34;", 5) == 0)
	{
	  s[0] = '"' ; s[1] = 0 ;
	  strcat (s+1, s+5) ;
	}
      else if (strncasecmp (s, "&#38;", 5) == 0)
	{
	  s[0] = '&' ; s[1] = 0 ;
	  strcat (s+1, s+5) ;
	}
      else if (strncasecmp (s, "&#60;", 5) == 0)
	{
	  s[0] = '<' ; s[1] = 0 ;
	  strcat (s+1, s+5) ;
	}
      else if (strncasecmp (s, "&#62;", 5) == 0)
	{
	  s[0] = '>' ; s[1] = 0 ;
	  strcat (s+1, s+5) ;
	}
      else if (strncasecmp (s, "&quot;", 6) == 0)
	{
	  s[0] = '"' ; s[1] = 0 ;
	  strcat (s+1, s+6) ;
	}
      else if (strncasecmp (s, "&amp;", 5) == 0)
	{
	  s[0] = '&' ; s[1] = 0 ;
	  strcat (s+1, s+5) ;
	}
      else if (strncasecmp (s, "&lt;", 4) == 0)
	{
	  s[0] = '<' ; s[1] = 0 ;
	  strcat (s+1, s+4) ;
	}
      else if (strncasecmp (s, "&gt;", 4) == 0)
	{
	  s[0] = '>' ; s[1] = 0 ;
	  strcat (s+1, s+4) ;
	}
      else if (strncasecmp (s, "&nbsp;", 4) == 0)
	{
	  s[0] = ' ' ; s[1] = 0 ;
	  strcat (s+1, s+6) ;
	}

      ++s ;
    }
 
  return ;
} /* replaceEscapeCodes */
/************************************************************/

static HtmlNode *makeNode (HtmlNodeType type, STORE_HANDLE handle)
/* allocate a node and initialise the type */
{
  HtmlNode *newnode ;

  newnode = (HtmlNode*)halloc (sizeof(HtmlNode), handle) ;
  newnode->type = type ;

  return (newnode) ;
} /* makeNode */
/************************************************************/

static BOOL parseHtml (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;

  *cp += 6 ;			/* skip <HTML> */

  skipSpaces (cp) ;

  node = makeNode (HTML_DOC, handle) ;

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : text inside <HTML> not valid !!\n") ;
    }

  skipSpaces (cp) ;

  if (strncasecmp (*cp, "</HTML>", 7) == 0)
    {
      *cp += 7 ;
    }
  else
    {
      printf ("Warning : <HTML> tag not closed by </HTML> !!\n") ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseHtml */
/************************************************************/

static BOOL parseHead (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;

  *cp += 6 ;			/* skip <HEAD> */

  skipSpaces (cp) ;

  node = makeNode (HTML_HEAD, handle) ;

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : HTML inside <head> not valid !!\n") ;
    }

  skipSpaces (cp) ;

  if (strncasecmp (*cp, "</HEAD>", 7) == 0)
    {
      *cp += 7 ;
    }
  else
    {
      printf ("Warning : <HEAD> tag not closed by </HEAD> !!\n") ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseHead */
/************************************************************/

static BOOL parseBody (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;

  *cp += 6 ;			/* skip <BODY> */

  skipSpaces (cp) ;

  node = makeNode (HTML_BODY, handle) ;

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : HTML inside <BODY> not valid !!\n") ;
    }

  skipSpaces (cp) ;

  if (strncasecmp (*cp, "</BODY>", 7) == 0)
    {
      *cp += 7 ;
    }
  else
    {
      printf ("Warning : <BODY> tag not closed by </BODY> !!\n") ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseBody */
/************************************************************/

static BOOL parseComment (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node ;
  int len ;
  char *start ;

  *cp += 4 ;			/* skip <!-- */

  start = *cp ;
  while (**cp && **cp != '>') { ++(*cp) ; }
  
  if (!**cp)
    {
      *resultnode = 0 ;
      return FALSE ;
    }
  
  node = makeNode (HTML_COMMENT, handle) ;

  len = *cp-start ;

  ++(*cp) ;			/* skip '>' */

  node->text = (char*)halloc ((len+1) * sizeof(char), handle) ;
  
  strncpy (node->text, start, len) ;
  node->text[len] = 0 ;
  
  *resultnode = node ;

  return TRUE ;
} /* parseComment */
/************************************************************/

static BOOL parseTitle (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node ;
  int len, numspaces=0 ;
  char *start ;

  *cp += 7 ;			/* skip <TITLE> */

  skipSpaces (cp) ;

  start = *cp ;

  while (**cp)
    {
      if (strncasecmp (*cp, "</title>", 8) == 0)
	break ;
      if (isspace((int)**cp))
	++numspaces ;
      else
	numspaces = 0 ;
      ++(*cp) ;
    }
  
  node = makeNode (HTML_TITLE, handle) ;
  
  len = (*cp-start) - numspaces ;

  if (**cp)
    *cp += 8 ;

  node->text = (char*)halloc ((len+1) * sizeof(char), handle);
  
  strncpy (node->text, start, len) ;
  node->text[len] = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseTitle */
/************************************************************/

static BOOL parseHeader (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;
  int level ;

  level = (*cp)[2]-'0' ;

  *cp += 4 ;			/* skip <H?> */

  skipSpaces (cp) ;

  node = makeNode (HTML_HEADER, handle) ;
  node->hlevel = level ;

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : heading%d text not valid !!\n", level) ;
    }

  skipSpaces (cp) ;

  if ((strncasecmp (*cp, "</H", 3) == 0) &&
      (*cp)[3]-'0' == level && (*cp)[4] == '>')
    {
      *cp += 5 ;
    }
  else
    {
      printf ("Warning : <H%d> tag not closed by </H%d> !!\n", level, level) ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseHeader */
/************************************************************/

static BOOL parseCode (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;

  *cp += 6 ;			/* skip <CODE> */

  skipSpaces (cp) ;

  node = makeNode (HTML_CODE_STYLE, handle) ;

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : <code> text not valid !!\n") ;
    }

  skipSpaces (cp) ;

  if (strncasecmp (*cp, "</CODE>", 7) == 0)
    {
      *cp += 7 ;
    }
  else
    {
      printf ("Warning : <CODE> tag not closed by </CODE> !!\n") ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseCode */
/************************************************************/

static BOOL parseBold (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;

  *cp += 3 ;			/* skip <B> */

  skipSpaces (cp) ;

  node = makeNode (HTML_BOLD_STYLE, handle) ;

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : HTML inside <B> not valid !!\n") ;
    }

  skipSpaces (cp) ;

  if (strncasecmp (*cp, "</B>", 3) == 0)
    {
      *cp += 4 ;
    }
  else
    {
      printf ("Warning : <B> tag not closed by </B> !!\n") ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseBold */
/************************************************************/

static BOOL parseStrong (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;

  *cp += 8 ;			/* skip <STRONG> */

  skipSpaces (cp) ;

  node = makeNode (HTML_STRONG_STYLE, handle) ;

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : strong text not valid !!\n") ;
    }

  skipSpaces (cp) ;

  if (strncasecmp (*cp, "</STRONG>", 9) == 0)
    {
      *cp += 9 ;
    }
  else
    {
      printf ("Warning : <STRONG> tag not closed by </STRONG> !!\n") ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseStrong */
/************************************************************/

static BOOL parseItalic (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;

  *cp += 3 ;			/* skip <I> */

  skipSpaces (cp) ;

  node = makeNode (HTML_ITALIC_STYLE, handle) ;

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : bold text not valid !!\n") ;
    }

  skipSpaces (cp) ;

  if (strncasecmp (*cp, "</I>", 3) == 0)
    {
      *cp += 4 ;
    }
  else
    {
      printf ("Warning : <I> tag not closed by </I> !!\n") ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseItalic */
/************************************************************/

static BOOL parseText (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node ;
  int len ;
  char *start ;

  start = *cp ;

  while (**cp)
    {
      /* read until beginning of new TAG */
      if (strncasecmp (*cp, "<", 1) == 0)
	break ;
      ++(*cp) ;
    }
  
  if (*cp == start)
    {
      /* an unknown tag had been reached, the text read until that
	 will be of length zero, because parseSection() couldn't
	 recognise it, and passed the text here, where it reads
	 until it finds a '<', which it'll find imediately,
	 so the length will be zero */

      while (**cp)
	{
	  /* read until beginning of new TAG */
	  if (strncasecmp (*cp, ">", 1) == 0)
	    break ;
	  ++(*cp) ;
	}
      ++(*cp) ;
      
      node = makeNode (HTML_UNKNOWN, handle) ;

      /* copy unknown tag into node->text */
      len = (*cp-start) ;
      node->text = (char*)halloc ((len+1) * sizeof(char), handle);
      strncpy (node->text, start, len);
      node->text[len] = 0 ;

      *resultnode = node ;
      return TRUE ;
    }

  node = makeNode (HTML_TEXT, handle) ;
  
  len = (*cp-start) ;
  
  node->text = (char*)halloc ((len+1) * sizeof(char), handle);
  
  strncpy (node->text, start, len) ;
  node->text[len] = 0 ;
  
  replaceEscapeCodes (node->text) ;

  *resultnode = node ;

  return TRUE ;
} /* parseText */
/************************************************************/

static BOOL parseHref (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;
  int hlen = -1;		/* init for compiler happiness */
  int numspaces ;
  char *hstart = NULL;		/* init for compiler happiness */
  BOOL HAVE_HREF, IS_NAME_REF ;

  *cp += 2 ;			/* skip '<A' */

  skipSpaces (cp) ;

  IS_NAME_REF = FALSE ;
  if (strncasecmp (*cp, "HREF=", 5) == 0)
    {
      HAVE_HREF = TRUE ;
      *cp += 5 ;		/* skip 'HREF=' */
    }
  else if (strncasecmp (*cp, "NAME=", 5) == 0)
    {

      HAVE_HREF = TRUE ; 
      IS_NAME_REF = TRUE ;
      *cp += 5 ;		/* skip 'NAME=' */
    }
  else
    {
      printf ("Warning : anchor tag <A without argument !!\n");
      HAVE_HREF = FALSE ;
    }

  if (HAVE_HREF)
    hstart = *cp ;

  /* parse the href destination or if no arg given
     just forward to next '>'*/
  numspaces = 0 ;
  while (**cp)
    {
      if (strncasecmp (*cp, ">", 1) == 0)
	break ;
      if (isspace((int)**cp))
	++numspaces ;
      else
	numspaces = 0 ;
      ++(*cp) ;
    }
  if (HAVE_HREF)
    hlen = (*cp-hstart) - numspaces ;

  if (**cp)
    *cp += 1 ;			/* skip '>' */

  node = makeNode (HTML_HREF, handle) ;

  if (HAVE_HREF)
    {
      if ((hstart[0] == '"') && (hstart[hlen-1] == '"'))
	{
	  ++hstart ;
	  hlen -= 2 ;
	}
      node->isNameRef = IS_NAME_REF ;

      node->link = (char*)halloc ((hlen+1) * sizeof(char), handle);

      strncpy (node->link, hstart, hlen) ;
      node->link[hlen] = 0 ;
    }
  else
    node->link = 0 ;		/* no link then */

  if (!(parseSection (cp, &leftnode, handle)))
    {
      printf ("Warning : referenced text not valid !!\n") ;
    }

  skipSpaces (cp) ;
  if (strncasecmp (*cp, "</a>", 4) == 0)
    {
      *cp += 4 ;
    }
  else
    {
      printf ("Warning : anchor tag not closed by </A> !!\n") ;
    }
  
  node->left = leftnode ;
  node->right = 0 ;

  *resultnode = node ;

  return TRUE ;
} /* parseHref */

/************************************************************/

static BOOL parseImage (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node ;
  int len, srclen, numspaces ;
  char *start, *s ;
  BOOL HAVE_SRC=FALSE ;
  *cp += 4 ;			/* skip '<IMG' */

  skipSpaces (cp) ;

  start = *cp ;

  /* read in the arguments list until next '>'*/
  numspaces = 0 ;
  while (**cp)
    {
      if (strncasecmp (*cp, ">", 1) == 0)
	break ;
      if (isspace((int)**cp))
	++numspaces ;
      else
	numspaces = 0 ;
      ++(*cp) ;
    }

  /* the length of everything between the 
     end of <IMG and the end of the args or the next > */
  len = (*cp-start) - numspaces ;

  if (**cp)
    *cp += 1 ;			/* skip '>' */

  /* now find the SRC= argument */

  s = start ;
  while (*s)
    {
      if (strncasecmp (s, "src=", 4) == 0)
	{
	  HAVE_SRC = TRUE ;
	  break ;
	}
      ++s ;
    }
  if (HAVE_SRC)
    {
      s += 4 ;			/* skip 'src=' */
      len -= 4;

      start = s ;
      srclen = 0 ;

      if (s[0] == '"')	/* if src in quotes then link ends with quote */
	{
	  s++ ; start++ ;
	  while (*s && ++srclen < len && *s != '"')
	    { ++(s) ; }
	  --srclen;		/* discard the quote */
	}
      else
	{ 
	  while (*s && ++srclen < len && !isspace((int)*s))
	    { ++(s) ; }
	}

      node = makeNode (HTML_GIFIMAGE, handle) ;

      /* save the file name of the image */
      node->link = (char*)halloc((srclen+1) * sizeof(char), handle);

      strncpy (node->link, start, srclen) ;
      node->link[srclen] = 0 ;
    }
  else
    {
      node = makeNode (HTML_UNKNOWN, handle) ;
    }

  *resultnode = node ;

  return TRUE ;
} /* parseImage */
/************************************************************/

static BOOL parseListItem (HtmlListType   style,
			   char		**cp, 
			   HtmlNode     **resultnode,
			   STORE_HANDLE   handle)
{
  HtmlNode *node, *leftnode, *rightnode ;
  int lstyle = style ;

  skipSpaces (cp) ;

  /* check, whether the next tag is a valid listitem tag */
  
  /* with <DL> list <LI> and <DD> items are allowed */
  if (lstyle == HTML_LIST_NOINDENT &&
      !(strncasecmp (*cp, "<dd>", 4) == 0 ||
	strncasecmp (*cp, "<li>", 4) == 0 ||
	strncasecmp (*cp, "<dt>", 4) == 0))
    {
      *resultnode = 0 ;
      return FALSE ;
    }
  /* only <LI> items in <UL> or <OL> lists */
  else if ((lstyle == HTML_LIST_BULLET || lstyle == HTML_LIST_NUMBER) &&
	   !(strncasecmp (*cp, "<li>", 4) == 0))
    {
      *resultnode = 0 ;
      return FALSE ;
    }

  if (lstyle == HTML_LIST_NOINDENT)
    {
      /* in <DL> list a <DD> item becomes indented but no bullet */
      if (strncasecmp (*cp, "<dd>", 4) == 0)
	lstyle = HTML_LIST_NOBULLET ;
      else if (strncasecmp (*cp, "<dt>", 4) == 0)
	lstyle = HTML_LIST_NOINDENT_NOBULLET ;
    }
  *cp += 4 ;
  /* now cp stands right after an <LI> and parses the following
     as a normal section */
  
  parseSection (cp, &leftnode, handle) ;
  
  node = makeNode (HTML_LISTITEM, handle) ;
  
  node->left = leftnode ;
  node->lstyle = lstyle ;

  if (parseListItem (style, cp, &rightnode, handle))
    {
      node->right = rightnode ;
    }
  else
    {
      node->right = 0 ;		/* no further list items */
    }

  *resultnode = node ;

  return TRUE ;
} /* parseListItem */
/************************************************************/

static BOOL parseList (int style, char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode ;

  *cp += 4 ;			/* skip <UL> */

#ifdef ALLOW_SECONDLEVEL_LIST_LIST_DOESN_T_YET_WORK
  if (strncasecmp (*cp, "<ul>", 4) == 0 ||
      strncasecmp (*cp, "<ol>", 4) == 0 ||
      strncasecmp (*cp, "<dl>", 4) == 0)
    {
      /* create list item for this list-in-list */
      node = makeNode (HTML_LISTITEM, handle) ;
      
      node->left = leftnode ;
      node->lstyle = lstyle ;

    }
#endif

  parseListItem (style, cp, &leftnode, handle);
  
  skipSpaces (cp) ;
  
  if ((style == HTML_LIST_BULLET && strncasecmp (*cp, "</ul>", 5) == 0) ||
      (style == HTML_LIST_NOINDENT && strncasecmp (*cp, "</dl>", 5) == 0) ||
      (style == HTML_LIST_NUMBER && strncasecmp (*cp, "</ol>", 5) == 0))
    {
      *cp += 5 ;		/* skip </ul> */
    }
  else
    {
      if (style == HTML_LIST_BULLET)
	printf ("Warning : found <UL> without closing </UL> tag !!\n") ;
      else if (style == HTML_LIST_NOINDENT)
	printf ("Warning : found <DL> without closing </DL> tag !!\n") ;
      else if (style == HTML_LIST_NUMBER)
	printf ("Warning : found <OL> without closing </OL> tag !!\n") ;
    }

  node = makeNode (HTML_LIST, handle) ;

  node->left = leftnode ;
  node->lstyle = style ;

  *resultnode = node ;

  return TRUE ;
} /* parseList */
/************************************************************/

static BOOL parseSection (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
{
  HtmlNode *node, *leftnode, *rightnode ;
  static BOOL MODE_PREFORMAT=FALSE, MODE_BLOCKQUOTE=FALSE ;

  if (!MODE_PREFORMAT)
    skipSpaces (cp) ;

  if (!**cp)			/* EOF */
    {
      if (MODE_PREFORMAT)
	printf ("Warning : found <PRE> tag "
		"without closing </PRE> tag !!\n") ;
      if (MODE_BLOCKQUOTE)
	printf ("Warning : found <BLOCKQUOTE> tag "
		"without closing </BLOCKQUOTE> tag !!\n") ;

      *resultnode = 0 ;
      return TRUE ;
    }

  if (strncasecmp (*cp, "<!--", 4) == 0)
    {
      if (!parseComment (cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "<html>", 6) == 0)
    {
      if (!(parseHtml (cp, &leftnode, handle)))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "</html>", 7) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<head>", 6) == 0)
    {
      if (!(parseHead (cp, &leftnode, handle)))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "</head>", 7) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<body>", 6) == 0)
    {
      if (!(parseBody (cp, &leftnode, handle)))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "</body>", 7) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<title>", 7) == 0)
    {
      if (!parseTitle (cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if ((strncasecmp (*cp, "<H", 2) == 0) &&
	   (*cp)[2]-'0' >= 1 && (*cp)[2]-'0' <= 7 && (*cp)[3] == '>')
    {
      if (!parseHeader (cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if ((strncasecmp (*cp, "</H", 3) == 0) &&
	   (*cp)[3]-'0' >= 1 && (*cp)[3]-'0' <= 7 && (*cp)[4] == '>')
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<a", 2) == 0 &&
	   (isspace((int)(*cp)[2]) || (*cp)[2] == '\n'))
    {
      if (!parseHref (cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "</a>", 4) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<img", 4) == 0)
    {
      if (!parseImage (cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "<ul>", 4) == 0)
    {
      if (!parseList (HTML_LIST_BULLET, cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "<ol>", 4) == 0)
    {
      if (!parseList (HTML_LIST_NUMBER, cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "<dl>", 4) == 0)
    {
      if (!parseList (HTML_LIST_NOINDENT, cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "<li>", 4) == 0)
    {
      /* LI isn't a section, so we've hit the end of a section */
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<dd>", 4) == 0)
    {
      /* DD isn't a section, so we've hit the end of a section */
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<dt>", 4) == 0)
    {
      /* DT isn't a section, so we've hit the end of a section */
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "</ul>", 5) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "</ol>", 5) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "</dl>", 5) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<hr>", 4) == 0)
    {
      leftnode = makeNode (HTML_RULER, handle) ;
      *cp += 4 ;
      skipSpaces (cp) ;
    }
  else if (strncasecmp (*cp, "<p>", 3) == 0)
    {
      leftnode = makeNode (HTML_PARAGRAPH, handle) ;
      *cp += 3 ;
      skipSpaces (cp) ;
    }
  else if (strncasecmp (*cp, "</p>", 4) == 0)
    {
      leftnode = makeNode (HTML_PARAGRAPH, handle) ;
      *cp += 4 ;
      skipSpaces (cp) ;
    }
  else if (strncasecmp (*cp, "<br>", 4) == 0)
    {
      leftnode = makeNode (HTML_LINEBREAK, handle) ;
      *cp += 4 ;
      skipSpaces (cp) ;
    }
  else if (strncasecmp (*cp, "<pre>", 5) == 0)
    {
      if (MODE_PREFORMAT)
	printf ("Warning : nesting of <PRE> tags without effect !!\n") ;
      MODE_PREFORMAT = TRUE ;

      leftnode = makeNode (HTML_STARTPREFORMAT, handle) ;
      *cp += 5 ;
      skipSpaces (cp) ;
    }
  else if (strncasecmp (*cp, "</pre>", 6) == 0)
    {
      if (!MODE_PREFORMAT)
	printf ("Warning : found </PRE> without preceeding <PRE>\n") ;
      MODE_PREFORMAT = FALSE ;

      leftnode = makeNode (HTML_ENDPREFORMAT, handle) ;
      *cp += 6 ;
      skipSpaces (cp) ;
    }
  else if (strncasecmp (*cp, "<blockquote>", 12) == 0)
    {
      if (!MODE_BLOCKQUOTE)
	{
	  leftnode = makeNode (HTML_STARTBLOCKQUOTE, handle) ;
	  MODE_BLOCKQUOTE = TRUE ;
	}
      else
	printf ("Warning : nesting of <BLOCKQUOTE> tags "
		"without effect !!\n") ;

      *cp += 12 ;
      skipSpaces (cp) ;
    }
  else if (strncasecmp (*cp, "</blockquote>", 13) == 0)
    {
      if (MODE_BLOCKQUOTE)
	{
	  leftnode = makeNode (HTML_ENDBLOCKQUOTE, handle) ;
	  MODE_BLOCKQUOTE = FALSE ;
	}
      else
	printf ("Warning : found </BLOCKQUOTE> "
		"without preceeding <BLOCKQUOTE>\n") ;

      *cp += 13 ;
      skipSpaces (cp) ;
    }
  else if (strncasecmp (*cp, "<code>", 6) == 0)
    {
      if (!(parseCode (cp, &leftnode, handle)))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "</code>", 7) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<b>", 3) == 0)
    {
      if (!(parseBold (cp, &leftnode, handle)))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "</b>", 4) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<strong>", 8) == 0)
    {
      if (!(parseStrong (cp, &leftnode, handle)))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "</strong>", 9) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else if (strncasecmp (*cp, "<i>", 3) == 0)
    {
      if (!(parseItalic (cp, &leftnode, handle)))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  else if (strncasecmp (*cp, "</i>", 4) == 0)
    {
      *resultnode = 0 ;
      return TRUE ;
    }
  else
    {
      if (!parseText (cp, &leftnode, handle))
	{
	  *resultnode = 0 ;
	  return FALSE ;
	}
    }
  
  node = makeNode (HTML_SECTION, handle) ;
  node->left = leftnode ;
  if (leftnode->type == 0)
    {
      printf ("section on section \n") ;
    }
  if (parseSection (cp, &rightnode, handle))
    {
      node->right = rightnode ;
      *resultnode = node ;
      return TRUE ;
    }
  else
    {
      node->right = 0 ;
      *resultnode = node ;
      return FALSE ;
    }

} /* parseSection */
/************************************************************/

( run in 1.633 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )