XML-Bare

 view release on metacpan or  search on metacpan

parser.c  view on Meta::CPAN

  }
  return 0;
}

struct nodec *new_nodecp( struct nodec *newparent ) {
  static int pos = 0;
  int size = sizeof( struct nodec );
  struct nodec *self = (struct nodec *) malloc( size );
  memset( (char *) self, 0, size );
  self->parent      = newparent;
  self->pos = ++pos;
  return self;
}

struct nodec *new_nodec() {
  int size = sizeof( struct nodec );
  struct nodec *self = (struct nodec *) malloc( size );
  memset( (char *) self, 0, size );
  return self;
}

void del_nodec( struct nodec *node ) {
  struct nodec *curnode;
  struct attc *curatt;
  struct nodec *next;
  struct attc *nexta;
  curnode = node->firstchild;
  while( curnode ) {
    next = curnode->next;
    del_nodec( curnode );
    if( !next ) break;
    curnode = next;
  }
  curatt = node->firstatt;
  while( curatt ) {
    nexta = curatt->next;
    free( curatt );
    curatt = nexta;
  }
  free( node );
}

struct attc* new_attc( struct nodec *newparent ) {
  int size = sizeof( struct attc );
  struct attc *self = (struct attc *) malloc( size );
  memset( (char *) self, 0, size );
  self->parent  = newparent;
  return self;
}

//#define DEBUG

#define ST_val_1 1
#define ST_val_x 2
#define ST_comment_1dash 3
#define ST_comment_2dash 4
#define ST_comment 5
#define ST_comment_x 6
#define ST_pi 7
#define ST_bang 24
#define ST_cdata 8
#define ST_name_1 9
#define ST_name_x 10
#define ST_name_gap 11
#define ST_att_name1 12
#define ST_att_space 13
#define ST_att_name 14
#define ST_att_nameqs 15
#define ST_att_nameqsdone 16
#define ST_att_eq1 17
#define ST_att_eqx 18
#define ST_att_quot 19
#define ST_att_quots 20
#define ST_att_tick 21
#define ST_ename_1 22
#define ST_ename_x 23

int parserc_parse( struct parserc *self, char *xmlin ) {
    // Variables that represent current 'state'
    struct nodec *root    = NULL;
    char  *tagname        = NULL; int    tagname_len    = 0;
    char  *attname        = NULL; int    attname_len    = 0;
    char  *attval         = NULL; int    attval_len     = 0;
    int    att_has_val    = 0;
    struct nodec *curnode = NULL;
    struct attc  *curatt  = NULL;
    int    last_state     = 0;
    self->rootpos = xmlin;
    
    // Variables used temporarily during processing
    struct nodec *temp;
    char   *cpos          = &xmlin[0];
    int    res            = 0;
    int    dent;
    register int let;
    
    if( self->last_state ) {
      #ifdef DEBUG
      printf( "Resuming parse in state %i\n", self->last_state );
      #endif
      self->err = 0;
      root = self->rootnode;
      curnode = self->curnode;
      curatt = self->curatt;
      tagname = self->tagname; tagname_len = self->tagname_len;
      attname = self->attname; attname_len = self->attname_len;
      attval = self->attval; attval_len = self->attval_len;
      att_has_val = self->att_has_val;
      switch( self->last_state ) {
        case ST_val_1: goto val_1;
        case ST_val_x: goto val_x;
        case ST_comment_1dash: goto comment_1dash;
        case ST_comment_2dash: goto comment_2dash;
        case ST_comment: goto comment;
        case ST_comment_x: goto comment_x;
        case ST_pi: goto pi;
        case ST_bang: goto bang;
        case ST_cdata: goto cdata;
        case ST_name_1: goto name_1;
        case ST_name_x: goto name_x;
        case ST_name_gap: goto name_gap;
        case ST_att_name1: goto att_name1;
        case ST_att_space: goto att_space;
        case ST_att_name: goto att_name;
        case ST_att_nameqs: goto att_nameqs;
        case ST_att_nameqsdone: goto att_nameqsdone;
        case ST_att_eq1: goto att_eq1;
        case ST_att_eqx: goto att_eqx;
        case ST_att_quot: goto att_quot;
        case ST_att_quots: goto att_quots;
        case ST_att_tick: goto att_tick;
        case ST_ename_1: goto ename_1;
        case ST_ename_x: goto ename_x;
      }
    }
    else {
      self->err = 0;
      curnode = root = self->rootnode = new_nodec();
    }
    
    #ifdef DEBUG
    printf("Entry to C Parser\n");
    #endif
    
    val_1:
      #ifdef DEBUG
      printf("val_1: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_val_1; goto done;
        case '<': goto val_x;
      }
      if( !curnode->numvals ) {
        curnode->value = cpos;
        curnode->vallen = 1;
      }
      curnode->numvals++;
      cpos++;
      
    val_x:
      #ifdef DEBUG
      printf("val_x: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_val_x; goto done;
        case '<':
          switch( *(cpos+1) ) {
            case '!':
              if( *(cpos+2) == '[' ) { // <![
                //if( !strncmp( cpos+3, "CDATA", 5 ) ) {
                if( *(cpos+3) == 'C' &&
                    *(cpos+4) == 'D' &&
                    *(cpos+5) == 'A' &&
                    *(cpos+6) == 'T' &&
                    *(cpos+7) == 'A'    ) {
                  cpos += 9;
                  curnode->type = 1;
                  goto cdata;
                }
                else {
                  cpos++; cpos++;
                  goto val_x;//actually goto error...
                }
              }
              else if( *(cpos+2) == '-' && // <!--
                *(cpos+3) == '-' ) {
                  cpos += 4;
                  goto comment;
              }
              else {
                cpos++;
                goto bang;
              }
            case '?':
              cpos+=2;
              goto pi;
          }
          tagname_len = 0; // for safety
          cpos++;
          goto name_1;
      }
      if( curnode->numvals == 1 ) curnode->vallen++;
      cpos++;
      goto val_x;
      
    comment_1dash:
      cpos++;
      let = *cpos;
      if( let == '-' ) goto comment_2dash;
      if( !let ) { last_state = ST_comment_1dash; goto done; }
      goto comment_x;
      
    comment_2dash:
      cpos++;
      let = *cpos;
      if( let == '>' ) {
        cpos++;
        goto val_1;
      }
      if( !let ) { last_state = ST_comment_2dash; goto done; }
      goto comment_x;
      
    comment:
      let = *cpos;
      switch( let ) {
        case 0:   last_state = ST_comment; goto done;
        case '-': goto comment_1dash;
      }
      if( !curnode->numcoms ) {
        curnode->comment = cpos;
        curnode->comlen = 1;
      }
      curnode->numcoms++;
      cpos++;
    
    comment_x:
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_comment_x; goto done;
        case '-': goto comment_1dash;
      }
      if( curnode->numcoms == 1 ) curnode->comlen++;
      cpos++;
      goto comment_x;
      
    pi:
      let = *cpos;
      if( let == '?' && *(cpos+1) == '>' ) {
        cpos += 2;
        goto val_1;
      }
      if( !let ) { last_state = ST_pi; goto done; }
      cpos++;
      goto pi;

    bang:
      let = *cpos;
      if( let == '>' ) {
        cpos++;
        goto val_1;
      }
      if( !let ) { last_state = ST_bang; goto done; }
      cpos++;
      goto bang;
    
    cdata:
      let = *cpos;
      if( !let ) { last_state = ST_cdata; goto done; }
      if( let == ']' && *(cpos+1) == ']' && *(cpos+2) == '>' ) {
        cpos += 3;
        goto val_1;
      }
      if( !curnode->numvals ) {
        curnode->value = cpos;
        curnode->vallen = 0;
        curnode->numvals = 1;
      }
      if( curnode->numvals == 1 ) curnode->vallen++;
      cpos++;
      goto cdata;
      
    name_1:
      #ifdef DEBUG
      printf("name_1: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_name_1; goto done;        
        case ' ':
        case 0x0d:
        case 0x0a:
          cpos++;
          goto name_1;
        case '/': // regular closing tag
          tagname_len = 0; // needed to reset
          cpos++;
          goto ename_1;
      }
      tagname       = cpos;
      tagname_len   = 1;
      cpos++;
      goto name_x;
      
    name_x:
      #ifdef DEBUG
      printf("name_x: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_name_x; goto done;
        case ' ':
        case 0x0d:
        case 0x0a:
          curnode     = nodec_addchildr( curnode, tagname, tagname_len );
          attname_len = 0;
          cpos++;
          goto name_gap;
        case '>':
          curnode     = nodec_addchildr( curnode, tagname, tagname_len );
          cpos++;
          goto val_1;
        case '/': // self closing
          temp = nodec_addchildr( curnode, tagname, tagname_len );
          temp->z = cpos +1 - xmlin;
          tagname_len            = 0;
          cpos+=2;
          goto val_1;
      }
      
      tagname_len++;
      cpos++;
      goto name_x;
          
    name_gap:
      let = *cpos;
      switch( *cpos ) {
        case 0: last_state = ST_name_gap; goto done;
        case ' ':
        case 0x0d:
        case 0x0a:

parser.c  view on Meta::CPAN

    struct nodec *root    = NULL;
    char  *tagname        = NULL; int    tagname_len    = 0;
    char  *attname        = NULL; int    attname_len    = 0;
    char  *attval         = NULL; int    attval_len     = 0;
    int    att_has_val    = 0;
    struct nodec *curnode = NULL;
    struct attc  *curatt  = NULL;
    int    last_state     = 0;
    self->rootpos = xmlin;
    
    // Variables used temporarily during processing
    struct nodec *temp;
    char   *cpos          = &xmlin[0];
    int    res            = 0;
    int    dent;
    register int let;
    
    if( self->last_state ) {
      return -1; // unsafe doesn't support this
    }
    else {
      self->err = 0;
      curnode = root = self->rootnode = new_nodec();
    }
    
    #ifdef DEBUG
    printf("Entry to C Parser\n");
    #endif
    
    u_val_1: // content
      #ifdef DEBUG
      printf("val_1: %c\n", *cpos);
      #endif
      switch( *cpos ) {
        case 0: last_state = ST_val_1; goto u_done;
        case '<': goto u_val_x;
      }
      if( !curnode->numvals ) {
        curnode->value = cpos;
        curnode->vallen = 1;
      }
      curnode->numvals++;
      cpos++;
      
    u_val_x: // content
      #ifdef DEBUG
      printf("val_x: %c\n", *cpos);
      #endif
      switch( *cpos ) {
        case 0: last_state = ST_val_x; goto u_done;
        case '<':
          if( *(cpos+1) == '!' &&
              *(cpos+2) == '[' &&
              *(cpos+3) == 'C' &&
              *(cpos+4) == 'D' &&
              *(cpos+5) == 'A' &&
              *(cpos+6) == 'T' &&
              *(cpos+7) == 'A'    ) {
            cpos += 9;
            curnode->type = 1;
            goto u_cdata;
          }
          
          tagname_len = 0; // for safety
          cpos++;
          goto u_name_1;
      }
      if( curnode->numvals == 1 ) curnode->vallen++;
      cpos++;
      goto u_val_x;
    
    u_cdata:
      if( *cpos == ']' && *(cpos+1) == ']' && *(cpos+2) == '>' ) {
        cpos += 3;
        goto u_val_1;
      }
      if( !curnode->numvals ) {
        curnode->value = cpos;
        curnode->vallen = 0;
        curnode->numvals = 1;
      }
      if( curnode->numvals == 1 ) curnode->vallen++;
      cpos++;
      goto u_cdata;
      
    u_name_1: // node name
      #ifdef DEBUG
      printf("name_1: %c\n", *cpos);
      #endif
      switch( *cpos ) {
        case '/': // regular closing tag
          tagname_len = 0; // needed to reset
          cpos++;
          goto u_ename_1;
      }
      tagname       = cpos;
      tagname_len   = 1;
      cpos++;
      goto u_name_x;
      
    u_name_x: // node name
      #ifdef DEBUG
      printf("name_x: %c\n", *cpos);
      #endif
      switch( *cpos ) {
        case ' ':
          curnode     = nodec_addchildr( curnode, tagname, tagname_len );
          attname_len = 0;
          cpos++;
          goto u_name_gap;
        case '>':
          curnode     = nodec_addchildr( curnode, tagname, tagname_len );
          cpos++;
          goto u_val_1;
        case '/': // self closing
          temp = nodec_addchildr( curnode, tagname, tagname_len );
          tagname_len = 0;
          cpos+=2;
          goto u_val_1;
      }
      
      tagname_len++;
      cpos++;
      goto u_name_x;
          
    u_name_gap: // node name gap
      switch( *cpos ) {
        case ' ':
        case '>':
          cpos++;
          goto u_val_1;
        case '/': // self closing
          curnode = curnode->parent;
          if( !curnode ) goto u_done;
          cpos += 2; // am assuming next char is >
          goto u_val_1;
      }
        
    u_att_name1:
      #ifdef DEBUG
      printf("attname1: %c\n", *cpos);
      #endif
      att_has_val = 0;
      attname = cpos;



( run in 0.633 second using v1.01-cache-2.11-cpan-39bf76dae61 )