HTML5-DOM

 view release on metacpan or  search on metacpan

third_party/modest/source/myhtml/tokenizer.c  view on Meta::CPAN

        if(html_offset < token_node->raw_length) {
            const char *tagname = myhtml_tree_incomming_buffer_make_data(tree, token_node->raw_begin, token_node->raw_length);
            token_node->tag_id = myhtml_tag_add(tags, tagname, token_node->raw_length, MyHTML_TOKENIZER_STATE_DATA, true);
        }
        else {
            token_node->tag_id = myhtml_tag_add(tags, &html[ (token_node->raw_begin - tree->global_offset) ], token_node->raw_length, MyHTML_TOKENIZER_STATE_DATA, true);
        }
        
        myhtml_tag_set_category(tags, token_node->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_ORDINARY);
    }
}

////
myhtml_token_node_t * myhtml_tokenizer_queue_create_text_node_if_need(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t absolute_html_offset, enum myhtml_token_type type)
{
    if(token_node->tag_id == MyHTML_TAG__UNDEF)
    {
        if(absolute_html_offset > token_node->raw_begin)
        {
            size_t tmp_begin = token_node->element_begin;
            
            token_node->type |= type;
            token_node->tag_id = MyHTML_TAG__TEXT;
            token_node->element_begin = token_node->raw_begin;
            token_node->raw_length = token_node->element_length = absolute_html_offset - token_node->raw_begin;
            
            if(myhtml_queue_add(tree, tmp_begin, token_node) != MyHTML_STATUS_OK) {
                return NULL;
            }
            
            return tree->current_token_node;
        }
    }
    
    return token_node;
}

void myhtml_tokenizer_set_state(myhtml_tree_t* tree, myhtml_token_node_t* token_node)
{
    if((token_node->type & MyHTML_TOKEN_TYPE_CLOSE) == 0)
    {
        if(token_node->tag_id == MyHTML_TAG_NOSCRIPT &&
           (tree->flags & MyHTML_TREE_FLAGS_SCRIPT) == 0)
        {
            myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
        }
        else {
            const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, token_node->tag_id);
            myhtml_tokenizer_state_set(tree) = tag_ctx->data_parser;
        }
    }
    else {
        myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
    }
}

/////////////////////////////////////////////////////////
//// RCDATA
////
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_rcdata(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
    if(tree->tmp_tag_id == 0) {
        token_node->raw_begin = (html_offset + tree->global_offset);
        
        mythread_queue_node_t* prev_qnode = mythread_queue_get_prev_node(tree->current_qnode);
        
        if(prev_qnode && prev_qnode->args) {
            tree->tmp_tag_id = ((myhtml_token_node_t*)(prev_qnode->args))->tag_id;
        }
        else if(tree->fragment) {
            tree->tmp_tag_id = tree->fragment->tag_id;
        }
    }
    
    while(html_offset < html_size)
    {
        if(html[html_offset] == '<')
        {
            token_node->element_begin = (html_offset + tree->global_offset);
            
            html_offset++;
            myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA_LESS_THAN_SIGN;
            
            break;
        }
        
        html_offset++;
    }
    
    return html_offset;
}

size_t myhtml_tokenizer_state_rcdata_less_than_sign(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
    if(html[html_offset] == '/')
    {
        html_offset++;
        myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_OPEN;
    }
    else {
        myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA;
    }
    
    return html_offset;
}

size_t myhtml_tokenizer_state_rcdata_end_tag_open(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
    if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_A_Z_a_z)
    {
        myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_NAME;
    }
    else {
        myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA;
    }
    
    return html_offset;
}

bool _myhtml_tokenizer_state_andata_end_tag_name(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t *html_offset, size_t tmp_begin, enum myhtml_token_type type)
{
    token_node->raw_length = (*html_offset + tree->global_offset) - token_node->raw_begin;
    myhtml_check_tag_parser(tree, token_node, html, *html_offset);
    
    if(token_node->tag_id != tree->tmp_tag_id)
    {
        token_node->raw_begin  = tmp_begin;
        token_node->raw_length = 0;
        
        myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA;
        
        (*html_offset)++;
        return false;
    }
    
    if((token_node->raw_begin - 2) > tmp_begin)
    {
        size_t tmp_element_begin = token_node->element_begin;
        size_t tmp_raw_begin     = token_node->raw_begin;
        
        token_node->raw_length      = (token_node->raw_begin - 2) - tmp_begin;
        token_node->raw_begin       = tmp_begin;
        token_node->element_begin   = tmp_begin;
        token_node->element_length  = token_node->raw_length;
        token_node->type           |= type;
        token_node->type           ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
        token_node->tag_id          = MyHTML_TAG__TEXT;
        
        /* TODO: return error */
        myhtml_queue_add(tree, *html_offset, token_node);
        
        /* return true values */
        token_node = tree->current_token_node;
        token_node->element_begin = tmp_element_begin;
        token_node->raw_begin = tmp_raw_begin;
    }
    
    token_node->tag_id         = tree->tmp_tag_id;
    token_node->type          |= MyHTML_TOKEN_TYPE_CLOSE;
    token_node->raw_length     = (tree->global_offset + *html_offset) - token_node->raw_begin;
    
    return true;
}

size_t myhtml_tokenizer_state_rcdata_end_tag_name(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
    size_t tmp_begin = token_node->raw_begin;
    token_node->raw_begin = html_offset + tree->global_offset;
    
    while(html_offset < html_size)
    {
        if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_WHITESPACE)
        {
            if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RCDATA)) {
                myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
                
                tree->tmp_tag_id = 0;
                html_offset++;
                
                return html_offset;
            }
            
            break;
        }
        else if(html[html_offset] == '>')
        {
            if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RCDATA)) {
                html_offset++;
                
                token_node = tree->current_token_node;
                token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
                
                if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
                    myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
                    return 0;
                }
                
                tree->tmp_tag_id = 0;
                myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
                
                return html_offset;
            }
            
            break;
        }
        // check end of tag
        else if(html[html_offset] == '/')
        {
            if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RCDATA)) {
                myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
                
                tree->tmp_tag_id = 0;
                html_offset++;
                
                return html_offset;
            }
            
            break;
        }
        else if (mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyCORE_STRING_MAP_CHAR_A_Z_a_z) {
            myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA;
            break;
        }
        
        html_offset++;

third_party/modest/source/myhtml/tokenizer.c  view on Meta::CPAN

                tree->tmp_tag_id = 0;
                myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
            }
            
            return html_offset;
        }
        // check end of tag
        else if(html[html_offset] == '/')
        {
            if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RAWTEXT)) {
                myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
                
                tree->tmp_tag_id = 0;
                html_offset++;
            }
            
            return html_offset;
        }
        else if (mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyCORE_STRING_MAP_CHAR_A_Z_a_z) {
            token_node->raw_begin = tmp_begin;
            myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RAWTEXT;
            
            return html_offset;
        }
        
        html_offset++;
    }
    
    token_node->raw_begin = tmp_begin;
    return html_offset;
}

/////////////////////////////////////////////////////////
//// PLAINTEXT
////
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_plaintext(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
    if((token_node->type & MyHTML_TOKEN_TYPE_PLAINTEXT) == 0)
        token_node->type |= MyHTML_TOKEN_TYPE_PLAINTEXT;
    
    token_node->type      ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
    token_node->raw_begin  = (html_offset + tree->global_offset);
    token_node->raw_length = token_node->element_length = (html_size + tree->global_offset) - token_node->raw_begin;
    token_node->tag_id     = MyHTML_TAG__TEXT;
    
    myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
    
    if(myhtml_queue_add(tree, html_size, token_node) != MyHTML_STATUS_OK) {
        myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
        return 0;
    }
    
    return html_size;
}

/////////////////////////////////////////////////////////
//// CDATA
////
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_cdata_section(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
    if((token_node->type & MyHTML_TOKEN_TYPE_CDATA) == 0)
        token_node->type |= MyHTML_TOKEN_TYPE_CDATA;
    
    while(html_offset < html_size)
    {
        if(html[html_offset] == '>')
        {
            const char *tagname;
            if(html_offset < 2)
                tagname = myhtml_tree_incomming_buffer_make_data(tree,((html_offset + tree->global_offset) - 2), 2);
            else
                tagname = &html[html_offset - 2];
            
            if(tagname[0] == ']' && tagname[1] == ']')
            {
               token_node->raw_length = (((html_offset + tree->global_offset) - 2) - token_node->raw_begin);
                html_offset++;
                
                if(token_node->raw_length) {
                    token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
                    
                    if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
                        myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
                        return 0;
                    }
                    
                }
                else {
                    token_node->raw_begin = html_offset + tree->global_offset;
                }
                
                myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
                break;
            }
        }
        
        html_offset++;
    }
    
    return html_offset;
}

/////////////////////////////////////////////////////////
//// outside of tag
//// %HERE%<div>%HERE%</div>%HERE%
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_data(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
    while(html_offset < html_size)
    {
        if(html[html_offset] == '<')
        {
            token_node->element_begin = (tree->global_offset + html_offset);
            
            html_offset++;
            myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_TAG_OPEN;
            
            break;
        }

third_party/modest/source/myhtml/tokenizer.c  view on Meta::CPAN

            token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
            
            if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
                myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
                return 0;
            }
            
            myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
            break;
        }
        
        html_offset++;
    }
    
    return html_offset;
}

/////////////////////////////////////////////////////////
//// Parse error
//// find >
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_parse_error_stop(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
    tree->tokenizer_status = MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION;
    return html_size;
}

mystatus_t myhtml_tokenizer_state_init(myhtml_t* myhtml)
{
    myhtml->parse_state_func = (myhtml_tokenizer_state_f*)mycore_malloc(sizeof(myhtml_tokenizer_state_f) *
                                                                   ((MyHTML_TOKENIZER_STATE_LAST_ENTRY *
                                                                     MyHTML_TOKENIZER_STATE_LAST_ENTRY) + 1));
    
    if(myhtml->parse_state_func == NULL)
        return MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION;
    
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DATA]                          = myhtml_tokenizer_state_data;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_TAG_OPEN]                      = myhtml_tokenizer_state_tag_open;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_TAG_NAME]                      = myhtml_tokenizer_state_tag_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_END_TAG_OPEN]                  = myhtml_tokenizer_state_end_tag_open;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SELF_CLOSING_START_TAG]        = myhtml_tokenizer_state_self_closing_start_tag;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_MARKUP_DECLARATION_OPEN]       = myhtml_tokenizer_state_markup_declaration_open;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME]         = myhtml_tokenizer_state_before_attribute_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_NAME]                = myhtml_tokenizer_state_attribute_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_NAME]          = myhtml_tokenizer_state_after_attribute_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_VALUE]        = myhtml_tokenizer_state_before_attribute_value;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED]  = myhtml_tokenizer_state_after_attribute_value_quoted;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED] = myhtml_tokenizer_state_attribute_value_double_quoted;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED] = myhtml_tokenizer_state_attribute_value_single_quoted;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_UNQUOTED]      = myhtml_tokenizer_state_attribute_value_unquoted;
    
    // comments
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_START]                 = myhtml_tokenizer_state_comment_start;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_START_DASH]            = myhtml_tokenizer_state_comment_start_dash;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT]                       = myhtml_tokenizer_state_comment;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_END]                   = myhtml_tokenizer_state_comment_end;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_END_DASH]              = myhtml_tokenizer_state_comment_end_dash;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_END_BANG]              = myhtml_tokenizer_state_comment_end_bang;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BOGUS_COMMENT]                 = myhtml_tokenizer_state_bogus_comment;
    
    // cdata
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_CDATA_SECTION]                 = myhtml_tokenizer_state_cdata_section;
    
    // rcdata
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RCDATA]                        = myhtml_tokenizer_state_rcdata;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RCDATA_LESS_THAN_SIGN]         = myhtml_tokenizer_state_rcdata_less_than_sign;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_OPEN]           = myhtml_tokenizer_state_rcdata_end_tag_open;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_NAME]           = myhtml_tokenizer_state_rcdata_end_tag_name;
    
    // rawtext
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RAWTEXT]                        = myhtml_tokenizer_state_rawtext;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RAWTEXT_LESS_THAN_SIGN]         = myhtml_tokenizer_state_rawtext_less_than_sign;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_OPEN]           = myhtml_tokenizer_state_rawtext_end_tag_open;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_NAME]           = myhtml_tokenizer_state_rawtext_end_tag_name;
    
    // plaintext
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_PLAINTEXT]                     = myhtml_tokenizer_state_plaintext;
    
    // doctype
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE]                                 = myhtml_tokenizer_state_doctype;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_NAME]                     = myhtml_tokenizer_state_before_doctype_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_NAME]                            = myhtml_tokenizer_state_doctype_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_NAME]                      = myhtml_tokenizer_state_after_doctype_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_CUSTOM_AFTER_DOCTYPE_NAME_A_Z]           = myhtml_tokenizer_state_custom_after_doctype_name_a_z;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER]        = myhtml_tokenizer_state_before_doctype_public_identifier;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED] = myhtml_tokenizer_state_doctype_public_identifier_double_quoted;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED] = myhtml_tokenizer_state_doctype_public_identifier_single_quoted;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER]         = myhtml_tokenizer_state_after_doctype_public_identifier;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED] = myhtml_tokenizer_state_doctype_system_identifier_double_quoted;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED] = myhtml_tokenizer_state_doctype_system_identifier_single_quoted;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER]         = myhtml_tokenizer_state_after_doctype_system_identifier;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BOGUS_DOCTYPE]                           = myhtml_tokenizer_state_bogus_doctype;
    
    // script
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA]                               = myhtml_tokenizer_state_script_data;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_LESS_THAN_SIGN]                = myhtml_tokenizer_state_script_data_less_than_sign;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_END_TAG_OPEN]                  = myhtml_tokenizer_state_script_data_end_tag_open;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_END_TAG_NAME]                  = myhtml_tokenizer_state_script_data_end_tag_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPE_START]                  = myhtml_tokenizer_state_script_data_escape_start;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPE_START_DASH]             = myhtml_tokenizer_state_script_data_escape_start_dash;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED]                       = myhtml_tokenizer_state_script_data_escaped;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_DASH]                  = myhtml_tokenizer_state_script_data_escaped_dash;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_DASH_DASH]             = myhtml_tokenizer_state_script_data_escaped_dash_dash;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN]        = myhtml_tokenizer_state_script_data_escaped_less_than_sign;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN]          = myhtml_tokenizer_state_script_data_escaped_end_tag_open;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME]          = myhtml_tokenizer_state_script_data_escaped_end_tag_name;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START]           = myhtml_tokenizer_state_script_data_double_escape_start;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED]                = myhtml_tokenizer_state_script_data_double_escaped;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH]           = myhtml_tokenizer_state_script_data_double_escaped_dash;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH]      = myhtml_tokenizer_state_script_data_double_escaped_dash_dash;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN] = myhtml_tokenizer_state_script_data_double_escaped_less_than_sign;
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END]             = myhtml_tokenizer_state_script_data_double_escape_end;
    
    myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP]                          = myhtml_tokenizer_state_parse_error_stop;
    
    
    // ***********
    // for ends
    // *********
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_DATA)]                          = myhtml_tokenizer_end_state_data;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_TAG_OPEN)]                      = myhtml_tokenizer_end_state_tag_open;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_TAG_NAME)]                      = myhtml_tokenizer_end_state_tag_name;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_END_TAG_OPEN)]                  = myhtml_tokenizer_end_state_end_tag_open;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SELF_CLOSING_START_TAG)]        = myhtml_tokenizer_end_state_self_closing_start_tag;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_MARKUP_DECLARATION_OPEN)]       = myhtml_tokenizer_end_state_markup_declaration_open;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME)]         = myhtml_tokenizer_end_state_before_attribute_name;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_ATTRIBUTE_NAME)]                = myhtml_tokenizer_end_state_attribute_name;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_NAME)]          = myhtml_tokenizer_end_state_after_attribute_name;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_VALUE)]        = myhtml_tokenizer_end_state_before_attribute_value;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED)] = myhtml_tokenizer_end_state_attribute_value_double_quoted;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED)] = myhtml_tokenizer_end_state_attribute_value_single_quoted;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_UNQUOTED)]      = myhtml_tokenizer_end_state_attribute_value_unquoted;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED)]  = myhtml_tokenizer_end_state_after_attribute_value_quoted;
    
    // for ends comments
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_COMMENT_START)]                 = myhtml_tokenizer_end_state_comment_start;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_COMMENT_START_DASH)]            = myhtml_tokenizer_end_state_comment_start_dash;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_COMMENT)]                       = myhtml_tokenizer_end_state_comment;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_COMMENT_END)]                   = myhtml_tokenizer_end_state_comment_end;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_COMMENT_END_DASH)]              = myhtml_tokenizer_end_state_comment_end_dash;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_COMMENT_END_BANG)]              = myhtml_tokenizer_end_state_comment_end_bang;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_BOGUS_COMMENT)]                 = myhtml_tokenizer_end_state_bogus_comment;
    
    // for ends cdata
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_CDATA_SECTION)]                 = myhtml_tokenizer_end_state_cdata_section;
    
    // rcdata
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_RCDATA)]                        = myhtml_tokenizer_end_state_rcdata;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_RCDATA_LESS_THAN_SIGN)]         = myhtml_tokenizer_end_state_rcdata_less_than_sign;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_OPEN)]           = myhtml_tokenizer_end_state_rcdata_end_tag_open;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_NAME)]           = myhtml_tokenizer_end_state_rcdata_end_tag_name;
    
    // rawtext
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_RAWTEXT)]                        = myhtml_tokenizer_end_state_rawtext;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_RAWTEXT_LESS_THAN_SIGN)]         = myhtml_tokenizer_end_state_rawtext_less_than_sign;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_OPEN)]           = myhtml_tokenizer_end_state_rawtext_end_tag_open;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_NAME)]           = myhtml_tokenizer_end_state_rawtext_end_tag_name;
    
    // for ends plaintext
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_PLAINTEXT)]                     = myhtml_tokenizer_end_state_plaintext;
    
    // for ends doctype
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_DOCTYPE)]                                 = myhtml_tokenizer_end_state_doctype;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_NAME)]                     = myhtml_tokenizer_end_state_before_doctype_name;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_DOCTYPE_NAME)]                            = myhtml_tokenizer_end_state_doctype_name;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_NAME)]                      = myhtml_tokenizer_end_state_after_doctype_name;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_CUSTOM_AFTER_DOCTYPE_NAME_A_Z)]           = myhtml_tokenizer_end_state_custom_after_doctype_name_a_z;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER)]        = myhtml_tokenizer_end_state_before_doctype_public_identifier;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED)] = myhtml_tokenizer_end_state_doctype_public_identifier_double_quoted;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED)] = myhtml_tokenizer_end_state_doctype_public_identifier_single_quoted;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER)]         = myhtml_tokenizer_end_state_after_doctype_public_identifier;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED)] = myhtml_tokenizer_end_state_doctype_system_identifier_double_quoted;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED)] = myhtml_tokenizer_end_state_doctype_system_identifier_single_quoted;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER)]         = myhtml_tokenizer_end_state_after_doctype_system_identifier;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_BOGUS_DOCTYPE)]                           = myhtml_tokenizer_end_state_bogus_doctype;
    
    // for ends script
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SCRIPT_DATA)]                               = myhtml_tokenizer_end_state_script_data;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SCRIPT_DATA_LESS_THAN_SIGN)]                = myhtml_tokenizer_end_state_script_data_less_than_sign;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SCRIPT_DATA_END_TAG_OPEN)]                  = myhtml_tokenizer_end_state_script_data_end_tag_open;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SCRIPT_DATA_END_TAG_NAME)]                  = myhtml_tokenizer_end_state_script_data_end_tag_name;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPE_START)]                  = myhtml_tokenizer_end_state_script_data_escape_start;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPE_START_DASH)]             = myhtml_tokenizer_end_state_script_data_escape_start_dash;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED)]                       = myhtml_tokenizer_end_state_script_data_escaped;
    myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
                              + MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_DASH)]                  = myhtml_tokenizer_end_state_script_data_escaped_dash;



( run in 0.541 second using v1.01-cache-2.11-cpan-524268b4103 )