HTML5-DOM
view release on metacpan or search on metacpan
third_party/modest/source/myhtml/tokenizer.c view on Meta::CPAN
if(html_offset < token_node->raw_length) {
const char *tagname = myhtml_tree_incomming_buffer_make_data(tree, token_node->raw_begin, token_node->raw_length);
token_node->tag_id = myhtml_tag_add(tags, tagname, token_node->raw_length, MyHTML_TOKENIZER_STATE_DATA, true);
}
else {
token_node->tag_id = myhtml_tag_add(tags, &html[ (token_node->raw_begin - tree->global_offset) ], token_node->raw_length, MyHTML_TOKENIZER_STATE_DATA, true);
}
myhtml_tag_set_category(tags, token_node->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_ORDINARY);
}
}
////
myhtml_token_node_t * myhtml_tokenizer_queue_create_text_node_if_need(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t absolute_html_offset, enum myhtml_token_type type)
{
if(token_node->tag_id == MyHTML_TAG__UNDEF)
{
if(absolute_html_offset > token_node->raw_begin)
{
size_t tmp_begin = token_node->element_begin;
token_node->type |= type;
token_node->tag_id = MyHTML_TAG__TEXT;
token_node->element_begin = token_node->raw_begin;
token_node->raw_length = token_node->element_length = absolute_html_offset - token_node->raw_begin;
if(myhtml_queue_add(tree, tmp_begin, token_node) != MyHTML_STATUS_OK) {
return NULL;
}
return tree->current_token_node;
}
}
return token_node;
}
void myhtml_tokenizer_set_state(myhtml_tree_t* tree, myhtml_token_node_t* token_node)
{
if((token_node->type & MyHTML_TOKEN_TYPE_CLOSE) == 0)
{
if(token_node->tag_id == MyHTML_TAG_NOSCRIPT &&
(tree->flags & MyHTML_TREE_FLAGS_SCRIPT) == 0)
{
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
else {
const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, token_node->tag_id);
myhtml_tokenizer_state_set(tree) = tag_ctx->data_parser;
}
}
else {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
}
/////////////////////////////////////////////////////////
//// RCDATA
////
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_rcdata(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
if(tree->tmp_tag_id == 0) {
token_node->raw_begin = (html_offset + tree->global_offset);
mythread_queue_node_t* prev_qnode = mythread_queue_get_prev_node(tree->current_qnode);
if(prev_qnode && prev_qnode->args) {
tree->tmp_tag_id = ((myhtml_token_node_t*)(prev_qnode->args))->tag_id;
}
else if(tree->fragment) {
tree->tmp_tag_id = tree->fragment->tag_id;
}
}
while(html_offset < html_size)
{
if(html[html_offset] == '<')
{
token_node->element_begin = (html_offset + tree->global_offset);
html_offset++;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA_LESS_THAN_SIGN;
break;
}
html_offset++;
}
return html_offset;
}
size_t myhtml_tokenizer_state_rcdata_less_than_sign(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
if(html[html_offset] == '/')
{
html_offset++;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_OPEN;
}
else {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA;
}
return html_offset;
}
size_t myhtml_tokenizer_state_rcdata_end_tag_open(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_A_Z_a_z)
{
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_NAME;
}
else {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA;
}
return html_offset;
}
bool _myhtml_tokenizer_state_andata_end_tag_name(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t *html_offset, size_t tmp_begin, enum myhtml_token_type type)
{
token_node->raw_length = (*html_offset + tree->global_offset) - token_node->raw_begin;
myhtml_check_tag_parser(tree, token_node, html, *html_offset);
if(token_node->tag_id != tree->tmp_tag_id)
{
token_node->raw_begin = tmp_begin;
token_node->raw_length = 0;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA;
(*html_offset)++;
return false;
}
if((token_node->raw_begin - 2) > tmp_begin)
{
size_t tmp_element_begin = token_node->element_begin;
size_t tmp_raw_begin = token_node->raw_begin;
token_node->raw_length = (token_node->raw_begin - 2) - tmp_begin;
token_node->raw_begin = tmp_begin;
token_node->element_begin = tmp_begin;
token_node->element_length = token_node->raw_length;
token_node->type |= type;
token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
token_node->tag_id = MyHTML_TAG__TEXT;
/* TODO: return error */
myhtml_queue_add(tree, *html_offset, token_node);
/* return true values */
token_node = tree->current_token_node;
token_node->element_begin = tmp_element_begin;
token_node->raw_begin = tmp_raw_begin;
}
token_node->tag_id = tree->tmp_tag_id;
token_node->type |= MyHTML_TOKEN_TYPE_CLOSE;
token_node->raw_length = (tree->global_offset + *html_offset) - token_node->raw_begin;
return true;
}
size_t myhtml_tokenizer_state_rcdata_end_tag_name(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
size_t tmp_begin = token_node->raw_begin;
token_node->raw_begin = html_offset + tree->global_offset;
while(html_offset < html_size)
{
if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_WHITESPACE)
{
if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RCDATA)) {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
tree->tmp_tag_id = 0;
html_offset++;
return html_offset;
}
break;
}
else if(html[html_offset] == '>')
{
if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RCDATA)) {
html_offset++;
token_node = tree->current_token_node;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
return 0;
}
tree->tmp_tag_id = 0;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
return html_offset;
}
break;
}
// check end of tag
else if(html[html_offset] == '/')
{
if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RCDATA)) {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
tree->tmp_tag_id = 0;
html_offset++;
return html_offset;
}
break;
}
else if (mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyCORE_STRING_MAP_CHAR_A_Z_a_z) {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA;
break;
}
html_offset++;
third_party/modest/source/myhtml/tokenizer.c view on Meta::CPAN
tree->tmp_tag_id = 0;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
return html_offset;
}
// check end of tag
else if(html[html_offset] == '/')
{
if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RAWTEXT)) {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
tree->tmp_tag_id = 0;
html_offset++;
}
return html_offset;
}
else if (mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyCORE_STRING_MAP_CHAR_A_Z_a_z) {
token_node->raw_begin = tmp_begin;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RAWTEXT;
return html_offset;
}
html_offset++;
}
token_node->raw_begin = tmp_begin;
return html_offset;
}
/////////////////////////////////////////////////////////
//// PLAINTEXT
////
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_plaintext(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
if((token_node->type & MyHTML_TOKEN_TYPE_PLAINTEXT) == 0)
token_node->type |= MyHTML_TOKEN_TYPE_PLAINTEXT;
token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
token_node->raw_begin = (html_offset + tree->global_offset);
token_node->raw_length = token_node->element_length = (html_size + tree->global_offset) - token_node->raw_begin;
token_node->tag_id = MyHTML_TAG__TEXT;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
if(myhtml_queue_add(tree, html_size, token_node) != MyHTML_STATUS_OK) {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
return 0;
}
return html_size;
}
/////////////////////////////////////////////////////////
//// CDATA
////
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_cdata_section(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
if((token_node->type & MyHTML_TOKEN_TYPE_CDATA) == 0)
token_node->type |= MyHTML_TOKEN_TYPE_CDATA;
while(html_offset < html_size)
{
if(html[html_offset] == '>')
{
const char *tagname;
if(html_offset < 2)
tagname = myhtml_tree_incomming_buffer_make_data(tree,((html_offset + tree->global_offset) - 2), 2);
else
tagname = &html[html_offset - 2];
if(tagname[0] == ']' && tagname[1] == ']')
{
token_node->raw_length = (((html_offset + tree->global_offset) - 2) - token_node->raw_begin);
html_offset++;
if(token_node->raw_length) {
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
return 0;
}
}
else {
token_node->raw_begin = html_offset + tree->global_offset;
}
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
break;
}
}
html_offset++;
}
return html_offset;
}
/////////////////////////////////////////////////////////
//// outside of tag
//// %HERE%<div>%HERE%</div>%HERE%
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_data(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
while(html_offset < html_size)
{
if(html[html_offset] == '<')
{
token_node->element_begin = (tree->global_offset + html_offset);
html_offset++;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_TAG_OPEN;
break;
}
third_party/modest/source/myhtml/tokenizer.c view on Meta::CPAN
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
return 0;
}
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
break;
}
html_offset++;
}
return html_offset;
}
/////////////////////////////////////////////////////////
//// Parse error
//// find >
/////////////////////////////////////////////////////////
size_t myhtml_tokenizer_state_parse_error_stop(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
{
tree->tokenizer_status = MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION;
return html_size;
}
mystatus_t myhtml_tokenizer_state_init(myhtml_t* myhtml)
{
myhtml->parse_state_func = (myhtml_tokenizer_state_f*)mycore_malloc(sizeof(myhtml_tokenizer_state_f) *
((MyHTML_TOKENIZER_STATE_LAST_ENTRY *
MyHTML_TOKENIZER_STATE_LAST_ENTRY) + 1));
if(myhtml->parse_state_func == NULL)
return MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DATA] = myhtml_tokenizer_state_data;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_TAG_OPEN] = myhtml_tokenizer_state_tag_open;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_TAG_NAME] = myhtml_tokenizer_state_tag_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_END_TAG_OPEN] = myhtml_tokenizer_state_end_tag_open;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SELF_CLOSING_START_TAG] = myhtml_tokenizer_state_self_closing_start_tag;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_MARKUP_DECLARATION_OPEN] = myhtml_tokenizer_state_markup_declaration_open;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME] = myhtml_tokenizer_state_before_attribute_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_NAME] = myhtml_tokenizer_state_attribute_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_NAME] = myhtml_tokenizer_state_after_attribute_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_VALUE] = myhtml_tokenizer_state_before_attribute_value;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED] = myhtml_tokenizer_state_after_attribute_value_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED] = myhtml_tokenizer_state_attribute_value_double_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED] = myhtml_tokenizer_state_attribute_value_single_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_UNQUOTED] = myhtml_tokenizer_state_attribute_value_unquoted;
// comments
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_START] = myhtml_tokenizer_state_comment_start;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_START_DASH] = myhtml_tokenizer_state_comment_start_dash;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT] = myhtml_tokenizer_state_comment;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_END] = myhtml_tokenizer_state_comment_end;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_END_DASH] = myhtml_tokenizer_state_comment_end_dash;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_COMMENT_END_BANG] = myhtml_tokenizer_state_comment_end_bang;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BOGUS_COMMENT] = myhtml_tokenizer_state_bogus_comment;
// cdata
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_CDATA_SECTION] = myhtml_tokenizer_state_cdata_section;
// rcdata
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RCDATA] = myhtml_tokenizer_state_rcdata;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RCDATA_LESS_THAN_SIGN] = myhtml_tokenizer_state_rcdata_less_than_sign;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_OPEN] = myhtml_tokenizer_state_rcdata_end_tag_open;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_NAME] = myhtml_tokenizer_state_rcdata_end_tag_name;
// rawtext
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RAWTEXT] = myhtml_tokenizer_state_rawtext;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RAWTEXT_LESS_THAN_SIGN] = myhtml_tokenizer_state_rawtext_less_than_sign;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_OPEN] = myhtml_tokenizer_state_rawtext_end_tag_open;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_NAME] = myhtml_tokenizer_state_rawtext_end_tag_name;
// plaintext
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_PLAINTEXT] = myhtml_tokenizer_state_plaintext;
// doctype
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE] = myhtml_tokenizer_state_doctype;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_NAME] = myhtml_tokenizer_state_before_doctype_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_NAME] = myhtml_tokenizer_state_doctype_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_NAME] = myhtml_tokenizer_state_after_doctype_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_CUSTOM_AFTER_DOCTYPE_NAME_A_Z] = myhtml_tokenizer_state_custom_after_doctype_name_a_z;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER] = myhtml_tokenizer_state_before_doctype_public_identifier;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED] = myhtml_tokenizer_state_doctype_public_identifier_double_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED] = myhtml_tokenizer_state_doctype_public_identifier_single_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER] = myhtml_tokenizer_state_after_doctype_public_identifier;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED] = myhtml_tokenizer_state_doctype_system_identifier_double_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED] = myhtml_tokenizer_state_doctype_system_identifier_single_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER] = myhtml_tokenizer_state_after_doctype_system_identifier;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BOGUS_DOCTYPE] = myhtml_tokenizer_state_bogus_doctype;
// script
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA] = myhtml_tokenizer_state_script_data;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_LESS_THAN_SIGN] = myhtml_tokenizer_state_script_data_less_than_sign;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_END_TAG_OPEN] = myhtml_tokenizer_state_script_data_end_tag_open;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_END_TAG_NAME] = myhtml_tokenizer_state_script_data_end_tag_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPE_START] = myhtml_tokenizer_state_script_data_escape_start;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPE_START_DASH] = myhtml_tokenizer_state_script_data_escape_start_dash;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED] = myhtml_tokenizer_state_script_data_escaped;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_DASH] = myhtml_tokenizer_state_script_data_escaped_dash;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_DASH_DASH] = myhtml_tokenizer_state_script_data_escaped_dash_dash;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN] = myhtml_tokenizer_state_script_data_escaped_less_than_sign;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN] = myhtml_tokenizer_state_script_data_escaped_end_tag_open;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME] = myhtml_tokenizer_state_script_data_escaped_end_tag_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START] = myhtml_tokenizer_state_script_data_double_escape_start;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED] = myhtml_tokenizer_state_script_data_double_escaped;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH] = myhtml_tokenizer_state_script_data_double_escaped_dash;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH] = myhtml_tokenizer_state_script_data_double_escaped_dash_dash;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN] = myhtml_tokenizer_state_script_data_double_escaped_less_than_sign;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END] = myhtml_tokenizer_state_script_data_double_escape_end;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP] = myhtml_tokenizer_state_parse_error_stop;
// ***********
// for ends
// *********
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_DATA)] = myhtml_tokenizer_end_state_data;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_TAG_OPEN)] = myhtml_tokenizer_end_state_tag_open;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_TAG_NAME)] = myhtml_tokenizer_end_state_tag_name;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_END_TAG_OPEN)] = myhtml_tokenizer_end_state_end_tag_open;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SELF_CLOSING_START_TAG)] = myhtml_tokenizer_end_state_self_closing_start_tag;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_MARKUP_DECLARATION_OPEN)] = myhtml_tokenizer_end_state_markup_declaration_open;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME)] = myhtml_tokenizer_end_state_before_attribute_name;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_ATTRIBUTE_NAME)] = myhtml_tokenizer_end_state_attribute_name;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_NAME)] = myhtml_tokenizer_end_state_after_attribute_name;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_VALUE)] = myhtml_tokenizer_end_state_before_attribute_value;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED)] = myhtml_tokenizer_end_state_attribute_value_double_quoted;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED)] = myhtml_tokenizer_end_state_attribute_value_single_quoted;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_UNQUOTED)] = myhtml_tokenizer_end_state_attribute_value_unquoted;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED)] = myhtml_tokenizer_end_state_after_attribute_value_quoted;
// for ends comments
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_COMMENT_START)] = myhtml_tokenizer_end_state_comment_start;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_COMMENT_START_DASH)] = myhtml_tokenizer_end_state_comment_start_dash;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_COMMENT)] = myhtml_tokenizer_end_state_comment;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_COMMENT_END)] = myhtml_tokenizer_end_state_comment_end;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_COMMENT_END_DASH)] = myhtml_tokenizer_end_state_comment_end_dash;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_COMMENT_END_BANG)] = myhtml_tokenizer_end_state_comment_end_bang;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_BOGUS_COMMENT)] = myhtml_tokenizer_end_state_bogus_comment;
// for ends cdata
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_CDATA_SECTION)] = myhtml_tokenizer_end_state_cdata_section;
// rcdata
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_RCDATA)] = myhtml_tokenizer_end_state_rcdata;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_RCDATA_LESS_THAN_SIGN)] = myhtml_tokenizer_end_state_rcdata_less_than_sign;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_OPEN)] = myhtml_tokenizer_end_state_rcdata_end_tag_open;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_NAME)] = myhtml_tokenizer_end_state_rcdata_end_tag_name;
// rawtext
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_RAWTEXT)] = myhtml_tokenizer_end_state_rawtext;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_RAWTEXT_LESS_THAN_SIGN)] = myhtml_tokenizer_end_state_rawtext_less_than_sign;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_OPEN)] = myhtml_tokenizer_end_state_rawtext_end_tag_open;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_NAME)] = myhtml_tokenizer_end_state_rawtext_end_tag_name;
// for ends plaintext
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_PLAINTEXT)] = myhtml_tokenizer_end_state_plaintext;
// for ends doctype
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_DOCTYPE)] = myhtml_tokenizer_end_state_doctype;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_NAME)] = myhtml_tokenizer_end_state_before_doctype_name;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_DOCTYPE_NAME)] = myhtml_tokenizer_end_state_doctype_name;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_NAME)] = myhtml_tokenizer_end_state_after_doctype_name;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_CUSTOM_AFTER_DOCTYPE_NAME_A_Z)] = myhtml_tokenizer_end_state_custom_after_doctype_name_a_z;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER)] = myhtml_tokenizer_end_state_before_doctype_public_identifier;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED)] = myhtml_tokenizer_end_state_doctype_public_identifier_double_quoted;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED)] = myhtml_tokenizer_end_state_doctype_public_identifier_single_quoted;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER)] = myhtml_tokenizer_end_state_after_doctype_public_identifier;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED)] = myhtml_tokenizer_end_state_doctype_system_identifier_double_quoted;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED)] = myhtml_tokenizer_end_state_doctype_system_identifier_single_quoted;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER)] = myhtml_tokenizer_end_state_after_doctype_system_identifier;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_BOGUS_DOCTYPE)] = myhtml_tokenizer_end_state_bogus_doctype;
// for ends script
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA)] = myhtml_tokenizer_end_state_script_data;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA_LESS_THAN_SIGN)] = myhtml_tokenizer_end_state_script_data_less_than_sign;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA_END_TAG_OPEN)] = myhtml_tokenizer_end_state_script_data_end_tag_open;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA_END_TAG_NAME)] = myhtml_tokenizer_end_state_script_data_end_tag_name;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPE_START)] = myhtml_tokenizer_end_state_script_data_escape_start;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPE_START_DASH)] = myhtml_tokenizer_end_state_script_data_escape_start_dash;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED)] = myhtml_tokenizer_end_state_script_data_escaped;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED_DASH)] = myhtml_tokenizer_end_state_script_data_escaped_dash;
( run in 0.541 second using v1.01-cache-2.11-cpan-524268b4103 )