HTML-HTML5-Parser

 view release on metacpan or  search on metacpan

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

      $i--;
      $node = $self->{open_elements}->[$i];
      
      ## Step 17
      redo LOOP;
    } # LOOP

  ## END
} # _reset_insertion_mode

  my $parse_rcdata = sub ($$$$) {
    my ($self, $insert, $open_tables, $parse_refs) = @_;

    ## Step 1
    my $start_tag_name = $token->{tag_name};
    
    {
      my $el;
      
      $el = $self->{document}->createElementNS((HTML_NS), $token->{tag_name});
    

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

    } else {
      $self->{state} = RAWTEXT_STATE;
    }
    delete $self->{escape}; # MUST

    ## Step 3, 4
    $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;

    
    $token = $self->_get_next_token;
  }; # $parse_rcdata

  my $script_start_tag = sub ($$$) {
    my ($self, $insert, $open_tables) = @_;
    
    ## Step 1
    my $script_el;
    
      $script_el = $self->{document}->createElementNS((HTML_NS), 'script');
    
        for my $attr_name (keys %{ $token->{attributes}}) {

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

            
            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after head',
                            text => $token->{tag_name}, token => $token);
            push @{$self->{open_elements}},
                [$self->{head_element}, $el_category->{head}];
          } else {
            
          }

          ## NOTE: There is a "as if in head" code clone.
          $parse_rcdata->($self, $insert, $open_tables, 1); # RCDATA

          ## NOTE: At this point the stack of open elements contain
          ## the |head| element (index == -2) and the |script| element
          ## (index == -1).  In the "after head" insertion mode the
          ## |head| element is inserted only for the purpose of
          ## providing the context for the |script| element, and
          ## therefore we can now and have to remove the element from
          ## the stack.
          splice @{$self->{open_elements}}, -2, 1, () # <head>
              if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

          ## NOTE: There is a "as if in head" code clone.
          if ($self->{insertion_mode} == AFTER_HEAD_IM) {
            
            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after head',
                            text => $token->{tag_name}, token => $token);
            push @{$self->{open_elements}},
                [$self->{head_element}, $el_category->{head}];
          } else {
            
          }
          $parse_rcdata->($self, $insert, $open_tables, 0); # RAWTEXT
          splice @{$self->{open_elements}}, -2, 1, () # <head>
              if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
          next B;
        } elsif ($token->{tag_name} eq 'noscript') {
              if ($self->{insertion_mode} == IN_HEAD_IM) {
                
                ## NOTE: and scripting is disalbed
                
    {
      my $el;

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

              pop @{$open_tables};

              $self->_reset_insertion_mode; 

          ## reprocess
          
          next B;
        } elsif ($token->{tag_name} eq 'style') {
          
          ## NOTE: This is a "as if in head" code clone.
          $parse_rcdata->($self, $insert, $open_tables, 0); # RAWTEXT
          $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
          next B;
        } elsif ($token->{tag_name} eq 'script') {
          
          ## NOTE: This is a "as if in head" code clone.
          $script_start_tag->($self, $insert, $open_tables);
          $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
          next B;
        } elsif ($token->{tag_name} eq 'input') {
          if ($token->{attributes}->{type}) {

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

      push @{$self->{open_elements}}, [$el, $el_category->{$token->{tag_name}} || 0];
    }
  
          pop @{$self->{open_elements}};
          delete $self->{self_closing};
          $token = $self->_get_next_token;
          next B;
        } elsif ($token->{tag_name} eq 'noframes') {
          
          ## NOTE: As if in head.
          $parse_rcdata->($self, $insert, $open_tables, 0); # RAWTEXT
          next B;

          ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
          ## has no parse error.
        } else {
          if ($self->{insertion_mode} == IN_FRAMESET_IM) {
            
            $self->{parse_error}->(level => $self->{level}->{must}, type => 'in frameset',
                            text => $token->{tag_name}, token => $token);
          } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

    ## "in body" insertion mode
    if ($token->{type} == START_TAG_TOKEN) {
      if ($token->{tag_name} eq 'script') {
        
        ## NOTE: This is an "as if in head" code clone
        $script_start_tag->($self, $insert, $open_tables);
        next B;
      } elsif ($token->{tag_name} eq 'style') {
        
        ## NOTE: This is an "as if in head" code clone
        $parse_rcdata->($self, $insert, $open_tables, 0); # RAWTEXT
        next B;
      } elsif ({
         base => 1, command => 1, link => 1, basefont => 1, bgsound => 1,
       }->{$token->{tag_name}}) {
        
        ## NOTE: This is an "as if in head" code clone, only "-t" differs
        
    {
      my $el;
      

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

					manakai_has_reference => $token->{attributes}->{content}->{has_reference});
          }
        }

        delete $self->{self_closing};
        $token = $self->_get_next_token;
        next B;
      } elsif ($token->{tag_name} eq 'title') {
        
        ## NOTE: This is an "as if in head" code clone
        $parse_rcdata->($self, $insert, $open_tables, 1); # RCDATA
        next B;
        
      } elsif ($token->{tag_name} eq 'body') {
        ## "In body" insertion mode, "body" start tag token.
        $self->{parse_error}->(level => $self->{level}->{must}, type => 'in body', text => 'body', token => $token);
              
        if (@{$self->{open_elements}} == 1 or
            not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
          
          ## Ignore the token

lib/HTML/HTML5/Parser/TagSoupParser.pm  view on Meta::CPAN

            ->($self, $insert, $active_formatting_elements, $open_tables);

          delete $self->{frameset_ok};
        } elsif ($token->{tag_name} eq 'iframe') {
          
          delete $self->{frameset_ok};
        } else {
          
        }
        ## NOTE: There is an "as if in body" code clone.
        $parse_rcdata->($self, $insert, $open_tables, 0); # RAWTEXT
        next B;
      } elsif ($token->{tag_name} eq 'isindex') {
        $self->{parse_error}->(level => $self->{level}->{must}, type => 'isindex', token => $token);
        
        if (defined $self->{form_element}) {
          
          ## Ignore the token
           ## NOTE: Not acknowledged.
          $token = $self->_get_next_token;
          next B;

lib/HTML/HTML5/Parser/Tokenizer.pm  view on Meta::CPAN

    name => 'data ]',
    state => DATA_MSE1_STATE,
    emit => CHARACTER_TOKEN,
  };
  $XMLAction->[DATA_STATE]->[KEY_ELSE_CHAR] = {
    name => 'data else xml',
    emit => CHARACTER_TOKEN,
    emit_data_read_until => qq{\x00<&\]},
  };
$Action->[RCDATA_STATE]->[0x0026] = {
  name => 'rcdata &',
  state => ENTITY_STATE, # "entity data state" + "consume a character reference"
  state_set => {entity_add => -1, prev_state => RCDATA_STATE},
};
$Action->[RCDATA_STATE]->[0x003C] = {
  name => 'rcdata <',
  state => RCDATA_LT_STATE,
};
$Action->[RCDATA_STATE]->[KEY_EOF_CHAR] = $Action->[DATA_STATE]->[KEY_EOF_CHAR];
$Action->[RCDATA_STATE]->[0x0000] = {
  name => 'rcdata null',
  emit => CHARACTER_TOKEN,
  emit_data => "\x{FFFD}",
  error => 'NULL',
};
$Action->[RCDATA_STATE]->[KEY_ELSE_CHAR] = {
  name => 'rcdata else',
  emit => CHARACTER_TOKEN,
  emit_data_read_until => qq{\x00<&},
};
$Action->[RAWTEXT_STATE]->[0x003C] = {
  name => 'rawtext <',
  state => RAWTEXT_LT_STATE,
};
$Action->[RAWTEXT_STATE]->[KEY_EOF_CHAR] = $Action->[DATA_STATE]->[KEY_EOF_CHAR];
$Action->[RAWTEXT_STATE]->[0x0000] = $Action->[RCDATA_STATE]->[0x0000];
$Action->[RAWTEXT_STATE]->[KEY_ELSE_CHAR] = {

lib/HTML/HTML5/Parser/Tokenizer.pm  view on Meta::CPAN

  $XMLAction->[TAG_OPEN_STATE]->[KEY_ELSE_CHAR] = {
    name => 'tag open else xml',
    ct => {
      type => START_TAG_TOKEN,
      delta => 1,
      append_tag_name => 0x0000,
    },
    state => TAG_NAME_STATE,
  };
$Action->[RCDATA_LT_STATE]->[0x002F] = {
  name => 'rcdata lt /',
  state => RCDATA_END_TAG_OPEN_STATE,
  buffer => {clear => 1},
};
$Action->[RAWTEXT_LT_STATE]->[0x002F] = {
  name => 'rawtext lt /',
  state => RAWTEXT_END_TAG_OPEN_STATE,
  buffer => {clear => 1},
};
$Action->[SCRIPT_DATA_LT_STATE]->[0x002F] = {
  name => 'script data lt /',

lib/HTML/HTML5/Parser/Tokenizer.pm  view on Meta::CPAN

};
$Action->[SCRIPT_DATA_ESCAPED_LT_STATE]->[KEY_LLATIN_CHAR] = {
  name => 'script data escaped lt lc',
  emit => CHARACTER_TOKEN,
  emit_data => '<',
  emit_data_append => 1,
  buffer => {clear => 1, append => 0x0000},
  state => SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE,
};
$Action->[RCDATA_LT_STATE]->[KEY_ELSE_CHAR] = {
  name => 'rcdata lt else',
  state => RCDATA_STATE,
  reconsume => 1,
  emit => CHARACTER_TOKEN,
  emit_data => '<',
};
$Action->[RAWTEXT_LT_STATE]->[KEY_ELSE_CHAR] = {
  name => 'rawtext lt else',
  state => RAWTEXT_STATE,
  reconsume => 1,
  emit => CHARACTER_TOKEN,

lib/HTML/HTML5/Parser/Tokenizer.pm  view on Meta::CPAN

      $self->{set_nc}->($self);
    }
  
        redo A;
      } elsif ($self->{kwd} eq '[CDATA' and
               $nc == 0x005B) { # [
        if ($self->{is_xml} and 
            not $self->{tainted} and
            @{$self->{open_elements} or []} == 0) {
          
          $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
                          line => $self->{line_prev},
                          column => $self->{column_prev} - 7);
          $self->{tainted} = 1;
        } else {
          
        }

        $self->{ct} = {type => CHARACTER_TOKEN,
                                  data => '',
                                  line => $self->{line_prev},



( run in 0.624 second using v1.01-cache-2.11-cpan-454fe037f31 )