PPI-XS-Tokenizer

 view release on metacpan or  search on metacpan

src/whitespace.cpp  view on Meta::CPAN

			 ( t0->type->isa( Token_Structure ) && ( !strcmp(t0->text, "]" ) ) ) ) { 
			t->_new_token(Token_Operator);
			return my_char;
		}
		if ( t0->type->isa( Token_Structure ) && 
			( ( !strcmp(t0->text, "(") ) || ( !strcmp(t0->text, "{") ) || ( !strcmp(t0->text, ";") ) ) ) {
			t->_new_token(Token_Regexp_Match_Bare);
			return my_char;
		}
		if ( t0->type->isa( Token_Word ) && 
			 ( ( !strcmp(t0->text, "split") ) || 
			   ( !strcmp(t0->text, "if") ) || 
			   ( !strcmp(t0->text, "unless") ) || 
			   ( !strcmp(t0->text, "grep") ) ) ) {
			t->_new_token(Token_Regexp_Match_Bare);
			return my_char;
		}

		unsigned char n_char = t->c_line[ t->line_pos + 1 ];
		if ( ( n_char == '^' ) || ( n_char == '[' ) || ( n_char == '\\' ) ) {
			t->_new_token(Token_Regexp_Match);
			return my_char;
		}

		t->_new_token(Token_Operator);
		return my_char;
	}

	if ( c_char == 'x' ) {
		unsigned char n_char = t->c_line[ t->line_pos + 1 ];
		Token *t0 = t->_last_significant_token(1);
		if ( ( t0 != NULL ) && ( n_char >= '0' ) && ( n_char <= 9 ) ) {
			TokenTypeNames p_type = t0->type->type;
			if ( ( p_type == Token_Quote_Single ) || ( p_type == Token_Quote_Double ) ) { // FIXME
				t->_new_token(Token_Operator);
				return my_char;
			}
		}
		return t->TokenTypeNames_pool[Token_Word]->commit( t );
	}

	if ( c_char == '-' ) {
		if ( t->_opcontext() == ooc_Operator ) {
			t->_new_token(Token_Operator);
			return my_char;
		} else {
			t->_new_token(Token_Unknown);
			return my_char;
		}
	}

	// FIXME: Add the c_char > 127 part?

	sprintf(t->ErrorMsg, "Error: charecter rejected: %d at pos %d", c_char, t->line_pos);
    return error_fail;
}

extern const char end_pod[] = "=cut";
CharTokenizeResults PodToken::tokenize(Tokenizer *t, Token *token, unsigned char c_char) {
	// will enter here only on the line's start, but not nessesery on byte 0.
	// there may be a BOM before it.
	PredicateLiteral< 4, end_pod > regex;
	unsigned long pos = t->line_pos;
	// suck the line anyway
	for ( unsigned long ix = pos; ix < t->line_length; ix++ ) {
		token->text[ token->length++ ] = t->c_line[ t->line_pos++ ];
	}
	if ( regex.test( t->c_line, &pos, t->line_length ) &&
		( ( pos >= t->line_length ) || is_whitespace( t->c_line[ pos ] ) ) ) {
		TokenTypeNames zone = t->_finalize_token();
		t->_new_token(zone);
	}
	return done_it_myself;
}

CharTokenizeResults EndToken::tokenize(Tokenizer *t, Token *token, unsigned char c_char) {
	// will always reach here in a new line
	PredicateAnd<
		PredicateIsChar< '=' >,
		PredicateFunc< is_word >
	> regex1;
	unsigned long pos = 0;
	if ( regex1.test( t->c_line, &pos, t->line_length ) ) {
		t->_finalize_token();
		t->_new_token( Token_Pod );
		return done_it_myself;
	}
	// if not Pod - just copy the whole line to myself
	while ( t->line_length > t->line_pos ) {			
		token->text[ token->length++ ] = t->c_line[ t->line_pos++ ];
	}
	return done_it_myself;
}

CharTokenizeResults DataToken::tokenize(Tokenizer *t, Token *token, unsigned char c_char) {
	// copy everything anytime
	while ( t->line_length > t->line_pos ) {			
		token->text[ token->length++ ] = t->c_line[ t->line_pos++ ];
	}
	return done_it_myself;
}

CharTokenizeResults CommentToken::commit(Tokenizer *t) {
	if (( t->c_token != NULL ) && 
		( t->c_token->type->type == Token_Whitespace ) &&
		( t->c_token->length == t->line_pos ) ) {
		// This is a whole-line comment, that should own the whitespace before
		// and the newline after it.
		t->changeTokenType(Token_Comment);
		Token *c_token = t->c_token;
	    
		while ( ( t->line_pos < t->line_length ) ) {
			c_token->text[c_token->length++] = t->c_line[t->line_pos++];
		}
	} else {
		// This is an inline comment - not contains the newline
		t->_new_token(Token_Comment);
		Token *c_token = t->c_token;
	    
		while ( ( t->line_pos < t->line_length ) && ( t->c_line[t->line_pos] != t->local_newline ) ) {
			c_token->text[c_token->length++] = t->c_line[t->line_pos++];



( run in 0.549 second using v1.01-cache-2.11-cpan-97f6503c9c8 )