PPI-XS-Tokenizer
view release on metacpan or search on metacpan
src/tokenizer.cpp view on Meta::CPAN
for ( unsigned long ix = 0; ix < MAGIC_COUNT; ix++ )
mmap.insert( uPair ( m_list[ix], 1 ) );
}
Tokenizer::Tokenizer()
:
c_token(NULL),
c_line(NULL),
line_pos(0),
line_length(0),
local_newline('\n'),
tokens_found_head(NULL),
tokens_found_tail(NULL),
tokens_posponded_head(NULL),
tokens_posponded_tail(NULL),
zone(Token_Whitespace),
m_nLastSignificantPos(0)
{
m_TokensCache = new TokensCacheMany();
for (int ix = 0; ix < Token_LastTokenType; ix++) {
TokenTypeNames_pool[ix] = NULL;
}
TokenTypeNames_pool[Token_NoType] = NULL;
TokenTypeNames_pool[Token_Whitespace] = new WhiteSpaceToken;
TokenTypeNames_pool[Token_Comment] = new CommentToken;
TokenTypeNames_pool[Token_Structure] = new StructureToken;
TokenTypeNames_pool[Token_Magic] = new MagicToken;
TokenTypeNames_pool[Token_Operator] = new OperatorToken;
TokenTypeNames_pool[Token_Unknown] = new UnknownToken;
TokenTypeNames_pool[Token_Symbol] = new SymbolToken;
TokenTypeNames_pool[Token_Operator_Attribute] = new AttributeOperatorToken;
TokenTypeNames_pool[Token_Quote_Double] = new AbstractSimpleQuote( Token_Quote_Double, true, '"' );
TokenTypeNames_pool[Token_Quote_Single] = new AbstractSimpleQuote( Token_Quote_Single, true, '\'' );
TokenTypeNames_pool[Token_QuoteLike_Backtick] = new AbstractSimpleQuote( Token_QuoteLike_Backtick, true, '`' );
TokenTypeNames_pool[Token_Word] = new WordToken;
TokenTypeNames_pool[Token_Quote_Literal] = new AbstractQuoteTokenType( Token_Quote_Literal, true, 1, false );
TokenTypeNames_pool[Token_Quote_Interpolate] = new AbstractQuoteTokenType( Token_Quote_Interpolate, true, 1, false );
TokenTypeNames_pool[Token_QuoteLike_Words] = new AbstractQuoteTokenType( Token_QuoteLike_Words, true, 1, false );
TokenTypeNames_pool[Token_QuoteLike_Command] = new AbstractQuoteTokenType( Token_QuoteLike_Command, true, 1, false );
TokenTypeNames_pool[Token_QuoteLike_Readline] = new AbstractBareQuoteTokenType( Token_QuoteLike_Readline, true, 1, false );
TokenTypeNames_pool[Token_Regexp_Match] = new AbstractQuoteTokenType( Token_Regexp_Match, true, 1, true );
TokenTypeNames_pool[Token_Regexp_Match_Bare] = new AbstractBareQuoteTokenType( Token_Regexp_Match_Bare, true, 1, true );
TokenTypeNames_pool[Token_QuoteLike_Regexp] = new AbstractQuoteTokenType( Token_QuoteLike_Regexp, true, 1, true );
TokenTypeNames_pool[Token_Regexp_Substitute] = new AbstractQuoteTokenType( Token_Regexp_Substitute, true, 2, true );
TokenTypeNames_pool[Token_Regexp_Transliterate] = new AbstractQuoteTokenType( Token_Regexp_Transliterate, true, 2, true );
TokenTypeNames_pool[Token_Number] = new NumberToken;
TokenTypeNames_pool[Token_Number_Float] = new FloatNumberToken;
TokenTypeNames_pool[Token_Number_Hex] = new HexNumberToken;
TokenTypeNames_pool[Token_Number_Binary] = new BinaryNumberToken;
TokenTypeNames_pool[Token_Number_Octal] = new OctalNumberToken;
TokenTypeNames_pool[Token_Number_Exp] = new ExpNumberToken;
TokenTypeNames_pool[Token_ArrayIndex] = new ArrayIndexToken;
TokenTypeNames_pool[Token_Label] = new LabelToken;
TokenTypeNames_pool[Token_Attribute] = new AttributeToken;
TokenTypeNames_pool[Token_Attribute_Parameterized] = new ParameterizedAttributeToken;
TokenTypeNames_pool[Token_Pod] = new PodToken;
TokenTypeNames_pool[Token_Cast] = new CastToken;
TokenTypeNames_pool[Token_Prototype] = new PrototypeToken;
TokenTypeNames_pool[Token_DashedWord] = new DashedWordToken;
TokenTypeNames_pool[Token_Number_Version] = new VersionNumberToken;
TokenTypeNames_pool[Token_BOM] = new BOMToken;
TokenTypeNames_pool[Token_Separator] = new SeparatorToken;
TokenTypeNames_pool[Token_End] = new EndToken;
TokenTypeNames_pool[Token_Data] = new DataToken;
TokenTypeNames_pool[Token_HereDoc] = new HereDocToken;
//TokenTypeNames_pool[Token_HereDoc_Body] = new HereDocBodyToken;
for (int ix = 0; ix < NUM_SIGNIFICANT_KEPT; ix++) {
m_LastSignificant[ix] = NULL;
}
fill_maps( operators, magics );
}
Tokenizer::~Tokenizer() {
Reset();
for (int ix = 0; ix < Token_LastTokenType; ix++) {
if ( TokenTypeNames_pool[ix] != NULL ) {
delete(TokenTypeNames_pool[ix]);
TokenTypeNames_pool[ix] = NULL;
}
}
delete m_TokensCache;
}
void Tokenizer::Reset() {
Token *t;
EndOfDocument();
while ( ( t = pop_one_token() ) != NULL ) {
freeToken( t );
}
for (int ix = 0; ix < NUM_SIGNIFICANT_KEPT; ix++) {
if (m_LastSignificant[ix] != NULL) {
freeToken(m_LastSignificant[ix]);
m_LastSignificant[ix] = NULL;
}
}
c_token = NULL;
c_line = NULL;
line_pos = 0;
line_length = 0;
zone = Token_Whitespace;
m_nLastSignificantPos = 0;
}
unsigned int count_waiting_tokens(Token *head) {
if (head == NULL)
return 0;
unsigned int x = 0;
while (head!=NULL) {
x++;
head = head->next;
}
return x;
}
void Tokenizer::EndOfDocument() {
if ( c_token != NULL )
_finalize_token();
while ( NULL != tokens_posponded_head ) {
src/tokenizer.cpp view on Meta::CPAN
Token *Tokenizer::_last_significant_token(unsigned int n) {
if (( n < 1) || (n > NUM_SIGNIFICANT_KEPT ))
return NULL;
unsigned int ix = ( m_nLastSignificantPos + NUM_SIGNIFICANT_KEPT - n + 1 ) % NUM_SIGNIFICANT_KEPT;
return m_LastSignificant[ix];
}
OperatorOperandContext Tokenizer::_opcontext() {
Token *t0 = _last_significant_token(1);
if ( t0 == NULL )
return ooc_Operand;
TokenTypeNames p_type = t0->type->type;
if ( t0->type->isa( Token_Symbol ) || t0->type->isa( Token_Number ) ||
t0->type->isa( isToken_QuoteOrQuotaLike ) || ( p_type == Token_ArrayIndex ) ) {
return ooc_Operator;
}
if ( t0->type->isa( Token_Operator ) )
return ooc_Operand;
// FIXME: Are we searching for Structure tokens?
if ( t0->length != 1 )
return ooc_Unknown;
unsigned char c_char = t0->text[0];
if ( ( c_char == '(' ) || ( c_char == '{' ) || ( c_char == '[' ) || ( c_char == ';' ) ) {
return ooc_Operand;
}
if ( c_char == '}' )
return ooc_Operator;
return ooc_Unknown;
}
//=====================================
LineTokenizeResults Tokenizer::_tokenize_the_rest_of_the_line() {
AbstractTokenType::VerifySufficientBufferLength(c_token, line_length);
while (line_length > line_pos) {
CharTokenizeResults rv = c_token->type->tokenize(this, c_token, c_line[line_pos]);
switch (rv) {
case my_char:
c_token->text[c_token->length++] = c_line[line_pos++];
break;
case done_it_myself:
break;
case error_fail:
return tokenizing_fail;
};
}
if ( ( c_token != NULL ) && ( c_token->type->type == Token_Whitespace ) ) {
}
return reached_eol;
}
LineTokenizeResults Tokenizer::tokenizeLine(char *line, unsigned long line_length) {
line_pos = 0;
c_line = line;
this->line_length = line_length;
if (c_token == NULL)
_new_token(Token_BOM);
while ( NULL != tokens_posponded_head ) {
if ( tokens_posponded_head->type->isa( Token_HereDoc ) ) {
ExtendedToken *tkn = (ExtendedToken *)tokens_posponded_head;
AbstractTokenType::VerifySufficientBufferLength(tkn, line_length);
if ( heredocbody_ended == ((HereDocToken*)(tokens_posponded_head->type))->Unpospone( this, tkn, line, line_length ) ) {
// release all posponded tokens, as long as they are not an another heredoc token
Token *tkn = tokens_posponded_head;
tokens_posponded_head = tkn->next;
chain_token(tkn, tokens_found_head, tokens_found_tail);
while ( ( NULL != tokens_posponded_head ) && ( ! tokens_posponded_head->type->isa( Token_HereDoc ) ) ) {
Token *tkn = tokens_posponded_head;
tokens_posponded_head = tkn->next;
chain_token(tkn, tokens_found_head, tokens_found_tail);
}
if ( NULL == tokens_posponded_head )
tokens_posponded_tail = NULL;
}
return reached_eol;
}
Token *tkn = tokens_posponded_head;
tokens_posponded_head = tkn->next;
chain_token(tkn, tokens_found_head, tokens_found_tail);
}
tokens_posponded_tail = NULL;
return _tokenize_the_rest_of_the_line();
}
void Tokenizer::changeTokenType(TokenTypeNames new_type) {
AbstractTokenType *oldType = c_token->type;
AbstractTokenType *newType = TokenTypeNames_pool[new_type];
if (oldType->isa(isToken_Extended) != newType->isa(isToken_Extended)) {
Token *newToken = newType->GetNewToken( this, m_TokensCache, line_pos + 1 );
char *temp_text = c_token->text;
c_token->text = newToken->text;
newToken->text = temp_text;
newToken->length = c_token->length;
c_token->length = 0;
unsigned long aSize = c_token->allocated_size;
c_token->allocated_size = newToken->allocated_size;
newToken->allocated_size = aSize;
freeToken( c_token );
c_token = newToken;
}
c_token->type = newType;
}
( run in 1.202 second using v1.01-cache-2.11-cpan-97f6503c9c8 )