Language-Haskell

 view release on metacpan or  search on metacpan

hugs98-Nov2003/fptools/libraries/base/Text/Read/Lex.hs  view on Meta::CPAN


-- -----------------------------------------------------------------------------
-- Lexing

lex :: ReadP Lexeme
lex = skipSpaces >> lexToken

hsLex :: ReadP String
-- ^ Haskell lexer: returns the lexed string, rather than the lexeme
hsLex = do skipSpaces 
	   (s,_) <- gather lexToken
	   return s

lexToken :: ReadP Lexeme
lexToken = lexEOF     +++
      	   lexLitChar +++ 
      	   lexString  +++ 
      	   lexPunc    +++ 
      	   lexSymbol  +++ 
      	   lexId      +++ 
      	   lexNumber


-- ----------------------------------------------------------------------
-- End of file
lexEOF :: ReadP Lexeme
lexEOF = do s <- look
	    guard (null s)
	    return EOF

-- ---------------------------------------------------------------------------
-- Single character lexemes

lexPunc :: ReadP Lexeme
lexPunc =
  do c <- satisfy isPuncChar
     return (Punc [c])
 where
  isPuncChar c = c `elem` ",;()[]{}`"

-- ----------------------------------------------------------------------
-- Symbols

lexSymbol :: ReadP Lexeme
lexSymbol =
  do s <- munch1 isSymbolChar
     if s `elem` reserved_ops then 
	return (Punc s)		-- Reserved-ops count as punctuation
      else
	return (Symbol s)
 where
  isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
  reserved_ops   = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]

-- ----------------------------------------------------------------------
-- identifiers

lexId :: ReadP Lexeme
lexId = lex_nan <++ lex_id
  where
	-- NaN and Infinity look like identifiers, so
	-- we parse them first.  
    lex_nan = (string "NaN"      >> return (Rat notANumber)) +++
  	      (string "Infinity" >> return (Rat infinity))
  
    lex_id = do c <- satisfy isIdsChar
  		s <- munch isIdfChar
  		return (Ident (c:s))

  	  -- Identifiers can start with a '_'
    isIdsChar c = isAlpha c || c == '_'
    isIdfChar c = isAlphaNum c || c `elem` "_'"

#ifndef __GLASGOW_HASKELL__
infinity, notANumber :: Rational
infinity   = 1 :% 0
notANumber = 0 :% 0
#endif

-- ---------------------------------------------------------------------------
-- Lexing character literals

lexLitChar :: ReadP Lexeme
lexLitChar =
  do char '\''
     (c,esc) <- lexCharE
     guard (esc || c /= '\'')	-- Eliminate '' possibility
     char '\''
     return (Char c)

lexChar :: ReadP Char
lexChar = do { (c,_) <- lexCharE; return c }

lexCharE :: ReadP (Char, Bool)  -- "escaped or not"?
lexCharE =
  do c <- get
     if c == '\\'
       then do c <- lexEsc; return (c, True)
       else do return (c, False)
 where 
  lexEsc =
    lexEscChar
      +++ lexNumeric
        +++ lexCntrlChar
          +++ lexAscii
  
  lexEscChar =
    do c <- get
       case c of
         'a'  -> return '\a'
         'b'  -> return '\b'
         'f'  -> return '\f'
         'n'  -> return '\n'
         'r'  -> return '\r'
         't'  -> return '\t'
         'v'  -> return '\v'
         '\\' -> return '\\'
         '\"' -> return '\"'
         '\'' -> return '\''
         _    -> pfail
  
  lexNumeric =
    do base <- lexBaseChar <++ return 10



( run in 1.526 second using v1.01-cache-2.11-cpan-39bf76dae61 )