Language-Haskell
view release on metacpan or search on metacpan
hugs98-Nov2003/fptools/libraries/base/Text/Read/Lex.hs view on Meta::CPAN
-- -----------------------------------------------------------------------------
-- Lexing
lex :: ReadP Lexeme
lex = skipSpaces >> lexToken
hsLex :: ReadP String
-- ^ Haskell lexer: returns the lexed string, rather than the lexeme
hsLex = do skipSpaces
(s,_) <- gather lexToken
return s
lexToken :: ReadP Lexeme
lexToken = lexEOF +++
lexLitChar +++
lexString +++
lexPunc +++
lexSymbol +++
lexId +++
lexNumber
-- ----------------------------------------------------------------------
-- End of file
lexEOF :: ReadP Lexeme
lexEOF = do s <- look
guard (null s)
return EOF
-- ---------------------------------------------------------------------------
-- Single character lexemes
lexPunc :: ReadP Lexeme
lexPunc =
do c <- satisfy isPuncChar
return (Punc [c])
where
isPuncChar c = c `elem` ",;()[]{}`"
-- ----------------------------------------------------------------------
-- Symbols
lexSymbol :: ReadP Lexeme
lexSymbol =
do s <- munch1 isSymbolChar
if s `elem` reserved_ops then
return (Punc s) -- Reserved-ops count as punctuation
else
return (Symbol s)
where
isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
-- ----------------------------------------------------------------------
-- identifiers
lexId :: ReadP Lexeme
lexId = lex_nan <++ lex_id
where
-- NaN and Infinity look like identifiers, so
-- we parse them first.
lex_nan = (string "NaN" >> return (Rat notANumber)) +++
(string "Infinity" >> return (Rat infinity))
lex_id = do c <- satisfy isIdsChar
s <- munch isIdfChar
return (Ident (c:s))
-- Identifiers can start with a '_'
isIdsChar c = isAlpha c || c == '_'
isIdfChar c = isAlphaNum c || c `elem` "_'"
#ifndef __GLASGOW_HASKELL__
infinity, notANumber :: Rational
infinity = 1 :% 0
notANumber = 0 :% 0
#endif
-- ---------------------------------------------------------------------------
-- Lexing character literals
lexLitChar :: ReadP Lexeme
lexLitChar =
do char '\''
(c,esc) <- lexCharE
guard (esc || c /= '\'') -- Eliminate '' possibility
char '\''
return (Char c)
lexChar :: ReadP Char
lexChar = do { (c,_) <- lexCharE; return c }
lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
lexCharE =
do c <- get
if c == '\\'
then do c <- lexEsc; return (c, True)
else do return (c, False)
where
lexEsc =
lexEscChar
+++ lexNumeric
+++ lexCntrlChar
+++ lexAscii
lexEscChar =
do c <- get
case c of
'a' -> return '\a'
'b' -> return '\b'
'f' -> return '\f'
'n' -> return '\n'
'r' -> return '\r'
't' -> return '\t'
'v' -> return '\v'
'\\' -> return '\\'
'\"' -> return '\"'
'\'' -> return '\''
_ -> pfail
lexNumeric =
do base <- lexBaseChar <++ return 10
( run in 1.526 second using v1.01-cache-2.11-cpan-39bf76dae61 )