{-# OPTIONS #-}
module Language.Python.Common.LexerUtils where
import Control.Monad (liftM)
import Control.Monad.Error.Class (throwError)
import Data.List (foldl')
import Data.Word (Word8)
import Language.Python.Common.Token as Token
import Language.Python.Common.ParserMonad hiding (location)
import Language.Python.Common.SrcLocation
import Codec.Binary.UTF8.String as UTF8 (encode)
type Byte = Word8
data BO = BOF | BOL
type StartCode = Int
type Action = SrcSpan -> Int -> String -> P Token
lineJoin :: Action
lineJoin :: Action
lineJoin span :: SrcSpan
span _len :: Int
_len _str :: String
_str =
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> Token
LineJoinToken (SrcSpan -> Token) -> SrcSpan -> Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> SrcSpan
spanStartPoint SrcSpan
span
endOfLine :: P Token -> Action
endOfLine :: StateT ParseState (Either ParseError) Token -> Action
endOfLine lexToken :: StateT ParseState (Either ParseError) Token
lexToken span :: SrcSpan
span _len :: Int
_len _str :: String
_str = do
SrcSpan -> P ()
setLastEOL (SrcSpan -> P ()) -> SrcSpan -> P ()
forall a b. (a -> b) -> a -> b
$ SrcSpan -> SrcSpan
spanStartPoint SrcSpan
span
StateT ParseState (Either ParseError) Token
lexToken
bolEndOfLine :: P Token -> Int -> Action
bolEndOfLine :: StateT ParseState (Either ParseError) Token -> Int -> Action
bolEndOfLine lexToken :: StateT ParseState (Either ParseError) Token
lexToken bol :: Int
bol span :: SrcSpan
span len :: Int
len inp :: String
inp = do
Int -> P ()
pushStartCode Int
bol
StateT ParseState (Either ParseError) Token -> Action
endOfLine StateT ParseState (Either ParseError) Token
lexToken SrcSpan
span Int
len String
inp
dedentation :: P Token -> Action
dedentation :: StateT ParseState (Either ParseError) Token -> Action
dedentation lexToken :: StateT ParseState (Either ParseError) Token
lexToken span :: SrcSpan
span _len :: Int
_len _str :: String
_str = do
Int
topIndent <- P Int
getIndent
case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (SrcSpan -> Int
startCol SrcSpan
span) Int
topIndent of
EQ -> do P ()
popStartCode
StateT ParseState (Either ParseError) Token
lexToken
LT -> do P ()
popIndent
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
dedentToken
GT -> SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
span "indentation error"
indentation :: P Token -> Int -> BO -> Action
indentation :: StateT ParseState (Either ParseError) Token -> Int -> BO -> Action
indentation lexToken :: StateT ParseState (Either ParseError) Token
lexToken _dedentCode :: Int
_dedentCode bo :: BO
bo _loc :: SrcSpan
_loc _len :: Int
_len [] = do
P ()
popStartCode
case BO
bo of
BOF -> StateT ParseState (Either ParseError) Token
lexToken
BOL -> StateT ParseState (Either ParseError) Token
newlineToken
indentation lexToken :: StateT ParseState (Either ParseError) Token
lexToken dedentCode :: Int
dedentCode bo :: BO
bo span :: SrcSpan
span _len :: Int
_len _str :: String
_str = do
P ()
popStartCode
Int
parenDepth <- P Int
getParenStackDepth
if Int
parenDepth Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0
then StateT ParseState (Either ParseError) Token
lexToken
else do
Int
topIndent <- P Int
getIndent
case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (SrcSpan -> Int
startCol SrcSpan
span) Int
topIndent of
EQ -> case BO
bo of
BOF -> StateT ParseState (Either ParseError) Token
lexToken
BOL -> StateT ParseState (Either ParseError) Token
newlineToken
LT -> do Int -> P ()
pushStartCode Int
dedentCode
StateT ParseState (Either ParseError) Token
newlineToken
GT -> do Int -> P ()
pushIndent (SrcSpan -> Int
startCol SrcSpan
span)
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
indentToken
where
indentToken :: Token
indentToken = SrcSpan -> Token
IndentToken SrcSpan
span
symbolToken :: (SrcSpan -> Token) -> Action
symbolToken :: (SrcSpan -> Token) -> Action
symbolToken mkToken :: SrcSpan -> Token
mkToken location :: SrcSpan
location _ _ = Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (SrcSpan -> Token
mkToken SrcSpan
location)
token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token mkToken :: SrcSpan -> String -> a -> Token
mkToken read :: String -> a
read location :: SrcSpan
location len :: Int
len str :: String
str
= Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> String -> a -> Token
mkToken SrcSpan
location String
literal (String -> a
read String
literal)
where
literal :: String
literal = Int -> String -> String
forall a. Int -> [a] -> [a]
take Int
len String
str
endOfFileToken :: Token
endOfFileToken :: Token
endOfFileToken = SrcSpan -> Token
EOFToken SrcSpan
SpanEmpty
dedentToken :: Token
dedentToken = SrcSpan -> Token
DedentToken SrcSpan
SpanEmpty
newlineToken :: P Token
newlineToken :: StateT ParseState (Either ParseError) Token
newlineToken = do
SrcSpan
loc <- P SrcSpan
getLastEOL
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> Token
NewlineToken SrcSpan
loc
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF _user :: a
_user _inputBeforeToken :: AlexInput
_inputBeforeToken _tokenLength :: Int
_tokenLength (_loc :: SrcLocation
_loc, _bs :: [Byte]
_bs, inputAfterToken :: String
inputAfterToken)
= String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
inputAfterToken Bool -> Bool -> Bool
|| Char
nextChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\n' Bool -> Bool -> Bool
|| Char
nextChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\r'
where
nextChar :: Char
nextChar = String -> Char
forall a. [a] -> a
head String
inputAfterToken
notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF _user :: a
_user _inputBeforeToken :: AlexInput
_inputBeforeToken _tokenLength :: Int
_tokenLength (_loc :: SrcLocation
_loc, _bs :: [Byte]
_bs, inputAfterToken :: String
inputAfterToken)
= Bool -> Bool
not (String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
inputAfterToken)
readBinary :: String -> Integer
readBinary :: String -> Integer
readBinary
= String -> Integer
toBinary (String -> Integer) -> (String -> String) -> String -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> String
forall a. Int -> [a] -> [a]
drop 2
where
toBinary :: String -> Integer
toBinary = (Integer -> Char -> Integer) -> Integer -> String -> Integer
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Integer -> Char -> Integer
forall a. Num a => a -> Char -> a
acc 0
acc :: a -> Char -> a
acc b :: a
b '0' = 2 a -> a -> a
forall a. Num a => a -> a -> a
* a
b
acc b :: a
b '1' = 2 a -> a -> a
forall a. Num a => a -> a -> a
* a
b a -> a -> a
forall a. Num a => a -> a -> a
+ 1
readFloat :: String -> Double
readFloat :: String -> Double
readFloat str :: String
str@('.':cs :: String
cs) = String -> Double
forall a. Read a => String -> a
read ('0'Char -> String -> String
forall a. a -> [a] -> [a]
:String -> String
readFloatRest String
str)
readFloat str :: String
str = String -> Double
forall a. Read a => String -> a
read (String -> String
readFloatRest String
str)
readFloatRest :: String -> String
readFloatRest :: String -> String
readFloatRest [] = []
readFloatRest ['.'] = ".0"
readFloatRest (c :: Char
c:cs :: String
cs) = Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
readFloatRest String
cs
mkString :: (SrcSpan -> String -> Token) -> Action
mkString :: (SrcSpan -> String -> Token) -> Action
mkString toToken :: SrcSpan -> String -> Token
toToken loc :: SrcSpan
loc len :: Int
len str :: String
str = do
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> String -> Token
toToken SrcSpan
loc (Int -> String -> String
forall a. Int -> [a] -> [a]
take Int
len String
str)
stringToken :: SrcSpan -> String -> Token
stringToken :: SrcSpan -> String -> Token
stringToken = SrcSpan -> String -> Token
StringToken
rawStringToken :: SrcSpan -> String -> Token
rawStringToken :: SrcSpan -> String -> Token
rawStringToken = SrcSpan -> String -> Token
StringToken
byteStringToken :: SrcSpan -> String -> Token
byteStringToken :: SrcSpan -> String -> Token
byteStringToken = SrcSpan -> String -> Token
ByteStringToken
formatStringToken :: SrcSpan -> String -> Token
formatStringToken :: SrcSpan -> String -> Token
formatStringToken = SrcSpan -> String -> Token
StringToken
formatRawStringToken :: SrcSpan -> String -> Token
formatRawStringToken :: SrcSpan -> String -> Token
formatRawStringToken = SrcSpan -> String -> Token
StringToken
unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken = SrcSpan -> String -> Token
UnicodeStringToken
rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken = SrcSpan -> String -> Token
ByteStringToken
openParen :: (SrcSpan -> Token) -> Action
openParen :: (SrcSpan -> Token) -> Action
openParen mkToken :: SrcSpan -> Token
mkToken loc :: SrcSpan
loc _len :: Int
_len _str :: String
_str = do
let token :: Token
token = SrcSpan -> Token
mkToken SrcSpan
loc
Token -> P ()
pushParen Token
token
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
token
closeParen :: (SrcSpan -> Token) -> Action
closeParen :: (SrcSpan -> Token) -> Action
closeParen mkToken :: SrcSpan -> Token
mkToken loc :: SrcSpan
loc _len :: Int
_len _str :: String
_str = do
let token :: Token
token = SrcSpan -> Token
mkToken SrcSpan
loc
Maybe Token
topParen <- P (Maybe Token)
getParen
case Maybe Token
topParen of
Nothing -> SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
loc String
err1
Just open :: Token
open -> if Token -> Token -> Bool
matchParen Token
open Token
token
then P ()
popParen P ()
-> StateT ParseState (Either ParseError) Token
-> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
token
else SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
loc String
err2
where
err1 :: String
err1 = "Lexical error ! unmatched closing paren"
err2 :: String
err2 = "Lexical error ! unmatched closing paren"
matchParen :: Token -> Token -> Bool
matchParen :: Token -> Token -> Bool
matchParen (LeftRoundBracketToken {}) (RightRoundBracketToken {}) = Bool
True
matchParen (LeftBraceToken {}) (RightBraceToken {}) = Bool
True
matchParen (LeftSquareBracketToken {}) (RightSquareBracketToken {}) = Bool
True
matchParen _ _ = Bool
False
type AlexInput = (SrcLocation,
[Byte],
String)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar _ = String -> Char
forall a. HasCallStack => String -> a
error "alexInputPrevChar not used"
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar (loc :: SrcLocation
loc, [], input :: String
input)
| String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
input = Maybe (Char, AlexInput)
forall a. Maybe a
Nothing
| Bool
otherwise = SrcLocation -> Maybe (Char, AlexInput) -> Maybe (Char, AlexInput)
forall a b. a -> b -> b
seq SrcLocation
nextLoc ((Char, AlexInput) -> Maybe (Char, AlexInput)
forall a. a -> Maybe a
Just (Char
nextChar, (SrcLocation
nextLoc, [], String
rest)))
where
nextChar :: Char
nextChar = String -> Char
forall a. [a] -> a
head String
input
rest :: String
rest = String -> String
forall a. [a] -> [a]
tail String
input
nextLoc :: SrcLocation
nextLoc = Char -> SrcLocation -> SrcLocation
moveChar Char
nextChar SrcLocation
loc
alexGetChar (loc :: SrcLocation
loc, _:_, _) = String -> Maybe (Char, AlexInput)
forall a. HasCallStack => String -> a
error "alexGetChar called with non-empty byte buffer"
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte (loc :: SrcLocation
loc, b :: Byte
b:bs :: [Byte]
bs, input :: String
input) = (Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a. a -> Maybe a
Just (Byte
b, (SrcLocation
loc, [Byte]
bs, String
input))
alexGetByte (loc :: SrcLocation
loc, [], []) = Maybe (Byte, AlexInput)
forall a. Maybe a
Nothing
alexGetByte (loc :: SrcLocation
loc, [], nextChar :: Char
nextChar:rest :: String
rest) =
SrcLocation -> Maybe (Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a b. a -> b -> b
seq SrcLocation
nextLoc ((Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a. a -> Maybe a
Just (Byte
byte, (SrcLocation
nextLoc, [Byte]
restBytes, String
rest)))
where
nextLoc :: SrcLocation
nextLoc = Char -> SrcLocation -> SrcLocation
moveChar Char
nextChar SrcLocation
loc
byte :: Byte
byte:restBytes :: [Byte]
restBytes = String -> [Byte]
UTF8.encode [Char
nextChar]
moveChar :: Char -> SrcLocation -> SrcLocation
moveChar :: Char -> SrcLocation -> SrcLocation
moveChar '\n' = Int -> SrcLocation -> SrcLocation
incLine 1
moveChar '\t' = SrcLocation -> SrcLocation
incTab
moveChar '\r' = SrcLocation -> SrcLocation
forall a. a -> a
id
moveChar _ = Int -> SrcLocation -> SrcLocation
incColumn 1
lexicalError :: P a
lexicalError :: P a
lexicalError = do
SrcLocation
location <- P SrcLocation
getLocation
Char
c <- (String -> Char)
-> StateT ParseState (Either ParseError) String
-> StateT ParseState (Either ParseError) Char
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> Char
forall a. [a] -> a
head StateT ParseState (Either ParseError) String
getInput
ErrorType (StateT ParseState (Either ParseError)) -> P a
forall (m :: * -> *) a. MonadError m => ErrorType m -> m a
throwError (ErrorType (StateT ParseState (Either ParseError)) -> P a)
-> ErrorType (StateT ParseState (Either ParseError)) -> P a
forall a b. (a -> b) -> a -> b
$ Char -> SrcLocation -> ParseError
UnexpectedChar Char
c SrcLocation
location
readOctNoO :: String -> Integer
readOctNoO :: String -> Integer
readOctNoO (zero :: Char
zero:rest :: String
rest) = String -> Integer
forall a. Read a => String -> a
read (Char
zeroChar -> String -> String
forall a. a -> [a] -> [a]
:'O'Char -> String -> String
forall a. a -> [a] -> [a]
:String
rest)