{-# LANGUAGE BangPatterns #-}
module Config.LexerUtils
(
AlexInput
, alexGetByte
, LexerMode(..)
, startString
, nestMode
, endMode
, token
, token_
, section
, number
, untermString
, eofAction
, errorAction
) where
import Control.Applicative
import Data.Char (GeneralCategory(..), generalCategory, digitToInt,
isAscii, isSpace, ord, isDigit, isHexDigit)
import Data.Text (Text)
import Data.Word (Word8)
import Numeric (readInt, readHex)
import qualified Data.Text as Text
import Config.Tokens
import Config.Number
import qualified Config.NumberParser
type AlexInput = Located Text
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (Located p :: Position
p cs :: Text
cs)
= do (c :: Char
c,cs' :: Text
cs') <- Text -> Maybe (Char, Text)
Text.uncons Text
cs
let !b :: Word8
b = Char -> Word8
byteForChar Char
c
!inp :: AlexInput
inp = Position -> Text -> AlexInput
forall a. Position -> a -> Located a
Located (Position -> Char -> Position
move Position
p Char
c) Text
cs'
(Word8, AlexInput) -> Maybe (Word8, AlexInput)
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8
b, AlexInput
inp)
move :: Position -> Char -> Position
move :: Position -> Char -> Position
move (Position ix :: Int
ix line :: Int
line column :: Int
column) c :: Char
c =
case Char
c of
'\t' -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) Int
line (((Int
column Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 7) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 8) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
'\n' -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) 1
_ -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) Int
line (Int
column Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
eofAction :: Position -> LexerMode -> [Located Token]
eofAction :: Position -> LexerMode -> [Located Token]
eofAction eofPosn :: Position
eofPosn st :: LexerMode
st =
case LexerMode
st of
InComment posn :: Position
posn _ -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermComment)]
InCommentString posn :: Position
posn _ -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermComment)]
InString posn :: Position
posn _ -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermString)]
InNormal -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located (Position -> Position
park Position
eofPosn) Token
EOF]
park :: Position -> Position
park :: Position -> Position
park pos :: Position
pos
| Position -> Int
posColumn Position
pos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 1 = Position
pos { posColumn :: Int
posColumn = 0 }
| Bool
otherwise = Position
pos { posColumn :: Int
posColumn = 0, posLine :: Int
posLine = Position -> Int
posLine Position
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 }
errorAction :: AlexInput -> [Located Token]
errorAction :: AlexInput -> [Located Token]
errorAction inp :: AlexInput
inp = [(Text -> Token) -> AlexInput -> Located Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Error -> Token
Error (Error -> Token) -> (Text -> Error) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Error
NoMatch (Char -> Error) -> (Text -> Char) -> Text -> Error
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Char
Text.head) AlexInput
inp]
data LexerMode
= InNormal
| !Position !LexerMode
| !Position !LexerMode
| InString !Position !Text
type Action =
Int ->
Located Text ->
LexerMode ->
(LexerMode, [Located Token])
token :: (Text -> Token) -> Action
token :: (Text -> Token) -> Action
token f :: Text -> Token
f len :: Int
len match :: AlexInput
match st :: LexerMode
st = (LexerMode
st, [(Text -> Token) -> AlexInput -> Located Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> Token
f (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
Text.take Int
len) AlexInput
match])
token_ :: Token -> Action
token_ :: Token -> Action
token_ = (Text -> Token) -> Action
token ((Text -> Token) -> Action)
-> (Token -> Text -> Token) -> Token -> Action
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Token -> Text -> Token
forall a b. a -> b -> a
const
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode f :: Position -> LexerMode -> LexerMode
f _ match :: AlexInput
match st :: LexerMode
st = (Position -> LexerMode -> LexerMode
f (AlexInput -> Position
forall a. Located a -> Position
locPosition AlexInput
match) LexerMode
st, [])
startString :: Action
startString :: Action
startString _ (Located posn :: Position
posn text :: Text
text) _ = (Position -> Text -> LexerMode
InString Position
posn Text
text, [])
endMode :: Action
endMode :: Action
endMode len :: Int
len (Located endPosn :: Position
endPosn _) mode :: LexerMode
mode =
case LexerMode
mode of
InNormal -> (LexerMode
InNormal, [])
InCommentString _ st :: LexerMode
st -> (LexerMode
st, [])
InComment _ st :: LexerMode
st -> (LexerMode
st, [])
InString startPosn :: Position
startPosn input :: Text
input ->
let n :: Int
n = Position -> Int
posIndex Position
endPosn Int -> Int -> Int
forall a. Num a => a -> a -> a
- Position -> Int
posIndex Position
startPosn Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
badEscape :: Error
badEscape = Text -> Error
BadEscape (String -> Text
Text.pack "out of range")
in case ReadS String
forall a. Read a => ReadS a
reads (Text -> String
Text.unpack (Int -> Text -> Text
Text.take Int
n Text
input)) of
[(s :: String
s,"")] -> (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
startPosn (Text -> Token
String (String -> Text
Text.pack String
s))])
_ -> (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
startPosn (Error -> Token
Error Error
badEscape)])
untermString :: Action
untermString :: Action
untermString _ _ = \(InString posn :: Position
posn _) ->
(LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermString)])
number ::
Text ->
Token
number :: Text -> Token
number = Number -> Token
Number (Number -> Token) -> (Text -> Number) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Number
Config.NumberParser.number (String -> Number) -> (Text -> String) -> Text -> Number
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Text.unpack (Text -> String) -> (Text -> Text) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.toUpper
section :: Text -> Token
section :: Text -> Token
section = Text -> Token
Section (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
Text.dropWhileEnd Char -> Bool
isSpace (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.init
byteForChar :: Char -> Word8
byteForChar :: Char -> Word8
byteForChar c :: Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\6' = Word8
non_graphic
| Char -> Bool
isAscii Char
c = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c)
| Bool
otherwise = case Char -> GeneralCategory
generalCategory Char
c of
LowercaseLetter -> Word8
lower
OtherLetter -> Word8
lower
UppercaseLetter -> Word8
upper
TitlecaseLetter -> Word8
upper
DecimalNumber -> Word8
digit
OtherNumber -> Word8
digit
ConnectorPunctuation -> Word8
symbol
DashPunctuation -> Word8
symbol
OtherPunctuation -> Word8
symbol
MathSymbol -> Word8
symbol
CurrencySymbol -> Word8
symbol
ModifierSymbol -> Word8
symbol
OtherSymbol -> Word8
symbol
Space -> Word8
space
ModifierLetter -> Word8
other
NonSpacingMark -> Word8
other
SpacingCombiningMark -> Word8
other
EnclosingMark -> Word8
other
LetterNumber -> Word8
other
OpenPunctuation -> Word8
other
ClosePunctuation -> Word8
other
InitialQuote -> Word8
other
FinalQuote -> Word8
other
_ -> Word8
non_graphic
where
non_graphic :: Word8
non_graphic = 0
upper :: Word8
upper = 1
lower :: Word8
lower = 2
digit :: Word8
digit = 3
symbol :: Word8
symbol = 4
space :: Word8
space = 5
other :: Word8
other = 6