-- |
-- Module      :  Data.Binary.Parser.Char8
-- Copyright   :  Bryan O'Sullivan 2007-2015, Winterland 2016
-- License     :  BSD3
--
-- Maintainer  :  drkoster@qq.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- This module is intended for parsing text that is
-- represented using an 8-bit character set, e.g. ASCII or
-- ISO-8859-15.  It /does not/ make any attempt to deal with character
-- encodings, multibyte characters, or wide characters.  In
-- particular, all attempts to use characters above code point U+00FF
-- will give wrong answers.
--
-- Code points below U+0100 are simply translated to and from their
-- numeric values, so e.g. the code point U+00A4 becomes the byte
-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.

module Data.Binary.Parser.Char8 where

import           Control.Applicative
import qualified Data.Binary.Get          as BG
import           Data.Binary.Get.Internal
import qualified Data.Binary.Parser.Word8 as W
import           Data.ByteString          (ByteString)
import qualified Data.ByteString          as B
import           Data.ByteString.Internal (c2w, w2c)
import qualified Data.ByteString.Unsafe   as B
import           Prelude                  hiding (takeWhile)

--------------------------------------------------------------------------------

-- | Match any char, to perform lookahead. Returns 'Nothing' if end of
-- input has been reached. Does not consume any input.
--
peekMaybe :: Get (Maybe Char)
peekMaybe :: Get (Maybe Char)
peekMaybe = (Word8 -> Char) -> Maybe Word8 -> Maybe Char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word8 -> Char
w2c (Maybe Word8 -> Maybe Char)
-> Get (Maybe Word8) -> Get (Maybe Char)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get (Maybe Word8)
W.peekMaybe
{-# INLINE peekMaybe #-}

-- | Match any char, to perform lookahead.  Does not consume any
-- input, but will fail if end of input has been reached.
--
peek :: Get Char
peek :: Get Char
peek = Word8 -> Char
w2c (Word8 -> Char) -> Get Word8 -> Get Char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get Word8
W.peek
{-# INLINE peek #-}

-- | The parser @satisfy p@ succeeds for any char for which the
-- predicate @p@ returns 'True'. Returns the char that is actually
-- parsed.
--
satisfy :: (Char -> Bool) -> Get Char
satisfy :: (Char -> Bool) -> Get Char
satisfy p :: Char -> Bool
p = Word8 -> Char
w2c (Word8 -> Char) -> Get Word8 -> Get Char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Word8 -> Bool) -> Get Word8
W.satisfy (Char -> Bool
p (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE satisfy #-}

-- | The parser @satisfyWith f p@ transforms a char, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed char that was parsed.
--
satisfyWith :: (Char -> a) -> (a -> Bool) -> Get a
satisfyWith :: (Char -> a) -> (a -> Bool) -> Get a
satisfyWith f :: Char -> a
f = (Word8 -> a) -> (a -> Bool) -> Get a
forall a. (Word8 -> a) -> (a -> Bool) -> Get a
W.satisfyWith (Char -> a
f (Char -> a) -> (Word8 -> Char) -> Word8 -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE satisfyWith #-}

-- | Match a specific character.
--
char :: Char -> Get ()
char :: Char -> Get ()
char c :: Char
c = Word8 -> Get ()
W.word8 (Char -> Word8
c2w Char
c)
{-# INLINE char #-}

-- | Match any character.
--
anyChar :: Get Char
anyChar :: Get Char
anyChar = Word8 -> Char
w2c (Word8 -> Char) -> Get Word8 -> Get Char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get Word8
BG.getWord8
{-# INLINE anyChar #-}

-- | The parser @skipChar p@ succeeds for any char for which the predicate @p@ returns 'True'.
--
skipChar :: (Char -> Bool) -> Get ()
skipChar :: (Char -> Bool) -> Get ()
skipChar p :: Char -> Bool
p = (Word8 -> Bool) -> Get ()
W.skipWord8 (Char -> Bool
p (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE skipChar #-}

--------------------------------------------------------------------------------

-- | Consume input as long as the predicate returns 'False' or reach the end of input,
-- and return the consumed input.
--
takeTill :: (Char -> Bool) -> Get ByteString
takeTill :: (Char -> Bool) -> Get ByteString
takeTill p :: Char -> Bool
p = (Word8 -> Bool) -> Get ByteString
W.takeTill (Char -> Bool
p (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE takeTill #-}

-- | Consume input as long as the predicate returns 'True' or reach the end of input,
-- and return the consumed input.
--
takeWhile :: (Char -> Bool) -> Get ByteString
takeWhile :: (Char -> Bool) -> Get ByteString
takeWhile p :: Char -> Bool
p = (Word8 -> Bool) -> Get ByteString
W.takeWhile (Char -> Bool
p (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE takeWhile #-}

-- Similar to 'takeWhile', but requires the predicate to succeed on at least one char
-- of input: it will fail if the predicate never returns 'True' or reach the end of input
--
takeWhile1 :: (Char -> Bool) -> Get ByteString
takeWhile1 :: (Char -> Bool) -> Get ByteString
takeWhile1 p :: Char -> Bool
p = (Word8 -> Bool) -> Get ByteString
W.takeWhile1 (Char -> Bool
p (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE takeWhile1 #-}

-- | Skip past input for as long as the predicate returns 'True'.
--
skipWhile :: (Char -> Bool) -> Get ()
skipWhile :: (Char -> Bool) -> Get ()
skipWhile p :: Char -> Bool
p = (Word8 -> Bool) -> Get ()
W.skipWhile (Char -> Bool
p (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE skipWhile #-}

-- | Satisfy a literal string but ignoring case.
--
stringCI :: ByteString -> Get ByteString
stringCI :: ByteString -> Get ByteString
stringCI bs :: ByteString
bs = do
    let l :: Int
l = ByteString -> Int
B.length ByteString
bs
    Int -> Get ()
ensureN Int
l
    ByteString
bs' <- Int -> ByteString -> ByteString
B.unsafeTake Int
l (ByteString -> ByteString) -> Get ByteString -> Get ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get ByteString
get
    if (Word8 -> Word8) -> ByteString -> ByteString
B.map Word8 -> Word8
forall p. (Ord p, Num p) => p -> p
toLower ByteString
bs' ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== (Word8 -> Word8) -> ByteString -> ByteString
B.map Word8 -> Word8
forall p. (Ord p, Num p) => p -> p
toLower ByteString
bs
    then ByteString -> Get ()
put (Int -> ByteString -> ByteString
B.unsafeDrop Int
l ByteString
bs') Get () -> Get ByteString -> Get ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> Get ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
bs'
    else String -> Get ByteString
forall (m :: * -> *) a. MonadFail m => String -> m a
fail "stringCI"
  where
    toLower :: p -> p
toLower w :: p
w | p
w p -> p -> Bool
forall a. Ord a => a -> a -> Bool
>= 65 Bool -> Bool -> Bool
&& p
w p -> p -> Bool
forall a. Ord a => a -> a -> Bool
<= 90 = p
w p -> p -> p
forall a. Num a => a -> a -> a
+ 32
              | Bool
otherwise          = p
w
{-# INLINE stringCI #-}

--------------------------------------------------------------------------------

-- | Fast predicate for matching ASCII space characters.
--
-- /Note/: This predicate only gives correct answers for the ASCII
-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
-- space) as a space character, even though it is a valid ISO-8859-15
-- byte. For a Unicode-aware and only slightly slower predicate,
-- use 'Data.Char.isSpace'
--
isSpace :: Char -> Bool
isSpace :: Char -> Bool
isSpace c :: Char
c = (Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ' ') Bool -> Bool -> Bool
|| ('\t' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\r')
{-# INLINE isSpace #-}

-- | Decimal digit predicate.
--
isDigit :: Char -> Bool
isDigit :: Char -> Bool
isDigit c :: Char
c = Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9'
{-# INLINE isDigit #-}

-- | Hex digit predicate.
--
isHexDigit :: Char -> Bool
isHexDigit :: Char -> Bool
isHexDigit c :: Char
c = (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9') Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'f') Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'F')
{-# INLINE isHexDigit #-}

-- | A predicate that matches either a space @\' \'@ or horizontal tab
-- @\'\\t\'@ character.
--
isHorizontalSpace :: Char -> Bool
isHorizontalSpace :: Char -> Bool
isHorizontalSpace c :: Char
c = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ' ' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\t'
{-# INLINE isHorizontalSpace #-}

-- | A predicate that matches either a carriage return @\'\\r\'@ or
-- newline @\'\\n\'@ character.
--
isEndOfLine :: Char -> Bool
isEndOfLine :: Char -> Bool
isEndOfLine c :: Char
c = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\r' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\n'
{-# INLINE isEndOfLine #-}