{-# LANGUAGE OverloadedStrings, TupleSections, ViewPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeFamilies #-}
module Text.HTML.TagStream.ByteString where

import qualified Blaze.ByteString.Builder as B
import           Control.Applicative
import           Control.Monad (unless)
import qualified Control.Monad.Fail as Fail
import           Data.Attoparsec.ByteString.Char8
import           Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as S
import           Data.Conduit
import qualified Data.Conduit.List as CL
import           Data.Default
import           Data.Functor.Identity (runIdentity)
import           Data.Monoid
import           Data.Text.Encoding
import qualified Text.XML.Stream.Parse as XML

import           Text.HTML.TagStream.Entities
import           Text.HTML.TagStream.Types
import           Text.HTML.TagStream.Utils (splitAccum)

type Token = Token' ByteString
type Attr = Attr' ByteString

{--
 - match quoted string, can fail.
 -}
quoted :: Char -> Parser ByteString
quoted :: Char -> Parser ByteString
quoted q :: Char
q = ByteString -> ByteString -> ByteString
S.append (ByteString -> ByteString -> ByteString)
-> Parser ByteString
-> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 ('\\',Char
q))
                    Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Char -> Parser Char
char Char
q Parser Char -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ByteString -> Parser ByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure ""
                      Parser ByteString -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char '\\' Parser Char -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser ByteString -> Parser ByteString
atLeast 1 (Char -> Parser ByteString
quoted Char
q) )

quotedOr :: Parser ByteString -> Parser ByteString
quotedOr :: Parser ByteString -> Parser ByteString
quotedOr p :: Parser ByteString
p = Parser Char -> Parser (Maybe Char)
forall a. Parser a -> Parser (Maybe a)
maybeP ((Char -> Bool) -> Parser Char
satisfy ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 ('"','\''))) Parser (Maybe Char)
-> (Maybe Char -> Parser ByteString) -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
             Parser ByteString
-> (Char -> Parser ByteString) -> Maybe Char -> Parser ByteString
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser ByteString
p Char -> Parser ByteString
quoted

{--
 - attribute value, can't fail.
 -}
attrValue :: Parser ByteString
attrValue :: Parser ByteString
attrValue = Parser ByteString -> Parser ByteString
quotedOr (Parser ByteString -> Parser ByteString)
-> Parser ByteString -> Parser ByteString
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser ByteString
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)

{--
 - attribute name, at least one char, can fail when meet tag end.
 - might match self-close tag end "/>" , make sure match `tagEnd' first.
 -}
attrName :: Parser ByteString
attrName :: Parser ByteString
attrName = Parser ByteString -> Parser ByteString
quotedOr (Parser ByteString -> Parser ByteString)
-> Parser ByteString -> Parser ByteString
forall a b. (a -> b) -> a -> b
$
             Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='>')
                    Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 ('/','>','=') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)

{--
 - tag end, return self-close or not, can fail.
 -}
tagEnd :: Parser Bool
tagEnd :: Parser Bool
tagEnd = Char -> Parser Char
char '>' Parser Char -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
     Parser Bool -> Parser Bool -> Parser Bool
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
string "/>" Parser ByteString -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True

{--
 - attribute pair or tag end, can fail if tag end met.
 -}
attr :: Parser Attr
attr :: Parser Attr
attr = (,) (ByteString -> ByteString -> Attr)
-> Parser ByteString -> Parser ByteString (ByteString -> Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
attrName Parser ByteString (ByteString -> Attr)
-> Parser ByteString () -> Parser ByteString (ByteString -> Attr)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
skipSpace
           Parser ByteString (ByteString -> Attr)
-> Parser ByteString -> Parser Attr
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Parser Char -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (Char -> Parser Char
char '=') Parser Bool -> (Bool -> Parser ByteString) -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                 Parser ByteString -> Parser ByteString -> Bool -> Parser ByteString
forall a. a -> a -> Bool -> a
cond (Parser ByteString ()
skipSpace Parser ByteString () -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString
attrValue)
                      (ByteString -> Parser ByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure "")
               )

{--
 - all attributes before tag end. can't fail.
 -}
attrs :: Parser ([Attr], Bool)
attrs :: Parser ([Attr], Bool)
attrs = [Attr] -> Parser ([Attr], Bool)
loop []
  where
    loop :: [Attr] -> Parser ([Attr], Bool)
loop acc :: [Attr]
acc = Parser ByteString ()
skipSpace Parser ByteString ()
-> Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Bool -> Either Bool Attr
forall a b. a -> Either a b
Left (Bool -> Either Bool Attr)
-> Parser Bool -> Parser ByteString (Either Bool Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bool
tagEnd Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Attr -> Either Bool Attr
forall a b. b -> Either a b
Right (Attr -> Either Bool Attr)
-> Parser Attr -> Parser ByteString (Either Bool Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Attr
attr) Parser ByteString (Either Bool Attr)
-> (Either Bool Attr -> Parser ([Attr], Bool))
-> Parser ([Attr], Bool)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
               (Bool -> Parser ([Attr], Bool))
-> (Attr -> Parser ([Attr], Bool))
-> Either Bool Attr
-> Parser ([Attr], Bool)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
                 (([Attr], Bool) -> Parser ([Attr], Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (([Attr], Bool) -> Parser ([Attr], Bool))
-> (Bool -> ([Attr], Bool)) -> Bool -> Parser ([Attr], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Attr] -> [Attr]
forall a. [a] -> [a]
reverse [Attr]
acc,))
                 ([Attr] -> Parser ([Attr], Bool)
loop ([Attr] -> Parser ([Attr], Bool))
-> (Attr -> [Attr]) -> Attr -> Parser ([Attr], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
:[Attr]
acc))

{--
 - comment tag without prefix.
 -}
comment :: Parser Token
comment :: Parser Token
comment = ByteString -> Token
forall s. s -> Token' s
Comment (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
comment'
  where comment' :: Parser ByteString
comment' = ByteString -> ByteString -> ByteString
S.append (ByteString -> ByteString -> ByteString)
-> Parser ByteString
-> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='-')
                            Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( ByteString -> Parser ByteString
string "-->" Parser ByteString -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ByteString -> Parser ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ""
                              Parser ByteString -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Int -> Parser ByteString -> Parser ByteString
atLeast 1 Parser ByteString
comment' )

{--
 - tags begine with <! , e.g. <!DOCTYPE ...>
 -}
special :: Parser Token
special :: Parser Token
special = ByteString -> ByteString -> Token
forall s. s -> s -> Token' s
Special
          (ByteString -> ByteString -> Token)
-> Parser ByteString -> Parser ByteString (ByteString -> Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='-') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace))
                       Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
                       Parser ByteString -> Parser ByteString () -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
skipSpace )
          Parser ByteString (ByteString -> Token)
-> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>') Parser Token -> Parser Char -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char '>'

{--
 - parse a tag, can fail.
 -}
tag :: Parser Token
tag :: Parser Token
tag = do
    TagType
t <-    ByteString -> Parser ByteString
string "/"     Parser ByteString
-> Parser ByteString TagType -> Parser ByteString TagType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser ByteString TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeClose
        Parser ByteString TagType
-> Parser ByteString TagType -> Parser ByteString TagType
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
string "!" Parser ByteString
-> Parser ByteString TagType -> Parser ByteString TagType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser ByteString TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeSpecial
        Parser ByteString TagType
-> Parser ByteString TagType -> Parser ByteString TagType
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TagType -> Parser ByteString TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeNormal
    case TagType
t of
        TagTypeClose ->
            ByteString -> Token
forall s. s -> Token' s
TagClose (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>')
            Parser Token -> Parser Char -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char '>'
        TagTypeSpecial -> Parser ByteString -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (ByteString -> Parser ByteString
string "--") Parser Bool -> (Bool -> Parser Token) -> Parser Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                          Parser Token -> Parser Token -> Bool -> Parser Token
forall a. a -> a -> Bool -> a
cond Parser Token
comment Parser Token
special
        TagTypeNormal -> do
            ByteString
name <- (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 ('<','>','/') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
            (as :: [Attr]
as, close :: Bool
close) <- Parser ([Attr], Bool)
attrs
            Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ ByteString -> [Attr] -> Bool -> Token
forall s. s -> [Attr' s] -> Bool -> Token' s
TagOpen ByteString
name [Attr]
as Bool
close

{--
 - record incomplete tag for streamline processing.
 -}
incomplete :: Parser Token
incomplete :: Parser Token
incomplete = ByteString -> Token
forall s. s -> Token' s
Incomplete (ByteString -> Token)
-> (ByteString -> ByteString) -> ByteString -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString -> ByteString
S.cons '<' (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
takeByteString

{--
 - parse text node. consume at least one char, to make sure progress.
 -}
text :: Parser Token
text :: Parser Token
text = ByteString -> Token
forall s. s -> Token' s
Text (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser ByteString -> Parser ByteString
atLeast 1 ((Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='<'))

-- | Decode the HTML entities e.g. @&amp;@ in some text into @&@.
decodeEntitiesBS :: Monad m => Conduit Token m Token
decodeEntitiesBS :: Conduit Token m Token
decodeEntitiesBS =
  Dec Builder ByteString -> Conduit Token m Token
forall (m :: * -> *) builder string.
(Monad m, Monoid builder, Monoid string, IsString string,
 Eq string) =>
Dec builder string -> Conduit (Token' string) m (Token' string)
decodeEntities
    Dec :: forall builder string.
(builder -> string)
-> ((Char -> Bool) -> string -> (string, string))
-> (string -> builder)
-> (Int -> string -> string)
-> (string -> Maybe string)
-> (string -> Maybe (Char, string))
-> Dec builder string
Dec { decToS :: Builder -> ByteString
decToS     = Builder -> ByteString
B.toByteString
        , decBreak :: (Char -> Bool) -> ByteString -> Attr
decBreak   = (Char -> Bool) -> ByteString -> Attr
S.break
        , decBuilder :: ByteString -> Builder
decBuilder = ByteString -> Builder
B.fromByteString
        , decDrop :: Int -> ByteString -> ByteString
decDrop    = Int -> ByteString -> ByteString
S.drop
        , decEntity :: ByteString -> Maybe ByteString
decEntity  = ByteString -> Maybe ByteString
forall (f :: * -> *). MonadThrow f => ByteString -> f ByteString
decodeEntity
        , decUncons :: ByteString -> Maybe (Char, ByteString)
decUncons  = ByteString -> Maybe (Char, ByteString)
S.uncons }
  where decodeEntity :: ByteString -> f ByteString
decodeEntity entity :: ByteString
entity =
          (Text -> ByteString) -> f Text -> f ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> ByteString
encodeUtf8
          (f Text -> f ByteString) -> f Text -> f ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ConduitT () ByteString f ()
forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
CL.sourceList ["&",ByteString
entity,";"]
          ConduitT () ByteString f ()
-> ConduitT ByteString Event f () -> ConduitT () Event f ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
$= ParseSettings -> ConduitT ByteString Event f ()
forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT ByteString Event m ()
XML.parseBytes ParseSettings
forall a. Default a => a
def { psDecodeEntities :: DecodeEntities
XML.psDecodeEntities = DecodeEntities
XML.decodeHtmlEntities }
          ConduitT () Event f () -> Sink Event f Text -> f Text
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Sink Event f Text
forall (m :: * -> *) o. MonadThrow m => ConduitT Event o m Text
XML.content

token :: Parser Token
token :: Parser Token
token = Char -> Parser Char
char '<' Parser Char -> Parser Token -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser Token
tag Parser Token -> Parser Token -> Parser Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
incomplete)
    Parser Token -> Parser Token -> Parser Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
text

{--
 - treat script tag specially, can't fail.
 -}
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd t :: Token
t = [Token] -> [Token]
forall a. [a] -> [a]
reverse ([Token] -> [Token]) -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Token] -> Parser [Token]
loop [Token
t]
              Parser [Token] -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[]) (Token -> [Token])
-> (ByteString -> Token) -> ByteString -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Token
forall s. s -> Token' s
Incomplete (ByteString -> Token)
-> (ByteString -> ByteString) -> ByteString -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString -> ByteString
S.append ByteString
script (ByteString -> [Token]) -> Parser ByteString -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
takeByteString
  where
    script :: ByteString
script = Builder -> ByteString
B.toByteString (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$ (ByteString -> ByteString) -> Token -> Builder
showToken ByteString -> ByteString
forall a. a -> a
id Token
t
    loop :: [Token] -> Parser [Token]
loop acc :: [Token]
acc = (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
acc) (Token -> [Token]) -> Parser Token -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Token
scriptEnd
           Parser [Token] -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Token
text Parser Token -> (Token -> Parser [Token]) -> Parser [Token]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Token] -> Parser [Token]
loop ([Token] -> Parser [Token])
-> (Token -> [Token]) -> Token -> Parser [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
acc))
    scriptEnd :: Parser Token
scriptEnd = ByteString -> Parser ByteString
string "</script>" Parser ByteString -> Parser Token -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Token
forall s. s -> Token' s
TagClose "script")

html :: Parser [Token]
html :: Parser [Token]
html = Parser [Token]
tokens Parser [Token] -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Token] -> Parser [Token]
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
  where
    tokens :: Parser [Token]
    tokens :: Parser [Token]
tokens = do
        Token
t <- Parser Token
token
        case Token
t of
            (TagOpen name :: ByteString
name _ close :: Bool
close)
              | Bool -> Bool
not Bool
close Bool -> Bool -> Bool
&& ByteString
nameByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
=="script"
                -> [Token] -> [Token] -> [Token]
forall a. [a] -> [a] -> [a]
(++) ([Token] -> [Token] -> [Token])
-> Parser [Token] -> Parser ByteString ([Token] -> [Token])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Token -> Parser [Token]
tillScriptEnd Token
t Parser ByteString ([Token] -> [Token])
-> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser [Token]
html
            _ -> (Token
tToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:) ([Token] -> [Token]) -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser [Token]
html

decode :: ByteString -> Either String [Token]
decode :: ByteString -> Either String [Token]
decode = ([Token] -> [Token])
-> Either String [Token] -> Either String [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Token] -> [Token]
forall (t :: * -> *). Foldable t => t Token -> [Token]
decodeEntitiesBS' (Either String [Token] -> Either String [Token])
-> (ByteString -> Either String [Token])
-> ByteString
-> Either String [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser [Token] -> ByteString -> Either String [Token]
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Token]
html
  where
    decodeEntitiesBS' :: t Token -> [Token]
decodeEntitiesBS' tokens :: t Token
tokens = Identity [Token] -> [Token]
forall a. Identity a -> a
runIdentity (Identity [Token] -> [Token]) -> Identity [Token] -> [Token]
forall a b. (a -> b) -> a -> b
$ (Token -> ConduitT () Token Identity ())
-> t Token -> ConduitT () Token Identity ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Token -> ConduitT () Token Identity ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield t Token
tokens ConduitT () Token Identity ()
-> Sink Token Identity [Token] -> Identity [Token]
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Conduit Token Identity Token
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesBS Conduit Token Identity Token
-> Sink Token Identity [Token] -> Sink Token Identity [Token]
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$ Sink Token Identity [Token]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
CL.consume


{--
 - Utils {{{
 -}

atLeast :: Int -> Parser ByteString -> Parser ByteString
atLeast :: Int -> Parser ByteString -> Parser ByteString
atLeast 0 p :: Parser ByteString
p = Parser ByteString
p
atLeast n :: Int
n p :: Parser ByteString
p = Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> Parser ByteString -> Parser ByteString
atLeast (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Parser ByteString
p

cond :: a -> a -> Bool -> a
cond :: a -> a -> Bool -> a
cond a1 :: a
a1 a2 :: a
a2 b :: Bool
b = if Bool
b then a
a1 else a
a2

(||.) :: Applicative f => f Bool -> f Bool -> f Bool
||. :: f Bool -> f Bool -> f Bool
(||.) = (Bool -> Bool -> Bool) -> f Bool -> f Bool -> f Bool
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(||)

in2 :: Eq a => (a,a) -> a -> Bool
in2 :: (a, a) -> a -> Bool
in2 (a1 :: a
a1,a2 :: a
a2) a :: a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2

in3 :: Eq a => (a,a,a) -> a -> Bool
in3 :: (a, a, a) -> a -> Bool
in3 (a1 :: a
a1,a2 :: a
a2,a3 :: a
a3) a :: a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a3

boolP :: Parser a -> Parser Bool
boolP :: Parser a -> Parser Bool
boolP p :: Parser a
p = Parser a
p Parser a -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True Parser Bool -> Parser Bool -> Parser Bool
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False

maybeP :: Parser a -> Parser (Maybe a)
maybeP :: Parser a -> Parser (Maybe a)
maybeP p :: Parser a
p = a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> Parser a -> Parser (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p Parser (Maybe a) -> Parser (Maybe a) -> Parser (Maybe a)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe a -> Parser (Maybe a)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing
-- }}}

-- {{{ encode tokens
cc :: [ByteString] -> B.Builder
cc :: [ByteString] -> Builder
cc = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> ([ByteString] -> [Builder]) -> [ByteString] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Builder) -> [ByteString] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Builder
B.fromByteString

showToken :: (ByteString -> ByteString) -> Token -> B.Builder
showToken :: (ByteString -> ByteString) -> Token -> Builder
showToken hl :: ByteString -> ByteString
hl (TagOpen name :: ByteString
name as :: [Attr]
as close :: Bool
close) =
    [ByteString] -> Builder
cc ([ByteString] -> Builder) -> [ByteString] -> Builder
forall a b. (a -> b) -> a -> b
$ [ByteString -> ByteString
hl "<", ByteString
name]
      [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ (Attr -> ByteString) -> [Attr] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Attr -> ByteString
showAttr [Attr]
as
      [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString -> ByteString
hl (if Bool
close then "/>" else ">")]
  where
    showAttr :: Attr -> ByteString
    showAttr :: Attr -> ByteString
showAttr (key :: ByteString
key, value :: ByteString
value) = [ByteString] -> ByteString
S.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [" ", ByteString
key, ByteString -> ByteString
hl "=\""] [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ (Char -> ByteString) -> String -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Char -> ByteString
escape (ByteString -> String
S.unpack ByteString
value) [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString -> ByteString
hl "\""]
    escape :: Char -> ByteString
escape '"' = "\\\""
    escape '\\' = "\\\\"
    escape c :: Char
c = Char -> ByteString
S.singleton Char
c
showToken hl :: ByteString -> ByteString
hl (TagClose name :: ByteString
name) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl "</", ByteString
name, ByteString -> ByteString
hl ">"]
showToken _ (Text s :: ByteString
s) = ByteString -> Builder
B.fromByteString ByteString
s
showToken hl :: ByteString -> ByteString
hl (Comment s :: ByteString
s) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl "<!--", ByteString
s, ByteString -> ByteString
hl "-->"]
showToken hl :: ByteString -> ByteString
hl (Special name :: ByteString
name s :: ByteString
s) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl "<!", ByteString
name, " ", ByteString
s, ByteString -> ByteString
hl ">"]
showToken _ (Incomplete s :: ByteString
s) = ByteString -> Builder
B.fromByteString ByteString
s
-- }}}

-- {{{ Stream
tokenStream :: Fail.MonadFail m
#if MIN_VERSION_conduit(1, 0, 0)
            => Conduit ByteString m Token
#else
            => GInfConduit ByteString m Token
#endif
tokenStream :: Conduit ByteString m Token
tokenStream =
    ByteString -> Conduit ByteString m Token
forall (m :: * -> *).
MonadFail m =>
ByteString -> ConduitT ByteString Token m ()
loop ByteString
S.empty Conduit ByteString m Token
-> ConduitT Token Token m () -> Conduit ByteString m Token
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$= ConduitT Token Token m ()
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesBS
  where
#if MIN_VERSION_conduit(1, 0, 0)
    loop :: ByteString -> ConduitT ByteString Token m ()
loop accum :: ByteString
accum = ConduitT ByteString Token m (Maybe ByteString)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT ByteString Token m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Token m ())
-> ConduitT ByteString Token m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT ByteString Token m ()
-> (ByteString -> ConduitT ByteString Token m ())
-> Maybe ByteString
-> ConduitT ByteString Token m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ByteString -> () -> ConduitT ByteString Token m ()
forall (m :: * -> *) b i.
Monad m =>
ByteString -> b -> ConduitT i Token m b
close ByteString
accum ()) (ByteString -> ByteString -> ConduitT ByteString Token m ()
push ByteString
accum)
#else
    loop accum = awaitE >>= either (close accum) (push accum)
#endif

    push :: ByteString -> ByteString -> ConduitT ByteString Token m ()
push accum :: ByteString
accum input :: ByteString
input =
        case Parser [Token] -> ByteString -> Either String [Token]
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Token]
html (ByteString
accum ByteString -> ByteString -> ByteString
`S.append` ByteString
input) of
            Right ([Token] -> (ByteString, [Token])
forall s. Monoid s => [Token' s] -> (s, [Token' s])
splitAccum -> (accum' :: ByteString
accum', tokens :: [Token]
tokens)) -> (Token -> ConduitT ByteString Token m ())
-> [Token] -> ConduitT ByteString Token m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Token -> ConduitT ByteString Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield [Token]
tokens ConduitT ByteString Token m ()
-> ConduitT ByteString Token m () -> ConduitT ByteString Token m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> ConduitT ByteString Token m ()
loop ByteString
accum'
            Left err :: String
err -> String -> ConduitT ByteString Token m ()
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
err

    close :: ByteString -> b -> ConduitT i Token m b
close s :: ByteString
s r :: b
r = do
        Bool -> ConduitT i Token m () -> ConduitT i Token m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
s) (ConduitT i Token m () -> ConduitT i Token m ())
-> ConduitT i Token m () -> ConduitT i Token m ()
forall a b. (a -> b) -> a -> b
$ Token -> ConduitT i Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Token -> ConduitT i Token m ()) -> Token -> ConduitT i Token m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Token
forall s. s -> Token' s
Text ByteString
s
        b -> ConduitT i Token m b
forall (m :: * -> *) a. Monad m => a -> m a
return b
r

-- }}}