module Ondim.Targets.HTML.Parser (parseT, parseLBS) where
import Conduit (ConduitT, MonadThrow, awaitForever, mapOutput, runConduit, yield, (.|))
import Data.ByteString.Lazy qualified as LBS
import Data.Conduit.List qualified as CL
import Data.XML.Types qualified as XT
import Text.HTML.DOM qualified as HTML
import Text.XML qualified as X
parseT :: Text -> X.Document
parseT :: Text -> Document
parseT Text
tss =
case ConduitT () Void (Either SomeException) Document
-> Either SomeException Document
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void (Either SomeException) Document
-> Either SomeException Document)
-> ConduitT () Void (Either SomeException) Document
-> Either SomeException Document
forall a b. (a -> b) -> a -> b
$ Text -> ConduitT () Text (Either SomeException) ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
tss ConduitT () Text (Either SomeException) ()
-> ConduitM Text Void (Either SomeException) Document
-> ConduitT () Void (Either SomeException) Document
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM Text Void (Either SomeException) Document
forall (m :: * -> *) o. MonadThrow m => ConduitT Text o m Document
sinkDocText of
Left SomeException
e -> Text -> Document
forall a t. (HasCallStack, IsText t) => t -> a
error (Text -> Document) -> Text -> Document
forall a b. (a -> b) -> a -> b
$ Text
"Unexpected exception in parseSTChunks: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> SomeException -> Text
forall b a. (Show a, IsString b) => a -> b
show SomeException
e
Right Document
x -> Document
x
parseLBS :: LByteString -> X.Document
parseLBS :: LByteString -> Document
parseLBS = [ByteString] -> Document
parseBSChunks ([ByteString] -> Document)
-> (LByteString -> [ByteString]) -> LByteString -> Document
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LByteString -> [ByteString]
LBS.toChunks
parseBSChunks :: [ByteString] -> X.Document
parseBSChunks :: [ByteString] -> Document
parseBSChunks [ByteString]
tss =
case ConduitT () Void (Either SomeException) Document
-> Either SomeException Document
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void (Either SomeException) Document
-> Either SomeException Document)
-> ConduitT () Void (Either SomeException) Document
-> Either SomeException Document
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ConduitT () ByteString (Either SomeException) ()
forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
CL.sourceList [ByteString]
tss ConduitT () ByteString (Either SomeException) ()
-> ConduitM ByteString Void (Either SomeException) Document
-> ConduitT () Void (Either SomeException) Document
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM ByteString Void (Either SomeException) Document
forall (m :: * -> *) o.
MonadThrow m =>
ConduitT ByteString o m Document
sinkDoc of
Left SomeException
e -> Text -> Document
forall a t. (HasCallStack, IsText t) => t -> a
error (Text -> Document) -> Text -> Document
forall a b. (a -> b) -> a -> b
$ Text
"Unexpected exception in parseBSChunks: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> SomeException -> Text
forall b a. (Show a, IsString b) => a -> b
show SomeException
e
Right Document
x -> Document
x
sinkDoc :: MonadThrow m => ConduitT ByteString o m X.Document
sinkDoc :: forall (m :: * -> *) o.
MonadThrow m =>
ConduitT ByteString o m Document
sinkDoc = ConduitT ByteString Event m () -> ConduitT ByteString o m Document
forall (m :: * -> *) a o.
MonadThrow m =>
ConduitT a Event m () -> ConduitT a o m Document
sinkDoc' ConduitT ByteString Event m ()
forall (m :: * -> *). Monad m => ConduitT ByteString Event m ()
HTML.eventConduit
sinkDocText :: MonadThrow m => ConduitT Text o m X.Document
sinkDocText :: forall (m :: * -> *) o. MonadThrow m => ConduitT Text o m Document
sinkDocText = ConduitT Text Event m () -> ConduitT Text o m Document
forall (m :: * -> *) a o.
MonadThrow m =>
ConduitT a Event m () -> ConduitT a o m Document
sinkDoc' ConduitT Text Event m ()
forall (m :: * -> *). Monad m => ConduitT Text Event m ()
HTML.eventConduitText
sinkDoc' ::
MonadThrow m =>
ConduitT a XT.Event m () ->
ConduitT a o m X.Document
sinkDoc' :: forall (m :: * -> *) a o.
MonadThrow m =>
ConduitT a Event m () -> ConduitT a o m Document
sinkDoc' ConduitT a Event m ()
f =
(Document -> Document)
-> ConduitT a o m Document -> ConduitT a o m Document
forall a b. (a -> b) -> ConduitT a o m a -> ConduitT a o m b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Document -> Document
stripDummy (ConduitT a o m Document -> ConduitT a o m Document)
-> ConduitT a o m Document -> ConduitT a o m Document
forall a b. (a -> b) -> a -> b
$ (Event -> (Maybe PositionRange, Event))
-> ConduitT a Event m ()
-> ConduitT a (Maybe PositionRange, Event) m ()
forall (m :: * -> *) o1 o2 i r.
Monad m =>
(o1 -> o2) -> ConduitT i o1 m r -> ConduitT i o2 m r
mapOutput (Maybe PositionRange
forall a. Maybe a
Nothing,) ConduitT a Event m ()
f ConduitT a (Maybe PositionRange, Event) m ()
-> ConduitM (Maybe PositionRange, Event) o m Document
-> ConduitT a o m Document
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT
(Maybe PositionRange, Event) (Maybe PositionRange, Event) m ()
forall {a}. ConduitT (Maybe a, Event) (Maybe a, Event) m ()
addDummyWrapper ConduitT
(Maybe PositionRange, Event) (Maybe PositionRange, Event) m ()
-> ConduitM (Maybe PositionRange, Event) o m Document
-> ConduitM (Maybe PositionRange, Event) o m Document
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe PositionRange, Event) o m Document
forall (m :: * -> *) o.
MonadThrow m =>
ConduitT (Maybe PositionRange, Event) o m Document
X.fromEvents
where
addDummyWrapper :: ConduitT (Maybe a, Event) (Maybe a, Event) m ()
addDummyWrapper = do
(Maybe a, Event) -> ConduitT (Maybe a, Event) (Maybe a, Event) m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Maybe a
forall a. Maybe a
Nothing, Name -> [(Name, [Content])] -> Event
XT.EventBeginElement Name
"html" [])
((Maybe a, Event)
-> ConduitT (Maybe a, Event) (Maybe a, Event) m ())
-> ConduitT (Maybe a, Event) (Maybe a, Event) m ()
forall (m :: * -> *) i o r.
Monad m =>
(i -> ConduitT i o m r) -> ConduitT i o m ()
awaitForever (Maybe a, Event) -> ConduitT (Maybe a, Event) (Maybe a, Event) m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield
(Maybe a, Event) -> ConduitT (Maybe a, Event) (Maybe a, Event) m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Maybe a
forall a. Maybe a
Nothing, Name -> Event
XT.EventEndElement Name
"html")
stripDummy :: Document -> Document
stripDummy doc :: Document
doc@(X.Document Prologue
pro (X.Element Name
_ Map Name Text
_ [Node]
nodes) [Miscellaneous]
epi) =
case [Node]
nodes of
[X.NodeElement root :: Element
root@X.Element {elementName :: Element -> Name
X.elementName = Name
"html"}] -> Prologue -> Element -> [Miscellaneous] -> Document
X.Document Prologue
pro Element
root [Miscellaneous]
epi
[Node]
_ -> Document
doc