module Ondim.Targets.LaTeX.Parser where
import Data.Char (isAsciiLower, isAsciiUpper, isSpace, isUpper, toLower, isNumber)
import Data.Sequence (Seq (..), (|>))
import Data.Text qualified as T
import Ondim.Targets.LaTeX.Instances (LaTeXNode (..), LaTeXDoc (..))
import Text.Megaparsec
import Text.Megaparsec.Char
import Prelude hiding (many, some)
parseLaTeX :: String -> Text -> Either String LaTeXDoc
parseLaTeX :: String -> Text -> Either String LaTeXDoc
parseLaTeX String
fp =
(ParseErrorBundle Text Void -> String)
-> Either (ParseErrorBundle Text Void) LaTeXDoc
-> Either String LaTeXDoc
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ParseErrorBundle Text Void -> String
forall s e.
(VisualStream s, TraversableStream s, ShowErrorComponent e) =>
ParseErrorBundle s e -> String
errorBundlePretty
(Either (ParseErrorBundle Text Void) LaTeXDoc
-> Either String LaTeXDoc)
-> (Text -> Either (ParseErrorBundle Text Void) LaTeXDoc)
-> Text
-> Either String LaTeXDoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parsec Void Text LaTeXDoc
-> String -> Text -> Either (ParseErrorBundle Text Void) LaTeXDoc
forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
parse (ReaderT ParserState (Parsec Void Text) LaTeXDoc
-> ParserState -> Parsec Void Text LaTeXDoc
forall r (m :: * -> *) a. ReaderT r m a -> r -> m a
runReaderT ReaderT ParserState (Parsec Void Text) LaTeXDoc
document ParserState
initialParserstate) String
fp
document :: Parser LaTeXDoc
document :: ReaderT ParserState (Parsec Void Text) LaTeXDoc
document = do
Nodes nodes <- Parser Nodes
manyNodes
eof
return $ LaTeXDoc (toList nodes)
newtype Nodes = Nodes {Nodes -> Seq LaTeXNode
unNodes :: Seq LaTeXNode}
instance Semigroup Nodes where
(Nodes (Seq LaTeXNode
xs :|> Text Text
t1)) <> :: Nodes -> Nodes -> Nodes
<> (Nodes (Text Text
t2 :<| Seq LaTeXNode
ys)) =
Seq LaTeXNode -> Nodes
Nodes ((Seq LaTeXNode
xs Seq LaTeXNode -> LaTeXNode -> Seq LaTeXNode
forall a. Seq a -> a -> Seq a
|> Text -> LaTeXNode
Text (Text
t1 Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t2)) Seq LaTeXNode -> Seq LaTeXNode -> Seq LaTeXNode
forall a. Semigroup a => a -> a -> a
<> Seq LaTeXNode
ys)
(Nodes Seq LaTeXNode
xs) <> (Nodes Seq LaTeXNode
ys) =
Seq LaTeXNode -> Nodes
Nodes (Seq LaTeXNode
xs Seq LaTeXNode -> Seq LaTeXNode -> Seq LaTeXNode
forall a. Semigroup a => a -> a -> a
<> Seq LaTeXNode
ys)
instance Monoid Nodes where
mempty :: Nodes
mempty = Seq LaTeXNode -> Nodes
Nodes Seq LaTeXNode
forall a. Monoid a => a
mempty
instance One Nodes where
type OneItem Nodes = LaTeXNode
one :: OneItem Nodes -> Nodes
one = Seq LaTeXNode -> Nodes
Nodes (Seq LaTeXNode -> Nodes)
-> (LaTeXNode -> Seq LaTeXNode) -> LaTeXNode -> Nodes
forall b c a. (b -> c) -> (a -> b) -> a -> c
. OneItem (Seq LaTeXNode) -> Seq LaTeXNode
LaTeXNode -> Seq LaTeXNode
forall x. One x => OneItem x -> x
one
newtype ParserState = ParserState
{ ParserState -> Int
level :: Int
}
initialParserstate :: ParserState
initialParserstate :: ParserState
initialParserstate = Int -> ParserState
ParserState Int
0
type Parser = ReaderT ParserState (Parsec Void Text)
isHSpace :: Char -> Bool
isHSpace :: Char -> Bool
isHSpace Char
c = Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n'
manyNodes :: Parser Nodes
manyNodes :: Parser Nodes
manyNodes =
[Nodes] -> Nodes
forall a. Monoid a => [a] -> a
mconcat
([Nodes] -> Nodes)
-> ReaderT ParserState (Parsec Void Text) [Nodes] -> Parser Nodes
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Nodes -> ReaderT ParserState (Parsec Void Text) [Nodes]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many
( Parser Nodes
braces
Parser Nodes -> Parser Nodes -> Parser Nodes
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Nodes
lineEnd
Parser Nodes -> Parser Nodes -> Parser Nodes
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Nodes
command
Parser Nodes -> Parser Nodes -> Parser Nodes
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Nodes
escape
Parser Nodes -> Parser Nodes -> Parser Nodes
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Nodes
spaceEater
Parser Nodes -> Parser Nodes -> Parser Nodes
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Nodes
comment
Parser Nodes -> Parser Nodes -> Parser Nodes
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Nodes
prose
)
command :: Parser Nodes
command :: Parser Nodes
command = do
_ <- ReaderT ParserState (Parsec Void Text) (Tokens Text)
-> ReaderT ParserState (Parsec Void Text) (Tokens Text)
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (ReaderT ParserState (Parsec Void Text) (Tokens Text)
-> ReaderT ParserState (Parsec Void Text) (Tokens Text))
-> ReaderT ParserState (Parsec Void Text) (Tokens Text)
-> ReaderT ParserState (Parsec Void Text) (Tokens Text)
forall a b. (a -> b) -> a -> b
$ Tokens Text -> ReaderT ParserState (Parsec Void Text) (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"\\@"
name <- takeWhileP Nothing isAllowedName
attrs <- option [] do
_ <- try do
space
char '['
space
pair `sepBy` char ',' <* char ']'
arg <- option mempty do
_ <- try do
space
char '{'
inner <* char '}'
return $ one (Command (unCamel name) attrs arg)
where
unCamel :: Text -> Text
unCamel = (Char -> Text) -> Text -> Text
T.concatMap \Char
c ->
if Char -> Bool
isUpper Char
c
then String -> Text
T.pack [Char
'-', Char -> Char
toLower Char
c]
else OneItem Text -> Text
forall x. One x => OneItem x -> x
one Char
OneItem Text
c
inner :: ReaderT ParserState (Parsec Void Text) [LaTeXNode]
inner =
(ParserState -> ParserState)
-> ReaderT ParserState (Parsec Void Text) [LaTeXNode]
-> ReaderT ParserState (Parsec Void Text) [LaTeXNode]
forall a.
(ParserState -> ParserState)
-> ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall r (m :: * -> *) a. MonadReader r m => (r -> r) -> m a -> m a
local
(\ParserState
s -> ParserState
s {level = 1 + level s})
(Seq LaTeXNode -> [LaTeXNode]
forall a. Seq a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList (Seq LaTeXNode -> [LaTeXNode])
-> (Nodes -> Seq LaTeXNode) -> Nodes -> [LaTeXNode]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Nodes -> Seq LaTeXNode
unNodes (Nodes -> [LaTeXNode])
-> Parser Nodes
-> ReaderT ParserState (Parsec Void Text) [LaTeXNode]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Nodes
manyNodes)
isAllowedKey :: Char -> Bool
isAllowedKey Char
c = Char -> Bool
isAllowedName Char
c Bool -> Bool -> Bool
|| Char -> Bool
isNumber Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':'
gpVal :: ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
gpVal = Token Text -> ReaderT ParserState (Parsec Void Text) (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'{' ReaderT ParserState (Parsec Void Text) Char
-> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
-> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
forall a b.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) b
-> ReaderT ParserState (Parsec Void Text) b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Nodes -> Seq LaTeXNode
unNodes (Nodes -> Seq LaTeXNode)
-> Parser Nodes
-> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Nodes
manyNodes) ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
-> ReaderT ParserState (Parsec Void Text) Char
-> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
forall a b.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) b
-> ReaderT ParserState (Parsec Void Text) a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Token Text -> ReaderT ParserState (Parsec Void Text) (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'}'
nVal :: ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
nVal = OneItem (Seq LaTeXNode) -> Seq LaTeXNode
LaTeXNode -> Seq LaTeXNode
forall x. One x => OneItem x -> x
one (LaTeXNode -> Seq LaTeXNode)
-> (Text -> LaTeXNode) -> Text -> Seq LaTeXNode
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> LaTeXNode
Text (Text -> Seq LaTeXNode)
-> ReaderT ParserState (Parsec Void Text) Text
-> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe String
-> (Token Text -> Bool)
-> ReaderT ParserState (Parsec Void Text) (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P Maybe String
forall a. Maybe a
Nothing Char -> Bool
Token Text -> Bool
isAllowedKey
pair :: ReaderT ParserState (Parsec Void Text) ([LaTeXNode], [LaTeXNode])
pair = do
ReaderT ParserState (Parsec Void Text) ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
k <- ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
-> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
gpVal ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
-> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
-> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ReaderT ParserState (Parsec Void Text) (Seq LaTeXNode)
nVal
space
v <- option mempty $ do
char '=' *> space
(try gpVal <|> nVal)
<* space
return (toList k, toList v)
lineEnd :: Parser Nodes
lineEnd :: Parser Nodes
lineEnd = do
_ <- Token Text -> ReaderT ParserState (Parsec Void Text) (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\n'
n <- asks level
s <- takeWhileP Nothing isHSpace
return $ one $ Text $ T.cons '\n' $ T.drop (2 * n) s
escape :: Parser Nodes
escape :: Parser Nodes
escape = Parser Nodes -> Parser Nodes
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try do
_ <- Token Text -> ReaderT ParserState (Parsec Void Text) (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\\'
s <- satisfy \Token Text
c -> Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Char -> Bool
isAsciiLower Char
Token Text
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
Token Text
c
return $ one $ Text $ T.pack [s]
prose :: Parser Nodes
prose :: Parser Nodes
prose = do
s <- (Token Text -> Bool)
-> ReaderT ParserState (Parsec Void Text) (Token Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy \Token Text
c -> Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'}' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'%'
t <-
takeWhileP
Nothing
(\Token Text
c -> Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\\' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'{' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'}' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'%')
return $ Nodes (one $ Text $ T.cons s t)
comment :: Parser Nodes
= do
_ <- ReaderT ParserState (Parsec Void Text) Char
-> ReaderT ParserState (Parsec Void Text) Char
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (ReaderT ParserState (Parsec Void Text) Char
-> ReaderT ParserState (Parsec Void Text) Char)
-> ReaderT ParserState (Parsec Void Text) Char
-> ReaderT ParserState (Parsec Void Text) Char
forall a b. (a -> b) -> a -> b
$ Token Text -> ReaderT ParserState (Parsec Void Text) (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'%'
c <- takeWhileP Nothing (/= '\n')
return $ Nodes (one $ Comment c)
braces :: Parser Nodes
braces :: Parser Nodes
braces = do
_ <- ReaderT ParserState (Parsec Void Text) Char
-> ReaderT ParserState (Parsec Void Text) Char
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (ReaderT ParserState (Parsec Void Text) Char
-> ReaderT ParserState (Parsec Void Text) Char)
-> ReaderT ParserState (Parsec Void Text) Char
-> ReaderT ParserState (Parsec Void Text) Char
forall a b. (a -> b) -> a -> b
$ Token Text -> ReaderT ParserState (Parsec Void Text) (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'{'
x <- manyNodes
_ <- char '}'
return $ one (Text "{") <> x <> one (Text "}")
spaceEater :: Parser Nodes
spaceEater :: Parser Nodes
spaceEater = do
_ <- ReaderT ParserState (Parsec Void Text) (Tokens Text)
-> ReaderT ParserState (Parsec Void Text) (Tokens Text)
forall a.
ReaderT ParserState (Parsec Void Text) a
-> ReaderT ParserState (Parsec Void Text) a
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (ReaderT ParserState (Parsec Void Text) (Tokens Text)
-> ReaderT ParserState (Parsec Void Text) (Tokens Text))
-> ReaderT ParserState (Parsec Void Text) (Tokens Text)
-> ReaderT ParserState (Parsec Void Text) (Tokens Text)
forall a b. (a -> b) -> a -> b
$ Tokens Text -> ReaderT ParserState (Parsec Void Text) (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"%!\n"
space
return mempty
isAllowedName :: Char -> Bool
isAllowedName :: Char -> Bool
isAllowedName Char
c = Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'@'