From 0c100a2e6f8a856bdb0a629f8f5f2caceefccbe6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 3 Jul 2017 17:55:19 +0200 Subject: [PATCH] Rewrote LaTeX reader with proper tokenization. This rewrite is primarily motivated by the need to get macros working properly (#982, #934, #3779, #3236, #1390, #2888, #2118). We now tokenize the input text, then parse the token stream. Macros modify the token stream, so they should now be effective in any context, including math. (Thus, we no longer need the clunky macro processing capacities of texmath.) A custom state LaTeXState is used instead of ParserState. This, plus the tokenization, will require some rewriting of the exported functions rawLaTeXInline, inlineCommand, rawLaTeXBlock. --- src/Text/Pandoc/Error.hs | 3 + src/Text/Pandoc/Readers/LaTeX.hs | 1732 ++++++++++++++++-------------- test/Tests/Readers/LaTeX.hs | 3 +- test/latex-reader.latex | 1 - test/latex-reader.native | 2 +- 5 files changed, 961 insertions(+), 780 deletions(-) diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 60bc699abd78..24186720cfcd 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -64,6 +64,7 @@ data PandocError = PandocIOError String IOError | PandocTemplateError String | PandocAppError String | PandocEpubSubdirectoryError String + | PandocMacroLoop String deriving (Show, Typeable, Generic) instance Exception PandocError @@ -107,6 +108,8 @@ handleError (Left e) = PandocAppError s -> err 1 s PandocEpubSubdirectoryError s -> err 31 $ "EPUB subdirectory name '" ++ s ++ "' contains illegal characters" + PandocMacroLoop s -> err 91 $ + "Loop encountered in expanding macro " ++ s err :: Int -> String -> IO a err exitCode msg = do diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 753a628aa4a1..ea5feca48895 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -30,6 +31,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Portability : portable Conversion of LaTeX to 'Pandoc' document. + +TODO: +[ ] exported rawLaTeXInline, rawLaTeXBlock, inlineCommand -} module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, @@ -42,7 +46,7 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (chr, isAlphaNum, isLetter, ord, isDigit) import Data.Default -import Data.Text (Text, unpack) +import Data.Text (Text) import qualified Data.Text as T import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M @@ -59,11 +63,11 @@ import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (many, optional, withRaw, - mathInline, mathDisplay, + mathInline, mathDisplay, macro, space, (<|>), spaces, blankline) import Text.Pandoc.Shared import Text.Pandoc.Walk -import Text.Pandoc.Error (PandocError(PandocParsecError)) +import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop)) import Text.Pandoc.Extensions (getDefaultExtensions) import Debug.Trace (traceShowId) @@ -103,32 +107,44 @@ testParser :: LP PandocIO a -> Text -> IO a testParser p t = do res <- runIOorExplode (runParserT p defaultLaTeXState{ sOptions = def{ readerExtensions = - getDefaultExtensions "latex" }} "name" (tokenize t)) + enableExtension Ext_raw_tex $ + getDefaultExtensions "latex" }} "source" (tokenize t)) case res of Left e -> error (show e) Right r -> return r -data Macro = CommandMacro Text ([Tok] -> [Tok]) - | EnvironmentMacro Text ([Tok] -> [Tok]) +data Macro = Macro Int (Maybe [Tok]) [Tok] + deriving Show data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sMeta :: Meta , sQuoteContext :: QuoteContext - , sMacros :: [Macro] + , sMacros :: M.Map Text Macro , sContainers :: [String] , sHeaders :: M.Map Inlines String , sLogMessages :: [LogMessage] - , sIdentifiers :: Set.Set String } + , sIdentifiers :: Set.Set String + , sVerbatimMode :: Bool + , sCaption :: Maybe Inlines + , sInListItem :: Bool + , sInTableCell :: Bool + } + deriving Show defaultLaTeXState :: LaTeXState defaultLaTeXState = LaTeXState{ sOptions = def , sMeta = nullMeta , sQuoteContext = NoQuote - , sMacros = [] + , sMacros = M.empty , sContainers = [] , sHeaders = M.empty , sLogMessages = [] - , sIdentifiers = Set.empty } + , sIdentifiers = Set.empty + , sVerbatimMode = False + , sCaption = Nothing + , sInListItem = False + , sInTableCell = False + } instance PandocMonad m => HasQuoteContext LaTeXState m where getQuoteContext = sQuoteContext <$> getState @@ -172,17 +188,48 @@ instance Default LaTeXState where type LP m = ParserT [Tok] LaTeXState m -rawLaTeXBlock :: PandocMonad m => ParserT String LaTeXState m String +withVerbatimMode :: PandocMonad m => LP m a -> LP m a +withVerbatimMode parser = do + updateState $ \st -> st{ sVerbatimMode = True } + result <- parser + updateState $ \st -> st{ sVerbatimMode = False } + return result + +-- TODO: export our local macro parsing code +-- use that in markdown reader and change ParserState +-- TODO: remove use of texmath's applyMacros in Markdown reader +-- (and elsewhere?). Instead, use an exported +-- applyMacros that uses our Macro type. or perhaps +-- just run the latex reader on the math (adding delims +-- $$). + +-- TODO: replace ParserState with variable and constraints. +-- can we use 'tokens' from Parsec? +-- feed ParserState -> LaTeXState and then back again +rawLaTeXBlock :: PandocMonad m => ParserT String ParserState m String rawLaTeXBlock = mzero +-- rawLaTeXBlock :: PandocMonad m => ParserT String LaTeXState String +-- rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) -rawLaTeXInline :: PandocMonad m => ParserT String LaTeXState m Inline +rawLaTeXInline :: PandocMonad m => ParserT String ParserState m Inline rawLaTeXInline = mzero +-- rawLaTeXInline :: PandocMonad m => ParserT String LaTeXState Inline +-- rawLaTeXInline = do +-- raw <- (snd <$> withRaw inlineCommand) +-- <|> (snd <$> withRaw inlineEnvironment) +-- <|> (snd <$> withRaw blockCommand) +-- RawInline "latex" <$> applyMacros' raw -inlineCommand :: PandocMonad m => ParserT String LaTeXState m Inlines +-- TODO this is only used in org reader. +-- I think the reason is that tarleb wanted to avoid +-- the blockCommand alternative in rawLaTeXInline. +-- I don't know whyt that's there and I think it should be +-- removed. +inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines inlineCommand = mzero -data TokType = CtrlSeq | Spaces | Newline | Symbol | Word | Comment | - Esc1 | Esc2 +data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | + Esc1 | Esc2 | Arg Int deriving (Eq, Ord, Show) data Tok = Tok (Line, Column) TokType Text @@ -197,7 +244,7 @@ totoks (lin,col) t = Nothing -> [] Just (c, rest) | c == '\n' -> - Tok (lin, col) Newline (T.singleton '\n') + Tok (lin, col) Newline "\n" : totoks (lin + 1,1) rest | isSpaceOrTab c -> let (sps, rest') = T.span isSpaceOrTab t @@ -209,7 +256,7 @@ totoks (lin,col) t = : totoks (lin, col + T.length ws) rest' | c == '%' -> let (cs, rest') = T.break (== '\n') rest - in Tok (lin, col) Comment (T.singleton '%' <> cs) + in Tok (lin, col) Comment ("%" <> cs) : totoks (lin, col + 1 + T.length cs) rest' | c == '\\' -> case T.uncons rest of @@ -217,14 +264,25 @@ totoks (lin,col) t = Just (d, rest') | isLetter d -> let (ws, rest'') = T.span isLetter rest - in Tok (lin, col) CtrlSeq (T.singleton '\\' <> ws) - : totoks (lin, col + 1 + T.length ws) rest'' + (ss, rest''') = T.span isSpaceOrTab rest'' + in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss) + : totoks (lin, + col + 1 + T.length ws + T.length ss) rest''' | d == '\t' || d == '\n' -> - Tok (lin, col) Symbol (T.singleton '\\') + Tok (lin, col) Symbol ("\\") : totoks (lin, col + 1) rest | otherwise -> - Tok (lin, col) CtrlSeq (T.pack [c,d]) + Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d]) : totoks (lin, col + 2) rest' + | c == '#' -> + let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest + in case safeRead (T.unpack t1) of + Just i -> + Tok (lin, col) (Arg i) ("#" <> t1) + : totoks (lin, col + 1 + T.length t1) t2 + Nothing -> + Tok (lin, col) Symbol ("#") + : totoks (lin, col + 1) t2 | c == '^' -> case T.uncons rest of Just ('^', rest') -> @@ -241,9 +299,9 @@ totoks (lin,col) t = | d < '\128' -> Tok (lin, col) Esc1 (T.pack ['^','^',d]) : totoks (lin, col + 3) rest'' - _ -> [Tok (lin, col) Symbol (T.singleton '^'), - Tok (lin, col + 1) Symbol (T.singleton '^')] - _ -> Tok (lin, col) Symbol (T.singleton '^') + _ -> [Tok (lin, col) Symbol ("^"), + Tok (lin, col + 1) Symbol ("^")] + _ -> Tok (lin, col) Symbol ("^") : totoks (lin, col + 1) rest | otherwise -> Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest @@ -261,10 +319,12 @@ untokenize = mconcat . map untoken untoken :: Tok -> Text untoken (Tok _ _ t) = t --- we put withMacros in front of satisfyTok, so it works on everything satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok satisfyTok f = - doMacros *> tokenPrim (T.unpack . untoken) updatePos matcher + try $ do + res <- tokenPrim (T.unpack . untoken) updatePos matcher + doMacros 0 -- apply macros on remaining input stream + return res where matcher t | f t = Just t | otherwise = Nothing updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos @@ -272,26 +332,51 @@ satisfyTok f = setSourceColumn (setSourceLine spos lin) col updatePos spos _ [] = spos --- TODO use something other than LaTeXState -doMacros :: PandocMonad m => LP m () -doMacros = do - inp <- getInput - macros <- sMacros <$> getState - case inp of - t@(Tok spos CtrlSeq txt) : ts - | txt == "\\macro" -> do -- TODO actually check macros - setInput $ map (setpos spos) (tokenize "Hi there") ++ ts - -- TODO loop detection - doMacros - _ -> return () +doMacros :: PandocMonad m => Int -> LP m () +doMacros n = do + applyMacros <- readerApplyMacros . sOptions <$> getState + verbatimMode <- sVerbatimMode <$> getState + when (applyMacros && not verbatimMode) $ do + inp <- getInput + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos name ts + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos ("end" <> name) ts + Tok spos (CtrlSeq name) _ : ts + -> handleMacros spos name ts + _ -> return () + where handleMacros spos name ts = do + macros <- sMacros <$> getState + case M.lookup name macros of + Nothing -> return () + Just (Macro numargs optarg newtoks) -> do + setInput ts + let getarg = spaces >> braced + args <- case optarg of + Nothing -> count numargs getarg + Just o -> + (:) <$> option o bracketedToks + <*> count (numargs - 1) getarg + let addTok (Tok _ (Arg i) _) acc | i > 0 + , i <= numargs = + map (setpos spos) (args !! (i - 1)) ++ acc + addTok t acc = setpos spos t : acc + ts' <- getInput + setInput $ foldr addTok ts' newtoks + if n > 20 -- detect macro expansion loops + then throwError $ PandocMacroLoop (T.unpack name) + else doMacros (n + 1) setpos :: (Line, Column) -> Tok -> Tok setpos spos (Tok _ tt txt) = Tok spos tt txt anyControlSeq :: PandocMonad m => LP m Tok -anyControlSeq = satisfyTok isCtrlSeq <* optional sp - where isCtrlSeq (Tok _ CtrlSeq _) = True - isCtrlSeq _ = False +anyControlSeq = satisfyTok isCtrlSeq + where isCtrlSeq (Tok _ (CtrlSeq _) _) = True + isCtrlSeq _ = False anySymbol :: PandocMonad m => LP m Tok anySymbol = satisfyTok isSym @@ -309,7 +394,7 @@ tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes controlSeq :: PandocMonad m => Text -> LP m Tok controlSeq name = satisfyTok isNamed - where isNamed (Tok _ CtrlSeq n) = n == T.singleton '\\' <> name + where isNamed (Tok _ (CtrlSeq n) _) = n == name isNamed _ = False symbol :: PandocMonad m => Char -> LP m Tok @@ -356,15 +441,13 @@ endline = try $ do notFollowedBy blankline blankline :: PandocMonad m => LP m () -blankline = try $ do - skipMany (satisfyTok (tokTypeIn [Spaces, Newline])) - newlineTok +blankline = try $ skipMany whitespace *> newlineTok primEscape :: PandocMonad m => LP m Char primEscape = do Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2]) case toktype of - Esc1 -> case T.uncons t of + Esc1 -> case T.uncons (T.drop 2 t) of Just (c, _) | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) | otherwise -> return (chr (ord c + 64)) @@ -374,14 +457,13 @@ primEscape = do Nothing -> fail $ "Could not read: " ++ T.unpack t _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen -bgroup :: PandocMonad m => LP m () +bgroup :: PandocMonad m => LP m Tok bgroup = try $ do skipMany sp symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" - return () -egroup :: PandocMonad m => LP m () -egroup = () <$ (symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup") +egroup :: PandocMonad m => LP m Tok +egroup = (symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup") grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a grouped parser = try $ do @@ -391,10 +473,20 @@ grouped parser = try $ do try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) braced :: PandocMonad m => LP m [Tok] -braced = try $ do - bgroup - -- {{a,b}} should be parsed the same as {a,b} - try (braced <* egroup) <|> manyTill anyTok egroup +braced = bgroup *> braced' 1 + where braced' (n :: Int) = + handleEgroup n <|> handleBgroup n <|> handleOther n + handleEgroup n = do + t <- egroup + if n == 1 + then return [] + else (t:) <$> braced' (n - 1) + handleBgroup n = do + t <- bgroup + (t:) <$> braced' (n + 1) + handleOther n = do + t <- anyTok + (t:) <$> braced' n bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a bracketed parser = try $ do @@ -414,8 +506,17 @@ dimenarg = try $ do -- inline elements: -inlineText :: PandocMonad m => LP m Inlines -inlineText = (str . T.unpack . untoken) <$> satisfyTok isWordTok +word :: PandocMonad m => LP m Inlines +word = (str . T.unpack . untoken) <$> satisfyTok isWordTok + +regularSymbol :: PandocMonad m => LP m Inlines +regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol + where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t + isRegularSymbol _ = False + isSpecial c = c `Set.member` specialChars + +specialChars :: Set.Set Char +specialChars = Set.fromList "#$%&~_^\\{}" isWordTok :: Tok -> Bool isWordTok (Tok _ Word _) = True @@ -435,9 +536,44 @@ doLHSverb = (codeWith ("",["haskell"],[]) . T.unpack . untokenize) <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') +mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines +mkImage options src = do + let replaceTextwidth (k,v) = + case numUnit v of + Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + _ -> (k, v) + let kvs = map replaceTextwidth + $ filter (\(k,_) -> k `elem` ["width", "height"]) options + let attr = ("",[], kvs) + let alt = str "image" + case takeExtension src of + "" -> do + defaultExt <- getOption readerDefaultImageExtension + return $ imageWith attr (addExtension src defaultExt) "" alt + _ -> return $ imageWith attr src "" alt + +-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" +dosiunitx :: PandocMonad m => LP m Inlines +dosiunitx = do + skipopts + value <- tok + valueprefix <- option "" $ bracketed tok + unit <- tok + let emptyOr160 "" = "" + emptyOr160 _ = "\160" + return . mconcat $ [valueprefix, + emptyOr160 valueprefix, + value, + emptyOr160 unit, + unit] + lit :: String -> LP m Inlines lit = pure . str +removeDoubleQuotes :: Text -> Text +removeDoubleQuotes t = + maybe t id $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" + doubleQuote :: PandocMonad m => LP m Inlines doubleQuote = do quoted' doubleQuoted (try $ count 2 $ symbol '`') @@ -496,7 +632,9 @@ doverb = do marker <- case T.uncons t of Just (c, ts) | T.null ts -> return c _ -> mzero - (code . T.unpack . untokenize) <$> manyTill (verbTok marker) (symbol marker) + withVerbatimMode $ + (code . T.unpack . untokenize) <$> + manyTill (verbTok marker) (symbol marker) verbTok :: PandocMonad m => Char -> LP m Tok verbTok stopchar = do @@ -519,8 +657,9 @@ dolstinline = do Just (c, ts) | T.null ts -> return c _ -> mzero let stopchar = if marker == '{' then '}' else marker - (codeWith ("",classes,[]) . T.unpack . untokenize) <$> - manyTill (verbTok stopchar) (symbol stopchar) + withVerbatimMode $ + (codeWith ("",classes,[]) . T.unpack . untokenize) <$> + manyTill (verbTok stopchar) (symbol stopchar) keyval :: PandocMonad m => LP m (String, String) keyval = try $ do @@ -749,52 +888,129 @@ toksToString = T.unpack . untokenize mathDisplay :: String -> Inlines mathDisplay = displayMath . trim --- TODO we shouldn't need applyMacros --- mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) mathInline :: String -> Inlines mathInline = math . trim --- TODO we shouldn't need applyMacros --- mathInline p = math <$> (try p >>= applyMacros') dollarsMath :: PandocMonad m => LP m Inlines dollarsMath = do symbol '$' display <- option False (True <$ symbol '$') - contents <- many1Till anyTok (symbol '$') - when display (() <$ symbol '$') - let constructor = if display then mathDisplay else mathInline - return $ constructor $ toksToString contents + contents <- trim . toksToString <$> + many (notFollowedBy (symbol '$') >> anyTok) + if display + then do + mathDisplay contents <$ try (symbol '$' >> symbol '$') + <|> (guard (null contents) >> return (mathInline "")) + else mathInline contents <$ (symbol '$') + +-- citations + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +simpleCiteArgs :: PandocMonad m => LP m [Citation] +simpleCiteArgs = try $ do + first <- optionMaybe $ toList <$> opt + second <- optionMaybe $ toList <$> opt + keys <- try $ bgroup *> (manyTill citationLabel egroup) + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> (mempty, s ) + (Just s , Just t ) -> (s , t ) + _ -> (mempty, mempty) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + +citationLabel :: PandocMonad m => LP m String +citationLabel = do + optional sp + toksToString <$> + (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) + <* optional sp + <* optional (symbol ',') + <* optional sp) + where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char] + +cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] +cites mode multi = try $ do + cits <- if multi + then many1 simpleCiteArgs + else count 1 simpleCiteArgs + let cs = concat cits + return $ case mode of + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs + +citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation name mode multi = do + (c,raw) <- withRaw $ cites mode multi + return $ cite c (rawInline "latex" $ "\\" ++ name ++ (toksToString raw)) + +handleCitationPart :: Inlines -> [Citation] +handleCitationPart ils = + let isCite Cite{} = True + isCite _ = False + (pref, rest) = break isCite (toList ils) + in case rest of + (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs + _ -> [] + +complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines +complexNatbibCitation mode = try $ do + (cs, raw) <- + withRaw $ concat <$> do + bgroup + items <- mconcat <$> + many1 (notFollowedBy (symbol ';') >> inline) + `sepBy1` (symbol ';') + egroup + return $ map handleCitationPart items + case cs of + [] -> mzero + (c:cits) -> return $ cite (c{ citationMode = mode }:cits) + (rawInline "latex" $ "\\citetext" ++ toksToString raw) + +inNote :: Inlines -> Inlines +inNote ils = + note $ para $ ils <> str "." inlineCommand' :: PandocMonad m => LP m Inlines inlineCommand' = try $ do - cs@(Tok _ CtrlSeq t) <- anyControlSeq - let name = T.drop 1 t + Tok _ (CtrlSeq name) cmd <- anyControlSeq guard $ name /= "begin" && name /= "end" - mbstar <- option Nothing (Just <$> symbol '*') - let names = name : maybe [] (const [name <> "*"]) mbstar - optional $ try $ symbol '{' >> symbol '}' + (star, rawstar) <- withRaw $ option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] -- check non-starred as fallback let raw = do guard $ not (isBlockCommand name) (_, rawargs) <- withRaw (skipangles *> skipopts *> option "" dimenarg *> many braced) - let rawcommand = T.unpack $ untokenize $ - cs : maybe [] (:[]) mbstar ++ rawargs + let rawcommand = T.unpack $ cmd <> untokenize (rawstar ++ rawargs) (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand)) <|> ignore rawcommand - -- transformed <- applyMacros' rawcommand -- TODO or get rid of - -- exts <- getOption readerExtensions - -- if transformed /= rawcommand - -- then parseFromString' inlines transformed - -- else if extensionEnabled Ext_raw_tex exts - -- then return $ rawInline "latex" rawcommand - -- else ignore rawcommand lookupListDefault raw names inlineCommands tok :: PandocMonad m => LP m Inlines -tok = try $ grouped inline <|> inlineCommand' <|> singleChar - where singleChar = do +tok = grouped inline <|> inlineCommand' <|> singleChar + where singleChar = try $ do Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) + guard $ not $ toktype == Symbol && + T.any (`Set.member` specialChars) t if T.length t > 1 then do let (t1, t2) = (T.take 1 t, T.drop 1 t) @@ -816,14 +1032,6 @@ rawopt = do skipopts :: PandocMonad m => LP m () skipopts = skipMany rawopt --- eat an optional argument and one or more arguments in braces -ignoreInlines :: PandocMonad m => Text -> (Text, LP m Inlines) -ignoreInlines name = (name, p) - where p = do oa <- optargs - let rawCommand = '\\' : T.unpack (name <> oa) - let doraw = guardRaw >> return (rawInline "latex" rawCommand) - doraw <|> ignore rawCommand - guardRaw :: PandocMonad m => LP m () guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex @@ -865,20 +1073,18 @@ unescapeURL (x:xs) = x:unescapeURL xs unescapeURL [] = "" mathEnvWith :: PandocMonad m - => (Inlines -> a) -> Maybe String -> String -> LP m a + => (Inlines -> a) -> Maybe Text -> Text -> LP m a mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name where inner x = case innerEnv of - Nothing -> x - Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ - "\\end{" ++ y ++ "}" + Nothing -> x + Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ + "\\end{" ++ T.unpack y ++ "}" -mathEnv :: PandocMonad m => String -> LP m String +mathEnv :: PandocMonad m => Text -> LP m String mathEnv name = do skipopts optional blankline - let endEnv = try $ controlSeq "end" *> - (T.unpack . untokenize <$> braced) >>= guard . (== name) - res <- manyTill anyTok endEnv + res <- manyTill anyTok (end_ name) return $ stripTrailingNewlines $ T.unpack $ untokenize res inlineEnvironment :: PandocMonad m => LP m Inlines @@ -986,7 +1192,10 @@ inlineCommands = M.fromList $ , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> spaces)) + , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState + guard $ not inTableCell + optional (bracketed inline) + spaces)) , (",", lit "\8198") , ("@", pure mempty) , (" ", lit "\160") @@ -1003,71 +1212,72 @@ inlineCommands = M.fromList $ , ("Verb", doverb) , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url -> pure (link url "" (str url))) --- , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> --- tok >>= \lab -> --- pure (link url "" lab)) --- , ("includegraphics", do options <- option [] keyvals --- src <- unescapeURL . removeDoubleQuotes <$> braced --- mkImage options src) + , ("href", (unescapeURL . toksToString <$> + braced <* optional sp) >>= \url -> + tok >>= \lab -> pure (link url "" lab)) + , ("includegraphics", do options <- option [] keyvals + src <- unescapeURL . T.unpack . + removeDoubleQuotes . untokenize <$> braced + mkImage options src) , ("enquote", enquote) --- , ("cite", citation "cite" NormalCitation False) --- , ("Cite", citation "Cite" NormalCitation False) --- , ("citep", citation "citep" NormalCitation False) --- , ("citep*", citation "citep*" NormalCitation False) --- , ("citeal", citation "citeal" NormalCitation False) --- , ("citealp", citation "citealp" NormalCitation False) --- , ("citealp*", citation "citealp*" NormalCitation False) --- , ("autocite", citation "autocite" NormalCitation False) --- , ("smartcite", citation "smartcite" NormalCitation False) --- , ("footcite", inNote <$> citation "footcite" NormalCitation False) --- , ("parencite", citation "parencite" NormalCitation False) --- , ("supercite", citation "supercite" NormalCitation False) --- , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) --- , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) --- , ("citeyear", citation "citeyear" SuppressAuthor False) --- , ("autocite*", citation "autocite*" SuppressAuthor False) --- , ("cite*", citation "cite*" SuppressAuthor False) --- , ("parencite*", citation "parencite*" SuppressAuthor False) --- , ("textcite", citation "textcite" AuthorInText False) --- , ("citet", citation "citet" AuthorInText False) --- , ("citet*", citation "citet*" AuthorInText False) --- , ("citealt", citation "citealt" AuthorInText False) --- , ("citealt*", citation "citealt*" AuthorInText False) --- , ("textcites", citation "textcites" AuthorInText True) --- , ("cites", citation "cites" NormalCitation True) --- , ("autocites", citation "autocites" NormalCitation True) --- , ("footcites", inNote <$> citation "footcites" NormalCitation True) --- , ("parencites", citation "parencites" NormalCitation True) --- , ("supercites", citation "supercites" NormalCitation True) --- , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) --- , ("Autocite", citation "Autocite" NormalCitation False) --- , ("Smartcite", citation "Smartcite" NormalCitation False) --- , ("Footcite", citation "Footcite" NormalCitation False) --- , ("Parencite", citation "Parencite" NormalCitation False) --- , ("Supercite", citation "Supercite" NormalCitation False) --- , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) --- , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) --- , ("Citeyear", citation "Citeyear" SuppressAuthor False) --- , ("Autocite*", citation "Autocite*" SuppressAuthor False) --- , ("Cite*", citation "Cite*" SuppressAuthor False) --- , ("Parencite*", citation "Parencite*" SuppressAuthor False) --- , ("Textcite", citation "Textcite" AuthorInText False) --- , ("Textcites", citation "Textcites" AuthorInText True) --- , ("Cites", citation "Cites" NormalCitation True) --- , ("Autocites", citation "Autocites" NormalCitation True) --- , ("Footcites", citation "Footcites" NormalCitation True) --- , ("Parencites", citation "Parencites" NormalCitation True) --- , ("Supercites", citation "Supercites" NormalCitation True) --- , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) --- , ("citetext", complexNatbibCitation NormalCitation) --- , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> --- complexNatbibCitation AuthorInText) --- <|> citation "citeauthor" AuthorInText False) --- , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= --- addMeta "nocite")) + , ("cite", citation "cite" NormalCitation False) + , ("Cite", citation "Cite" NormalCitation False) + , ("citep", citation "citep" NormalCitation False) + , ("citep*", citation "citep*" NormalCitation False) + , ("citeal", citation "citeal" NormalCitation False) + , ("citealp", citation "citealp" NormalCitation False) + , ("citealp*", citation "citealp*" NormalCitation False) + , ("autocite", citation "autocite" NormalCitation False) + , ("smartcite", citation "smartcite" NormalCitation False) + , ("footcite", inNote <$> citation "footcite" NormalCitation False) + , ("parencite", citation "parencite" NormalCitation False) + , ("supercite", citation "supercite" NormalCitation False) + , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) + , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) + , ("citeyear", citation "citeyear" SuppressAuthor False) + , ("autocite*", citation "autocite*" SuppressAuthor False) + , ("cite*", citation "cite*" SuppressAuthor False) + , ("parencite*", citation "parencite*" SuppressAuthor False) + , ("textcite", citation "textcite" AuthorInText False) + , ("citet", citation "citet" AuthorInText False) + , ("citet*", citation "citet*" AuthorInText False) + , ("citealt", citation "citealt" AuthorInText False) + , ("citealt*", citation "citealt*" AuthorInText False) + , ("textcites", citation "textcites" AuthorInText True) + , ("cites", citation "cites" NormalCitation True) + , ("autocites", citation "autocites" NormalCitation True) + , ("footcites", inNote <$> citation "footcites" NormalCitation True) + , ("parencites", citation "parencites" NormalCitation True) + , ("supercites", citation "supercites" NormalCitation True) + , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) + , ("Autocite", citation "Autocite" NormalCitation False) + , ("Smartcite", citation "Smartcite" NormalCitation False) + , ("Footcite", citation "Footcite" NormalCitation False) + , ("Parencite", citation "Parencite" NormalCitation False) + , ("Supercite", citation "Supercite" NormalCitation False) + , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) + , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) + , ("Citeyear", citation "Citeyear" SuppressAuthor False) + , ("Autocite*", citation "Autocite*" SuppressAuthor False) + , ("Cite*", citation "Cite*" SuppressAuthor False) + , ("Parencite*", citation "Parencite*" SuppressAuthor False) + , ("Textcite", citation "Textcite" AuthorInText False) + , ("Textcites", citation "Textcites" AuthorInText True) + , ("Cites", citation "Cites" NormalCitation True) + , ("Autocites", citation "Autocites" NormalCitation True) + , ("Footcites", citation "Footcites" NormalCitation True) + , ("Parencites", citation "Parencites" NormalCitation True) + , ("Supercites", citation "Supercites" NormalCitation True) + , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) + , ("citetext", complexNatbibCitation NormalCitation) + , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> + complexNatbibCitation AuthorInText) + <|> citation "citeauthor" AuthorInText False) + , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= + addMeta "nocite")) , ("hypertarget", braced >> tok) -- -- siuntix --- , ("SI", dosiunitx) + , ("SI", dosiunitx) -- hyphenat , ("bshyp", lit "\\\173") , ("fshyp", lit "/\173") @@ -1080,16 +1290,18 @@ inlineCommands = M.fromList $ -- fontawesome , ("faCheck", lit "\10003") , ("faClose", lit "\10007") - ] ++ map ignoreInlines + ] + -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks: - [ "index" - , "hspace" - , "vspace" - , "newpage" - , "clearpage" - , "pagebreak" - ] + -- [ "index" + -- , "hspace" + -- , "vspace" + -- , "newpage" + -- , "clearpage" + -- , "pagebreak" + -- , "noindent" + -- ] ttfamily :: PandocMonad m => LP m Inlines ttfamily = (code . stringify . toList) <$> tok @@ -1102,27 +1314,42 @@ rawInlineOr name' fallback = do else fallback getRawCommand :: PandocMonad m => Text -> LP m String -getRawCommand name' = do +getRawCommand txt = do (_, rawargs) <- withRaw (many (try (optional sp *> opt)) *> option "" (try (optional sp *> dimenarg)) *> many braced) - return $ '\\' : T.unpack (name' <> untokenize rawargs) + return $ T.unpack (txt <> untokenize rawargs) isBlockCommand :: Text -> Bool isBlockCommand s = s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks)) -lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v +isInlineCommand :: Text -> Bool +isInlineCommand s = + s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines)) + || s `Set.member` treatAsRawInline + +treatAsRawInline :: Set.Set Text +treatAsRawInline = Set.fromList + [ "index" + , "hspace" + , "vspace" + , "newpage" + , "clearpage" + , "pagebreak" + , "noindent" + ] + +lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v lookupListDefault d = (fromMaybe d .) . lookupList - where - lookupList l m = msum $ map (`M.lookup` m) l + where lookupList l m = msum $ map (`M.lookup` m) l inline :: PandocMonad m => LP m Inlines inline = (mempty <$ comment) <|> (space <$ whitespace) <|> (softbreak <$ endline) - <|> inlineText + <|> word <|> inlineCommand' <|> inlineEnvironment <|> inlineGroup @@ -1139,7 +1366,8 @@ inline = (mempty <$ comment) <|> dollarsMath <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) <|> (str . (:[]) <$> primEscape) - <|> (do res <- symbolIn "#&~^'`\"[]" + <|> regularSymbol + <|> (do res <- symbolIn "#^'`\"[]" pos <- getPosition let s = T.unpack (untoken res) report $ ParsingUnescaped s pos @@ -1150,6 +1378,37 @@ inlines = mconcat <$> many inline -- block elements: +begin_ :: PandocMonad m => Text -> LP m () +begin_ t = (try $ do + controlSeq "begin" + spaces + symbol '{' + spaces + Tok _ Word txt <- satisfyTok isWordTok + spaces + symbol '}' + guard (t == txt)) ("\\begin{" ++ T.unpack t ++ "}") + +end_ :: PandocMonad m => Text -> LP m () +end_ t = (try $ do + controlSeq "end" + spaces + symbol '{' + spaces + Tok _ Word txt <- satisfyTok isWordTok + spaces + symbol '}' + guard $ t == txt) ("\\end{" ++ T.unpack t ++ "}") + +preamble :: PandocMonad m => LP m Blocks +preamble = mempty <$ many preambleBlock + where preambleBlock = spaces1 + <|> void include + <|> void macroDef + <|> void blockCommand + <|> void braced + <|> (notFollowedBy (begin_ "document") >> void anyTok) + paragraph :: PandocMonad m => LP m Blocks paragraph = do x <- trimInlines . mconcat <$> many1 inline @@ -1159,11 +1418,12 @@ paragraph = do include :: PandocMonad m => LP m Blocks include = do - (Tok _ _ name) <- controlSeq "include" <|> controlSeq "input" <|> + (Tok _ (CtrlSeq name) _) <- + controlSeq "include" <|> controlSeq "input" <|> controlSeq "subfile" <|> controlSeq "usepackage" skipMany $ bracketed inline -- skip options fs <- (map trim . splitBy (==',') . T.unpack . untokenize) <$> braced - let fs' = if name == "\\usepackage" + let fs' = if name == "usepackage" then map (maybeAddExtension ".sty") fs else map (maybeAddExtension ".tex") fs dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" @@ -1179,7 +1439,7 @@ ignoreBlocks :: PandocMonad m => Text -> (Text, LP m Blocks) ignoreBlocks name = (name, p) where p = do oa <- optargs - let rawCommand = '\\' : T.unpack (name <> oa) + let rawCommand = T.unpack ("\\" <> name <> oa) let doraw = guardRaw >> return (rawBlock "latex" rawCommand) doraw <|> ignore rawCommand @@ -1198,6 +1458,109 @@ authors = try $ do egroup addMeta "author" (map trimInlines auths) +macroDef :: PandocMonad m => LP m Blocks +macroDef = mempty <$ ((commandDef <|> environmentDef) <* doMacros 0) + where commandDef = do + (name, macro) <- newcommand + updateState $ \s -> s{ sMacros = M.insert name macro (sMacros s) } + environmentDef = do + (name, macro1, macro2) <- newenvironment + updateState $ \s -> s{ sMacros = + M.insert name macro1 (sMacros s) } + updateState $ \s -> s{ sMacros = + M.insert ("end" <> name) macro2 (sMacros s) } + -- @\newenvironment{envname}[n-args][default]{begin}{end}@ + -- is equivalent to + -- @\newcommand{\envname}[n-args][default]{begin}@ + -- @\newcommand{\endenvname}@ + +newcommand :: PandocMonad m => LP m (Text, Macro) +newcommand = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> + controlSeq "renewcommand" <|> + controlSeq "providecommand" + optional $ symbol '*' + symbol '{' + spaces + Tok _ (CtrlSeq name) txt <- withVerbatimMode anyControlSeq + spaces + symbol '}' + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + contents <- braced + when (mtype == "newcommand") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos + Nothing -> return () + return (name, Macro numargs optarg contents) + +newenvironment :: PandocMonad m => LP m (Text, Macro, Macro) +newenvironment = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> + controlSeq "renewenvironment" <|> + controlSeq "provideenvironment" + optional $ symbol '*' + symbol '{' + spaces + Tok _ Word name <- satisfyTok isWordTok + spaces + symbol '}' + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + startcontents <- braced + spaces + endcontents <- braced + when (mtype == "newenvironment") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos + Nothing -> return () + return (name, Macro numargs optarg startcontents, + Macro 0 Nothing endcontents) + +bracketedToks :: PandocMonad m => LP m [Tok] +bracketedToks = do + symbol '[' + manyTill anyTok (symbol ']') + +bracketedNum :: PandocMonad m => LP m Int +bracketedNum = do + ds <- untokenize <$> bracketedToks + case safeRead (T.unpack ds) of + Just i -> return i + _ -> return 0 + +setCaption :: PandocMonad m => LP m Blocks +setCaption = do + ils <- tok + mblabel <- option Nothing $ + try $ spaces >> controlSeq "label" >> (Just <$> tok) + let ils' = case mblabel of + Just lab -> ils <> spanWith + ("",[],[("data-label", stringify lab)]) mempty + Nothing -> ils + updateState $ \st -> st{ sCaption = Just ils' } + return mempty + +looseItem :: PandocMonad m => LP m Blocks +looseItem = do + inListItem <- sInListItem <$> getState + guard $ not inListItem + skipopts + return mempty + +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption = Nothing } + section :: PandocMonad m => Attr -> Int -> LP m Blocks section (ident, classes, kvs) lvl = do skipopts @@ -1210,18 +1573,14 @@ section (ident, classes, kvs) lvl = do blockCommand :: PandocMonad m => LP m Blocks blockCommand = try $ do - Tok _ CtrlSeq name <- anyControlSeq - guard $ name /= "\\begin" && name /= "\\end" + Tok _ (CtrlSeq name) txt <- anyControlSeq + guard $ name /= "begin" && name /= "end" star <- option "" ("*" <$ symbol '*' <* optional sp) let name' = name <> star - let names = name : [name' | name' /= name] - let raw = rawBlock "latex" <$> getRawCommand name' - -- TODO do we still need this? - -- rawcommand <- getRawCommand name' - -- transformed <- applyMacros' rawcommand - -- guard $ transformed /= rawcommand - -- notFollowedBy $ parseFromString' inlines transformed - -- parseFromString' blocks transformed + let names = ordNub [name', name] + let raw = do + guard $ not (isInlineCommand name) + rawBlock "latex" <$> getRawCommand txt lookupListDefault raw names blockCommands closing :: PandocMonad m => LP m Blocks @@ -1243,8 +1602,8 @@ blockCommands = M.fromList $ [ ("par", mempty <$ skipopts) , ("parbox", braced >> grouped blocks) , ("title", mempty <$ (skipopts *> - (grouped inline >>= addMeta "title") - <|> (grouped block >>= addMeta "title"))) + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) , ("author", mempty <$ (skipopts *> authors)) -- -- in letter class, temp. store address & sig as title, author @@ -1278,24 +1637,26 @@ blockCommands = M.fromList $ , ("hrule", pure horizontalRule) , ("strut", pure mempty) , ("rule", skipopts *> tok *> tok *> pure horizontalRule) --- , ("item", skipopts *> looseItem) --- , ("documentclass", skipopts *> braced *> preamble) + , ("item", looseItem) + , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) --- , ("caption", skipopts *> setCaption) --- , ("bibliography", mempty <$ (skipopts *> braced >>= --- addMeta "bibliography" . splitBibs)) --- , ("addbibresource", mempty <$ (skipopts *> braced >>= --- addMeta "bibliography" . splitBibs)) --- -- includes --- , ("lstinputlisting", inputListing) --- , ("graphicspath", graphicsPath) --- -- hyperlink - , ("hypertarget", braced >> grouped block) + , ("caption", skipopts *> setCaption) + , ("bibliography", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + , ("addbibresource", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + -- includes + , ("lstinputlisting", inputListing) + , ("graphicspath", graphicsPath) + -- hyperlink + , ("hypertarget", try $ braced >> grouped block) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks - [ "newcommand", "renewcommand", "newenvironment", "renewenvironment" - -- newcommand, etc. should be parsed by macro, but we need this + [ "newcommand", "renewcommand" + , "newenvironment", "renewenvironment" + , "providecommand", "provideenvironment" + -- newcommand, etc. should be parsed by macroDef, but we need this -- here so these aren't parsed as inline commands to ignore , "special", "pdfannot", "pdfstringdef" , "bibliographystyle" @@ -1315,63 +1676,52 @@ blockCommands = M.fromList $ environments :: PandocMonad m => M.Map Text (LP m Blocks) environments = M.fromList [ ("document", env "document" blocks) --- , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) --- , ("letter", env "letter" letterContents) --- , ("minipage", env "minipage" $ --- skipopts *> spaces' *> optional braced *> spaces' *> blocks) --- , ("figure", env "figure" $ skipopts *> figure) --- , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) --- , ("center", env "center" blocks) --- , ("longtable", env "longtable" $ --- resetCaption *> simpTable "longtable" False >>= addTableCaption) --- , ("table", env "table" $ --- resetCaption *> skipopts *> blocks >>= addTableCaption) --- , ("tabular*", env "tabular" $ simpTable "tabular*" True) --- , ("tabularx", env "tabularx" $ simpTable "tabularx" True) --- , ("tabular", env "tabular" $ simpTable "tabular" False) --- , ("quote", blockQuote <$> env "quote" blocks) --- , ("quotation", blockQuote <$> env "quotation" blocks) --- , ("verse", blockQuote <$> env "verse" blocks) --- , ("itemize", bulletList <$> listenv "itemize" (many item)) --- , ("description", definitionList <$> listenv "description" (many descItem)) --- , ("enumerate", orderedList') --- , ("alltt", alltt =<< verbEnv "alltt") --- , ("code", guardEnabled Ext_literate_haskell *> --- (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> --- verbEnv "code")) --- , ("comment", mempty <$ verbEnv "comment") --- , ("verbatim", codeBlock <$> verbEnv "verbatim") --- , ("Verbatim", fancyverbEnv "Verbatim") --- , ("BVerbatim", fancyverbEnv "BVerbatim") --- , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals --- codeBlockWith attr <$> verbEnv "lstlisting") --- , ("minted", do options <- option [] keyvals --- lang <- grouped (many1 $ satisfy (/='}')) --- let kvs = [ (if k == "firstnumber" --- then "startFrom" --- else k, v) | (k,v) <- options ] --- let classes = [ lang | not (null lang) ] ++ --- [ "numberLines" | --- lookup "linenos" options == Just "true" ] --- let attr = ("",classes,kvs) --- codeBlockWith attr <$> verbEnv "minted") --- , ("obeylines", parseFromString --- (para . trimInlines . mconcat <$> many inline) =<< --- intercalate "\\\\\n" . lines <$> verbEnv "obeylines") --- , ("displaymath", mathEnvWith para Nothing "displaymath") --- , ("equation", mathEnvWith para Nothing "equation") --- , ("equation*", mathEnvWith para Nothing "equation*") --- , ("gather", mathEnvWith para (Just "gathered") "gather") --- , ("gather*", mathEnvWith para (Just "gathered") "gather*") --- , ("multline", mathEnvWith para (Just "gathered") "multline") --- , ("multline*", mathEnvWith para (Just "gathered") "multline*") --- , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") --- , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") --- , ("align", mathEnvWith para (Just "aligned") "align") --- , ("align*", mathEnvWith para (Just "aligned") "align*") --- , ("alignat", mathEnvWith para (Just "aligned") "alignat") --- , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") --- , ("tikzpicture", rawVerbEnv "tikzpicture") + , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) + , ("letter", env "letter" letterContents) + , ("minipage", env "minipage" $ + skipopts *> spaces *> optional braced *> spaces *> blocks) + , ("figure", env "figure" $ skipopts *> figure) + , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) + , ("center", env "center" blocks) + , ("longtable", env "longtable" $ + resetCaption *> simpTable "longtable" False >>= addTableCaption) + , ("table", env "table" $ + resetCaption *> skipopts *> blocks >>= addTableCaption) + , ("tabular*", env "tabular" $ simpTable "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable "tabularx" True) + , ("tabular", env "tabular" $ simpTable "tabular" False) + , ("quote", blockQuote <$> env "quote" blocks) + , ("quotation", blockQuote <$> env "quotation" blocks) + , ("verse", blockQuote <$> env "verse" blocks) + , ("itemize", bulletList <$> listenv "itemize" (many item)) + , ("description", definitionList <$> listenv "description" (many descItem)) + , ("enumerate", orderedList') + , ("alltt", alltt <$> env "alltt" blocks) + , ("code", guardEnabled Ext_literate_haskell *> + (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + verbEnv "code")) + , ("comment", mempty <$ verbEnv "comment") + , ("verbatim", codeBlock <$> verbEnv "verbatim") + , ("Verbatim", fancyverbEnv "Verbatim") + , ("BVerbatim", fancyverbEnv "BVerbatim") + , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals + codeBlockWith attr <$> verbEnv "lstlisting") + , ("minted", minted) + , ("obeylines", obeylines) + , ("displaymath", mathEnvWith para Nothing "displaymath") + , ("equation", mathEnvWith para Nothing "equation") + , ("equation*", mathEnvWith para Nothing "equation*") + , ("gather", mathEnvWith para (Just "gathered") "gather") + , ("gather*", mathEnvWith para (Just "gathered") "gather*") + , ("multline", mathEnvWith para (Just "gathered") "multline") + , ("multline*", mathEnvWith para (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") + , ("align", mathEnvWith para (Just "aligned") "align") + , ("align*", mathEnvWith para (Just "aligned") "align*") + , ("alignat", mathEnvWith para (Just "aligned") "alignat") + , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") + , ("tikzpicture", rawVerbEnv "tikzpicture") ] environment :: PandocMonad m => LP m Blocks @@ -1382,9 +1732,7 @@ environment = do <|> rawEnv name env :: PandocMonad m => Text -> LP m a -> LP m a -env name p = p <* - (try (controlSeq "end" *> braced >>= guard . (== name) . untokenize) - ("\\end{" ++ T.unpack name ++ "}")) +env name p = p <* end_ name rawEnv :: PandocMonad m => Text -> LP m Blocks rawEnv name = do @@ -1404,525 +1752,355 @@ rawEnv name = do report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 return bs - -- raw' <- applyMacros' $ beginCommand <> untokenize raw - -- if raw' /= beginCommand ++ raw - -- then parseFromString' blocks raw' - -- else if parseRaw - -- then return $ rawBlock "latex" $ beginCommand ++ raw' - -- else do - -- unless parseRaw $ do - -- report $ SkippedContent beginCommand pos1 - -- pos2 <- getPosition - -- report $ SkippedContent ("\\end{" ++ name ++ "}") pos2 - -- return bs +rawVerbEnv :: PandocMonad m => Text -> LP m Blocks +rawVerbEnv name = do + pos <- getPosition + (_, raw) <- withRaw $ verbEnv name + let raw' = "\\begin{tikzpicture}" ++ toksToString raw + exts <- getOption readerExtensions + let parseRaw = extensionEnabled Ext_raw_tex exts + if parseRaw + then return $ rawBlock "latex" raw' + else do + report $ SkippedContent raw' pos + return mempty + +verbEnv :: PandocMonad m => Text -> LP m String +verbEnv name = withVerbatimMode $ do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ toksToString res + +fancyverbEnv :: PandocMonad m => Text -> LP m Blocks +fancyverbEnv name = do + options <- option [] keyvals + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv name + +obeylines :: PandocMonad m => LP m Blocks +obeylines = do + para . fromList . removeLeadingTrailingBreaks . + walk softBreakToHard . toList <$> env "obeylines" inlines + where softBreakToHard SoftBreak = LineBreak + softBreakToHard x = x + removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak . + reverse . dropWhile isLineBreak + isLineBreak LineBreak = True + isLineBreak _ = False + +minted :: PandocMonad m => LP m Blocks +minted = do + options <- option [] keyvals + lang <- toksToString <$> braced + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ lang | not (null lang) ] ++ + [ "numberLines" | + lookup "linenos" options == Just "true" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv "minted" + +letterContents :: PandocMonad m => LP m Blocks +letterContents = do + bs <- blocks + st <- getState + -- add signature (author) and address (title) + let addr = case lookupMeta "address" (sMeta st) of + Just (MetaBlocks [Plain xs]) -> + para $ trimInlines $ fromList xs + _ -> mempty + return $ addr <> bs -- sig added by \closing + +figure :: PandocMonad m => LP m Blocks +figure = try $ do + resetCaption + blocks >>= addImageCaption + +addImageCaption :: PandocMonad m => Blocks -> LP m Blocks +addImageCaption = walkM go + where go (Image attr alt (src,tit)) + | not ("fig:" `isPrefixOf` tit) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) + Nothing -> Image attr alt (src,tit) + go x = return x + +graphicsPath :: PandocMonad m => LP m Blocks +graphicsPath = do + ps <- map toksToString <$> (bgroup *> manyTill braced egroup) + getResourcePath >>= setResourcePath . (++ ps) + return mempty + +splitBibs :: String -> [Inlines] +splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') + +alltt :: Blocks -> Blocks +alltt = walk strToCode + where strToCode (Str s) = Code nullAttr s + strToCode Space = RawInline (Format "latex") "\\ " + strToCode SoftBreak = LineBreak + strToCode x = x + +parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions options = + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + ++ maybeToList (lookup "language" options + >>= fromListingsLanguage) + in (fromMaybe "" (lookup "label" options), classes, kvs) + +inputListing :: PandocMonad m => LP m Blocks +inputListing = do + pos <- getPosition + options <- option [] keyvals + f <- filter (/='"') . toksToString <$> braced + dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs f + codeLines <- case mbCode of + Just s -> return $ lines s + Nothing -> do + report $ CouldNotLoadIncludeFile f pos + return [] + let (ident,classes,kvs) = parseListingsOptions options + let language = case lookup "language" options >>= fromListingsLanguage of + Just l -> [l] + Nothing -> take 1 $ languagesByExtension (takeExtension f) + let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead + let lastline = fromMaybe (length codeLines) $ + lookup "lastline" options >>= safeRead + let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $ + drop (firstline - 1) codeLines + return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents + +-- lists + +item :: PandocMonad m => LP m Blocks +item = void blocks *> controlSeq "item" *> skipopts *> blocks + +descItem :: PandocMonad m => LP m (Inlines, [Blocks]) +descItem = do + blocks -- skip blocks before item + controlSeq "item" + optional sp + ils <- opt + bs <- blocks + return (ils, [bs]) + +listenv :: PandocMonad m => Text -> LP m a -> LP m a +listenv name p = try $ do + oldInListItem <- sInListItem `fmap` getState + updateState $ \st -> st{ sInListItem = True } + res <- env name p + updateState $ \st -> st{ sInListItem = oldInListItem } + return res + +orderedList' :: PandocMonad m => LP m Blocks +orderedList' = try $ do + spaces + let markerSpec = do + symbol '[' + ts <- toksToString <$> manyTill anyTok (symbol ']') + case runParser anyOrderedListMarker def "option" ts of + Right r -> return r + Left _ -> do + pos <- getPosition + report $ SkippedContent ("[" ++ ts ++ "]") pos + return (1, DefaultStyle, DefaultDelim) + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec + spaces + optional $ try $ controlSeq "setlength" + *> grouped (count 1 $ controlSeq "itemindent") + *> braced + spaces + start <- option 1 $ try $ do pos <- getPosition + controlSeq "setcounter" + ctr <- toksToString <$> braced + guard $ "enum" `isPrefixOf` ctr + guard $ all (`elem` ['i','v']) (drop 4 ctr) + optional sp + num <- toksToString <$> braced + case safeRead num of + Just i -> return (i + 1 :: Int) + Nothing -> do + report $ SkippedContent + ("\\setcounter{" ++ ctr ++ + "}{" ++ num ++ "}") pos + return 1 + bs <- listenv "enumerate" (many item) + return $ orderedListWith (start, style, delim) bs + +-- tables + +hline :: PandocMonad m => LP m () +hline = try $ do + spaces + controlSeq "hline" <|> + -- booktabs rules: + controlSeq "toprule" <|> + controlSeq "bottomrule" <|> + controlSeq "midrule" <|> + controlSeq "endhead" <|> + controlSeq "endfirsthead" + spaces + optional $ bracketed inline + return () + +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces + +amp :: PandocMonad m => LP m Tok +amp = symbol '&' + +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do + inp <- getInput + case inp of + (Tok spos Word t : rest) -> do + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + _ -> return () + +parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] +parseAligns = try $ do + let maybeBar = skipMany $ + sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced) + let cAlign = AlignCenter <$ symbol 'c' + let lAlign = AlignLeft <$ symbol 'l' + let rAlign = AlignRight <$ symbol 'r' + let parAlign = AlignLeft <$ symbol 'p' + -- aligns from tabularx + let xAlign = AlignLeft <$ symbol 'X' + let mAlign = AlignLeft <$ symbol 'm' + let bAlign = AlignLeft <$ symbol 'b' + let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign ) + let alignPrefix = symbol '>' >> braced + let alignSuffix = symbol '<' >> braced + let colWidth = try $ do + symbol '{' + ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") + spaces + symbol '}' + case safeRead ds of + Just w -> return w + Nothing -> return 0.0 + let alignSpec = try $ do + spaces + pref <- option [] alignPrefix + spaces + al <- alignChar + width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced + pos <- getPosition + report $ SkippedContent s pos + return 0.0) + spaces + suff <- option [] alignSuffix + return (al, width, (pref, suff)) + bgroup + spaces + maybeBar + aligns' <- many (alignSpec <* maybeBar) + spaces + egroup + spaces + return aligns' + +parseTableRow :: PandocMonad m + => Text -- ^ table environment name + -> [([Tok], [Tok])] -- ^ pref/suffixes + -> LP m [Blocks] +parseTableRow envname prefsufs = do + notFollowedBy (spaces *> end_ envname) + let cols = length prefsufs + -- add prefixes and suffixes in token stream: + let celltoks (pref, suff) = do + prefpos <- getPosition + contents <- many (notFollowedBy + (() <$ amp <|> () <$ lbreak <|> end_ envname) + >> anyTok) + suffpos <- getPosition + option [] (count 1 amp) + return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref + ++ contents ++ + map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff + rawcells <- sequence (map celltoks prefsufs) + oldInput <- getInput + cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells + setInput oldInput + spaces + let numcells = length cells + guard $ numcells <= cols && numcells >= 1 + guard $ cells /= [mempty] + -- note: a & b in a three-column table leaves an empty 3rd cell: + return $ cells ++ replicate (cols - numcells) mempty + +parseTableCell :: PandocMonad m => LP m Blocks +parseTableCell = do + let plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + updateState $ \st -> st{ sInTableCell = True } + cells <- plainify <$> blocks + updateState $ \st -> st{ sInTableCell = False } + return cells + +simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks +simpTable envname hasWidthParameter = try $ do + when hasWidthParameter $ () <$ (spaces >> tok) + skipopts + colspecs <- parseAligns + let (aligns, widths, prefsufs) = unzip3 colspecs + let cols = length colspecs + optional $ controlSeq "caption" *> skipopts *> setCaption + optional lbreak + spaces + skipMany hline + spaces + header' <- option [] $ try (parseTableRow envname prefsufs <* + lbreak <* many1 hline) + spaces + rows <- sepEndBy (parseTableRow envname prefsufs) + (lbreak <* optional (skipMany hline)) + spaces + optional $ controlSeq "caption" *> skipopts *> setCaption + optional lbreak + spaces + let header'' = if null header' + then replicate cols mempty + else header' + lookAhead $ controlSeq "end" -- make sure we're at end + return $ table mempty (zip aligns widths) header'' rows + +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go + where go (Table c als ws hs rs) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Table (toList ils) als ws hs rs + Nothing -> Table c als ws hs rs + go x = return x + block :: PandocMonad m => LP m Blocks block = (mempty <$ spaces1) <|> environment <|> include - -- <|> macro - <|> blockCommand + <|> macroDef <|> paragraph + <|> blockCommand <|> grouped block blocks :: PandocMonad m => LP m Blocks blocks = mconcat <$> many block -{- --- graphicsPath :: PandocMonad m => LP m Blocks --- graphicsPath = do --- ps <- bgroup *> (manyTill braced egroup) --- getResourcePath >>= setResourcePath . (++ ps) --- return mempty --- --- addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () --- addMeta field val = updateState $ \st -> --- st{ sMeta = addMetaField field val $ sMeta st } --- --- splitBibs :: String -> [Inlines] --- splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') --- --- setCaption :: PandocMonad m => LP m Blocks --- setCaption = do --- ils <- tok --- mblabel <- option Nothing $ --- try $ spaces' >> controlSeq "label" >> (Just <$> tok) --- let ils' = case mblabel of --- Just lab -> ils <> spanWith --- ("",[],[("data-label", stringify lab)]) mempty --- Nothing -> ils --- updateState $ \st -> st{ stateCaption = Just ils' } --- return mempty --- --- resetCaption :: PandocMonad m => LP m () --- resetCaption = updateState $ \st -> st{ stateCaption = Nothing } --- --- mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines --- mkImage options src = do --- let replaceTextwidth (k,v) = case numUnit v of --- Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") --- _ -> (k, v) --- let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options --- let attr = ("",[], kvs) --- let alt = str "image" --- case takeExtension src of --- "" -> do --- defaultExt <- getOption readerDefaultImageExtension --- return $ imageWith attr (addExtension src defaultExt) "" alt --- _ -> return $ imageWith attr src "" alt --- --- inNote :: Inlines -> Inlines --- inNote ils = --- note $ para $ ils <> str "." --- -- --- -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" --- dosiunitx :: PandocMonad m => LP m Inlines --- dosiunitx = do --- skipopts --- value <- tok --- valueprefix <- option "" $ char '[' >> (mconcat <$> manyTill tok (char ']')) --- unit <- tok --- let emptyOr160 "" = "" --- emptyOr160 _ = "\160" --- return . mconcat $ [valueprefix, --- emptyOr160 valueprefix, --- value, --- emptyOr160 unit, --- unit] --- --- skipangles :: PandocMonad m => LP m () --- skipangles = skipMany rawangle --- --- inlineText :: PandocMonad m => LP m Inlines --- inlineText = str <$> many1 inlineChar --- --- inlineChar :: PandocMonad m => LP m Char --- inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n" --- --- --- inlineEnvironment :: PandocMonad m => LP m Inlines --- inlineEnvironment = try $ do --- controlSeq "begin" --- name <- braced --- M.findWithDefault mzero name inlineEnvironments --- --- --- rawVerbEnv :: PandocMonad m => String -> LP m Blocks --- rawVerbEnv name = do --- pos <- getPosition --- (_, raw) <- withRaw $ verbEnv name --- let raw' = "\\begin{tikzpicture}" ++ raw --- exts <- getOption readerExtensions --- let parseRaw = extensionEnabled Ext_raw_tex exts --- if parseRaw --- then return $ rawBlock "latex" raw' --- else do --- report $ SkippedContent raw' pos --- return mempty --- --- ---- --- --- alltt :: PandocMonad m => String -> LP m Blocks --- alltt t = walk strToCode <$> parseFromString' blocks --- (substitute " " "\\ " $ substitute "%" "\\%" $ --- intercalate "\\\\\n" $ lines t) --- where strToCode (Str s) = Code nullAttr s --- strToCode x = x --- --- rawLaTeXBlock :: PandocMonad m => ParserT String LaTeXState String --- rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) --- --- rawLaTeXInline :: PandocMonad m => ParserT String LaTeXState Inline --- rawLaTeXInline = do --- raw <- (snd <$> withRaw inlineCommand) --- <|> (snd <$> withRaw inlineEnvironment) --- <|> (snd <$> withRaw blockCommand) --- RawInline "latex" <$> applyMacros' raw --- --- addImageCaption :: PandocMonad m => Blocks -> LP m Blocks --- addImageCaption = walkM go --- where go (Image attr alt (src,tit)) --- | not ("fig:" `isPrefixOf` tit) = do --- mbcapt <- stateCaption <$> getState --- return $ case mbcapt of --- Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) --- Nothing -> Image attr alt (src,tit) --- go x = return x --- --- addTableCaption :: PandocMonad m => Blocks -> LP m Blocks --- addTableCaption = walkM go --- where go (Table c als ws hs rs) = do --- mbcapt <- stateCaption <$> getState --- return $ case mbcapt of --- Just ils -> Table (toList ils) als ws hs rs --- Nothing -> Table c als ws hs rs --- go x = return x --- --- --- figure :: PandocMonad m => LP m Blocks --- figure = try $ do --- resetCaption --- blocks >>= addImageCaption --- --- letterContents :: PandocMonad m => LP m Blocks --- letterContents = do --- bs <- blocks --- st <- getState --- -- add signature (author) and address (title) --- let addr = case lookupMeta "address" (sMeta st) of --- Just (MetaBlocks [Plain xs]) -> --- para $ trimInlines $ fromList xs --- _ -> mempty --- return $ addr <> bs -- sig added by \closing --- --- --- item :: PandocMonad m => LP m Blocks --- item = blocks *> controlSeq "item" *> skipopts *> blocks --- --- looseItem :: PandocMonad m => LP m Blocks --- looseItem = do --- ctx <- stateParserContext `fmap` getState --- if ctx == ListItemState --- then mzero --- else return mempty --- --- descItem :: PandocMonad m => LP m (Inlines, [Blocks]) --- descItem = do --- blocks -- skip blocks before item --- controlSeq "item" --- optional sp --- ils <- opt --- bs <- blocks --- return (ils, [bs]) --- --- --- listenv :: PandocMonad m => String -> LP m a -> LP m a --- listenv name p = try $ do --- oldCtx <- stateParserContext `fmap` getState --- updateState $ \st -> st{ stateParserContext = ListItemState } --- res <- env name p --- updateState $ \st -> st{ stateParserContext = oldCtx } --- return res --- --- mathEnvWith :: PandocMonad m --- => (Inlines -> a) -> Maybe String -> String -> LP m a --- mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name) --- where inner x = case innerEnv of --- Nothing -> x --- Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ --- "\\end{" ++ y ++ "}" --- --- mathEnv :: PandocMonad m => String -> LP m String --- mathEnv name = do --- skipopts --- optional blankline --- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) --- charMuncher = skipMany comment *> --- (many1 (noneOf "\\%") <|> try (string "\\%") --- <|> try (string "\\\\") <|> count 1 anyChar) --- res <- concat <$> manyTill charMuncher endEnv --- return $ stripTrailingNewlines res --- --- verbEnv :: PandocMonad m => String -> LP m String --- verbEnv name = do --- skipopts --- optional blankline --- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) --- charMuncher = anyChar --- res <- manyTill charMuncher endEnv --- return $ stripTrailingNewlines res --- --- fancyverbEnv :: PandocMonad m => String -> LP m Blocks --- fancyverbEnv name = do --- options <- option [] keyvals --- let kvs = [ (if k == "firstnumber" --- then "startFrom" --- else k, v) | (k,v) <- options ] --- let classes = [ "numberLines" | --- lookup "numbers" options == Just "left" ] --- let attr = ("",classes,kvs) --- codeBlockWith attr <$> verbEnv name --- --- orderedList' :: PandocMonad m => LP m Blocks --- orderedList' = try $ do --- optional sp --- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ --- try $ char '[' *> anyOrderedListMarker <* char ']' --- spaces --- optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced --- spaces --- start <- option 1 $ try $ do controlSeq "setcounter" --- grouped (string "enum" *> many1 (oneOf "iv")) --- optional sp --- num <- grouped (many1 digit) --- spaces --- return (read num + 1 :: Int) --- bs <- listenv "enumerate" (many item) --- return $ orderedListWith (start, style, delim) bs --- --- paragraph :: PandocMonad m => LP m Blocks --- paragraph = do --- x <- trimInlines . mconcat <$> many1 inline --- if x == mempty --- then return mempty --- else return $ para x --- --- preamble :: PandocMonad m => LP m Blocks --- preamble = mempty <$> manyTill preambleBlock beginDoc --- where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" --- preambleBlock = void comment --- <|> void sp --- <|> void blanklines --- <|> void include --- <|> void macro --- <|> void blockCommand --- <|> void anyControlSeq --- <|> void braced --- <|> void anyChar --- --- ------- --- --- -- citations --- --- addPrefix :: [Inline] -> [Citation] -> [Citation] --- addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks --- addPrefix _ _ = [] --- --- addSuffix :: [Inline] -> [Citation] -> [Citation] --- addSuffix s ks@(_:_) = --- let k = last ks --- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] --- addSuffix _ _ = [] --- --- simpleCiteArgs :: PandocMonad m => LP m [Citation] --- simpleCiteArgs = try $ do --- first <- optionMaybe $ toList <$> opt --- second <- optionMaybe $ toList <$> opt --- keys <- try $ bgroup *> (manyTill citationLabel egroup) --- let (pre, suf) = case (first , second ) of --- (Just s , Nothing) -> (mempty, s ) --- (Just s , Just t ) -> (s , t ) --- _ -> (mempty, mempty) --- conv k = Citation { citationId = k --- , citationPrefix = [] --- , citationSuffix = [] --- , citationMode = NormalCitation --- , citationHash = 0 --- , citationNoteNum = 0 --- } --- return $ addPrefix pre $ addSuffix suf $ map conv keys --- --- citationLabel :: PandocMonad m => LP m String --- citationLabel = optional sp *> --- (many1 (satisfy isBibtexKeyChar) --- <* optional sp --- <* optional (char ',') --- <* optional sp) --- where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String) --- --- cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] --- cites mode multi = try $ do --- cits <- if multi --- then many1 simpleCiteArgs --- else count 1 simpleCiteArgs --- let cs = concat cits --- return $ case mode of --- AuthorInText -> case cs of --- (c:rest) -> c {citationMode = mode} : rest --- [] -> [] --- _ -> map (\a -> a {citationMode = mode}) cs --- --- citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines --- citation name mode multi = do --- (c,raw) <- withRaw $ cites mode multi --- return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw) --- --- complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines --- complexNatbibCitation mode = try $ do --- let ils = (toList . trimInlines . mconcat) <$> --- many (notFollowedBy (oneOf "\\};") >> inline) --- let parseOne = try $ do --- skipSpaces --- pref <- ils --- cit' <- inline -- expect a citation --- let citlist = toList cit' --- cits' <- case citlist of --- [Cite cs _] -> return cs --- _ -> mzero --- suff <- ils --- skipSpaces --- optional $ char ';' --- return $ addPrefix pref $ addSuffix suff cits' --- (c:cits, raw) <- withRaw $ grouped parseOne --- return $ cite (c{ citationMode = mode }:cits) --- (rawInline "latex" $ "\\citetext" ++ raw) --- --- -- tables --- --- parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))] --- parseAligns = try $ do --- bgroup --- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) --- maybeBar --- let cAlign = AlignCenter <$ char 'c' --- let lAlign = AlignLeft <$ char 'l' --- let rAlign = AlignRight <$ char 'r' --- let parAlign = AlignLeft <$ char 'p' --- -- algins from tabularx --- let xAlign = AlignLeft <$ char 'X' --- let mAlign = AlignLeft <$ char 'm' --- let bAlign = AlignLeft <$ char 'b' --- let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign --- <|> xAlign <|> mAlign <|> bAlign --- let alignPrefix = char '>' >> braced --- let alignSuffix = char '<' >> braced --- let colWidth = try $ do --- char '{' --- ds <- many1 (oneOf "0123456789.") --- spaces --- string "\\linewidth" --- char '}' --- case safeRead ds of --- Just w -> return w --- Nothing -> return 0.0 --- let alignSpec = do --- spaces --- pref <- option "" alignPrefix --- spaces --- al <- alignChar --- width <- colWidth <|> option 0.0 (do s <- braced --- pos <- getPosition --- report $ SkippedContent s pos --- return 0.0) --- spaces --- suff <- option "" alignSuffix --- return (al, width, (pref, suff)) --- aligns' <- sepEndBy alignSpec maybeBar --- spaces --- egroup --- spaces --- return $ aligns' --- --- hline :: PandocMonad m => LP m () --- hline = try $ do --- spaces' --- controlSeq "hline" <|> --- -- booktabs rules: --- controlSeq "toprule" <|> --- controlSeq "bottomrule" <|> --- controlSeq "midrule" <|> --- controlSeq "endhead" <|> --- controlSeq "endfirsthead" --- spaces' --- optional $ bracketed (many1 (satisfy (/=']'))) --- return () --- --- lbreak :: PandocMonad m => LP m () --- lbreak = () <$ try (spaces' *> --- (controlSeq "\\" <|> controlSeq "tabularnewline") <* --- spaces') --- --- amp :: PandocMonad m => LP m () --- amp = () <$ try (spaces' *> char '&' <* spaces') --- --- parseTableRow :: PandocMonad m --- => String -- ^ table environment name --- -> [(String, String)] -- ^ pref/suffixes --- -> LP m [Blocks] --- parseTableRow envname prefsufs = try $ do --- let cols = length prefsufs --- let tableCellRaw = concat <$> many --- (do notFollowedBy amp --- notFollowedBy lbreak --- notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}")) --- many1 (noneOf "&%\n\r\\") --- <|> try (string "\\&") --- <|> count 1 anyChar) --- let plainify bs = case toList bs of --- [Para ils] -> plain (fromList ils) --- _ -> bs --- rawcells <- sepBy1 tableCellRaw amp --- guard $ length rawcells == cols --- let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs --- let tableCell = plainify <$> blocks --- cells' <- mapM (parseFromString' tableCell) rawcells' --- let numcells = length cells' --- guard $ numcells <= cols && numcells >= 1 --- guard $ cells' /= [mempty] --- -- note: a & b in a three-column table leaves an empty 3rd cell: --- let cells'' = cells' ++ replicate (cols - numcells) mempty --- spaces' --- return cells'' --- --- spaces' :: PandocMonad m => LP m () --- spaces' = spaces *> skipMany (comment *> spaces) --- --- simpTable :: PandocMonad m => String -> Bool -> LP m Blocks --- simpTable envname hasWidthParameter = try $ do --- when hasWidthParameter $ () <$ (spaces' >> tok) --- skipopts --- colspecs <- parseAligns --- let (aligns, widths, prefsufs) = unzip3 colspecs --- let cols = length colspecs --- optional $ controlSeq "caption" *> skipopts *> setCaption --- optional lbreak --- spaces' --- skipMany hline --- spaces' --- header' <- option [] $ try (parseTableRow envname prefsufs <* --- lbreak <* many1 hline) --- spaces' --- rows <- sepEndBy (parseTableRow envname prefsufs) --- (lbreak <* optional (skipMany hline)) --- spaces' --- optional $ controlSeq "caption" *> skipopts *> setCaption --- optional lbreak --- spaces' --- let header'' = if null header' --- then replicate cols mempty --- else header' --- lookAhead $ controlSeq "end" -- make sure we're at end --- return $ table mempty (zip aligns widths) header'' rows --- --- removeDoubleQuotes :: String -> String --- removeDoubleQuotes ('"':xs) = --- case reverse xs of --- '"':ys -> reverse ys --- _ -> '"':xs --- removeDoubleQuotes xs = xs --- --- inputListing :: PandocMonad m => LP m Blocks --- inputListing = do --- pos <- getPosition --- options <- option [] keyvals --- f <- filter (/='"') <$> braced --- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" --- mbCode <- readFileFromDirs dirs f --- codeLines <- case mbCode of --- Just s -> return $ lines s --- Nothing -> do --- report $ CouldNotLoadIncludeFile f pos --- return [] --- let (ident,classes,kvs) = parseListingsOptions options --- let language = case lookup "language" options >>= fromListingsLanguage of --- Just l -> [l] --- Nothing -> take 1 $ languagesByExtension (takeExtension f) --- let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead --- let lastline = fromMaybe (length codeLines) $ --- lookup "lastline" options >>= safeRead --- let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $ --- drop (firstline - 1) codeLines --- return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents --- --- parseListingsOptions :: [(String, String)] -> Attr --- parseListingsOptions options = --- let kvs = [ (if k == "firstnumber" --- then "startFrom" --- else k, v) | (k,v) <- options ] --- classes = [ "numberLines" | --- lookup "numbers" options == Just "left" ] --- ++ maybeToList (lookup "language" options --- >>= fromListingsLanguage) --- in (fromMaybe "" (lookup "label" options), classes, kvs) --} diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index afac9e8cbd1a..f2be6de5f1b7 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -58,7 +58,8 @@ tests = [ testGroup "basic" , "blank lines + space + comments" =: "% my comment\n\n \n % another\n\nhi" =?> para "hi" , "comment in paragraph" =: - "hi % this is a comment\nthere\n" =?> para "hi there" + "hi % this is a comment\nthere\n" =?> + para ("hi" <> softbreak <> "there") ] , testGroup "code blocks" diff --git a/test/latex-reader.latex b/test/latex-reader.latex index 2ebdfed9900c..7cbcc96723ea 100644 --- a/test/latex-reader.latex +++ b/test/latex-reader.latex @@ -4,7 +4,6 @@ \setlength{\parindent}{0pt} \setlength{\parskip}{6pt plus 2pt minus 1pt} -\newcommand{\textsubscript}[1]{\ensuremath{_{\scriptsize\textrm{#1}}}} \usepackage[breaklinks=true,unicode=true]{hyperref} \usepackage[normalem]{ulem} % avoid problems with \sout in headers with hyperref: diff --git a/test/latex-reader.native b/test/latex-reader.native index d481a714d96f..04be2538e373 100644 --- a/test/latex-reader.native +++ b/test/latex-reader.native @@ -261,7 +261,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] - ,[Para [RawInline (Format "latex") "\\doublespacing\n"]] + ,[Para [RawInline (Format "latex") "\\doublespacing"]] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] ,[Para [Math InlineMath "\\alpha \\wedge \\omega"]]