From a73201084765a4c2652943c96ea193e847f9ccd8 Mon Sep 17 00:00:00 2001 From: eVITAERC Date: Mon, 16 Sep 2013 00:25:11 -0700 Subject: [PATCH] Refactored all LaTeX Reader's handling of macros to a preprocessor `handleMacros`. This approach results in additional passes through the document in addition to `handleIncludes`, but it's ultimately cleaner and more maintainable. Many uses of macros were not properly handled by the old paradigm, such as when environment begin\end delimiters were defined as macros. The new paradigm preprocesses all macro definitions and recursively apply them to the text until reaching a fixed point, so the LaTeX Reader is free to assume that no macros exist in the input. Closes #982. As a side effect, the `macro` parser now produces String instead of Inline. Note that this commit introduces a regression when `\newcommand` is in the argument of another command. (For example, inside `\pdfstringdefDisableCommands` as in the test files.) Perhaps the most sensible policy for now is to ignore all such occurrences. --- pandoc.hs | 7 +++++- src/Text/Pandoc/Parsing.hs | 12 ++++++++-- src/Text/Pandoc/Readers/LaTeX.hs | 37 ++++++++++++++++++++--------- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index ad01ad2caa8e..89eb0ad9e720 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -33,7 +33,7 @@ module Main where import Text.Pandoc import Text.Pandoc.Builder (setMeta) import Text.Pandoc.PDF (makePDF) -import Text.Pandoc.Readers.LaTeX (handleIncludes) +import Text.Pandoc.Readers.LaTeX ( handleIncludes, handleMacros ) import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, safeRead, headerShift, normalize, err, warn ) import Text.Pandoc.XML ( toEntities ) @@ -1104,8 +1104,13 @@ main = do then handleIncludes else return + let handleMacros' = if readerName' == "latex" || readerName' == "latex+lhs" + then handleMacros readerOpts + else return + doc <- readSources sources >>= handleIncludes' . convertTabs . intercalate "\n" >>= + handleMacros' >>= reader readerOpts diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 701b2ef84ad2..cb79e800d2b0 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -87,6 +87,7 @@ module Text.Pandoc.Parsing ( (>>~), dash, nested, macro, + macroBlock, applyMacros', Parser, F(..), @@ -1060,7 +1061,7 @@ nested p = do -- -- | Parse a \newcommand or \renewcommand macro definition. -macro :: Parser [Char] ParserState Blocks +macro :: Parser [Char] ParserState String macro = do apply <- getOption readerApplyMacros inp <- getInput @@ -1072,7 +1073,14 @@ macro = do updateState $ \st -> st { stateMacros = ms ++ stateMacros st } return mempty - else return $ rawBlock "latex" def' + else return def' + +macroBlock :: Parser [Char] ParserState Blocks +macroBlock = do + result <- macro + case result of + "" -> return mempty + x -> return $ rawBlock "latex" x -- | Apply current macros to string. applyMacros' :: String -> Parser [Char] ParserState String diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 70d859e07b95..6f41200599b9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -31,6 +31,7 @@ Conversion of LaTeX to 'Pandoc' document. module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, rawLaTeXBlock, + handleMacros, handleIncludes ) where @@ -40,6 +41,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) import qualified Text.Pandoc.UTF8 as UTF8 +import Text.TeXMath.Macros (applyMacros) import Data.Char ( chr, ord ) import Control.Monad import Text.Pandoc.Builder @@ -152,10 +154,10 @@ bracketed :: Monoid a => LP a -> LP a bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) mathDisplay :: LP String -> LP Inlines -mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) +mathDisplay p = displayMath <$> (liftM trim (try p)) mathInline :: LP String -> LP Inlines -mathInline p = math <$> (try p >>= applyMacros') +mathInline p = math <$> (try p) mathChars :: LP String mathChars = concat <$> @@ -203,7 +205,6 @@ block :: LP Blocks block = (mempty <$ comment) <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) <|> preamble - <|> macro <|> environment <|> blockCommand <|> paragraph @@ -333,12 +334,9 @@ inlineCommand = try $ do let raw = do rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) let rawcommand = '\\' : name ++ star ++ snd rawargs - transformed <- applyMacros' rawcommand - if transformed /= rawcommand - then parseFromString inlines transformed - else if parseRaw - then return $ rawInline "latex" rawcommand - else return mempty + if parseRaw + then return $ rawInline "latex" rawcommand + else return mempty case M.lookup name' inlineCommands of Just p -> p <|> raw Nothing -> case M.lookup name inlineCommands of @@ -763,9 +761,26 @@ rawEnv name = do parseRaw <- getOption readerParseRaw if parseRaw then (rawBlock "latex" . addBegin) <$> - (withRaw (env name blocks) >>= applyMacros' . snd) + (withRaw (env name blocks) >>= return . snd) else env name blocks +-- | Process all LaTeX macro definitions and expand them +handleMacros :: ReaderOptions -> String -> IO String +handleMacros opts input = do + expandedString <- readWith handleMacros' def{ stateOptions = opts } input + return expandedString + +handleMacros' :: LP (IO String) +handleMacros' = do + remainingStrings <- many macroToken + macros <- liftM stateMacros getState + return . return $ applyMacros macros $ concat remainingStrings + where macroToken = try $ do + skipMany comment + choice [ macro + , try $ string "\\%" -- avoid `comment` from consuming literal % + , count 1 anyChar ] + -- | Replace "include" commands with file contents. handleIncludes :: String -> IO String handleIncludes = handleIncludes' [] @@ -865,7 +880,7 @@ rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) rawLaTeXInline :: Parser [Char] ParserState Inline rawLaTeXInline = do raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand) - RawInline "latex" <$> applyMacros' raw + RawInline "latex" <$> return raw environments :: M.Map String (LP Blocks) environments = M.fromList diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5456f25b5bdb..357966ab446b 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -442,7 +442,7 @@ block :: MarkdownParser (F Blocks) block = choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock - , guardEnabled Ext_latex_macros *> (macro >>= return . return) + , guardEnabled Ext_latex_macros *> (macroBlock >>= return . return) , header , lhsCodeBlock , rawTeXBlock