From d6fa586883d7f7716ec24c73874dd493bf37dd6f Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 11:43:08 +0000 Subject: [PATCH 01/11] Handle Strong element --- src/Main.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Main.hs b/src/Main.hs index af44cea..60ee1c3 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -127,7 +127,10 @@ inlineToComposer = \case (\t -> "" <> t <> "") (fmap mconcat (traverse inlineToComposer inlines)) Underline _ -> return mempty - Strong _ -> return mempty + Strong inlines -> + wrapComposerText + (\t -> "" <> t <> "") + (fmap mconcat (traverse inlineToComposer inlines)) Strikeout _ -> return mempty Superscript inlines -> wrapComposerText (\t -> "" <> t <> "") From 9e95ccd20a77afcfa9ea558e63d876cb5c8de735 Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 11:51:27 +0000 Subject: [PATCH 02/11] Handle Strikeout element --- src/Main.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Main.hs b/src/Main.hs index 60ee1c3..d56e06b 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -131,7 +131,10 @@ inlineToComposer = \case wrapComposerText (\t -> "" <> t <> "") (fmap mconcat (traverse inlineToComposer inlines)) - Strikeout _ -> return mempty + Strikeout inlines -> + wrapComposerText + (\t -> "" <> t <> "") + (fmap mconcat (traverse inlineToComposer inlines)) Superscript inlines -> wrapComposerText (\t -> "" <> t <> "") (fmap mconcat (traverse inlineToComposer inlines)) From d2bd9f804f39acddf50fd3592c4b87913ca90d62 Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 11:51:36 +0000 Subject: [PATCH 03/11] Handle SoftBreak and LineBreak elements I think these should both map to what composer enters for shift-enter, i.e. a `
` tag. --- src/Main.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Main.hs b/src/Main.hs index d56e06b..3e0b81f 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -146,8 +146,8 @@ inlineToComposer = \case Cite _ _ -> return mempty Code _ _ -> return mempty Space -> return (Composer.Elements [Composer.Text " "]) - SoftBreak -> return mempty - LineBreak -> return mempty + SoftBreak -> return (Composer.Elements [Composer.Text "
"]) + LineBreak -> return (Composer.Elements [Composer.Text "
"]) Math _ _ -> return mempty RawInline _ _ -> return mempty Link _ _ _ -> return mempty From 71d4b704ef6faa755954853a4f629be8f836a3be Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 12:09:40 +0000 Subject: [PATCH 04/11] Handle link elements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I may have got the title and link text backwards, but I don’t think so: _title here has type Text while linkText has type Inlines, which sounds like it matches the description in the manual: > The link text can contain formatting (such as emphasis), but the title > cannot. --- src/Main.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Main.hs b/src/Main.hs index 3e0b81f..0507d29 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -150,7 +150,10 @@ inlineToComposer = \case LineBreak -> return (Composer.Elements [Composer.Text "
"]) Math _ _ -> return mempty RawInline _ _ -> return mempty - Link _ _ _ -> return mempty + Link _attrs linkText (url, _title) -> + wrapComposerText + (\t -> " url <> "\">" <> t <> "") + (fmap mconcat (traverse inlineToComposer linkText)) Image _ _ _ -> return mempty Note _ -> return mempty Span _ _ -> return mempty From eacda1e548a6e6d30dc455f950aaba3ca621df76 Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 12:27:54 +0000 Subject: [PATCH 05/11] Handle ordered lists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I _think_ this should work… --- src/Main.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Main.hs b/src/Main.hs index 0507d29..4065d5e 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -100,7 +100,16 @@ blockToComposer = \case (\t -> "
    " <> t <> "
") (fmap mconcat (sequence items)) in list (fmap listItem blocks) - OrderedList _ _ -> return mempty + OrderedList _attrs blocks -> let + listItem :: PandocMonad m => [Block] -> StateT WriterState m Composer.Elements + listItem bs = wrapComposerText + (\t -> "
  • " <> t <> "
  • ") + (fmap mconcat (traverse blockToComposer bs)) + list :: PandocMonad m => [StateT WriterState m Composer.Elements] -> StateT WriterState m Composer.Elements + list items = wrapComposerText + (\t -> "
      " <> t <> "
    ") + (fmap mconcat (sequence items)) + in list (fmap listItem blocks) DefinitionList _ -> return mempty Header _ _ _ -> return mempty HorizontalRule -> return mempty From 97a257f9a3cfb40b5e57d253180e14ec129d748a Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 12:45:30 +0000 Subject: [PATCH 06/11] Support headings This is somewhat hackish, but hopefully it works ok! --- src/Main.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Main.hs b/src/Main.hs index 4065d5e..1244a79 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -8,6 +8,7 @@ import Control.Monad.Trans.State.Strict import Data.Aeson import Data.ByteString.Lazy (toStrict) import Data.Text (Text) +import Data.Text qualified as Text import Data.Text.Encoding (decodeUtf8) import Network.Wai import Network.Wai.Handler.Warp @@ -111,7 +112,10 @@ blockToComposer = \case (fmap mconcat (sequence items)) in list (fmap listItem blocks) DefinitionList _ -> return mempty - Header _ _ _ -> return mempty + Header level _attrs inlines -> wrapComposerText + (let levelText = Text.pack (show level) + in \t -> " levelText <> ">" <> t <> " levelText <> ">") + (fmap mconcat (traverse inlineToComposer inlines)) HorizontalRule -> return mempty Table _ _ _ _ _ _ -> return mempty Figure _ _ _ -> return mempty From dd93f8fef4e6f1befd1454d7aa996a1784eca22a Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 13:40:38 +0000 Subject: [PATCH 07/11] Represent soft breaks with spaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment things don’t look quite right: markdown paragraphs aren’t getting wrapped into single lines. I think this is why, though I might need to also change the handling of LineBreak in the same way. --- src/Main.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Main.hs b/src/Main.hs index 1244a79..c47e84d 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -159,7 +159,7 @@ inlineToComposer = \case Cite _ _ -> return mempty Code _ _ -> return mempty Space -> return (Composer.Elements [Composer.Text " "]) - SoftBreak -> return (Composer.Elements [Composer.Text "
    "]) + SoftBreak -> return (Composer.Elements [Composer.Text " "]) LineBreak -> return (Composer.Elements [Composer.Text "
    "]) Math _ _ -> return mempty RawInline _ _ -> return mempty From 94816f93f6d06a3cefe11ec3e0e3f5969eaaebde Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 13:46:45 +0000 Subject: [PATCH 08/11] Add endpoint to handle docx files The content type used for submission needs to be application/octet-stream, for servant to accept it. (Otherwise it returns a 415.) --- src/Main.hs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/Main.hs b/src/Main.hs index c47e84d..175f218 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -6,7 +6,7 @@ module Main where import Control.Category ((>>>)) import Control.Monad.Trans.State.Strict import Data.Aeson -import Data.ByteString.Lazy (toStrict) +import Data.ByteString.Lazy (ByteString, toStrict) import Data.Text (Text) import Data.Text qualified as Text import Data.Text.Encoding (decodeUtf8) @@ -32,7 +32,8 @@ converterAPI = Proxy server :: Server ConverterAPI server = return "working, hopefully" :<|> return "working, hopefully" - :<|> exampleConversionHandler + :<|> conversionHandler readMarkdown + :<|> conversionHandler readDocx type ConverterAPI = Get '[PlainText] Text :<|> "healthcheck" :> Get '[PlainText] Text @@ -40,15 +41,18 @@ type ConverterAPI = Get '[PlainText] Text :> ReqBody '[PlainText] Text :> Post '[PlainText] (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) -- assume markdown input and composer output for now + :<|> "convert-docx" + :> ReqBody '[OctetStream] ByteString + :> Post '[PlainText] (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) -exampleConversionHandler :: Text -> Handler (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) -exampleConversionHandler input = do - result <- liftIO (exampleConversion input) +conversionHandler :: (ReaderOptions -> a -> PandocIO Pandoc) -> a -> Handler (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) +conversionHandler reader input = do + result <- liftIO (conversion reader input) return (addHeader "*" result) -exampleConversion :: Text -> IO Text -exampleConversion input = - mdToComposer input +conversion :: (ReaderOptions -> a -> PandocIO Pandoc) -> a -> IO Text +conversion reader input = + toComposer reader input <&> (id >>> encode >>> toStrict @@ -57,9 +61,12 @@ exampleConversion input = -- toContentEntityRaw :: Composer.Block -> ContentEntityRaw -- toContentEntityRaw = -mdToComposer :: Text -> IO Composer.Block -mdToComposer txt = runIOorExplode $ - readMarkdown readerOptions txt +toComposer :: + (ReaderOptions -> a -> PandocIO Pandoc) -> + a -> + IO Composer.Block +toComposer reader input = runIOorExplode $ + reader readerOptions input >>= writeComposer def where readerOptions :: ReaderOptions From 9f06d10492b82cbc9bff7f54ba8b1e2c0a2e5edb Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 14:32:08 +0000 Subject: [PATCH 09/11] Add servant-options middleware for cors preflight For converting docx, the browser sends cors preflight requests, which fail. This middleware should allow them to be answered correctly. --- pandoc-converter.cabal | 1 + src/Main.hs | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pandoc-converter.cabal b/pandoc-converter.cabal index b0c4688..d31d9fc 100644 --- a/pandoc-converter.cabal +++ b/pandoc-converter.cabal @@ -26,6 +26,7 @@ executable PandocConverter megaparsec, pandoc, servant-server, + servant-options, text, time, transformers, diff --git a/src/Main.hs b/src/Main.hs index 175f218..7404aa1 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -12,6 +12,7 @@ import Data.Text qualified as Text import Data.Text.Encoding (decodeUtf8) import Network.Wai import Network.Wai.Handler.Warp +import Network.Wai.Middleware.Servant.Options import Servant hiding (Header) import qualified Servant import Text.Pandoc hiding (trace) @@ -24,7 +25,7 @@ main :: IO () main = run 9482 app app :: Application -app = serve converterAPI server +app = provideOptions converterAPI (serve converterAPI server) converterAPI :: Proxy ConverterAPI converterAPI = Proxy @@ -35,15 +36,18 @@ server = return "working, hopefully" :<|> conversionHandler readMarkdown :<|> conversionHandler readDocx -type ConverterAPI = Get '[PlainText] Text - :<|> "healthcheck" :> Get '[PlainText] Text +type ConverterAPI = Get '[PlainText, JSON] Text + :<|> "healthcheck" :> Get '[PlainText, JSON] Text :<|> "convert" - :> ReqBody '[PlainText] Text - :> Post '[PlainText] (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) + :> ReqBody '[PlainText, JSON] Text + :> Post '[PlainText, JSON] (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) -- assume markdown input and composer output for now :<|> "convert-docx" - :> ReqBody '[OctetStream] ByteString - :> Post '[PlainText] (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) + :> ReqBody '[OctetStream, JSON] ByteString + :> Post '[PlainText, JSON] (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) + +instance FromJSON ByteString where + parseJSON = mempty conversionHandler :: (ReaderOptions -> a -> PandocIO Pandoc) -> a -> Handler (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) conversionHandler reader input = do From bed47da25b30e785666a03cd6bfc396cc0ce1851 Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 14:51:14 +0000 Subject: [PATCH 10/11] Add wai-cors to handle cors headers --- pandoc-converter.cabal | 1 + src/Main.hs | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandoc-converter.cabal b/pandoc-converter.cabal index d31d9fc..a06e958 100644 --- a/pandoc-converter.cabal +++ b/pandoc-converter.cabal @@ -31,6 +31,7 @@ executable PandocConverter time, transformers, wai, + wai-cors, warp, hs-source-dirs: src diff --git a/src/Main.hs b/src/Main.hs index 7404aa1..08e0818 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -12,6 +12,7 @@ import Data.Text qualified as Text import Data.Text.Encoding (decodeUtf8) import Network.Wai import Network.Wai.Handler.Warp +import Network.Wai.Middleware.Cors import Network.Wai.Middleware.Servant.Options import Servant hiding (Header) import qualified Servant @@ -25,7 +26,13 @@ main :: IO () main = run 9482 app app :: Application -app = provideOptions converterAPI (serve converterAPI server) +app = + cors (const $ Just policy) + $ provideOptions converterAPI + $ serve converterAPI server + where + policy = simpleCorsResourcePolicy + { corsRequestHeaders = [ "content-type" ] } converterAPI :: Proxy ConverterAPI converterAPI = Proxy From 146b2521f5171ed7f059f8d828da5649d703f7b6 Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Thu, 28 Nov 2024 16:31:09 +0000 Subject: [PATCH 11/11] Remove manual Access-Control-Allow-Origin headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that I have automatic headers from a dependency, mine are unneeded! And causing problems with firefox, so let’s remove them. --- src/Main.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Main.hs b/src/Main.hs index 08e0818..7cdaaae 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -47,19 +47,19 @@ type ConverterAPI = Get '[PlainText, JSON] Text :<|> "healthcheck" :> Get '[PlainText, JSON] Text :<|> "convert" :> ReqBody '[PlainText, JSON] Text - :> Post '[PlainText, JSON] (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) + :> Post '[PlainText, JSON] Text -- assume markdown input and composer output for now :<|> "convert-docx" :> ReqBody '[OctetStream, JSON] ByteString - :> Post '[PlainText, JSON] (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) + :> Post '[PlainText, JSON] Text instance FromJSON ByteString where parseJSON = mempty -conversionHandler :: (ReaderOptions -> a -> PandocIO Pandoc) -> a -> Handler (Headers '[Servant.Header "Access-Control-Allow-Origin" Text] Text) +conversionHandler :: (ReaderOptions -> a -> PandocIO Pandoc) -> a -> Handler Text conversionHandler reader input = do result <- liftIO (conversion reader input) - return (addHeader "*" result) + return result conversion :: (ReaderOptions -> a -> PandocIO Pandoc) -> a -> IO Text conversion reader input =