NixOS · Mar 14, 2024 · Apr 6, 2023 · Apr 6, 2023 · Apr 6, 2023 · Apr 6, 2023
diff --git a/nixfmt.cabal b/nixfmt.cabal
@@ -88,9 +88,11 @@ library
   build-depends:
       base             >= 4.12.0 && < 4.17
     , megaparsec       >= 9.0.1 && < 9.3
+    , mtl
     , parser-combinators >= 1.0.3 && < 1.4
     , scientific       >= 0.3.0 && < 0.4.0
     , text             >= 1.2.3 && < 1.3
+    , transformers
   default-language:    Haskell2010
   ghc-options:
     -Wall

diff --git a/src/Nixfmt/Lexer.hs b/src/Nixfmt/Lexer.hs
@@ -4,22 +4,25 @@
  - SPDX-License-Identifier: MPL-2.0
  -}
 
-{-# LANGUAGE LambdaCase, OverloadedStrings #-}
+{-# LANGUAGE BlockArguments, FlexibleContexts, LambdaCase, OverloadedStrings #-}
 
-module Nixfmt.Lexer (lexeme) where
+module Nixfmt.Lexer (lexeme, pushTrivia, takeTrivia, whole) where
 
+import Control.Monad.State (MonadState, evalStateT, get, modify, put)
 import Data.Char (isSpace)
 import Data.List (dropWhileEnd)
 import Data.Maybe (fromMaybe)
 import Data.Text as Text
-  (Text, intercalate, length, lines, null, pack, replace, replicate, strip,
-  stripEnd, stripPrefix, stripStart, takeWhile)
+  (Text, length, lines, null, pack, replace, replicate, strip, stripEnd,
+  stripPrefix, stripStart, takeWhile, unwords)
+import Data.Void (Void)
 import Text.Megaparsec
-  (SourcePos(..), anySingle, chunk, getSourcePos, hidden, many, manyTill, some,
-  try, unPos, (<|>))
+  (Parsec, SourcePos(..), anySingle, chunk, getSourcePos, hidden, many,
+  manyTill, some, try, unPos, (<|>))
 import Text.Megaparsec.Char (eol)
 
-import Nixfmt.Types (Ann(..), Parser, TrailingComment(..), Trivia, Trivium(..))
+import Nixfmt.Types
+  (Ann(..), Whole(..), Parser, TrailingComment(..), Trivia, Trivium(..))
 import Nixfmt.Util (manyP)
 
 data ParseTrivium
@@ -32,7 +35,7 @@ preLexeme :: Parser a -> Parser a
 preLexeme p = p <* manyP (\x -> isSpace x && x /= '\n' && x /= '\r')
 
 newlines :: Parser ParseTrivium
-newlines = PTNewlines <$> Prelude.length <$> some (preLexeme eol)
+newlines = PTNewlines . Prelude.length <$> some (preLexeme eol)
 
 splitLines :: Text -> [Text]
 splitLines = dropWhile Text.null . dropWhileEnd Text.null
@@ -65,7 +68,7 @@ convertTrailing = toMaybe . join . map toText
     where toText (PTLineComment c)    = strip c
           toText (PTBlockComment [c]) = strip c
           toText _                    = ""
-          join = intercalate " " . filter (/="")
+          join = Text.unwords . filter (/="")
           toMaybe "" = Nothing
           toMaybe c  = Just $ TrailingComment c
 
@@ -92,8 +95,28 @@ convertTrivia pts =
 trivia :: Parser [ParseTrivium]
 trivia = many $ hidden $ lineComment <|> blockComment <|> newlines
 
+-- The following primitives to interact with the state monad that stores trivia
+-- are designed to prevent trivia from being dropped or duplicated by accident.
+
+takeTrivia :: MonadState Trivia m => m Trivia
+takeTrivia = get <* put []
+
+pushTrivia :: MonadState Trivia m => Trivia -> m ()
+pushTrivia t = modify (<>t)
+
 lexeme :: Parser a -> Parser (Ann a)
 lexeme p = do
+    lastLeading <- takeTrivia
     token <- preLexeme p
-    (trailing, leading) <- convertTrivia <$> trivia
-    return $ Ann token trailing leading
+    (trailing, nextLeading) <- convertTrivia <$> trivia
+    pushTrivia nextLeading
+    return $ Ann lastLeading token trailing
+
+-- | Tokens normally have only leading trivia and one trailing comment on the same
+-- line. A whole x also parses and stores final trivia after the x. A whole also
+-- does not interact with the trivia state of its surroundings.
+whole :: Parser a -> Parsec Void Text (Whole a)
+whole pa = flip evalStateT [] do
+    preLexeme $ pure ()
+    pushTrivia . convertLeading =<< trivia
+    Whole <$> pa <*> takeTrivia
diff --git a/src/Nixfmt/Parser.hs b/src/Nixfmt/Parser.hs
@@ -4,7 +4,7 @@
  - SPDX-License-Identifier: MPL-2.0
  -}
 
-{-# LANGUAGE LambdaCase, OverloadedStrings #-}
+{-# LANGUAGE FlexibleInstances, OverloadedStrings #-}
 
 module Nixfmt.Parser where
 
@@ -14,22 +14,27 @@ import Control.Monad (guard, liftM2)
 import Control.Monad.Combinators (sepBy)
 import qualified Control.Monad.Combinators.Expr as MPExpr
   (Operator(..), makeExprParser)
+import Control.Monad.Trans.Class (lift)
 import Data.Char (isAlpha)
 import Data.Foldable (toList)
+import Data.Functor (($>))
 import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
-import Data.Text as Text (Text, cons, empty, singleton, split, stripPrefix)
+import Data.Text (Text)
+import qualified Data.Text as Text
+import Data.Void (Void)
 import Text.Megaparsec
-  (anySingle, chunk, eof, label, lookAhead, many, notFollowedBy, oneOf,
-  optional, satisfy, some, try, (<|>))
+  (Parsec, anySingle, chunk, empty, eof, label, lookAhead, many, notFollowedBy,
+  oneOf, optional, satisfy, some, try, (<|>))
 import Text.Megaparsec.Char (char)
 import qualified Text.Megaparsec.Char.Lexer as L (decimal)
 
-import Nixfmt.Lexer (lexeme)
-import Nixfmt.Types
-  (Ann, Binder(..), Expression(..), File(..), Fixity(..), Leaf, Operator(..),
-  ParamAttr(..), Parameter(..), Parser, Path, Selector(..), SimpleSelector(..),
-  String, StringPart(..), Term(..), Token(..), operators, tokenText)
+import Nixfmt.Lexer (lexeme, pushTrivia, takeTrivia, whole)
 import Nixfmt.Parser.Float (floatParse)
+import Nixfmt.Types
+  (Ann, Binder(..), Expression(..), File, Fixity(..), Item(..), Items(..), Leaf,
+  Operator(..), ParamAttr(..), Parameter(..), Parser, Path, Selector(..),
+  SimpleSelector(..), String, StringPart(..), Term(..), Token(..), Trivium(..),
+  operators, tokenText)
 import Nixfmt.Util
   (commonIndentation, identChar, isSpaces, manyP, manyText, pathChar,
   schemeChar, someP, someText, uriChar)
@@ -41,7 +46,7 @@ ann f p = try $ lexeme $ f <$> p
 
 -- | parses a token without parsing trivia after it
 rawSymbol :: Token -> Parser Token
-rawSymbol t = chunk (tokenText t) *> return t
+rawSymbol t = chunk (tokenText t) $> t
 
 symbol :: Token -> Parser (Ann Token)
 symbol = lexeme . rawSymbol
@@ -72,12 +77,18 @@ identifier :: Parser (Ann Token)
 identifier = ann Identifier $ do
     ident <- Text.cons <$> satisfy (\x -> isAlpha x || x == '_')
                        <*> manyP identChar
-    guard $ not $ ident `elem` reservedNames
+    guard $ ident `notElem` reservedNames
     return ident
 
 slash :: Parser Text
 slash = chunk "/" <* notFollowedBy (char '/')
 
+instance Semigroup a => Semigroup (Parser a) where
+  fx <> fy = do
+    x <- fx
+    y <- fy
+    pure $ x <> y
+
 envPath :: Parser (Ann Token)
 envPath = ann EnvPath $ char '<' *>
     someP pathChar <> manyText (slash <> someP pathChar)
@@ -101,26 +112,26 @@ uri = fmap (pure . pure . TextPart) $ try $
 
 interpolation :: Parser StringPart
 interpolation = Interpolation <$>
-    symbol TInterOpen <*> expression <*> rawSymbol TInterClose
+    (rawSymbol TInterOpen *> lift (whole expression) <* rawSymbol TInterClose)
 
 simpleStringPart :: Parser StringPart
 simpleStringPart = TextPart <$> someText (
-    chunk "\\n" *> pure "\n" <|>
-    chunk "\\r" *> pure "\r" <|>
-    chunk "\\t" *> pure "\t" <|>
+    chunk "\\n" $> "\n" <|>
+    chunk "\\r" $> "\r" <|>
+    chunk "\\t" $> "\t" <|>
     chunk "\\" *> (Text.singleton <$> anySingle) <|>
     chunk "$$" <|>
     try (chunk "$" <* notFollowedBy (char '{')) <|>
     someP (\t -> t /= '"' && t /= '\\' && t /= '$'))
 
 indentedStringPart :: Parser StringPart
 indentedStringPart = TextPart <$> someText (
-    chunk "''\\n" *> pure "\n" <|>
-    chunk "''\\r" *> pure "\r" <|>
-    chunk "''\\t" *> pure "\t" <|>
+    chunk "''\\n" $> "\n" <|>
+    chunk "''\\r" $> "\r" <|>
+    chunk "''\\t" $> "\t" <|>
     chunk "''\\" *> (Text.singleton <$> anySingle) <|>
-    chunk "''$" *> pure "$" <|>
-    chunk "'''" *> pure "''" <|>
+    chunk "''$" $> "$" <|>
+    chunk "'''" $> "''" <|>
     chunk "$$" <|>
     try (chunk "$" <* notFollowedBy (char '{')) <|>
     try (chunk "'" <* notFollowedBy (char '\'')) <|>
@@ -151,7 +162,7 @@ lineHead :: [StringPart] -> Maybe Text
 lineHead []                        = Nothing
 lineHead line | isEmptyLine line   = Nothing
 lineHead (TextPart t : _)          = Just t
-lineHead (Interpolation _ _ _ : _) = Just ""
+lineHead (Interpolation{} : _) = Just ""
 
 stripParts :: Text -> [StringPart] -> [StringPart]
 stripParts indentation (TextPart t : xs) =
@@ -170,7 +181,7 @@ splitLines (TextPart t : xs) =
 
 splitLines (x : xs) =
     case splitLines xs of
-        (xs' : xss) -> ((x : xs') : xss)
+        (xs' : xss) -> (x : xs') : xss
         _           -> error "unreachable"
 
 stripIndentation :: [[StringPart]] -> [[StringPart]]
@@ -238,6 +249,28 @@ term = label "term" $ do
     return $ case s of [] -> t
                        _  -> Selection t s
 
+items :: Parser a -> Parser (Items a)
+items p = Items <$> many (item p) <> (toList <$> optional lastItem)
+
+item :: Parser a -> Parser (Item a)
+item p = detachedComment <|> CommentedItem <$> takeTrivia <*> p
+
+lastItem :: Parser (Item a)
+lastItem = do
+    trivia <- takeTrivia
+    case trivia of
+        [] -> empty
+        _  -> pure $ DetachedComments trivia
+
+detachedComment :: Parser (Item a)
+detachedComment = do
+    trivia <- takeTrivia
+    case break (== EmptyLine) trivia of
+        -- Return a set of comments that don't annotate the next item
+        (detached, EmptyLine : trivia') -> pushTrivia trivia' >> pure (DetachedComments detached)
+        -- The remaining trivia annotate the next item
+        _ -> pushTrivia trivia >> empty
+
 -- ABSTRACTIONS
 
 attrParameter :: Maybe (Parser Leaf) -> Parser ParamAttr
@@ -255,7 +288,7 @@ setParameter = SetParameter <$> bopen <*> attrs <*> bclose
           commaAttrs = many $ try $ attrParameter $ Just $ symbol TComma
           ellipsis   = ParamEllipsis <$> symbol TEllipsis
           lastAttr   = attrParameter Nothing <|> ellipsis
-          attrs      = commaAttrs <> (toList <$> optional (lastAttr))
+          attrs      = commaAttrs <> (toList <$> optional lastAttr)
 
 contextParameter :: Parser Parameter
 contextParameter =
@@ -277,15 +310,15 @@ assignment :: Parser Binder
 assignment = Assignment <$>
     selectorPath <*> symbol TAssign <*> expression <*> symbol TSemicolon
 
-binders :: Parser [Binder]
-binders = many (assignment <|> inherit)
+binders :: Parser (Items Binder)
+binders = items (assignment <|> inherit)
 
 set :: Parser Term
 set = Set <$> optional (reserved KRec <|> reserved KLet) <*>
     symbol TBraceOpen <*> binders <*> symbol TBraceClose
 
 list :: Parser Term
-list = List <$> symbol TBrackOpen <*> many term <*> symbol TBrackClose
+list = List <$> symbol TBrackOpen <*> items term <*> symbol TBrackClose
 
 -- OPERATORS
 
@@ -341,5 +374,5 @@ expression :: Parser Expression
 expression = label "expression" $ try operation <|> abstraction <|>
     with <|> letIn <|> ifThenElse <|> assert
 
-file :: Parser File
-file = File <$> lexeme (return SOF) <*> expression <* eof
+file :: Parsec Void Text File
+file = whole (expression <* eof)
diff --git a/src/Nixfmt/Predoc.hs b/src/Nixfmt/Predoc.hs
@@ -4,8 +4,7 @@
  - SPDX-License-Identifier: MPL-2.0
  -}
 
-{-# LANGUAGE DeriveFoldable, DeriveFunctor, FlexibleInstances,
-             OverloadedStrings, StandaloneDeriving #-}
+{-# LANGUAGE FlexibleInstances, OverloadedStrings #-}
 
 -- | This module implements a layer around the prettyprinter package, making it
 -- easier to use.
@@ -33,7 +32,7 @@ module Nixfmt.Predoc
     ) where
 
 import Data.List (intersperse)
-import Data.Text as Text (Text, concat, length, pack, replicate)
+import Data.Text as Text (Text, concat, length, pack, replicate, strip)
 
 -- | Sequential Spacings are reduced to a single Spacing by taking the maximum.
 -- This means that e.g. a Space followed by an Emptyline results in just an
@@ -193,7 +192,7 @@ moveLinesIn (Node ann xs : ys) =
 moveLinesIn (x : xs) = x : moveLinesIn xs
 
 layout :: Pretty a => Int -> a -> Text
-layout w = layoutGreedy w . fixup . pretty
+layout w = (<>"\n") . Text.strip . layoutGreedy w . fixup . pretty
 
 -- 1. Move and merge Spacings.
 -- 2. Convert Softlines to Grouped Lines and Hardspaces to Texts.