Create Grammar package

haskell · Oct 20, 2021 · 00bfba8 · 00bfba8
1 parent 77850fb
commit 00bfba8
Show file tree

Hide file tree

Showing 15 changed files with 227 additions and 173 deletions.
diff --git a/cabal.project b/cabal.project
@@ -1,2 +1,3 @@
 packages:
+  packages/grammar
   ./
diff --git a/happy.cabal b/happy.cabal
@@ -160,7 +160,8 @@ executable happy
   build-depends: base < 5,
                  array,
                  containers >= 0.4.2,
-                 mtl >= 2.2.1
+                 mtl >= 2.2.1,
+                 happy-grammar == 1.21.0
                      -- mtl-2.2.1 added Control.Monad.Except
 
   default-language: Haskell98
@@ -171,10 +172,10 @@ executable happy
         AbsSyn
         First
         GenUtils
-        Grammar
         Info
         LALR
         Lexer
+        Mangler
         ParseMonad
         ParseMonad.Class
         Parser

diff --git a/packages/grammar/LICENSE b/packages/grammar/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
diff --git a/packages/grammar/Setup.hs b/packages/grammar/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/packages/grammar/happy-grammar.cabal b/packages/grammar/happy-grammar.cabal
@@ -0,0 +1,45 @@
+name:            happy-grammar
+version:         1.21.0
+license:         BSD2
+license-file:    LICENSE
+copyright:       (c) Andy Gill, Simon Marlow
+author:          Andy Gill and Simon Marlow
+maintainer:      Simon Marlow <marlowsd@gmail.com>
+bug-reports:     https://github.com/simonmar/happy/issues
+stability:       stable
+homepage:        https://www.haskell.org/happy/
+category:        Development
+cabal-version:   >= 1.10
+build-type:      Simple
+synopsis:        happy's Grammar datatype
+
+Description:
+  Happy is a parser generator for Haskell.
+  Happy-Grammar exposes the cross-package Grammar datatype,
+  which represents a grammar as can be parsed and processed by happy.
+
+tested-with:
+        GHC == 7.0.4
+        GHC == 7.4.2
+        GHC == 7.6.3
+        GHC == 7.8.4
+        GHC == 7.10.3
+        GHC == 8.0.2
+        GHC == 8.2.2
+        GHC == 8.4.4
+        GHC == 8.6.5
+        GHC == 8.8.4
+        GHC == 8.10.4
+        GHC == 9.0.1
+
+library
+  hs-source-dirs:      src
+
+  exposed-modules:     Happy.Grammar
+  build-depends:       base < 5,
+                       array
+
+  default-language:    Haskell98
+  default-extensions:  CPP, MagicHash, FlexibleContexts
+  ghc-options: -Wall
+  other-modules:
diff --git a/packages/grammar/src/Happy/Grammar.lhs b/packages/grammar/src/Happy/Grammar.lhs
@@ -0,0 +1,126 @@
+/-----------------------------------------------------------------------------
+The Grammar data type.
+
+(c) 1993-2001 Andy Gill, Simon Marlow
+-----------------------------------------------------------------------------
+
+> module Happy.Grammar (
+>       Name,
+>
+>       Production(..), Grammar(..), ErrorHandlerType(..),
+>       Priority(..),
+>       Assoc(..),
+>
+>       errorName, errorTok, startName, dummyName, firstStartTok, dummyTok,
+>       eofName, epsilonTok
+>       ) where
+
+> import Data.Array
+
+> type Name = Int
+
+> data ErrorHandlerType
+>   = ErrorHandlerTypeDefault
+>   | ErrorHandlerTypeExpList
+
+> data Production
+>       = Production Name [Name] (String,[Int]) Priority
+>       deriving Show
+
+> data Grammar
+>       = Grammar {
+>               productions       :: [Production],
+>               lookupProdNo      :: Int -> Production,
+>               lookupProdsOfName :: Name -> [Int],
+>               token_specs       :: [(Name,String)],
+>               terminals         :: [Name],
+>               non_terminals     :: [Name],
+>               starts            :: [(String,Name,Name,Bool)],
+>               types             :: Array Int (Maybe String),
+>               token_names       :: Array Int String,
+>               first_nonterm     :: Name,
+>               first_term        :: Name,
+>               eof_term          :: Name,
+>               priorities        :: [(Name,Priority)],
+>               token_type        :: String,
+>               imported_identity :: Bool,
+>               monad             :: (Bool,String,String,String,String),
+>               expect            :: Maybe Int,
+>               attributes        :: [(String,String)],
+>               attributetype     :: String,
+>               lexer             :: Maybe (String,String),
+>               error_handler     :: Maybe String,
+>               error_sig         :: ErrorHandlerType
+>       }
+
+> instance Show Grammar where
+>       showsPrec _ (Grammar
+>               { productions           = p
+>               , token_specs           = t
+>               , terminals             = ts
+>               , non_terminals         = nts
+>               , starts                = sts
+>               , types                 = tys
+>               , token_names           = e
+>               , first_nonterm         = fnt
+>               , first_term            = ft
+>               , eof_term              = eof
+>               })
+>        = showString "productions = "     . shows p
+>        . showString "\ntoken_specs = "   . shows t
+>        . showString "\nterminals = "     . shows ts
+>        . showString "\nnonterminals = "  . shows nts
+>        . showString "\nstarts = "        . shows sts
+>        . showString "\ntypes = "         . shows tys
+>        . showString "\ntoken_names = "   . shows e
+>        . showString "\nfirst_nonterm = " . shows fnt
+>        . showString "\nfirst_term = "    . shows ft
+>        . showString "\neof = "           . shows eof
+>        . showString "\n"
+
+> data Assoc = LeftAssoc | RightAssoc | None
+>       deriving Show
+
+> data Priority = No | Prio Assoc Int | PrioLowest
+>       deriving Show
+
+> instance Eq Priority where
+>   No == No = True
+>   Prio _ i == Prio _ j = i == j
+>   _ == _ = False
+
+-----------------------------------------------------------------------------
+-- Magic name values
+
+All the tokens in the grammar are mapped onto integers, for speed.
+The namespace is broken up as follows:
+
+epsilon         = 0
+error           = 1
+dummy           = 2
+%start          = 3..s
+non-terminals   = s..n
+terminals       = n..m
+%eof            = m
+
+These numbers are deeply magical, change at your own risk.  Several
+other places rely on these being arranged as they are, including
+ProduceCode.lhs and the various HappyTemplates.
+
+Unfortunately this means you can't tell whether a given token is a
+terminal or non-terminal without knowing the boundaries of the
+namespace, which are kept in the Grammar structure.
+
+In hindsight, this was probably a bad idea.
+
+> startName, eofName, errorName, dummyName :: String
+> startName = "%start" -- with a suffix, like %start_1, %start_2 etc.
+> eofName   = "%eof"
+> errorName = "error"
+> dummyName = "%dummy"  -- shouldn't occur in the grammar anywhere
+
+> firstStartTok, dummyTok, errorTok, epsilonTok :: Name
+> firstStartTok   = 3
+> dummyTok        = 2
+> errorTok        = 1
+> epsilonTok      = 0
diff --git a/src/AbsSyn.lhs b/src/AbsSyn.lhs
@@ -7,14 +7,16 @@ Abstract syntax for grammar files.
 Here is the abstract syntax of the language we parse.
 
 > module AbsSyn (
->       AbsSyn(..), Directive(..), ErrorHandlerType(..),
+>       AbsSyn(..), Directive(..),
 >       getTokenType, getTokenSpec, getParserNames, getLexer,
 >       getImportedIdentity, getMonad, getError,
 >       getPrios, getPrioNames, getExpect, getErrorHandlerType,
 >       getAttributes, getAttributetype,
 >       Rule(..), Prod(..), Term(..), Prec(..)
 >  ) where
 
+> import Happy.Grammar (ErrorHandlerType(..))
+
 > data AbsSyn
 >     = AbsSyn
 >         (Maybe String)       -- header
@@ -53,9 +55,6 @@ Parser Generator Directives.
 ToDo: find a consistent way to analyse all the directives together and
 generate some error messages.
 
-> data ErrorHandlerType
->   = ErrorHandlerTypeDefault
->   | ErrorHandlerTypeExpList
 >
 > data Directive a
 >       = TokenType     String                  -- %tokentype

diff --git a/src/First.lhs b/src/First.lhs
@@ -9,7 +9,7 @@ Implementation of FIRST
 > import GenUtils
 > import NameSet ( NameSet )
 > import qualified NameSet as Set
-> import Grammar
+> import Happy.Grammar
 > import Data.IntSet (IntSet)
 
 \subsection{Utilities}

diff --git a/src/Info.lhs b/src/Info.lhs
@@ -7,11 +7,11 @@ Generating info files.
 > module Info (genInfoFile) where
 
 > import Paths_happy            ( version )
-> import LALR                   ( Lr0Item(..) )
+> import LALR                   ( Lr0Item(..), Goto(..), LRAction(..), ActionTable, GotoTable )
 > import GenUtils               ( str, interleave, interleave' )
 > import Data.Set ( Set )
 > import qualified Data.Set as Set hiding ( Set )
-> import Grammar
+> import Happy.Grammar
 
 > import Data.Array
 > import Data.List (nub)

diff --git a/src/LALR.lhs b/src/LALR.lhs
@@ -6,17 +6,18 @@ Generation of LALR parsing tables.
 -----------------------------------------------------------------------------
 
 > module LALR
->       (genActionTable, genGotoTable, genLR0items, precalcClosure0,
+>       (ActionTable, GotoTable,
+>        genActionTable, genGotoTable, genLR0items, precalcClosure0,
 >        propLookaheads, calcLookaheads, mergeLookaheadInfo, countConflicts,
->        Lr0Item(..), Lr1Item, ItemSetWithGotos)
+>        Lr0Item(..), Lr1Item(..), ItemSetWithGotos, LRAction(..), Goto(..))
 >       where
 
 > import GenUtils
 > import Data.Set ( Set )
 > import qualified Data.Set as Set hiding ( Set )
 > import qualified NameSet
 > import NameSet ( NameSet )
-> import Grammar
+> import Happy.Grammar
 
 > import Control.Monad (guard)
 > import Control.Monad.ST
@@ -32,16 +33,33 @@ Generation of LALR parsing tables.
 > unionNameMap :: (Name -> NameSet) -> NameSet -> NameSet
 > unionNameMap f = NameSet.foldr (NameSet.union . f) NameSet.empty
 
+-----------------------------------------------------------------------------
+
 This means rule $a$, with dot at $b$ (all starting at 0)
 
-> data Lr0Item = Lr0 {-#UNPACK#-}!Int {-#UNPACK#-}!Int                  -- (rule, dot)
+> data Lr0Item = Lr0 {-#UNPACK#-}!Int {-#UNPACK#-}!Int          -- (rule, dot)
 >       deriving (Eq,Ord,Show)
 
 > data Lr1Item = Lr1 {-#UNPACK#-}!Int {-#UNPACK#-}!Int NameSet  -- (rule, dot, lookahead)
 >       deriving (Show)
 
 > type RuleList = [Lr0Item]
 
+> type ItemSetWithGotos = (Set Lr0Item, [(Name,Int)])
+
+> data LRAction = LR'Shift Int Priority -- state number and priority
+>               | LR'Reduce Int Priority-- rule no and priority
+>               | LR'Accept             -- :-)
+>               | LR'Fail               -- :-(
+>               | LR'MustFail           -- :-(
+>               | LR'Multiple [LRAction] LRAction       -- conflict
+>       deriving (Eq,Show)
+
+> type ActionTable = Array Int{-state-} (Array Int{-terminal#-} LRAction)
+> type GotoTable = Array Int{-state-} (Array Int{-nonterminal #-} Goto)
+> data Goto = Goto Int | NoGoto
+>       deriving (Eq, Show)
+
 -----------------------------------------------------------------------------
 Generating the closure of a set of LR(0) items
 
@@ -179,8 +197,6 @@ keep iterating until the second set is empty.
 The addItems function is complicated by the fact that we need to keep
 information about which sets were generated by which others.
 
-> type ItemSetWithGotos = (Set Lr0Item, [(Name,Int)])
-
 > genLR0items :: Grammar -> (Name -> RuleList) -> [ItemSetWithGotos]
 > genLR0items g precalcClosures
 >       = fst (mkClosure (\(_,new) _ -> null new)

diff --git a/src/Main.lhs b/src/Main.lhs
@@ -13,11 +13,12 @@ Path settings auto-generated by Cabal:
 
 > import ParseMonad.Class
 > import AbsSyn
-> import Grammar
+> import Happy.Grammar
 > import PrettyGrammar
 > import Parser
 
 > import Tabular
+> import Mangler
 > import ProduceCode (produceParser)
 > import ProduceGLRCode
 > import Info (genInfoFile)