Skip to content

Commit

Permalink
Create Grammar package
Browse files Browse the repository at this point in the history
  • Loading branch information
knothed authored and int-index committed Oct 20, 2021
1 parent 77850fb commit 00bfba8
Show file tree
Hide file tree
Showing 15 changed files with 227 additions and 173 deletions.
1 change: 1 addition & 0 deletions cabal.project
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
packages:
packages/grammar
./
5 changes: 3 additions & 2 deletions happy.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ executable happy
build-depends: base < 5,
array,
containers >= 0.4.2,
mtl >= 2.2.1
mtl >= 2.2.1,
happy-grammar == 1.21.0
-- mtl-2.2.1 added Control.Monad.Except

default-language: Haskell98
Expand All @@ -171,10 +172,10 @@ executable happy
AbsSyn
First
GenUtils
Grammar
Info
LALR
Lexer
Mangler
ParseMonad
ParseMonad.Class
Parser
Expand Down
1 change: 1 addition & 0 deletions packages/grammar/LICENSE
2 changes: 2 additions & 0 deletions packages/grammar/Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
45 changes: 45 additions & 0 deletions packages/grammar/happy-grammar.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: happy-grammar
version: 1.21.0
license: BSD2
license-file: LICENSE
copyright: (c) Andy Gill, Simon Marlow
author: Andy Gill and Simon Marlow
maintainer: Simon Marlow <marlowsd@gmail.com>
bug-reports: https://github.com/simonmar/happy/issues
stability: stable
homepage: https://www.haskell.org/happy/
category: Development
cabal-version: >= 1.10
build-type: Simple
synopsis: happy's Grammar datatype

Description:
Happy is a parser generator for Haskell.
Happy-Grammar exposes the cross-package Grammar datatype,
which represents a grammar as can be parsed and processed by happy.

tested-with:
GHC == 7.0.4
GHC == 7.4.2
GHC == 7.6.3
GHC == 7.8.4
GHC == 7.10.3
GHC == 8.0.2
GHC == 8.2.2
GHC == 8.4.4
GHC == 8.6.5
GHC == 8.8.4
GHC == 8.10.4
GHC == 9.0.1

library
hs-source-dirs: src

exposed-modules: Happy.Grammar
build-depends: base < 5,
array

default-language: Haskell98
default-extensions: CPP, MagicHash, FlexibleContexts
ghc-options: -Wall
other-modules:
126 changes: 126 additions & 0 deletions packages/grammar/src/Happy/Grammar.lhs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/-----------------------------------------------------------------------------
The Grammar data type.

(c) 1993-2001 Andy Gill, Simon Marlow
-----------------------------------------------------------------------------

> module Happy.Grammar (
> Name,
>
> Production(..), Grammar(..), ErrorHandlerType(..),
> Priority(..),
> Assoc(..),
>
> errorName, errorTok, startName, dummyName, firstStartTok, dummyTok,
> eofName, epsilonTok
> ) where

> import Data.Array

> type Name = Int

> data ErrorHandlerType
> = ErrorHandlerTypeDefault
> | ErrorHandlerTypeExpList

> data Production
> = Production Name [Name] (String,[Int]) Priority
> deriving Show

> data Grammar
> = Grammar {
> productions :: [Production],
> lookupProdNo :: Int -> Production,
> lookupProdsOfName :: Name -> [Int],
> token_specs :: [(Name,String)],
> terminals :: [Name],
> non_terminals :: [Name],
> starts :: [(String,Name,Name,Bool)],
> types :: Array Int (Maybe String),
> token_names :: Array Int String,
> first_nonterm :: Name,
> first_term :: Name,
> eof_term :: Name,
> priorities :: [(Name,Priority)],
> token_type :: String,
> imported_identity :: Bool,
> monad :: (Bool,String,String,String,String),
> expect :: Maybe Int,
> attributes :: [(String,String)],
> attributetype :: String,
> lexer :: Maybe (String,String),
> error_handler :: Maybe String,
> error_sig :: ErrorHandlerType
> }

> instance Show Grammar where
> showsPrec _ (Grammar
> { productions = p
> , token_specs = t
> , terminals = ts
> , non_terminals = nts
> , starts = sts
> , types = tys
> , token_names = e
> , first_nonterm = fnt
> , first_term = ft
> , eof_term = eof
> })
> = showString "productions = " . shows p
> . showString "\ntoken_specs = " . shows t
> . showString "\nterminals = " . shows ts
> . showString "\nnonterminals = " . shows nts
> . showString "\nstarts = " . shows sts
> . showString "\ntypes = " . shows tys
> . showString "\ntoken_names = " . shows e
> . showString "\nfirst_nonterm = " . shows fnt
> . showString "\nfirst_term = " . shows ft
> . showString "\neof = " . shows eof
> . showString "\n"

> data Assoc = LeftAssoc | RightAssoc | None
> deriving Show

> data Priority = No | Prio Assoc Int | PrioLowest
> deriving Show

> instance Eq Priority where
> No == No = True
> Prio _ i == Prio _ j = i == j
> _ == _ = False

-----------------------------------------------------------------------------
-- Magic name values

All the tokens in the grammar are mapped onto integers, for speed.
The namespace is broken up as follows:

epsilon = 0
error = 1
dummy = 2
%start = 3..s
non-terminals = s..n
terminals = n..m
%eof = m

These numbers are deeply magical, change at your own risk. Several
other places rely on these being arranged as they are, including
ProduceCode.lhs and the various HappyTemplates.

Unfortunately this means you can't tell whether a given token is a
terminal or non-terminal without knowing the boundaries of the
namespace, which are kept in the Grammar structure.

In hindsight, this was probably a bad idea.

> startName, eofName, errorName, dummyName :: String
> startName = "%start" -- with a suffix, like %start_1, %start_2 etc.
> eofName = "%eof"
> errorName = "error"
> dummyName = "%dummy" -- shouldn't occur in the grammar anywhere

> firstStartTok, dummyTok, errorTok, epsilonTok :: Name
> firstStartTok = 3
> dummyTok = 2
> errorTok = 1
> epsilonTok = 0
7 changes: 3 additions & 4 deletions src/AbsSyn.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ Abstract syntax for grammar files.
Here is the abstract syntax of the language we parse.

> module AbsSyn (
> AbsSyn(..), Directive(..), ErrorHandlerType(..),
> AbsSyn(..), Directive(..),
> getTokenType, getTokenSpec, getParserNames, getLexer,
> getImportedIdentity, getMonad, getError,
> getPrios, getPrioNames, getExpect, getErrorHandlerType,
> getAttributes, getAttributetype,
> Rule(..), Prod(..), Term(..), Prec(..)
> ) where

> import Happy.Grammar (ErrorHandlerType(..))

> data AbsSyn
> = AbsSyn
> (Maybe String) -- header
Expand Down Expand Up @@ -53,9 +55,6 @@ Parser Generator Directives.
ToDo: find a consistent way to analyse all the directives together and
generate some error messages.

> data ErrorHandlerType
> = ErrorHandlerTypeDefault
> | ErrorHandlerTypeExpList
>
> data Directive a
> = TokenType String -- %tokentype
Expand Down
2 changes: 1 addition & 1 deletion src/First.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Implementation of FIRST
> import GenUtils
> import NameSet ( NameSet )
> import qualified NameSet as Set
> import Grammar
> import Happy.Grammar
> import Data.IntSet (IntSet)

\subsection{Utilities}
Expand Down
4 changes: 2 additions & 2 deletions src/Info.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ Generating info files.
> module Info (genInfoFile) where

> import Paths_happy ( version )
> import LALR ( Lr0Item(..) )
> import LALR ( Lr0Item(..), Goto(..), LRAction(..), ActionTable, GotoTable )
> import GenUtils ( str, interleave, interleave' )
> import Data.Set ( Set )
> import qualified Data.Set as Set hiding ( Set )
> import Grammar
> import Happy.Grammar

> import Data.Array
> import Data.List (nub)
Expand Down
28 changes: 22 additions & 6 deletions src/LALR.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@ Generation of LALR parsing tables.
-----------------------------------------------------------------------------

> module LALR
> (genActionTable, genGotoTable, genLR0items, precalcClosure0,
> (ActionTable, GotoTable,
> genActionTable, genGotoTable, genLR0items, precalcClosure0,
> propLookaheads, calcLookaheads, mergeLookaheadInfo, countConflicts,
> Lr0Item(..), Lr1Item, ItemSetWithGotos)
> Lr0Item(..), Lr1Item(..), ItemSetWithGotos, LRAction(..), Goto(..))
> where

> import GenUtils
> import Data.Set ( Set )
> import qualified Data.Set as Set hiding ( Set )
> import qualified NameSet
> import NameSet ( NameSet )
> import Grammar
> import Happy.Grammar

> import Control.Monad (guard)
> import Control.Monad.ST
Expand All @@ -32,16 +33,33 @@ Generation of LALR parsing tables.
> unionNameMap :: (Name -> NameSet) -> NameSet -> NameSet
> unionNameMap f = NameSet.foldr (NameSet.union . f) NameSet.empty

-----------------------------------------------------------------------------

This means rule $a$, with dot at $b$ (all starting at 0)

> data Lr0Item = Lr0 {-#UNPACK#-}!Int {-#UNPACK#-}!Int -- (rule, dot)
> data Lr0Item = Lr0 {-#UNPACK#-}!Int {-#UNPACK#-}!Int -- (rule, dot)
> deriving (Eq,Ord,Show)

> data Lr1Item = Lr1 {-#UNPACK#-}!Int {-#UNPACK#-}!Int NameSet -- (rule, dot, lookahead)
> deriving (Show)

> type RuleList = [Lr0Item]

> type ItemSetWithGotos = (Set Lr0Item, [(Name,Int)])

> data LRAction = LR'Shift Int Priority -- state number and priority
> | LR'Reduce Int Priority-- rule no and priority
> | LR'Accept -- :-)
> | LR'Fail -- :-(
> | LR'MustFail -- :-(
> | LR'Multiple [LRAction] LRAction -- conflict
> deriving (Eq,Show)

> type ActionTable = Array Int{-state-} (Array Int{-terminal#-} LRAction)
> type GotoTable = Array Int{-state-} (Array Int{-nonterminal #-} Goto)
> data Goto = Goto Int | NoGoto
> deriving (Eq, Show)

-----------------------------------------------------------------------------
Generating the closure of a set of LR(0) items

Expand Down Expand Up @@ -179,8 +197,6 @@ keep iterating until the second set is empty.
The addItems function is complicated by the fact that we need to keep
information about which sets were generated by which others.

> type ItemSetWithGotos = (Set Lr0Item, [(Name,Int)])

> genLR0items :: Grammar -> (Name -> RuleList) -> [ItemSetWithGotos]
> genLR0items g precalcClosures
> = fst (mkClosure (\(_,new) _ -> null new)
Expand Down
3 changes: 2 additions & 1 deletion src/Main.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ Path settings auto-generated by Cabal:

> import ParseMonad.Class
> import AbsSyn
> import Grammar
> import Happy.Grammar
> import PrettyGrammar
> import Parser

> import Tabular
> import Mangler
> import ProduceCode (produceParser)
> import ProduceGLRCode
> import Info (genInfoFile)
Expand Down
Loading

0 comments on commit 00bfba8

Please sign in to comment.