diff --git a/CHANGES.md b/CHANGES.md index 77f2f5a..d7846a1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +## v0.7.0 - 2024=07-03 +- support for EBNF output ([#14](https://github.com/Lelio-Brun/Obelisk/issues/14)) +- rewriting of the printers +- switch to Github Actions for CI + ## v0.6.0 - 2021-02-09 This version adds support for the Menhir standard rules `endrule`, `midrule`, `rev`, `flatten` and `append`. diff --git a/Makefile b/Makefile index 95dcdfe..162fffc 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ PREFIX=my MAIN=main EXE=dune exec $(SRC)/main.exe -- -.PHONY: all latex htmlcss html default reco readme tests clean +.PHONY: all latex htmlcss html default ebnf reco readme tests clean all: @dune build @@ -16,7 +16,7 @@ all: @$(EXE) latex -prefix $(PREFIX) -$* $(PARSER) -o $@ %.pdf: %.tex - @pdflatex -interaction batchmode $< + pdflatex -interaction batchmode $< %.png: %.pdf @convert -quiet -density 150 $< -format png $(MISC)/$@ @@ -38,13 +38,21 @@ default: @printf "\nDefault output on $(PARSER):\n" @$(EXE) $(PARSER) +ebnf: + @printf "\nEBNF output on $(PARSER):\n" + @$(EXE) ebnf $(PARSER) + reco: @printf "Default output on $(RECO):\n" @$(EXE) $(RECO) @printf "Default output on $(RECO) with '-i' switch:\n" @$(EXE) -i $(RECO) + @printf "EBNF output on $(RECO):\n" + @$(EXE) ebnf $(RECO) + @printf "EBNF output on $(RECO) with '-i' switch:\n" + @$(EXE) ebnf -i $(RECO) -readme: latex htmlcss html default reco +readme: latex htmlcss html default ebnf reco tests: @dune test diff --git a/README.md b/README.md index 5125c9f..ba2833f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,10 @@ +<<<<<<< HEAD # Obelisk ![Build Status](https://github.com/Lelio-Brun/Obelisk/actions/workflows/main.yml/badge.svg?branch=master) [![Mentioned in Awesome OCaml](https://awesome.re/mentioned-badge.svg)](https://github.com/rizo/awesome-ocaml) **Obelisk** is a simple tool that produces pretty-printed output from a [Menhir] parser file (_.mly_). +======= +# Obelisk [![Mentioned in Awesome OCaml](https://awesome.re/mentioned-badge.svg)](https://github.com/rizo/awesome-ocaml) ![workflow badge](https://github.com/Lelio-Brun/Obelisk/actions/workflows/workflow.yml/badge.svg?branch=dev) +**Obelisk** is a simple tool which produces pretty-printed output from a [Menhir] parser file (_.mly_). +>>>>>>> origin/dev It is inspired from [yacc2latex] and is also written in [OCaml], but is aimed at supporting features from Menhir instead of only those of [ocamlyacc]. @@ -11,10 +16,12 @@ It is inspired from [yacc2latex] and is also written in [OCaml], but is aimed at * [Usage](#usage) + [Pattern recognition](#pattern-recognition) + [Multi-format output](#multi-format-output) + - [EBNF](#ebnf) - [LaTeX](#latex) - [HTML](#html) + [Example](#example) - [Default](#default) + - [EBNF](#ebnf-1) - [LaTeX](#latex-1) * [Tabular](#tabular) * [Syntax](#syntax) @@ -61,7 +68,7 @@ dune install [--prefix ] ## Usage ``` -obelisk [latex|html] [options] +obelisk [ebnf|latex|html] [options] ``` If multiple files are specified, **Obelisk** will output a concatenated result without consistency checks. @@ -100,14 +107,14 @@ my_separated_list(X,S): | {} | my_separated_nonempty_list(X,S) {} -my_rule(E,F,S1,S2): +my_rule: | my_option(E, F) {} | my_list(E) {} | my_nonempty_list(F) {} | my_separated_nonempty_list(E,S1) {} | my_separated_list(F,S2) {} ``` -**Obelisk** outputs: +**Obelisk** (`obelisk misc/reco.mly`) outputs: ``` ::= [Y X] @@ -119,26 +126,66 @@ my_rule(E,F,S1,S2): ::= [X (S X)*] - ::= - | - | - | - | + ::= + | + | + | + | ``` -And with the `-i` switch: +And with the `-i` switch (`obelisk -i misc/reco.mly`): ``` - ::= [F E] - | E* - | F+ - | E (S1 E)* - | [F (S2 F)*] + ::= [F E] + | E* + | F+ + | E (S1 E)* + | [F (S2 F)*] ``` ### Multi-format output +<<<<<<< HEAD By default, the output format is a simple text format that is close to the BNF syntax. You can use the subcommands `latex` or `html` to get a LaTeX (resp. HTML) file. In default and HTML mode, the option `-noaliases` avoids printing token aliases in the output. +======= +By default the output format is a simple text format close to the BNF syntax. +You can use the subcommands `ebnf`, `latex` or `html` to get respectively an EBNF text output, LaTeX output or HTML output. + +In default, EBNF and HTML mode, the option `-noaliases` avoid printing token aliases in the output. + +#### EBNF +In EBNF mode, parameterized rules are specialized into dedicated regular rules. +On the example above (`obelisk ebnf misc/reco.mly`): + +``` +my_rule ::= my_option_0 + | my_list_0 + | my_nonempty_list_0 + | my_separated_nonempty_list_0 + | my_separated_list_0 + +my_option_0 ::= (F E)? + +my_nonempty_list_0 ::= F+ + +my_separated_nonempty_list_1 ::= F (S2 F)* + +my_separated_list_0 ::= (F (S2 F)*)? + +my_separated_nonempty_list_0 ::= E (S1 E)* + +my_list_0 ::= E* +``` +And with the `-i` switch (`obelisk ebnf -i misc/reco.mly`): + +``` +my_rule ::= (F E)? + | E* + | F+ + | E (S1 E)* + | (F (S2 F)*)? +``` +>>>>>>> origin/dev #### LaTeX Use the following options to tweak the LaTeX: @@ -252,6 +299,81 @@ Here are the outputs of the different formats obtained by **Obelisk** from its o | QID ``` +#### EBNF + +``` +specification ::= rule* EOF + +rule ::= old_rule + | new_rule + +old_rule ::= flags? ident ATTRIBUTE* parameters_0 COLON optional_bar group + (BAR group)* SEMICOLON* + +flags ::= PUBLIC + | INLINE + | PUBLIC INLINE + | INLINE PUBLIC + +optional_bar ::= BAR? + +group ::= production (BAR production)* ACTION precedence? + +production ::= producer* precedence? + +producer ::= (LID EQ)? actual ATTRIBUTE* SEMICOLON* + +actual ::= generic_actual_0 + +lax_actual ::= generic_actual_0 + | group (BAR group)* + +new_rule ::= PUBLIC? LET LID ATTRIBUTE* parameters_0 binder expression + +binder ::= COLONEQ + | EQEQ + +expression ::= optional_bar seq_expression (BAR seq_expression)* + +seq_expression ::= (pattern EQ)? symbol_expression SEMICOLON seq_expression + | symbol_expression + | action_expression + +symbol_expression ::= ident parameters_2 + | symbol_expression modifier + +action_expression ::= action + | action precedence + | precedence action + +action ::= ACTION + | POINTFREEACTION + +pattern ::= LID + | UNDERSCORE + | TILDE + | LPAR (pattern (COMMA pattern)*)? RPAR + +modifier ::= OPT + | PLUS + | STAR + +precedence ::= PREC ident + +ident ::= UID + | LID + | QID + +generic_actual_0 ::= ident parameters_1 + | actual modifier + +parameters_1 ::= (LPAR (lax_actual (COMMA lax_actual)*)? RPAR)? + +parameters_0 ::= (LPAR (ident (COMMA ident)*)? RPAR)? + +parameters_2 ::= (LPAR (expression (COMMA expression)*)? RPAR)? +``` + #### LaTeX ##### Tabular ![Tabular](misc/tabular.png) diff --git a/dune-project b/dune-project index 8d5bb34..98ea9e0 100644 --- a/dune-project +++ b/dune-project @@ -1,7 +1,7 @@ (lang dune 2.0) (name obelisk) -(version 0.6.0) +(version 0.7.0) (using menhir 2.0) (generate_opam_files true) @@ -11,7 +11,7 @@ (documentation https://github.com/Lelio-Brun/Obelisk/blob/master/README.md) (license MIT) (authors "Lélio Brun") -(maintainers "Lélio Brun ") +(maintainers "Lélio Brun ") (package (name obelisk) diff --git a/misc/backnaur.png b/misc/backnaur.png index 0bcba3d..b7989e2 100644 Binary files a/misc/backnaur.png and b/misc/backnaur.png differ diff --git a/misc/html.png b/misc/html.png index 5c75be0..e6d02e4 100644 Binary files a/misc/html.png and b/misc/html.png differ diff --git a/misc/htmlcss.png b/misc/htmlcss.png index 5c75be0..8319a41 100644 Binary files a/misc/htmlcss.png and b/misc/htmlcss.png differ diff --git a/misc/reco.mly b/misc/reco.mly index 9708ba9..a9c14c2 100644 --- a/misc/reco.mly +++ b/misc/reco.mly @@ -19,7 +19,7 @@ my_separated_list(X,S): | {} | my_separated_nonempty_list(X,S) {} -my_rule(E,F,S1,S2): +my_rule: | my_option(E, F) {} | my_list(E) {} | my_nonempty_list(F) {} diff --git a/misc/syntax.png b/misc/syntax.png index 3f40e74..87bf836 100644 Binary files a/misc/syntax.png and b/misc/syntax.png differ diff --git a/misc/tabular.png b/misc/tabular.png index 2201240..f0a7c2a 100644 Binary files a/misc/tabular.png and b/misc/tabular.png differ diff --git a/obelisk.opam b/obelisk.opam index f17454c..3b025b0 100644 --- a/obelisk.opam +++ b/obelisk.opam @@ -1,11 +1,11 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -version: "0.6.0" +version: "0.7.0" synopsis: "Pretty-printing for Menhir files" description: """ Obelisk is a simple tool which produces pretty-printed output from a Menhir parser file (.mly). It is inspired from yacc2latex and is also written in OCaml, but is aimed at supporting features from Menhir instead of only those of ocamlyacc.""" -maintainer: ["Lélio Brun "] +maintainer: ["Lélio Brun "] authors: ["Lélio Brun"] license: "MIT" homepage: "https://github.com/Lelio-Brun/Obelisk" diff --git a/src/common.ml b/src/common.ml index 9f24835..7e72a04 100644 --- a/src/common.ml +++ b/src/common.ml @@ -1,5 +1,10 @@ (** Some common facilities. *) +(** Find a rule by its left-hand side (name) in the grammar. *) +let find_rule r rules = + let open ExtendedAst in + List.find_opt (fun { name; _ } -> name = r) rules + (** The signature for the set of symbols appearing in a grammar. *) module type SYMBOLS = sig (** The set of symbols. *) @@ -41,6 +46,9 @@ module type SYMBOLS = sig See {!defined}. *) val is_defined: string -> t -> string list option + (** Test if the given symbol actually appears as a symbol. *) + val is_symbol: string -> t -> bool + end (** The actual implementation for the set of symbols, see {!SYMBOLS}. *) @@ -116,4 +124,8 @@ module Symbols : SYMBOLS = struct end with Not_found -> None + (** See {!SYMBOLS.is_symbol}. *) + let is_symbol x m = + M.mem x m + end diff --git a/src/dune b/src/dune index 6568b23..c21b15e 100644 --- a/src/dune +++ b/src/dune @@ -2,7 +2,7 @@ (name main) (public_name obelisk) (package obelisk) - (modules_without_implementation ast extendedAst helper printer) + (modules_without_implementation ast helper printer) (libraries re)) (include_subdirs unqualified) diff --git a/src/extendedAst.mli b/src/extendedAst.ml similarity index 62% rename from src/extendedAst.mli rename to src/extendedAst.ml index 76806a5..c6076d7 100644 --- a/src/extendedAst.mli +++ b/src/extendedAst.ml @@ -42,3 +42,46 @@ and pattern = | NEList of actual (** [nonempty_list(x)] *) | SepList of actual * actual (** [separated_list(x)] *) | SepNEList of actual * actual (** [separated_nonempty_list(sep, x)] *) + +let fold_map_pattern f acc = function + | Option x -> + let acc, x = f acc x in + acc, Option x + | Pair (x, y) -> + let acc, x = f acc x in + let acc, y = f acc y in + acc, Pair (x, y) + | SepPair (x, sep, y) -> + let acc, x = f acc x in + let acc, sep = f acc sep in + let acc, y = f acc y in + acc, SepPair (x, sep, y) + | Preceded (o, x) -> + let acc, o = f acc o in + let acc, x = f acc x in + acc, Preceded (o, x) + | Terminated (x, c) -> + let acc, x = f acc x in + let acc, c = f acc c in + acc, Terminated (x, c) + | Delimited (o, x, c) -> + let acc, o = f acc o in + let acc, x = f acc x in + let acc, c = f acc c in + acc, Delimited (o, x, c) + | List x -> + let acc, x = f acc x in + acc, List x + | NEList x -> + let acc, x = f acc x in + acc, NEList x + | SepList (sep, x) -> + let acc, sep = f acc sep in + let acc, x = f acc x in + acc, SepList (sep, x) + | SepNEList (sep, x) -> + let acc, sep = f acc sep in + let acc, x = f acc x in + acc, SepNEList (sep, x) + +let map_pattern f p = fold_map_pattern (fun o p -> o, f p) None p |> snd diff --git a/src/genericPrinter.ml b/src/genericPrinter.ml index a017241..7adaad5 100644 --- a/src/genericPrinter.ml +++ b/src/genericPrinter.ml @@ -1,6 +1,7 @@ (** Provide a generic functor to build a specific printer. *) open ExtendedAst +open Format (** Alias for the {!Helper} signature. *) module type HELPER = module type of Helper @@ -15,28 +16,23 @@ module type PRINTER = module type of Printer module Make (H : HELPER) : PRINTER = struct (** Print a {!val:Helper.space} space. *) - let print_space () = H.print_string (H.space ()) + let print_space fmt = + fprintf fmt "%t" H.space - (** [print_sep_encl print sep op cl xs] prints the elements of [xs] with + (** [print_sep_encl fmt print sep op cl xs] prints the elements of [xs] with the printer [print], separated by [sep] end enclosed by [op] and [cl] *) - let print_sep_encl print sep op cl = - let rec aux = function - | [] -> () - | [x] -> print x - | x :: xs -> - print x; H.print_string sep; aux xs - in - function + let print_sep_encl print sep op cl fmt = function | [] -> () | xs -> - H.print_string op; - aux xs; - H.print_string cl + fprintf fmt "%t%a%t" + op + (pp_print_list ~pp_sep:(fun fmt () -> sep fmt) print) xs + cl - (** [print_sep print sep xs] prints the elements of [xs] with + (** [print_sep print sep fmt xs] prints the elements of [xs] with the printer [print], separated by [sep]. *) - let print_sep print sep = - print_sep_encl print sep "" "" + let print_sep print sep fmt = + print_sep_encl print sep (fun _ -> ()) (fun _ -> ()) fmt (** [is_not_atom a] decides if the actual [a] has to be parenthesized. *) let rec is_not_atom = function @@ -51,27 +47,28 @@ module Make (H : HELPER) : PRINTER = struct (** Print a production by first calling {!val:Helper.production_begin} then printing the actuals and finally calling {!val:Helper.production_end}. *) - let rec print_production symbols actuals = - H.production_begin (); - print_actuals symbols actuals; - H.production_end () + let rec print_production symbols fmt actuals = + fprintf fmt "%t%a%t" + H.production_begin + (print_actuals symbols) actuals + H.production_end (** Print a list of actuals. If the list is [nil], then the empty word {!val:Helper.eps} is printed. *) - and print_actuals symbols = function - | [] -> H.print_string (H.eps ()) - | xs -> print_sep (print_actual symbols) (H.space ()) xs + and print_actuals symbols fmt = function + | [] -> fprintf fmt "%t" H.eps + | xs -> print_sep (print_actual symbols) H.space fmt xs (** Print an actual. *) - and print_actual symbols = function + and print_actual symbols fmt = function | Symbol (x, ps) -> - print_symbol symbols x ps + print_symbol symbols fmt x ps | Pattern p -> - print_pattern symbols p + print_pattern symbols fmt p | Modifier (a, m) -> - print_modifier m (is_not_atom a) (fun () -> print_actual symbols a) + print_modifier m (is_not_atom a) (fun fmt -> print_actual symbols fmt a) fmt | Anonymous ps -> - print_sep (print_actuals symbols) (H.bar ()) ps + print_sep (print_actuals symbols) H.bar fmt ps (** Print a "modified" actual. Modular: see {!val:Helper.opt}, {!val:Helper.plus} @@ -93,39 +90,43 @@ module Make (H : HELPER) : PRINTER = struct ([x+]) - [separated_list(sep, x)] and [separated_nonempty_list(sep, x)] are modularly printed, see {!val:Helper.print_sep_list} *) - and print_pattern symbols = - let print' = print_actual symbols in - let print'' x () = print' x in + and print_pattern symbols fmt = + let print = print_actual symbols in + let print' x fmt = print fmt x in function | Option x -> - H.opt (is_not_atom x) (print'' x) + H.opt (is_not_atom x) (print' x) fmt | Pair (x, y) -> - print' x; print_space (); print' y + fprintf fmt "%a%t%a" print x print_space print y | SepPair (x, sep, y) -> - print' x; print_space (); print' sep; print_space (); print' y + fprintf fmt "%a%t%a%t%a" print x print_space print sep print_space print y | Preceded (o, x) -> - print' o; print_space (); print' x + fprintf fmt "%a%t%a" print o print_space print x | Terminated (x, c) -> - print' x; print_space (); print' c + fprintf fmt "%a%t%a" print x print_space print c | Delimited (o, x, c) -> - print' o; print_space (); print' x; print_space (); print' c + fprintf fmt "%a%t%a%t%a" print o print_space print x print_space print c | List x -> - H.star (is_not_atom x) (print'' x) + H.star (is_not_atom x) (print' x) fmt | NEList x -> - H.plus (is_not_atom x) (print'' x) + H.plus (is_not_atom x) (print' x) fmt | SepList (sep, x) -> - H.print_sep_list (is_not_atom x) false (print'' sep) (print'' x) + H.print_sep_list (is_not_atom x) false (print' sep) (print' x) fmt | SepNEList (sep, x) -> - H.print_sep_list (is_not_atom x) true (print'' sep) (print'' x) + H.print_sep_list (is_not_atom x) true (print' sep) (print' x) fmt - (** [print_symbols symbols s xs] modularly prints the symbol [s] and its + (** [print_symbols symbols fmt s xs] modularly prints the symbol [s] and its parameters [xs]. See {!val:Helper.print_symbol}. *) - and print_symbol symbols s xs = - H.print_symbol symbols s - (fun () -> + and print_symbol symbols fmt s xs = + H.print_symbol symbols + (fun fmt -> print_sep_encl (print_actual symbols) - ("," ^ (H.space ())) "(" ")" xs) + (fun fmt -> fprintf fmt ",%t" H.space) + (fun fmt -> H.print_string fmt "(") + (fun fmt -> H.print_string fmt ")") + fmt xs) + fmt s (** Print a rule: + calls {!val:Helper.rule_begin} @@ -135,26 +136,33 @@ module Make (H : HELPER) : PRINTER = struct + prints the productions, separated by a line break {!val:Helper.break} and a bar {!val:Helper.prod_bar} + calls {!val:Helper.rule_end} *) - let print_rule symbols {name; params; prods} = - H.rule_begin (); - let print_params = - if params <> [] - then Some (fun () -> print_sep_encl H.print_param ", " "(" ")" params) - else None + let print_rule symbols fmt { name; params; prods } = + let print_params fmt = + print_sep_encl H.print_param + (fun fmt -> H.print_string fmt ", ") + (fun fmt -> H.print_string fmt "(") + (fun fmt -> H.print_string fmt ")") + fmt params + in + let print_prods fmt = + print_sep (print_production symbols) + (fun fmt -> fprintf fmt "%t%t" H.break H.prod_bar) + fmt in - H.print_rule_name name print_params; - H.print_string (H.def ()); - print_sep (print_production symbols) (H.break () ^ H.prod_bar ()) prods; - H.rule_end () + fprintf fmt "%t%a%t%a%t" + H.rule_begin + (H.print_rule_name print_params) name + H.def + print_prods prods + H.rule_end (** Print the grammar by first calling {!val:Helper.print_header}, then printing the rules and finally calling {!val:Helper.print_footer}. *) - let print_spec o symbols s = - H.p := o; - H.print_header symbols; - H.print_string "@["; - List.iter (print_rule symbols) s; - H.print_string "@]"; - H.print_footer () + let print_spec symbols fmt s = + fprintf fmt "%t@[%a@]%s%t" + (H.print_header symbols) + (pp_print_list (print_rule symbols)) s + (if s = [] then "" else "\n") + H.print_footer end diff --git a/src/helper.mli b/src/helper.mli index 292d5c1..d30e4a4 100644 --- a/src/helper.mli +++ b/src/helper.mli @@ -1,77 +1,75 @@ (** The generic signature for the printer helpers. *) open Common - -(** A reference to the used formatter. *) -val p: Format.formatter ref +open Format (** Printed at the beginning of the output. *) -val print_header: Symbols.t -> unit +val print_header: Symbols.t -> formatter -> unit (** Printed at the end of the output. *) -val print_footer: unit -> unit +val print_footer: formatter -> unit (** Print an escaped string. *) -val print_string: string -> unit +val print_string: formatter -> string -> unit -(** [print_rule_name x print_params] prints the left-hand side of a rule where +(** [print_rule_name print_params fmt x] prints the left-hand side of a rule where [x] is the name of the defined non terminal and [print_params] the function to print the optional parameters. *) -val print_rule_name: string -> (unit -> unit) option -> unit +val print_rule_name: (formatter -> unit) -> formatter -> string -> unit (** Print a rule parmater. *) -val print_param: string -> unit +val print_param: formatter -> string -> unit (** Printed at the beginning of each rule. *) -val rule_begin: unit -> unit +val rule_begin: formatter -> unit (** Printed at the end of each rule. *) -val rule_end: unit -> unit +val rule_end: formatter -> unit (** Printed at the beginning of each production. *) -val production_begin: unit -> unit +val production_begin: formatter -> unit (** Printed at the end of each production. *) -val production_end: unit -> unit +val production_end: formatter -> unit -(** [print_symbol symbols s print_params] prints the symbol [s] and +(** [print_symbol symbols print_params fmt s] prints the symbol [s] and its parameters thanks to [print_params]. A different formatting is possible accordingly to [s] being a terminal, a non terminal or a functional non terminal. *) -val print_symbol: Symbols.t -> string -> (unit -> unit) -> unit +val print_symbol: Symbols.t -> (formatter -> unit) -> formatter -> string -> unit -(** [print_sep_list e nonempty print_sep print_x] prints the possibly non empty +(** [print_sep_list e nonempty print_sep print_x fmt] prints the possibly non empty separated list [separated[_nonempty]_list(sep, x)] where [sep] and [x] are respectively printed by [print_sep] and [print_x]. If [e] is [true] then the result is parenthesized. *) -val print_sep_list: bool -> bool -> (unit -> unit) -> (unit -> unit) -> unit +val print_sep_list: bool -> bool -> (formatter -> unit) -> (formatter -> unit) -> formatter -> unit (** To print a possibly parenthesized optional. *) -val opt: bool -> (unit -> unit) -> unit +val opt: bool -> (formatter -> unit) -> formatter -> unit (** To print a possibly parenthesized non empty list. *) -val plus: bool -> (unit -> unit) -> unit +val plus: bool -> (formatter -> unit) -> formatter -> unit (** To print a possibly parenthesized list. *) -val star: bool -> (unit -> unit) -> unit +val star: bool -> (formatter -> unit) -> formatter -> unit (** The rule definition symbol. *) -val def: unit -> string +val def: formatter -> unit (** The bar at the start of each alternative production. *) -val prod_bar: unit -> string +val prod_bar: formatter -> unit (** The bar for the anonymous rules. *) -val bar: unit -> string +val bar: formatter -> unit (* (\** The optionally parenthesizing function. *\) * val par: bool -> (unit -> unit) -> unit *) (** The space. *) -val space: unit -> string +val space: formatter -> unit (** The line break. *) -val break: unit -> string +val break: formatter -> unit (** The empty word epsilon. *) -val eps: unit -> string +val eps: formatter -> unit diff --git a/src/helpers/default.ml b/src/helpers/default.ml index bbf2059..22e47ab 100644 --- a/src/helpers/default.ml +++ b/src/helpers/default.ml @@ -1,32 +1,35 @@ +open Format + include MiniHelper -let print_header _ = () -let print_footer () = print_string "@." +let print_header _ _ = () +let print_footer fmt = fprintf fmt "@." -let def () = " ::= @[" -let prod_bar () = "| " -let bar () = "@ |@ " -let space () = "@ " -let break () = "@;" -let eps () = "epsilon" +let def fmt = fprintf fmt " ::= @[" +let prod_bar fmt = pp_print_string fmt "| " +let bar fmt = fprintf fmt "@ |@ " +let space fmt = fprintf fmt "@ " +let break fmt = fprintf fmt "@;" +let eps fmt = pp_print_string fmt "epsilon" let print_rule_name = - print_rule_name_with "<" ">" + print_rule_name_with (print_string' "<") (print_string' ">") -let rule_begin () = () -let rule_end () = - print_string "@]@;@;" +let rule_begin _ = () +let rule_end fmt = + fprintf fmt "@]@;" let print_symbol symbols = - print_symbol_aux "<" ">" symbols + print_symbol_aux symbols (print_string' "<") (print_string' ">") -let opt _ print = enclose print "[" "]" -let plus e print = par e print; print_string "+" -let star e print = par e print; print_string "*" +let opt _ print = enclose print (print_string' "[") (print_string' "]") +let plus e print fmt = fprintf fmt "%t%a" (par e print) print_string "+" +let star e print fmt = fprintf fmt "%t%a" (par e print) print_string "*" -let print_sep_list e nonempty print_sep print_x = - let print () = - print_x (); print_string (space ()); - star true (fun () -> print_sep (); print_string (space ()); print_x ()) +let print_sep_list e nonempty print_sep print_x fmt = + let print fmt = + fprintf fmt "%t%t%t" + print_x space + (star true (fun fmt -> fprintf fmt "%t%t%t" print_sep space print_x)) in - if nonempty then par e print else enclose print "[" "]" + (if nonempty then par e else opt false) print fmt diff --git a/src/helpers/ebnf.ml b/src/helpers/ebnf.ml new file mode 100644 index 0000000..d3f3d98 --- /dev/null +++ b/src/helpers/ebnf.ml @@ -0,0 +1,35 @@ +open Format + +include MiniHelper + +let print_header _ _ = () +let print_footer fmt = fprintf fmt "@." + +let def fmt = fprintf fmt " ::= @[" +let prod_bar fmt = pp_print_string fmt "| " +let bar fmt = fprintf fmt "@ |@ " +let space fmt = fprintf fmt "@ " +let break fmt = fprintf fmt "@;" +let eps fmt = pp_print_string fmt "" + +let print_rule_name = + print_rule_name_with (fun _ -> ()) (fun _ -> ()) + +let rule_begin _ = () +let rule_end fmt = + fprintf fmt "@]@;" + +let print_symbol symbols = + print_symbol_aux symbols (fun _ -> ()) (fun _ -> ()) + +let opt e print fmt = fprintf fmt "%t%a" (par e print) print_string "?" +let plus e print fmt = fprintf fmt "%t%a" (par e print) print_string "+" +let star e print fmt = fprintf fmt "%t%a" (par e print) print_string "*" + +let print_sep_list e nonempty print_sep print_x fmt = + let print fmt = + fprintf fmt "%t%t%t" + print_x space + (star true (fun fmt -> fprintf fmt "%t%t%t" print_sep space print_x)) + in + (if nonempty then par e else opt true) print fmt diff --git a/src/helpers/html.ml b/src/helpers/html.ml index c3e23fc..3ab9f3d 100644 --- a/src/helpers/html.ml +++ b/src/helpers/html.ml @@ -1,7 +1,9 @@ +open Format + include MiniHtml -let print_header _ = - print_string +let print_header _ fmt = + fprintf fmt "@[@;\ @[@;\ @[@;\ @@ -47,37 +49,41 @@ let list_after = "*" let ne_list_after = "+" let rule_def = "::=" -let def () = Format.sprintf " %s @;" rule_def -let prod_bar () = "@[@;| @;" +let def fmt = fprintf fmt " %s @;" rule_def +let prod_bar fmt = + fprintf fmt "@[@;| @;" let print_rule_name = print_rule_name_with - (Format.sprintf "%s" nonterminal_before) - (Format.sprintf "%s" nonterminal_after) + (fun fmt -> fprintf fmt "%s" nonterminal_before) + (fun fmt -> fprintf fmt "%s" nonterminal_after) let print_symbol symbols = - print_symbol_aux - (Format.sprintf "%s" nonterminal_before) - (Format.sprintf "%s" nonterminal_after) symbols + print_symbol_aux symbols + (fun fmt -> fprintf fmt "%s" nonterminal_before) + (fun fmt -> fprintf fmt "%s" nonterminal_after) -let opt _ print = - print_string option_before; - print_string ""; - print (); - print_string ""; - print_string option_after +let opt _ print fmt = + fprintf fmt "%a%a%t%a%a" + print_string option_before + print_string "" + print + print_string "" + print_string option_after -let plus e print = - print_string ""; - par e print; - print_string ""; - print_string ""; - print_string ne_list_after; - print_string "" -let star e print = - print_string ""; - par e print; - print_string ""; - print_string ""; - print_string list_after; - print_string "" +let plus e print fmt = + fprintf fmt "%a%t%a%a%a%a" + print_string "" + (par e print) + print_string "" + print_string "" + print_string ne_list_after + print_string "" +let star e print fmt = + fprintf fmt "%a%t%a%a%a%a" + print_string "" + (par e print) + print_string "" + print_string "" + print_string list_after + print_string "" diff --git a/src/helpers/htmlCss.ml b/src/helpers/htmlCss.ml index f2ceda3..3927164 100644 --- a/src/helpers/htmlCss.ml +++ b/src/helpers/htmlCss.ml @@ -1,7 +1,9 @@ +open Format + include MiniHtml -let print_header _ = - print_string +let print_header _ fmt = + fprintf fmt "@[@;\ @[@;\ @[@;\ @@ -59,26 +61,32 @@ let print_header _ = @[@;@;\ @[@;@;" -let def () = "@;@;@;@;@;@;" -let rule_end () = - print_string "@;@;" +let rule_begin fmt = + fprintf fmt "@[@;" +let rule_end fmt = + fprintf fmt "@;@;" -let production_begin () = () -let production_end () = - print_string "@]@;" +let production_begin _ = () +let production_end fmt = + fprintf fmt "@]@;" -let print_sep_list e nonempty print_sep print_x = - par e (fun () -> - print_x (); - print_string (if nonempty then "+" else "*"); - print_string ""; print_sep (); print_string "") +let print_sep_list e nonempty print_sep print_x fmt = + par e (fun fmt -> + fprintf fmt "%t%a%a%t%a" + print_x + print_string (if nonempty then "+" else "*") + print_string "" + print_sep + print_string "") + fmt diff --git a/src/helpers/miniLatex.ml b/src/helpers/miniLatex.ml index 07ab6ae..c791908 100644 --- a/src/helpers/miniLatex.ml +++ b/src/helpers/miniLatex.ml @@ -1,4 +1,5 @@ open Options +open Format include MiniHelper @@ -9,23 +10,19 @@ let alias s = let use () = !pfile <> "" -let print_string_package s = - Format.fprintf !formatter_package (Scanf.format_from_string s "") -let print_fmt_package s = - Format.fprintf !formatter_package s - -let usepackage opts s = - Format.sprintf "\\%s%s{%s}@;" +let usepackage fmt (opts, s) = + fprintf fmt "\\%s%s{%s}@;" (if use () then "RequirePackage" else "usepackage") opts s let documentclass header = - print_string_package "@["; - if use () - then print_fmt_package - "\\NeedsTeXFormat{LaTeX2e}@;\\ProvidesPackage{%s}@;@;" - !pfile - else print_string "\\documentclass[preview]{standalone}@;@;\\usepackage[T1]{fontenc}@;@;"; - print_string_package (usepackage "" "suffix" ^ header) + fprintf !formatter_package "@[%t%a%t" + (fun fmt -> + if use () then + fprintf fmt "\\NeedsTeXFormat{LaTeX2e}@;\\ProvidesPackage{%s}@;@;" !pfile + else + fprintf fmt "\\documentclass[preview]{standalone}@;@;\\usepackage[T1]{fontenc}@;@;") + usepackage ("", "suffix") + header let forall_str p s = let exception Exit in @@ -82,7 +79,7 @@ let pre () = valid !prefix let grammarname = command "obeliskgrammar" -let begin_document misc symbols = +let begin_document misc fmt symbols = let commands symbols = let replace r by = Re.Str.global_replace (Re.Str.regexp r) by in let escape s = @@ -96,30 +93,29 @@ let begin_document misc symbols = |> replace "}" "\\}" |> replace "\\$" "\\$" in - List.iter (fun x -> - print_fmt_package "\\newcommand\\%s{%s}@;" - (command (alias x)) (escape x)) - (Common.Symbols.terminals symbols @ Common.Symbols.defined symbols); - List.iter (fun x -> - let cx = command (alias x) in - print_fmt_package "\\WithSuffix\\newcommand\\%s*{\\%s{\\%s}}@;" - cx (command "gramterm") cx) - (Common.Symbols.terminals symbols); - List.iter (fun x -> - let cx = command x in - print_fmt_package "\\WithSuffix\\newcommand\\%s*{\\%s{\\%s}}@;" - cx (command "gramnonterm") cx) - (Common.Symbols.non_terminals symbols); - List.iter (fun x -> - let cx = command x in - print_fmt_package "\\WithSuffix\\newcommand\\%s*{\\%s{\\%s}}@;" - cx (command "gramfunc") cx) - (Common.Symbols.functionals symbols); - print_string_package "@;" + fprintf !formatter_package "%a%a%a%a@;" + (pp_print_list ~pp_sep:(fun _ () -> ()) (fun fmt x -> + fprintf fmt "\\newcommand\\%s{%s}@;" (command (alias x)) (escape x))) + (Common.Symbols.terminals symbols @ Common.Symbols.defined symbols) + (pp_print_list ~pp_sep:(fun _ () -> ()) (fun fmt x -> + let cx = command (alias x) in + fprintf fmt "\\WithSuffix\\newcommand\\%s*{\\%s{\\%s}}@;" + cx (command "gramterm") cx)) + (Common.Symbols.terminals symbols) + (pp_print_list ~pp_sep:(fun _ () -> ()) (fun fmt x -> + let cx = command x in + fprintf fmt "\\WithSuffix\\newcommand\\%s*{\\%s{\\%s}}@;" + cx (command "gramnonterm") cx)) + (Common.Symbols.non_terminals symbols) + (pp_print_list ~pp_sep:(fun _ () -> ()) (fun fmt x -> + let cx = command x in + fprintf fmt "\\WithSuffix\\newcommand\\%s*{\\%s{\\%s}}@;" + cx (command "gramfunc") cx)) + (Common.Symbols.functionals symbols) in commands symbols; let pre = pre () in - print_fmt_package + fprintf !formatter_package "\\newcommand\\%sgramopt[1]{[#1]}@;\ \\newcommand\\%sgramplus[1]{{#1}\\ensuremath{^+}}@;\ \\newcommand\\%sgramstar[1]{{#1}\\ensuremath{^*}}@;\ @@ -134,82 +130,78 @@ let begin_document misc symbols = pre pre pre pre pre pre pre pre pre pre pre pre pre pre pre pre pre pre pre pre; - print_fmt "%s%s\\begin{%s}@;" + fprintf fmt "%s%t\\begin{%s}@;" (if use () then "" else "\n\n\\begin{document}\n\n") - (if misc = "" then "" else misc ^ "\n") + misc grammarname -let newcommand x n o cmd = - "\\newcommand\\" ^ pre () ^ x ^ - begin match n with - | 0 -> "" - | _ -> "[" ^ string_of_int n ^ "]" - end - ^ begin match o with - | None -> "" - | Some y -> "[" ^ y ^ "]" - end - ^ "{" ^ cmd ^ "}@;" - -let end_document () = - print_fmt "\\end{%s}%s@]@." +let newcommand fmt (x, n, o, cmd) = + fprintf fmt "\\newcommand\\%s%s%a%a{%t}@;" + (pre ()) x + (fun fmt n -> match n with + | 0 -> () + | _ -> fprintf fmt "[%d]" n) + n + (pp_print_option (fun fmt -> fprintf fmt "[%s]")) o + cmd + +let end_document fmt = + fprintf fmt "\\end{%s}%s@]@." grammarname (if use () then "" else "\n\n\\end{document}") let print_footer = end_document let opt, plus, star = - let cmd s e (print: unit -> unit) = - print_fmt "\\%s%s%s{" (pre ()) s (if e then "*" else ""); - print (); - print_string "}" + let cmd s e print fmt = + fprintf fmt "\\%s%s%s{%t}" (pre ()) s (if e then "*" else "") print in cmd "gramopt", cmd "gramplus", cmd "gramstar" -let print_sep_list e nonempty print_sep print_x = - print_fmt "\\%sgramsep%slist%s{" +let print_sep_list e nonempty print_sep print_x fmt = + fprintf fmt "\\%sgramsep%slist%s{%t}{%t}" (pre ()) (if nonempty then "ne" else "") - (if e then "*" else ""); - print_sep (); - print_string "}{"; - print_x (); - print_string "}" + (if e then "*" else "") + print_sep + print_x -let print_comm star c = - print_fmt "\\%s%s{}" (command c) (if star then "*" else "") +let print_comm star fmt c = + fprintf fmt "\\%s%s{}" (command c) (if star then "*" else "") let print_term = print_comm true let print_non_term = print_comm true -let print_fun f print_params = - print_fmt "\\%s{" (command "gramfunc"); - print_comm false f; - print_params (); - print_string "}" +let print_fun print_params fmt f = + fprintf fmt "\\%s{%a%t}" + (command "gramfunc") + (print_comm false) f + print_params -let print_undef u = - print_fmt "%s" (Re.Str.global_replace (Re.Str.regexp "_") "\\_" u) +let print_undef fmt u = + fprintf fmt "%s" (Re.Str.global_replace (Re.Str.regexp "_") "\\_" u) let print_param = print_undef -let print_symbol_aux term non_term func undef symbols s print_params = +let print_symbol_aux symbols term non_term func undef print_params fmt s = let open Common.Symbols in match is_defined s symbols with - | Some [] -> non_term s - | Some _ -> func s print_params + | Some [] -> + non_term fmt s + | Some _ -> + func print_params fmt s | None -> - if is_term s symbols then term (alias s) else undef s + if is_term s symbols then term fmt (alias s) else undef fmt s let print_symbol symbols = - print_symbol_aux + print_symbol_aux symbols print_term print_non_term print_fun print_undef - symbols -let print_rule_name_raw name = - print_comm false name; - print_opt_params +let print_rule_name_raw print_params fmt name = + fprintf fmt "%a%t" + (print_comm false) name + print_params diff --git a/src/main.ml b/src/main.ml index 4489074..a8ce83a 100644 --- a/src/main.ml +++ b/src/main.ml @@ -29,7 +29,8 @@ let get () = in formatter_package := formatter'; let p = match !mode with - | Default -> (module Printers.Default : GenericPrinter.PRINTER) + | Plain Default -> (module Printers.Default : GenericPrinter.PRINTER) + | Plain EBNF -> (module Printers.Ebnf) | Latex Tabular -> (module Printers.LatexTabular) | Latex Syntax -> (module Printers.LatexSyntax) | Latex Backnaur -> (module Printers.LatexBacknaur) @@ -37,7 +38,7 @@ let get () = | Html NoCSS -> (module Printers.Html) in let module P = (val p: GenericPrinter.PRINTER) in - let print = P.print_spec formatter in + let print symbols = P.print_spec symbols formatter in let files = rev !ifiles in let infs = map open_in files in let lexbufs = map Lexing.from_channel infs in @@ -68,6 +69,7 @@ let () = s |> Normalize.normalize |> Transform.transform symbols + |> (if !mode = Plain EBNF then Specialize.specialize symbols else fun s -> s) |> Reduce.reduce !inline |> print symbols; close () diff --git a/src/options.ml b/src/options.ml index d9a626d..0462b10 100644 --- a/src/options.ml +++ b/src/options.ml @@ -17,10 +17,15 @@ let formatter_package = ref Format.std_formatter (** The different output modes. *) type mode = - | Default (** Standard plain text format. Default. *) + | Plain of plainmode (** Standard plain text format. Default. *) | Latex of latexmode (** LaTeX output. *) | Html of htmlmode (** HTML output. *) +(** The different plain text sub-modes *) +and plainmode = + | Default (** Default BNF-like mode. *) + | EBNF (** EBNF mode. *) + (** The different LaTeX sub-modes *) and latexmode = | Tabular (** Table-based layout. Default. *) @@ -32,10 +37,10 @@ and htmlmode = | CSS | NoCSS -(** The chosen mode, default to {!mode.Default}. *) -let mode = ref Default +(** The chosen mode, default to {!mode.Plain !plainmode.Default}. *) +let mode = ref (Plain Default) -(** Do we inline inferred patterns ? [false] by default. *) +(** Do we inline inferred patterns? [false] by default. *) let inline = ref false (** Do we substitute token aliases? [false] by default. *) @@ -48,6 +53,10 @@ let options = ref (Arg.align [ "-noaliases", Arg.Set no_aliases, " Do not substitute token aliases. Has no effect in LaTeX modes." ]) +(** Specify the plain sub-mode to use. *) +let set_plainmode m () = + mode := Plain m + (** Specify the LaTeX sub-mode to use. *) let set_latexmode m () = mode := Latex m @@ -81,6 +90,10 @@ let msg = "Obelisk version %%VERSION_NUM%%\n\ let parse_cmd = let cpt = ref 0 in function + | "ebnf" when !cpt < 1 -> + incr cpt; + set_plainmode EBNF (); + options := Arg.align (!options @ latex_opt) | "latex" when !cpt < 1 -> incr cpt; set_latexmode Tabular (); diff --git a/src/printer.mli b/src/printer.mli index 4d9b9d0..708b468 100644 --- a/src/printer.mli +++ b/src/printer.mli @@ -4,6 +4,7 @@ open Format open Common open ExtendedAst -(** [print_spec fmt symbs s] print the grammar [s] with symbols [symbs] on +(** [print_spec symbs fmt s] print the grammar [s] with symbols [symbs] on the specified formatter [fmt]. *) -val print_spec: formatter -> Symbols.t -> spec -> unit +val print_spec: Symbols.t -> formatter -> spec -> unit + diff --git a/src/printers.ml b/src/printers.ml index 5b31724..80a3344 100644 --- a/src/printers.ml +++ b/src/printers.ml @@ -5,6 +5,9 @@ open GenericPrinter (** The default plain text printer. *) module Default = Make(Default) +(** The EBNF plain text printer. *) +module Ebnf = Make(Ebnf) + (** The LaTeX default table-based printer. *) module LatexTabular = Make(LatexTabular) diff --git a/src/reduce.ml b/src/reduce.ml index 10682a1..23ad638 100644 --- a/src/reduce.ml +++ b/src/reduce.ml @@ -3,107 +3,35 @@ open Lazy open ExtendedAst -(** A map with identifiers keys. *) -module M = Map.Make(String) - -(** {2 Inlining} *) - -(** {3 Instantiating} *) - -(** Performs a substitution over an actual. - Only non functional symbols are substituted. *) -let rec subst_actual s a = - let subst_pattern = - let r = subst_actual s in - function - | Option x -> Option (r x) - | Pair (x, y) -> Pair (r x, r y) - | SepPair (x, sep, y) -> SepPair (r x, r sep, r y) - | Preceded (o, x) -> Preceded (r o, r x) - | Terminated (x, c) -> Terminated (r x, r c) - | Delimited (o, x, c) -> Delimited (r o, r x, r c) - | List x -> List (r x) - | NEList x -> NEList (r x) - | SepList (sep, x) -> SepList (r sep, r x) - | SepNEList (sep, x) -> SepNEList (r sep, r x) - in - let subst_production = List.map (subst_actual s) in - match a with - | Symbol (f, []) -> - begin try - M.find f s - with Not_found -> Symbol (f, []) - end - | Symbol (f, xs) -> - Symbol (f, List.map (subst_actual s) xs) - | Pattern p -> - Pattern (subst_pattern p) - | Modifier (x, m) -> - Modifier (subst_actual s x, m) - | Anonymous ps -> - Anonymous (List.map subst_production ps) - -(** [make_subst xs ys] builds a substitution that is a map linking each element - of [xs] to the corresponding (by index) element of [ys]. *) -let make_subst = - List.fold_left2 (fun s x y -> M.add x y s) M.empty +(** {2 Rewriting} *) (** [subst (xs, p) ys] substitutes the formal parameters [xs] with the actual parameters [ys] in the pattern [p].*) let subst (xs, p) ys = - let s = make_subst xs ys in - let f = subst_actual s in - let p = match p with - | Option x -> Option (f x) - | Pair (x, y) -> Pair (f x, f y) - | SepPair (x, sep, y) -> SepPair (f x, f sep, f y) - | Preceded (o, x) -> Preceded (f o, f x) - | Terminated (x, c) -> Terminated (f x, f c) - | Delimited (o, x, c) -> Delimited (f o, f x, f c) - | List x -> List (f x) - | NEList x -> NEList (f x) - | SepList (sep, x) -> SepList (f sep, f x) - | SepNEList (sep, x) -> SepNEList (f sep, f x) - in - Pattern p - -(** {3 Rewriting} *) + let s = Subst.make_subst xs ys in + Pattern (map_pattern (Subst.subst_actual s) p) (** Inline the recognized patterns in an actual. If the actual is a symbol [s], then we look in the map if [s] should be - inlined. In this case, we use {!subst} to perform the inlining, that is to - instantiate the recognized pattern obtained in the map with the actual + inlined. In this case, we use {!subst} to perform the inlining, that + is to instantiate the recognized pattern obtained in the map with the actual parameters of [s]. *) + let rec rewrite_actual rws = function | Symbol (s, xs) -> let xs = List.map (rewrite_actual rws) xs in begin try - let rw = M.find s rws in + let rw = Subst.M.find s rws in subst rw xs with Not_found -> Symbol (s, xs) end | Pattern p -> - Pattern (rewrite_pattern rws p) + Pattern (map_pattern (rewrite_actual rws) p) | Modifier (x, m) -> Modifier (rewrite_actual rws x, m) | Anonymous ps -> Anonymous (List.map (rewrite_production rws) ps) -(** Inline the recognized patterns in a pattern. *) -and rewrite_pattern rws = - let r = rewrite_actual rws in - function - | Option x -> Option (r x) - | Pair (x, y) -> Pair (r x, r y) - | SepPair (x, sep, y) -> SepPair (r x, r sep, r y) - | Preceded (o, x) -> Preceded (r o, r x) - | Terminated (x, c) -> Terminated (r x, r c) - | Delimited (o, x, c) -> Delimited (r o, r x, r c) - | List x -> List (r x) - | NEList x -> NEList (r x) - | SepList (sep, x) -> SepList (r sep, r x) - | SepNEList (sep, x) -> SepNEList (r sep, r x) - (** Inline the recognized patterns in a production. *) and rewrite_production rws = List.map (rewrite_actual rws) @@ -131,12 +59,6 @@ let compose x y = (** An infix notation for {!compose}. *) let (@@) = compose -(** Find a rule by its left-hand side (name) in the grammar. *) -let find_rule r rules = - try - Some (List.find (fun { name; _ } -> name = r) rules) - with Not_found -> None - (** Define a recursive equivalence between rules by transitivity. @@ -146,7 +68,7 @@ let find_rule r rules = let alias rules r = let rec eq r' = r = r' || - match find_rule r' rules with + match Common.find_rule r' rules with | Some { prods = [[Symbol (s, _)]]; _ } -> eq s | _ -> false in @@ -319,7 +241,7 @@ let is_sep_list rules r = let g = generalize r.params in match r.prods with | [[]; [Symbol (s, xs)]] | [[Symbol (s, xs)]; []] -> - begin match find_rule s rules with + begin match Common.find_rule s rules with | Some r' -> begin match is_sep_nonempty_list rules r' with | Some rw -> @@ -373,7 +295,7 @@ let replace_prods r (_, p) = let reduce_rule inline rules r (rs, rws) = match recognize rules r with | Some rw -> - if inline then (rs, M.add r.name rw rws) + if inline then (rs, Subst.M.add r.name rw rws) else replace_prods r rw :: rs, rws | None -> r :: rs, rws @@ -387,5 +309,7 @@ let reduce_rule inline rules r (rs, rws) = Moreover, if [inline] is [true] then the concerned rules are deleted and inlined at each of their instances. *) let reduce inline s = - let rules, rws = List.fold_right (reduce_rule inline s) s ([], M.empty) in + let rules, rws = List.fold_right (reduce_rule inline s) s + ([], Subst.M.empty) + in if inline then rewrite rws rules else rules diff --git a/src/specialize.ml b/src/specialize.ml new file mode 100644 index 0000000..8a8fc59 --- /dev/null +++ b/src/specialize.ml @@ -0,0 +1,110 @@ +(** Specialize functional non-terminals. *) + +open List +open ExtendedAst + +module S = Set.Make(String) +module M = Map.Make(Int) + +let hash = Hashtbl.hash + +let fresh = + let h = Hashtbl.create 32 in + fun symbols f -> + let n, fn = match Hashtbl.find_opt h f with + | Some n -> + let rec next n = + let n = n + 1 in + let fn = Format.sprintf "%s_%d" f n in + if Common.Symbols.is_symbol fn symbols then next (n + 1) else n, fn + in + next n + | None -> + 0, Format.sprintf "%s_0" f + in + Hashtbl.replace h f n; + fn + +(** Specialize an actual. + If a symbol is functional, we introduce a new corresponding specialized + rule. *) +let rec specialize_actual symbols spec new_rules_map = + let specialize = specialize_actual symbols spec in + function + | Symbol (_, []) as s -> + new_rules_map, s + | Symbol (f, xs) when Common.Symbols.is_defined f symbols <> None -> + let h = hash (f, xs) in + let new_rules_map, r = match M.find_opt h new_rules_map with + | Some r -> + new_rules_map, r + | None -> + begin match Common.find_rule f spec with + | Some r -> + let name = fresh symbols f in + let s = Subst.make_subst r.params xs in + let prods = map (map (Subst.subst_actual s)) r.prods in + let r = { name; prods; params = [] } in + (* to handle recursive rules *) + let new_rules_map = M.add h r new_rules_map in + let new_rules_map, prods = specialize_productions symbols spec + new_rules_map prods + in + let r = { name; prods; params = [] } in + M.add h r new_rules_map, r + | None -> assert false + end + in + new_rules_map, Symbol (r.name, []) + | Symbol (f, xs) -> + let new_rules_map, xs = specialize_actuals symbols spec new_rules_map xs in + new_rules_map, Symbol (f, xs) + | Pattern p -> + let new_rules_map, p = fold_map_pattern specialize new_rules_map p in + new_rules_map, Pattern p + | Modifier (x, m) -> + let new_rules_map, x = specialize new_rules_map x in + new_rules_map, Modifier (x, m) + | Anonymous p -> + let new_rules_map, p = specialize_productions symbols spec new_rules_map p in + new_rules_map, Anonymous p + +(** Specialize actuals. *) +and specialize_actuals symbols spec new_rules_map xs = + let new_rules_map, xs = List.fold_left (fun (new_rules_map, actuals) a -> + let new_rules_map, a = specialize_actual symbols spec new_rules_map a in + new_rules_map, a :: actuals) + (new_rules_map, []) xs + in + new_rules_map, List.rev xs + +(** Specialize a production by specializing its actuals. *) +and specialize_production symbols spec (new_rules_map, prods) p = + let new_rules_map, p = specialize_actuals symbols spec new_rules_map p in + new_rules_map, p :: prods + +(** Specialize productions. *) +and specialize_productions symbols spec new_rules_map prods = + let new_rules_map, prods = fold_left (specialize_production symbols spec) + (new_rules_map, []) prods + in + new_rules_map, rev prods + +(** Specialize a rule by specializing its productions. *) +let specialize_rule symbols spec (new_rules_map, rules) r = + if r.params = [] then + let new_rules_map, prods = specialize_productions symbols spec new_rules_map + r.prods + in + new_rules_map, + { r with prods } :: rules + else + new_rules_map, rules + +(** Specialize a grammar by specializing its rules, producing new specialized + rules. *) +let specialize symbols spec = + let new_rules_map, spec = fold_left (specialize_rule symbols spec) + (M.empty, []) spec + in + rev spec @ map snd (M.bindings new_rules_map) diff --git a/src/subst.ml b/src/subst.ml new file mode 100644 index 0000000..6888c25 --- /dev/null +++ b/src/subst.ml @@ -0,0 +1,30 @@ +(** Symbol substitution. *) + +open ExtendedAst + +(** A map with identifiers keys. *) +module M = Map.Make(String) + +(** Performs a substitution over an actual. + Only non functional symbols are substituted. *) +let rec subst_actual s a = + let subst_production = List.map (subst_actual s) in + match a with + | Symbol (f, []) -> + begin try + M.find f s + with Not_found -> Symbol (f, []) + end + | Symbol (f, xs) -> + Symbol (f, List.map (subst_actual s) xs) + | Pattern p -> + Pattern (map_pattern (subst_actual s) p) + | Modifier (x, m) -> + Modifier (subst_actual s x, m) + | Anonymous ps -> + Anonymous (List.map subst_production ps) + +(** [make_subst xs ys] builds a substitution that is a map linking each element + of [xs] to the corresponding (by index) element of [ys]. *) +let make_subst xs ys = + List.fold_left2 (fun s x y -> M.add x y s) M.empty xs ys diff --git a/src/transform.ml b/src/transform.ml index 6eab237..5390552 100644 --- a/src/transform.ml +++ b/src/transform.ml @@ -14,7 +14,9 @@ let rec transform_actual symbols = function transform_modifier symbols x m | Ast.Anonymous gs -> let gs = map (transform_group symbols) gs in - Anonymous gs + match gs with + | [[ a ]] -> a + | _ -> Anonymous gs (** Transform all non defined symbols (see {!Common.Symbols.is_defined}) corresponding to Menhir standard library symbols ([list], [pair], [option],
" -let prod_bar () = "@[
" +let def fmt = fprintf fmt "@;" +let prod_bar fmt = fprintf fmt "@[
" let print_rule_name = - print_rule_name_with "" "" + print_rule_name_with + (print_string' "") + (print_string' "") let print_symbol symbols = - print_symbol_aux "" "" symbols - -let opt _ print = - print_string ""; - print (); - print_string "" + print_symbol_aux symbols + (print_string' "") + (print_string' "") -let plus e print = - print_string ""; - par e print; - print_string "" -let star e print = - print_string ""; - par e print; - print_string "" +let opt _ print fmt = + fprintf fmt "%a%t%a" + print_string "" + print + print_string "" +let plus e print fmt = + fprintf fmt "%a%t%a" + print_string "" + (par e print) + print_string "" +let star e print fmt = + fprintf fmt "%a%t%a" + print_string "" + (par e print) + print_string "" diff --git a/src/helpers/latexBacknaur.ml b/src/helpers/latexBacknaur.ml index e6b7afc..fd0a14b 100644 --- a/src/helpers/latexBacknaur.ml +++ b/src/helpers/latexBacknaur.ml @@ -1,34 +1,40 @@ +open Format + include MiniLatex -let print_header symbols = +let print_header symbols fmt = documentclass - (usepackage "[epsilon]" "backnaur" ^ "@;" ^ - "\\newenvironment{" ^ grammarname ^ - "}{\\begin{bnf*}}{\\end{bnf*}}@;@;" ^ - newcommand "gramsp" 0 None "\\ensuremath{\\bnfsp}" ^ - newcommand "gramterm" 1 None "\\bnfts{#1}" ^ - newcommand "gramnonterm" 1 None "\\ensuremath{\\bnfpn{#1}}" ^ - newcommand "gramfunc" 1 None "\\ensuremath{\\bnfpn{#1}}" ^ - newcommand "grameps" 0 None "\\ensuremath{\\bnfes}" ^ - newcommand "gramprod" 3 (Some "\\textwidth") - "\\bnfprod{#2}{%%@;<0 2>\ - \\begin{minipage}[t]{#1}@;<0 4>\ - $#3$@;<0 2>\ - \\end{minipage}}" ^ - newcommand "grambar" 0 None "\\hspace*{-2.5em}\\bnfor\\hspace*{1.2em}" ^ - newcommand "grambaranon" 0 None "\\ensuremath{\\bnfor}" ^ "@;"); - begin_document "" symbols + (fun fmt -> fprintf fmt + "%a@;\ + \\newenvironment{%s}{\\begin{bnf*}}{\\end{bnf*}}@;@;\ + %a%a%a%a%a%a%a%a@;" + usepackage ("[epsilon]", "backnaur") + grammarname + newcommand ("gramsp", 0, None, print_string' "\\ensuremath{\\bnfsp}") + newcommand ("gramterm", 1, None, print_string' "\\bnfts{#1}") + newcommand ("gramnonterm", 1, None, print_string' "\\ensuremath{\\bnfpn{#1}}") + newcommand ("gramfunc", 1, None, print_string' "\\ensuremath{\\bnfpn{#1}}") + newcommand ("grameps", 0, None, print_string' "\\ensuremath{\\bnfes}") + newcommand ("gramprod", 3, (Some "\\textwidth"), + fun fmt -> fprintf fmt + "\\bnfprod{#2}{%%@;<0 2>\ + \\begin{minipage}[t]{#1}@;<0 4>\ + $#3$@;<0 2>\ + \\end{minipage}}") + newcommand ("grambar", 0, None, print_string' "\\hspace*{-2.5em}\\bnfor\\hspace*{1.2em}") + newcommand ("grambaranon", 0, None, print_string' "\\ensuremath{\\bnfor}")); + begin_document (fun _ -> ()) fmt symbols -let def () = "}{" -let prod_bar () = "\\" ^ command "grambar" ^ " " -let bar () = "@ \\" ^ command "grambaranon" ^ "@ " -let space () = "\\" ^ command "gramsp" ^ "@ " -let break () = "\\\\@;" -let eps () = "\\" ^ command "grameps" +let def fmt = print_string fmt "}{" +let prod_bar fmt = fprintf fmt "\\%s " (command "grambar") +let bar fmt = fprintf fmt "@ \\%s@ " (command "grambaranon") +let space fmt = fprintf fmt "\\%s@ " (command "gramsp") +let break fmt = fprintf fmt "\\\\@;" +let eps fmt = fprintf fmt "\\%s" (command "grameps") let print_rule_name = print_rule_name_raw -let rule_begin () = - print_fmt "@[\\%s{" (command "gramprod") -let rule_end () = - print_string "}\\\\@]@;" +let rule_begin fmt = + fprintf fmt "@[\\%s{" (command "gramprod") +let rule_end fmt = + fprintf fmt "}\\\\@]" diff --git a/src/helpers/latexSyntax.ml b/src/helpers/latexSyntax.ml index 309f4e2..b0ea577 100644 --- a/src/helpers/latexSyntax.ml +++ b/src/helpers/latexSyntax.ml @@ -1,8 +1,9 @@ open Common +open Format include MiniLatex -let print_header symbols = +let print_header symbols fmt = let max = let compare_length s1 s2 = compare (String.length s2) (String.length s1) in let max = try List.(hd (sort compare_length (Symbols.defined symbols))) with _ -> " " in @@ -10,10 +11,10 @@ let print_header symbols = let rec aux = function | [] -> "" | [x] -> x - | x :: xs -> x ^ ", " ^ aux xs + | x :: xs -> sprintf "%s, %s" x (aux xs) in function | [] -> "" - | xs -> "(" ^ aux xs ^ ")" + | xs -> sprintf "(%s)" (aux xs) in let max = match Common.Symbols.is_defined max symbols with | Some xs -> max ^ params xs @@ -22,37 +23,43 @@ let print_header symbols = Re.Str.global_replace (Re.Str.regexp "_") "\\_" max in documentclass - (usepackage "" "syntax" ^ "@;" ^ - "\\newenvironment{" ^ grammarname ^ - "}{\\begin{grammar}}{\\end{grammar}}@;@;" ^ - newcommand "gramterm" 1 None "\\lit{#1}" ^ - newcommand "gramnonterm" 1 None "\\synt{#1}" ^ - newcommand "gramfunc" 1 None "\\synt{#1}" ^ - newcommand "gramdef" 0 None "::=" ^ - newcommand "grambar" 0 None "\\alt" ^ - newcommand "grambaranon" 0 None "\\ensuremath{|}" ^ - newcommand "grameps" 0 None "\\ensuremath{\\epsilon}" ^ - "\\newlength{\\" ^ command "grammaxindent" ^ - "}@;\ - \\settowidth{\\" ^ command "grammaxindent" ^ - "}{\\synt{" ^ max ^ "} \\" ^ - command "gramdef" ^ "{} }@;@;"); - begin_document - ("\\setlength{\\grammarindent}{\\" ^ command "grammaxindent" ^ "}") - symbols + (fun fmt -> fprintf fmt + "%a@;\ + \\newenvironment{%s}{\\begin{grammar}}{\\end{grammar}}@;@;\ + %a%a%a%a%a%a%a\ + \\newlength{\\%s}@;\ + \\settowidth{\\%s}{\\synt{%s} \\%s{} }@;@;" + usepackage ("", "syntax") + grammarname + newcommand ("gramterm", 1, None, print_string' "\\lit{#1}") + newcommand ("gramnonterm", 1, None, print_string' "\\synt{#1}") + newcommand ("gramfunc", 1, None, print_string' "\\synt{#1}") + newcommand ("gramdef", 0, None, print_string' "::=") + newcommand ("grambar", 0, None, print_string' "\\alt") + newcommand ("grambaranon", 0, None, print_string' "\\ensuremath{|}") + newcommand ("grameps", 0, None, print_string' "\\ensuremath{\\epsilon}") + (command "grammaxindent") + (command "grammaxindent") + max + (command "gramdef")); + begin_document (fun fmt -> fprintf fmt + "\\setlength{\\grammarindent}{\\%s}" + (command "grammaxindent")) + fmt symbols -let def () = " \\" ^ command "gramdef" ^ "{} " -let prod_bar () = "\\" ^ command "grambar" ^ " " -let bar () = "@ \\" ^ command "grambaranon{}" ^ "@ " -let space () = "@ " -let break () = "@;" -let eps () = "\\" ^ command "grameps" +let def fmt = fprintf fmt " \\%s{} " (command "gramdef") +let prod_bar fmt = fprintf fmt "\\%s " (command "grambar") +let bar fmt = fprintf fmt "@ \\%s@ " (command "grambaranon{}") +let space fmt = fprintf fmt "@ " +let break fmt = fprintf fmt "@;" +let eps fmt = fprintf fmt "\\%s" (command "grameps") -let print_rule_name name print_params = - print_string "<"; - print_rule_name_raw name print_params; - print_string ">" -let rule_begin () = - print_string "@[" -let rule_end () = - print_string "@]@;@;" +let print_rule_name print_params fmt name = + fprintf fmt "%a%a%a" + print_string "<" + (print_rule_name_raw print_params) name + print_string ">" +let rule_begin fmt = + fprintf fmt "@[" +let rule_end fmt = + fprintf fmt "@]@;" diff --git a/src/helpers/latexTabular.ml b/src/helpers/latexTabular.ml index 9d8811e..1b63c86 100644 --- a/src/helpers/latexTabular.ml +++ b/src/helpers/latexTabular.ml @@ -1,37 +1,43 @@ +open Format + include MiniLatex -let print_header symbols = +let print_header symbols fmt = documentclass - (usepackage "" "longtable" ^ - usepackage "" "tabu" ^ - "@;\ - \\newenvironment{" ^ grammarname ^ "}{@;<0 2>\ - \\begin{longtabu}{r%@{}c%@{}X%@{}}@;\ - }{@;<0 2>\ - \\end{longtabu}@;}@;@;" ^ - newcommand "gramsp" 0 None "\\quad" ^ - newcommand "gramdef" 0 None ("$\\" ^ command "gramsp" ^ "::=\\" ^ command "gramsp" ^ "$") ^ - newcommand "grambar" 0 None ("$\\" ^ command "gramsp" ^ "|\\" ^ command "gramsp" ^ "$") ^ - newcommand "grambaranon" 0 None "$|$" ^ - newcommand "grameps" 0 None "\\ensuremath{\\epsilon}" ^ - newcommand "gramnonterm" 1 None "\\ensuremath{\\langle\\textnormal{#1}\\rangle}" ^ - newcommand "gramfunc" 1 None ("\\" ^ command "gramnonterm" ^ "{#1}") ^ - newcommand "gramterm" 1 None "#1" ^ "@;"); - begin_document "" symbols + (fun fmt -> fprintf fmt + "%a%a@;\ + \\newenvironment{%s}{@;<0 2>\ + \\begin{longtabu}{r%@{}c%@{}X%@{}}@;\ + }{@;<0 2>\ + \\end{longtabu}@;}@;@;\ + %a%a%a%a%a%a%a%a@;" + usepackage ("", "longtable") + usepackage ("", "tabu") + grammarname + newcommand ("gramsp" ,0, None, print_string' "\\quad") + newcommand ("gramdef", 0, None, fun fmt -> + fprintf fmt "$\\%s::=\\%s$" (command "gramsp") (command "gramsp")) + newcommand ("grambar", 0, None, fun fmt -> + fprintf fmt "$\\%s|\\%s$" (command "gramsp") (command "gramsp")) + newcommand ("grambaranon", 0, None, print_string' "$|$") + newcommand ("grameps", 0, None, print_string' "\\ensuremath{\\epsilon}") + newcommand ("gramnonterm", 1, None, print_string' "\\ensuremath{\\langle\\textnormal{#1}\\rangle}") + newcommand ("gramfunc", 1, None, fun fmt -> fprintf fmt "\\%s{#1}" (command "gramnonterm")) + newcommand ("gramterm", 1, None, print_string' "#1")); + begin_document (fun _ -> ()) fmt symbols -let def () = "& \\" ^ command "gramdef" ^ " & " -let prod_bar () = "& \\" ^ command "grambar" ^ " &" -let bar () = "@ \\" ^ command "grambaranon{}" ^ "@ " -let space () = "@ " -let break () = "\\\\@;" -let eps () = "\\" ^ command "grameps" +let def fmt = fprintf fmt "& \\%s & " (command "gramdef") +let prod_bar fmt = fprintf fmt "& \\%s &" (command "grambar") +let bar fmt = fprintf fmt "@ \\%s@ " (command "grambaranon{}") +let space fmt = fprintf fmt "@ " +let break fmt = fprintf fmt "\\\\@;" +let eps fmt = fprintf fmt "\\%s" (command "grameps") -let print_rule_name name print_params = - print_fmt "\\%s{" (command "gramfunc"); - print_rule_name_raw name print_params; - print_string "}" -let rule_begin () = - print_string "@[" -let rule_end () = - print_string "@;\\\\& & \\\\"; - print_string "@]@;@;" +let print_rule_name print_params fmt name = + fprintf fmt "\\%s{%a}" + (command "gramfunc") + (print_rule_name_raw print_params) name +let rule_begin fmt = + fprintf fmt "@[" +let rule_end fmt = + fprintf fmt "@;\\\\& & \\\\@]@;" diff --git a/src/helpers/miniHelper.ml b/src/helpers/miniHelper.ml index ec7ef03..f597a58 100644 --- a/src/helpers/miniHelper.ml +++ b/src/helpers/miniHelper.ml @@ -1,40 +1,38 @@ open Format -let p = ref std_formatter -let print_string s = fprintf !p (Scanf.format_from_string Re.Str.(global_replace (regexp "%") "%%" s) "") -let print_param = print_string -let print_fmt s = fprintf !p s +let print_string fmt s = + pp_print_string fmt s +let print_string' s fmt = print_string fmt s -let production_begin _ = - print_string "@[" -let production_end _ = - print_string "@]" +let print_param = print_string -let enclose print op cl = - print_string op; print (); print_string cl +let production_begin fmt = + fprintf fmt "@[" +let production_end fmt = + fprintf fmt "@]" -let par e print = - if e then enclose print "(" ")" else print () +let enclose print op cl fmt = + fprintf fmt "%t%t%t" + op print cl -let print_opt_params = function - | Some pps -> pps () - | None -> () +let par e print fmt = + (if e then enclose print (print_string' "(") (print_string' ")") else print) + fmt -let print_rule_name_with opening closing name print_params = - print_string (opening ^ name); - print_opt_params print_params; - print_string closing +let print_rule_name_with opening closing print_params fmt name = + fprintf fmt "%t%a%t%t" opening print_string name print_params closing -let print_symbol_aux opening closing symbols s print_params = +let print_symbol_aux symbols opening closing print_params fmt s = let is_def = match Common.Symbols.is_defined s symbols with | Some _ -> true | None -> false in - if is_def then print_string opening; let s = match Hashtbl.find_opt Lexer.tokens s with | Some a -> if !Options.no_aliases then a else "'" ^ s ^ "'" | None -> s in - print_string s; - print_params (); - if is_def then print_string closing + fprintf fmt "%t%a%t%t" + (if is_def then opening else fun _ -> ()) + print_string s + print_params + (if is_def then closing else fun _ -> ()) diff --git a/src/helpers/miniHtml.ml b/src/helpers/miniHtml.ml index 08befd1..4943d1e 100644 --- a/src/helpers/miniHtml.ml +++ b/src/helpers/miniHtml.ml @@ -1,59 +1,66 @@ +open Format + include MiniHelper -let print_header_with_style style = - print_string - ("@[@;\ - @[@;\ - @[@;\ - Grammar@;\ - @[\ - @]@;@;@;\ - @[@;@;\ - @[@;@;") +let print_header_with_style fmt style = + fprintf fmt + "@[@;\ + @[@;\ + @[@;\ + Grammar@;\ + @[\ + @]@;@;@;\ + @[@;@;\ + @[
@;@;" + style -let print_footer () = - print_string +let print_footer fmt = + fprintf fmt "@]@;
@;\ @]@;@;\ @]@;@]@." -let bar () = " | " -let space () = "@ " -let break () = "@;" -let eps () = "ε" +let bar fmt = pp_print_string fmt " | " +let space fmt = fprintf fmt "@ " +let break fmt = fprintf fmt "@;" +let eps fmt = pp_print_string fmt "ε" -let rule_begin () = - print_string "@[