diff -Nru menhir-20171013/CHANGES.md menhir-20171206/CHANGES.md --- menhir-20171013/CHANGES.md 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/CHANGES.md 2017-12-06 20:41:09.000000000 +0000 @@ -1,5 +1,18 @@ # Changes +## 2017/12/06 + +* Fix the termination test that takes place before parameterized symbols are + expanded away. The previous test was both unsound (it would accept grammars + whose termination did not terminate) and incomplete (it would reject + grammars whose termination did terminate). The new test is believed to be + sound and complete. + +## 2017/11/12 + +* Documentation: clarify the fact that `%type` declarations should carry types + whose meaning does not depend on the headers `%{ ... %}`. + ## 2017/10/13 * Remove the OCaml version check at installation time, for greater simplicity, diff -Nru menhir-20171013/debian/changelog menhir-20171206/debian/changelog --- menhir-20171013/debian/changelog 2017-11-23 07:42:12.000000000 +0000 +++ menhir-20171206/debian/changelog 2017-12-13 20:31:45.000000000 +0000 @@ -1,3 +1,11 @@ +menhir (20171206-1) unstable; urgency=medium + + * New upstream version. + * Standards-version 4.1.2 (no change) + * Drop debian/source/local-options + + -- Ralf Treinen Wed, 13 Dec 2017 21:31:45 +0100 + menhir (20171013-1) unstable; urgency=medium * New upstream version. diff -Nru menhir-20171013/debian/control menhir-20171206/debian/control --- menhir-20171013/debian/control 2017-11-23 07:42:12.000000000 +0000 +++ menhir-20171206/debian/control 2017-12-13 20:31:45.000000000 +0000 @@ -12,7 +12,7 @@ ocamlbuild, ocaml-findlib, dh-ocaml (>= 0.9) -Standards-Version: 4.1.1 +Standards-Version: 4.1.2 Homepage: http://gallium.inria.fr/~fpottier/menhir/ Vcs-Git: https://anonscm.debian.org/git/pkg-ocaml-maint/packages/menhir.git Vcs-Browser: https://anonscm.debian.org/git/pkg-ocaml-maint/packages/menhir.git diff -Nru menhir-20171013/doc/main.tex menhir-20171206/doc/main.tex --- menhir-20171013/doc/main.tex 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/doc/main.tex 2017-12-06 20:41:09.000000000 +0000 @@ -463,6 +463,7 @@ mandatory \percentpercent keyword. \subsubsection{Headers} +\label{sec:decls:headers} A header is a piece of \ocaml code, surrounded with \dheader{and}. It is copied verbatim at the beginning of the \ml file. It typically contains \ocaml @@ -539,13 +540,20 @@ For start symbols, providing an \ocaml type is mandatory, but is usually done as part of the \dstart declaration. For other symbols, it is optional. Providing type information can improve the quality of \ocaml's type error messages. -% TEMPORARY type information can be mandatory in --coq mode; document? A \dtype declaration may concern not only a nonterminal symbol, such as, say, \texttt{expression}, but also a fully applied parameterized nonterminal symbol, such as \texttt{list(expression)} or \texttt{separated\_list(COMMA, option(expression))}. +The types provided as part of \dtype declarations are copied verbatim to the +\ml and \mli files. In contrast, headers (\sref{sec:decls:headers}) are copied +to the \ml file only. For this reason, the types provided as part of \dtype +declarations must make sense both in the presence and in the absence of these +headers. They should typically be fully qualified types. + +% TEMPORARY type information can be mandatory in --coq mode; document? + \subsubsection{Start symbols} \label{sec:start} @@ -3651,15 +3659,15 @@ with dots, such as \texttt{my.name}. An attribute payload is an OCaml expression of arbitrary type, such as \texttt{1} or \verb+"&&"+ or \verb+print_int+. Following the syntax of OCaml's attributes, an attribute's name and payload -are separated with one or more spaces, and are delimited by \verb+@[+ and -\verb+]+. Thus, \verb+@[cost 1]+ and \verb+@[printer print_int]+ are examples +are separated with one or more spaces, and are delimited by \verb+[@+ and +\verb+]+. Thus, \verb+[@cost 1]+ and \verb+[@printer print_int]+ are examples of attributes. An attribute can be attached at one of four levels: % grammar-level attributes, %[@foo ...] -% terminal attribute, %token BAR @[foo ...] -% nonterminal attribute, bar @[foo ...]: ... -% producer attribute, e = expr @[foo ...] +% terminal attribute, %token BAR [@foo ...] +% nonterminal attribute, bar [@foo ...]: ... +% producer attribute, e = expr [@foo ...] \begin{enumerate} \item An attribute can be attached with the grammar. @@ -3667,14 +3675,14 @@ in the declarations section (\sref{sec:decls}). For example, the following is a valid declaration: \begin{verbatim} - %@[trace true] + %[@trace true] \end{verbatim} \item An attribute can be attached with a terminal symbol. Such an attribute must follow the declaration of this symbol. For example, the following is a valid declaration of the terminal symbol \verb+INT+: \begin{verbatim} - %token INT @[cost 0] @[printer print_int] + %token INT [@cost 0] [@printer print_int] \end{verbatim} \item An attribute can be attached with a nonterminal symbol. @@ -3682,13 +3690,13 @@ immediately after the name of this symbol. For instance, the following is a valid definition of the nonterminal symbol \verb+expr+: \begin{verbatim} - expr @[default EConst 0]: + expr [@default EConst 0]: i = INT { EConst i } | e1 = expr PLUS e2 = expr { EAdd (e1, e2) } \end{verbatim} An attribute can be attached with a parameterized nonterminal symbol: \begin{verbatim} - option @[default None] (X): + option [@default None] (X): { None } | x = X { Some x } \end{verbatim} @@ -3702,7 +3710,7 @@ an attribute is attached with the producer \verb+expr*+: \begin{verbatim} exprs: - LPAREN es = expr* @[list true] RPAREN { es } + LPAREN es = expr* [@list true] RPAREN { es } \end{verbatim} \end{enumerate} @@ -3713,11 +3721,11 @@ and nonterminal) symbols in one go, via an \dattribute declaration, which must be placed in the declarations section (\sref{sec:decls}). For instance, the following declaration attaches both of the attributes -\verb+@[cost 0]+ and \verb+@[precious false]+ +\verb+[@cost 0]+ and \verb+[@precious false]+ with each of the symbols \verb+INT+ and \verb+id+: \begin{verbatim} - %attribute INT id @[cost 0] @[precious false] + %attribute INT id [@cost 0] [@precious false] \end{verbatim} An \dattribute declaration can be considered syntactic sugar: it is desugared away in terms of the four forms of attributes presented earlier. (The command @@ -3728,12 +3736,12 @@ If an attribute is attached with a parameterized nonterminal symbol, then, when this symbol is expanded away, the attribute is transmitted to every instance. For instance, in an earlier example, the attribute -\verb+@[default None]+ was attached with the parameterized symbol +\verb+[@default None]+ was attached with the parameterized symbol \verb+option+. Then, every instance of \verb+option+, such as \verb+option(expr)+, \verb+option(COMMA)+, and so on, inherits this attribute. To attach an attribute with one specific instance only, one can use an \dattribute declaration. For instance, -the declaration \verb+%attribute option(expr) @[cost 10]+ attaches +the declaration \verb+%attribute option(expr) [@cost 10]+ attaches an attribute with the nonterminal symbol \verb+option(expr)+, but not with the symbol \verb+option(COMMA)+. diff -Nru menhir-20171013/doc/version.tex menhir-20171206/doc/version.tex --- menhir-20171013/doc/version.tex 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/doc/version.tex 2017-12-06 20:41:09.000000000 +0000 @@ -1 +1 @@ -\gdef\menhirversion{20171013} +\gdef\menhirversion{20171206} Binary files /tmp/tmpJokrW6/GiF32ufDTh/menhir-20171013/manual.pdf and /tmp/tmpJokrW6/Owgac4zBdK/menhir-20171206/manual.pdf differ diff -Nru menhir-20171013/src/CheckSafeParameterizedGrammar.ml menhir-20171206/src/CheckSafeParameterizedGrammar.ml --- menhir-20171013/src/CheckSafeParameterizedGrammar.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/CheckSafeParameterizedGrammar.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,184 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +let value = Positions.value +open Syntax + +(* This test accepts a parameterized grammar, with the restriction that all + parameters must have sort [*]. This implies that the head of every + application must be a toplevel nonterminal symbol: it cannot be a formal + parameter of the current rule. *) + +(* -------------------------------------------------------------------------- *) + +(* This flag causes graph edges to be logged on the standard error channel. *) + +let debug = false + +(* -------------------------------------------------------------------------- *) + +(* For syntactic convenience, the code is wrapped in a functor. *) + +module Run (G : sig val g : grammar end) = struct +open G + +(* -------------------------------------------------------------------------- *) + +(* We build a graph whose vertices are all formal parameters of all rules. A + formal parameter is represented as a pair of a nonterminal symbol and a + 0-based integer index (the number of this parameter within this rule). + We use OCaml's generic equality and hash functions at this type. *) + +type formal = + symbol * int + +let formals (nt, rule) : formal list = + let arity = List.length rule.pr_parameters in + Misc.mapi arity (fun i -> nt, i) + +let formals : formal array = + StringMap.bindings g.p_rules + |> List.map formals + |> List.concat + |> Array.of_list + +(* -------------------------------------------------------------------------- *) + +(* The graph edges are as follows. First, for every rule of the following form: + + F(..., X, ...): # where X is the i-th formal parameter of F + ... G(..., X, ...) ... # where X is the j-th actual parameter of G + + there is a "safe" edge from the formal parameter F/i to the formal G/j. This + reflects the fact that there is a flow from F/i to G/j. It is "safe" in the + sense that it is not size-increasing: the same parameter X is passed from F + to G. + + Second, for every rule of the following form: + + F(..., X, ...): # where X is the i-th formal parameter of F + ... G(..., H(..., X, ...) , ...) ... + # where H(...) is the j-th actual parameter of G + + there is a "dangerous" edge from the formal parameter F/i to the formal G/j. + This reflects the fact that there is a flow from F/i to G/j. This flow is + "dangerous" in the sense that it is size-increasing: X is transformed to + H(..., X, ...). *) + +type edge = + | Safe + | Dangerous + +let successors_parameter (f : edge -> formal -> unit) x (param : parameter) = + match param with + | ParameterVar _ -> + (* This is not an application. No successors. *) + () + | ParameterApp (sym, params) -> + let nt = value sym in + (* If [x] occurs in the [i]-th actual parameter of this application, + then there is an edge to the formal [nt, i]. Whether it is a safe + or dangerous edge depends on whether [x] occurs shallow or deep. *) + List.iteri (fun i param -> + if Parameters.occurs_shallow x param then + f Safe (nt, i) + else if Parameters.occurs_deep x param then + f Dangerous (nt, i) + ) params + | ParameterAnonymous _ -> + assert false + +let successors_producer f x ((_, param, _) : producer) = + successors_parameter f x param + +let successors_branch f x (branch : parameterized_branch) = + List.iter (successors_producer f x) branch.pr_producers + +let successors f ((nt, i) : formal) = + let rule = try StringMap.find nt g.p_rules with Not_found -> assert false in + let x = try List.nth rule.pr_parameters i with Failure _ -> assert false in + List.iter (successors_branch f x) rule.pr_branches + +(* -------------------------------------------------------------------------- *) + +(* We now have a full description of the graph. *) + +module G = struct + type node = formal + let n = Array.length formals + let index = Misc.inverse formals + let successors f = successors (fun _ target -> f target) + let iter f = Array.iter f formals +end + +(* -------------------------------------------------------------------------- *) + +(* Display the graph. *) + +let () = + if debug then + G.iter (fun (x, i) -> + successors (fun edge (y, j) -> + let kind = match edge with Safe -> "safe" | Dangerous -> "dangerous" in + Printf.eprintf "%s/%d ->(%s) %s/%d\n" x i kind y j + ) (x, i) + ) + +(* -------------------------------------------------------------------------- *) + +(* Compute its strongly connected components, ignoring the distinction between + safe and dangerous edges. *) + +module T = Tarjan.Run(G) + +(* -------------------------------------------------------------------------- *) + +(* The safety criterion is: no dangerous edge is part of a cycle. Indeed, if + this criterion is satisfied, then expansion must terminate: only a finite + number of well-sorted terms (involving toplevel symbols and applications) + can arise. (This sentence is not a proof!) Conversely, if a dangerous edge + appears in a cycle, then expansion will not terminate. (That is, unless the + dangerous cycle is unreachable. We choose to reject it anyway in that case.) + In other words, this criterion is sound and complete. *) + +(* Checking that no dangerous edge is part of a cycle is done by examining the + source and destination of every dangerous edge and ensuring that they lie + in distinct components. *) + +let () = + G.iter (fun source -> + successors (fun edge target -> + match edge with + | Safe -> + () + | Dangerous -> + if T.representative source = T.representative target then + let (nt, i) = source in + Error.error [] + "the parameterized nonterminal symbols in this grammar\n\ + cannot be expanded away: expansion would not terminate.\n\ + The %s formal parameter of \"%s\" grows without bound." + (Misc.nth (i + 1)) nt + + ) source + ) + +end (* of the functor *) + +(* -------------------------------------------------------------------------- *) + +(* Re-package the above functor as a function. *) + +let check g = + let module T = Run(struct let g = g end) in + () diff -Nru menhir-20171013/src/CheckSafeParameterizedGrammar.mli menhir-20171206/src/CheckSafeParameterizedGrammar.mli --- menhir-20171013/src/CheckSafeParameterizedGrammar.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/CheckSafeParameterizedGrammar.mli 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,21 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* This test accepts a parameterized grammar, with the restriction that all + parameters must have sort [*]. Parameters of higher sort must be eliminated + prior to running this test: see [SelectiveExpansion]. *) + +(* This test succeeds if and only if the expansion of this grammar is safe, + that is, terminates. *) + +val check: Syntax.grammar -> unit diff -Nru menhir-20171013/src/Drop.ml menhir-20171206/src/Drop.ml --- menhir-20171013/src/Drop.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/Drop.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,146 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +let value = Positions.value +(* The source. *) +module S = Syntax +(* The target. *) +module T = UnparameterizedSyntax + +(* -------------------------------------------------------------------------- *) + +(* Most of the translation is straightforward. *) + +let drop_parameter (param : S.parameter) : S.symbol = + match param with + | S.ParameterVar sym -> + value sym + | S.ParameterApp _ -> + (* The grammar should not have any parameterized symbols. *) + assert false + | S.ParameterAnonymous _ -> + assert false + +let drop_producer ((id, param, attrs) : S.producer) : T.producer = + { + T.producer_identifier = value id; + T.producer_symbol = drop_parameter param; + T.producer_attributes = attrs + } + +let drop_branch (branch : S.parameterized_branch) : T.branch = + { + T.branch_position = branch.S.pr_branch_position; + T.producers = List.map drop_producer branch.S.pr_producers; + T.action = branch.S.pr_action; + T.branch_prec_annotation = branch.S.pr_branch_prec_annotation; + T.branch_production_level = branch.S.pr_branch_production_level + } + +let drop_rule (rule : S.parameterized_rule) : T.rule = + (* The grammar should not have any parameterized symbols. *) + assert (rule.S.pr_parameters = []); + (* The [%public] flag is dropped. *) + { + T.branches = List.map drop_branch rule.S.pr_branches; + T.positions = rule.S.pr_positions; + T.inline_flag = rule.S.pr_inline_flag; + T.attributes = rule.S.pr_attributes; + } + +(* -------------------------------------------------------------------------- *) + +(* We must store [%type] declarations and [%on_error_reduce] declarations in + StringMaps, whereas so far they were represented as lists. *) + +let drop_declarations + (kind : string) + (f : 'info1 -> 'info2) + (decls : (S.parameter * 'info1) list) +: 'info2 StringMap.t = + + (* Now is as good a time as any to check against multiple declarations + concerning a single nonterminal symbol. Indeed, if we did not rule out + this situation, then we would have to keep only one (arbitrarily chosen) + declaration. To do this, we first build a map of symbols to info *and* + position... *) + List.fold_left (fun accu (param, info) -> + let symbol = drop_parameter param in + begin match StringMap.find symbol accu with + | exception Not_found -> + () + | (_, position) -> + Error.error [position; Parameters.position param] + "there are multiple %s declarations for the symbol %s." + kind symbol + end; + StringMap.add symbol (f info, Parameters.position param) accu + ) StringMap.empty decls + (* ... then drop the positions. *) + |> StringMap.map (fun (info, _) -> info) + +let drop_type_declarations = + drop_declarations "%type" value + +let drop_on_error_reduce_declarations = + drop_declarations "%on_error_reduce" (fun x -> x) + +(* -------------------------------------------------------------------------- *) + +(* We must eliminate (that is, desugar) [%attribute] declarations. We examine + them one by one and attach these attributes with terminal or nonterminal + symbols, as appropriate. This is entirely straightforward. *) + +let add_attribute (g : T.grammar) param attr : T.grammar = + let symbol = drop_parameter param in + match StringMap.find symbol g.T.tokens with + | props -> + (* This is a terminal symbol. *) + let props = { props with S.tk_attributes = attr :: props.S.tk_attributes } in + { g with T.tokens = StringMap.add symbol props g.T.tokens } + | exception Not_found -> + match StringMap.find symbol g.T.rules with + | rule -> + (* This is a nonterminal symbol. *) + let rule = { rule with T.attributes = attr :: rule.T.attributes } in + { g with T.rules = StringMap.add symbol rule g.T.rules } + | exception Not_found -> + (* This is an unknown symbol. This should not happen. *) + assert false + +let add_attributes g (params, attrs) = + List.fold_left (fun g param -> + List.fold_left (fun g attr -> + add_attribute g param attr + ) g attrs + ) g params + +let add_attributes (decls : (S.parameter list * S.attributes) list) g = + List.fold_left add_attributes g decls + +(* -------------------------------------------------------------------------- *) + +(* Putting it all together. *) + +let drop (g : S.grammar) : T.grammar = + { + T.preludes = g.S.p_preludes; + T.postludes = g.S.p_postludes; + T.parameters = g.S.p_parameters; + T.start_symbols = StringMap.domain g.S.p_start_symbols; + T.types = drop_type_declarations g.S.p_types; + T.tokens = g.S.p_tokens; + T.on_error_reduce = drop_on_error_reduce_declarations g.S.p_on_error_reduce; + T.gr_attributes = g.S.p_grammar_attributes; + T.rules = StringMap.map drop_rule g.S.p_rules + } |> add_attributes g.S.p_symbol_attributes diff -Nru menhir-20171013/src/Drop.mli menhir-20171206/src/Drop.mli --- menhir-20171013/src/Drop.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/Drop.mli 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,19 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* This function translates a grammar from the [Syntax] format + to the [UnparameterizedSyntax] format. Naturally, the grammar + must not have any parameterized symbols, since these are not + allowed by the latter format. *) + +val drop: Syntax.grammar -> UnparameterizedSyntax.grammar diff -Nru menhir-20171013/src/front.ml menhir-20171206/src/front.ml --- menhir-20171013/src/front.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/front.ml 2017-12-06 20:41:09.000000000 +0000 @@ -17,7 +17,7 @@ (* Reading a grammar from a file. *) -let load_partial_grammar filename = +let load_partial_grammar filename : Syntax.partial_grammar = let validExt = if Settings.coq then ".vy" else ".mly" in if not (Filename.check_suffix filename validExt) then Error.error [] @@ -43,7 +43,7 @@ (* Read all of the grammar files that are named on the command line. *) -let partial_grammars = +let grammars : Syntax.partial_grammar list = List.map load_partial_grammar Settings.filenames let () = @@ -53,23 +53,44 @@ (* Eliminate anonymous rules. *) -let partial_grammars = - List.map Anonymous.transform_partial_grammar partial_grammars +let grammars : Syntax.partial_grammar list = + List.map Anonymous.transform_partial_grammar grammars (* ------------------------------------------------------------------------- *) (* If several grammar files were specified, merge them. *) -let parameterized_grammar = - PartialGrammar.join_partial_grammars partial_grammars +let grammar : Syntax.grammar = + PartialGrammar.join_partial_grammars grammars (* ------------------------------------------------------------------------- *) -(* Expand away all applications of parameterized nonterminal symbols, so as to - obtain a grammar without parameterized nonterminal symbols. *) +(* Check that the grammar is well-sorted; infer the sort of every symbol. *) -let grammar = - ParameterizedGrammar.expand parameterized_grammar +let sorts = + SortInference.infer grammar + +(* ------------------------------------------------------------------------- *) + +(* Expand away all applications of parameterized nonterminal symbols, so as + to obtain a grammar without parameterized nonterminal symbols. *) + +let grammar : UnparameterizedSyntax.grammar = + let module S = SelectiveExpansion in + (* First, perform a selective expansion: expand away all parameters of + higher sort, keeping the parameters of sort [*]. This process always + terminates. *) + let grammar1 = S.expand S.ExpandHigherSort sorts grammar in + (* This "first-order parameterized grammar" can then be submitted to + the termination check. *) + CheckSafeParameterizedGrammar.check grammar1; + (* If it passes the check, then full expansion is safe. We drop [grammar1] + and start over from [grammar]. This is required in order to get correct + names. (Expanding [grammar1] would yield an equivalent grammar, with + more complicated names, reflecting the two steps of expansion.) *) + let grammar = S.expand S.ExpandAll sorts grammar in + (* This yields an unparameterized grammar. *) + Drop.drop grammar let () = Time.tick "Joining and expanding" diff -Nru menhir-20171013/src/GroundSort.ml menhir-20171206/src/GroundSort.ml --- menhir-20171013/src/GroundSort.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/GroundSort.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,22 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +type sort = + | GArrow of sort list + +let star = + GArrow [] + +let domain sort = + let GArrow sorts = sort in + sorts diff -Nru menhir-20171013/src/GroundSort.mli menhir-20171206/src/GroundSort.mli --- menhir-20171013/src/GroundSort.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/GroundSort.mli 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,26 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* The syntax of sorts is: + + sort ::= (sort, ..., sort) -> * + + where the arity (the number of sorts on the left-hand side of the arrow) + can be zero. *) + +type sort = + | GArrow of sort list + +val star: sort + +val domain: sort -> sort list diff -Nru menhir-20171013/src/invariant.ml menhir-20171206/src/invariant.ml --- menhir-20171013/src/invariant.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/invariant.ml 2017-12-06 20:41:09.000000000 +0000 @@ -296,7 +296,7 @@ (* Setters. *) let represent state = - UnionFind.change (represented state) true + UnionFind.set (represented state) true let represents states = represent (Lr1.NodeSet.choose states) @@ -307,7 +307,7 @@ List.iter (fun states -> let dummy = UnionFind.fresh false in Lr1.NodeSet.iter (fun state -> - UnionFind.eunion dummy (represented state) + UnionFind.union dummy (represented state) ) states ) v @@ -384,7 +384,7 @@ (* Define accessors. *) let represented state = - UnionFind.find (represented state) + UnionFind.get (represented state) let representeds states = if Lr1.NodeSet.is_empty states then diff -Nru menhir-20171013/src/Memoize.ml menhir-20171206/src/Memoize.ml --- menhir-20171013/src/Memoize.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/Memoize.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,63 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +module type MEMOIZER = sig + (* A fixed type of keys. *) + type key + (* A memoization combinator for this type. *) + val memoize: (key -> 'a) -> (key -> 'a) +end + +module type IMPERATIVE_MAP = sig + (* A type of keys. *) + type key + (* A type of imperative maps. *) + type 'a t + (* Creation, insertion, lookup. *) + val create: int -> 'a t + val add: 'a t -> key -> 'a -> unit + val find: 'a t -> key -> 'a +end + +module Make (M : IMPERATIVE_MAP) = struct + type key = M.key + let memoize (f : key -> 'a) = + let table = M.create 127 in + fun x -> + try + M.find table x + with Not_found -> + let y = f x in + M.add table x y; + y +end + +module MakeViaMap (O : Map.OrderedType) = + Make(struct + module M = Map.Make(O) + type key = O.t + type 'a t = 'a M.t ref + let create _ = ref M.empty + let add table key data = table := M.add key data !table + let find table key = M.find key !table + end) + +module MakeViaHashtbl (H : Hashtbl.HashedType) = + Make(Hashtbl.Make(H)) + +module Int = + MakeViaHashtbl(struct + type t = int + let equal = (=) + let hash = Hashtbl.hash +end) diff -Nru menhir-20171013/src/Memoize.mli menhir-20171206/src/Memoize.mli --- menhir-20171013/src/Memoize.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/Memoize.mli 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,38 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +module type MEMOIZER = sig + (* A type of keys. *) + type key + (* A memoization combinator for this type. *) + val memoize: (key -> 'a) -> (key -> 'a) +end + +module type IMPERATIVE_MAP = sig + (* A type of keys. *) + type key + (* A type of imperative maps. *) + type 'a t + (* Creation, insertion, lookup. *) + val create: int -> 'a t + val add: 'a t -> key -> 'a -> unit + val find: 'a t -> key -> 'a +end + +module Make (M : IMPERATIVE_MAP) : MEMOIZER with type key = M.key + +module MakeViaMap (O : Map.OrderedType) : MEMOIZER with type key = O.t + +module MakeViaHashtbl (H : Hashtbl.HashedType) : MEMOIZER with type key = H.t + +module Int : MEMOIZER with type key = int diff -Nru menhir-20171013/src/META menhir-20171206/src/META --- menhir-20171013/src/META 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/META 2017-12-06 20:41:09.000000000 +0000 @@ -1 +1 @@ -version = "20171013" +version = "20171206" diff -Nru menhir-20171013/src/misc.ml menhir-20171206/src/misc.ml --- menhir-20171013/src/misc.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/misc.ml 2017-12-06 20:41:09.000000000 +0000 @@ -48,6 +48,12 @@ if element then count + 1 else count ) 0 a +(* [tabulatef number fold n dummy f] returns a function that is extensionally + equal to [f], but relies on an internal array. Arguments to [f] are of type + ['a] and are mapped by [number] into the range [0..n). [fold] allows + folding over the domain of [f]. [dummy] is used to initialize the internal + array. Its value has no impact if [fold] is surjective. *) + let tabulatef number fold n dummy f = let a = Array.make n dummy in let () = fold (fun () element -> @@ -74,11 +80,6 @@ in get, !c -module IntSet = Set.Make (struct - type t = int - let compare = ( - ) - end) - type 'a iter = ('a -> unit) -> unit let separated_iter_to_string printer separator iter = @@ -99,29 +100,17 @@ let separated_list_to_string printer separator xs = separated_iter_to_string printer separator (fun f -> List.iter f xs) -let terminated_iter_to_string printer terminator iter = - let b = Buffer.create 32 in - iter (fun x -> - Buffer.add_string b (printer x); - Buffer.add_string b terminator - ); - Buffer.contents b - -let terminated_list_to_string printer terminator xs = - terminated_iter_to_string printer terminator (fun f -> List.iter f xs) - -let index_map string_map = - let n = StringMap.cardinal string_map in - let a = Array.make n None in - let conv, _ = StringMap.fold - (fun k v (conv, idx) -> - a.(idx) <- Some (k, v); - StringMap.add k idx conv, idx + 1) - string_map (StringMap.empty, 0) - in - ((fun n -> snd (unSome a.(n))), - (fun k -> StringMap.find k conv), - (fun n -> fst (unSome a.(n)))) +let inverse (a : 'a array) : 'a -> int = + let table = Hashtbl.create (Array.length a) in + Array.iteri (fun i data -> + assert (not (Hashtbl.mem table data)); + Hashtbl.add table data i + ) a; + fun data -> + try + Hashtbl.find table data + with Not_found -> + assert false let support_assoc l x = try @@ -270,31 +259,6 @@ | Some y -> y :: ys ) [] l)) -let new_intern capacity = - (* Set up a a hash table, mapping strings to unique integers. *) - let module H = Hashtbl.Make(struct - type t = string - let equal = (=) - let hash = Hashtbl.hash - end) in - let table = H.create capacity in - (* This counts the calls to [intern]. *) - let c = ref 0 in - (* A string is mapped to a unique string, as follows. *) - let intern s = - c := !c + 1; - try - H.find table s - with Not_found -> - H.add table s s; - s - and verbose () = - Printf.fprintf stderr - "%d calls to intern; %d unique strings.\n%!" - !c (H.length table) - in - intern, verbose - let new_encode_decode capacity = (* Set up a a hash table, mapping strings to unique integers. *) let module H = Hashtbl.Make(struct @@ -329,6 +293,15 @@ in encode, decode, verbose +let new_claim () = + let names = ref StringSet.empty in + let claim name = + if StringSet.mem name !names then + Error.error [] "internal name clash over %s" name; + names := StringSet.add name !names + in + claim + let rec best (preferable : 'a -> 'a -> bool) (xs : 'a list) : 'a option = match xs with | [] -> @@ -376,9 +349,38 @@ let ys1, yss = levels1 cmp x1 xs in ys1 :: yss +let rec dup1 cmp x ys = + match ys with + | [] -> + None + | y :: ys -> + if cmp x y = 0 then + Some x + else + dup1 cmp y ys + +let dup cmp xs = + match xs with + | [] -> + None + | x :: xs -> + dup1 cmp x xs + let once x y = let s = ref x in fun () -> let result = !s in s := y; result + +module ListExtras = struct + let equal = List.for_all2 + let hash hash xs = + Hashtbl.hash (List.map hash xs) +end + +let nth = function + | 1 -> "first" + | 2 -> "second" + | 3 -> "third" + | i -> Printf.sprintf "%dth" i diff -Nru menhir-20171013/src/misc.mli menhir-20171206/src/misc.mli --- menhir-20171013/src/misc.mli 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/misc.mli 2017-12-06 20:41:09.000000000 +0000 @@ -45,20 +45,6 @@ val tabulateb: int -> (int -> bool) -> (int -> bool) * int -(* [tabulatef number fold n dummy f] returns a function that is extensionally - equal to [f], but relies on an internal array. Arguments to [f] are of type - ['a] and are mapped by [number] into the range [0..n). [fold] allows - folding over the domain of [f]. [dummy] is used to initialize the internal - array. Its value has no impact if [fold] is surjective. *) - -val tabulatef: - ('a -> int) -> - ((unit -> 'a -> unit) -> unit -> unit) -> - int -> - 'b -> - ('a -> 'b) -> - ('a -> 'b) - (* [tabulateo number fold n f] returns a function that is extensionally equal to [f], but relies on an internal array. Arguments to [f] are of type ['a] and are mapped by [number] @@ -72,10 +58,6 @@ val ( $$ ) : 'a -> ('a -> 'b) -> 'b -(* Sets of strings and maps over strings. *) - -module IntSet : Set.S with type elt = int - (* [separated_list_to_string printer sep l] converts [l] into a string representation built by using [printer] on each element and [sep] as a separator. *) @@ -85,24 +67,12 @@ val separated_iter_to_string: ('a -> string) -> string -> 'a iter -> string val separated_list_to_string: ('a -> string) -> string -> 'a list -> string -(* [terminated_list_to_string printer term l] converts [l] into a string - representation built by using [printer] on each element and [term] as - a terminator. *) - -val terminated_list_to_string: ('a -> string) -> string -> 'a list -> string -val terminated_iter_to_string: ('a -> string) -> string -> 'a iter -> string - -(* [index_map f] returns a triple (indexed_f, domain_indexation, domain_array). - [indexed_f] is a mapping from [0..n-1] to the elements of the map [f] - ([n] being the size of the image of [f]). - [domain_indexation] is a mapping from the domain of the map [f] to indexes. - [domain_array] is a mapping from the indexes to the domain of [f]. - The indexation implements [f] ie: - - forall x in domain(m), indexed_f (domain_indexation x) = f (x). - - forall x in domain(m), domain_array (domain_indexation x) = x. *) +(* If [a] is an array, therefore a mapping of integers to elements, then + [inverse a] computes its inverse, a mapping of elements to integers. + The type ['a] of elements must support the use of OCaml's generic + equality and hashing functions. *) -val index_map - : 'a StringMap.t -> (int -> 'a) * (string -> int) * (int -> string) +val inverse: 'a array -> ('a -> int) (* [support_assoc l x] returns the second component of the first couple in [l] whose first component is [x]. If it does not exist, it returns @@ -176,13 +146,6 @@ is in [l], preserving the order of elements of [l]. *) val map_opt : ('a -> 'b option) -> 'a list -> 'b list -(* [new_intern capacity] creates a new service for interning (hash-consing) - strings. [capacity] is the initial capacity of the internal hash table. - [new_intern] returns a pair [intern, verbose] where [intern] is the - hash-consing service and [verbose] prints statistics about the use of - the service so far. *) -val new_intern: int -> (string -> string) * (unit -> unit) - (* [new_encode_decode capacity] creates a new service for assigning unique integer codes to strings. [capacity] is the initial capacity of the internal hash table. [new_encode_decode] returns a triple [encode, decode, @@ -191,6 +154,11 @@ so far. *) val new_encode_decode: int -> (string -> int) * (int -> string) * (unit -> unit) +(* [new_claim()] creates a new service for claiming names. It returns a + function [claim] of type [int -> unit] such that the call [claim x] + succeeds if and only if [claim x] has never been called before. *) +val new_claim: unit -> (string -> unit) + (* If [preferable] is a partial order on elements, then [best preferable xs] returns the best (least) element of [xs], if there is one. Its complexity is quadratic. *) @@ -203,7 +171,24 @@ val levels: ('a -> 'a -> int) -> 'a list -> 'a list list +(* Assuming that the list [xs] is sorted with respect to the ordering [cmp], + [dup cmp xs] returns a duplicate element of the list [xs], if one exists. *) + +val dup: ('a -> 'a -> int) -> 'a list -> 'a option + (* [once x y] produces a function [f] which produces [x] the first time it is called and produces [y] forever thereafter. *) val once: 'a -> 'a -> (unit -> 'a) + +(* Equality and hashing for lists, parameterized over equality and hashing + for elements. *) + +module ListExtras : sig + val equal: ('a -> 'a -> bool) -> 'a list -> 'a list -> bool + val hash: ('a -> int) -> 'a list -> int +end + +(* A nice way of printing "nth" in English, for concrete values of [n]. *) + +val nth: int -> string diff -Nru menhir-20171013/src/option.ml menhir-20171206/src/option.ml --- menhir-20171013/src/option.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/option.ml 2017-12-06 20:41:09.000000000 +0000 @@ -37,3 +37,19 @@ | None -> (* Presumably, an error message has already been printed. *) exit 1 + +let equal equal o1 o2 = + match o1, o2 with + | None, None -> + true + | Some x1, Some x2 -> + equal x1 x2 + | None, Some _ + | Some _, None -> + false + +let hash hash = function + | Some x -> + hash x + | None -> + Hashtbl.hash None diff -Nru menhir-20171013/src/option.mli menhir-20171206/src/option.mli --- menhir-20171013/src/option.mli 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/option.mli 2017-12-06 20:41:09.000000000 +0000 @@ -15,3 +15,5 @@ val iter: ('a -> unit) -> 'a option -> unit val fold: ('a -> 'b -> 'b) -> 'a option -> 'b -> 'b val project: 'a option -> 'a (* careful: calls [exit 1] in case of failure *) +val equal: ('a -> 'b -> bool) -> 'a option -> 'b option -> bool +val hash: ('a -> int) -> 'a option -> int diff -Nru menhir-20171013/src/parameterizedGrammar.ml menhir-20171206/src/parameterizedGrammar.ml --- menhir-20171013/src/parameterizedGrammar.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/parameterizedGrammar.ml 1970-01-01 00:00:00.000000000 +0000 @@ -1,784 +0,0 @@ -(******************************************************************************) -(* *) -(* Menhir *) -(* *) -(* François Pottier, Inria Paris *) -(* Yann Régis-Gianas, PPS, Université Paris Diderot *) -(* *) -(* Copyright Inria. All rights reserved. This file is distributed under the *) -(* terms of the GNU General Public License version 2, as described in the *) -(* file LICENSE. *) -(* *) -(******************************************************************************) - -open Positions -open Syntax -open UnparameterizedSyntax -open Misc - -(* Inside its own definition, a parameterized nonterminal symbol foo(X, Y) - can be applied only to its formal parameters: that is, using foo(X, Y) - is permitted, but using foo(Y, X) or foo(X, list(Y)) is not. - - If foo(X, Y) is defined in a mutually recursive manner with bar(X, Y), - then this restriction extends to both foo and bar. Furthermore, in such - a case, foo and bar must have the same arity, that is, the same formal - parameters. - - These conditions are sufficient to ensure the termination of expansion. - - For example: - C(x) : ... // This definition does not involve A or B. - A(x,y) : B(x,y) C(Y) // This mutually recursive definition is ok. - B(x,y) : A(x,y) - D(x) : E(D(x)) // This one is incorrect. - E(y) : D(y) - -*) - -(* -------------------------------------------------------------------------- *) - -(* Sort inference for nonterminal symbols. *) - -(* Unification variables convey [variable_info] to describe - the multi-equation they take part of. *) -type variable_info = - { - mutable structure : nt_type option; - mutable name : string option; - mutable mark : Mark.t - } - -(* [UnionFind] is used to improve the union and the equality test - between multi-equations. *) -and variable = variable_info UnionFind.point - -(* Types are simple types. - [star] denotes the type of ground symbol (non terminal or terminal). - [Arrow] describes the type of a parameterized non terminal. *) -and nt_type = - Arrow of variable list - -let star = - Arrow [] - -(* [var_name] is a name generator for unification variables. *) -let var_name = - let name_counter = ref (-1) in - let next_name () = - incr name_counter; - String.make 1 (char_of_int (97 + !name_counter mod 26)) - ^ let d = !name_counter / 26 in if d = 0 then "" else string_of_int d - in - fun v -> - let repr = UnionFind.find v in - match repr.name with - None -> let name = next_name () in repr.name <- Some name; name - | Some x -> x - -(* [string_of_nt_type] is a simple pretty printer for types (they can be - recursive). *) - -(* 2011/04/05: types can no longer be recursive, but I won't touch the printer -fpottier *) - -let string_of paren_fun ?paren ?colors t : string = - let colors = - match colors with - None -> (Mark.fresh (), Mark.fresh ()) - | Some cs -> cs - in - let s, p = paren_fun colors t in - if paren <> None && p = true then - "("^ s ^")" - else s - -let rec paren_nt_type colors = function - (* [colors] is a pair [white, black] *) - - Arrow [] -> - "*", false - - | Arrow ins -> - let args = separated_list_to_string - (string_of paren_var ~paren:true ~colors) ", " ins - in - let args = - if List.length ins > 1 then - "("^ args ^ ")" - else - args - in - args^" -> *", true - -and paren_var (white, black) x = - let descr = UnionFind.find x in - if Mark.same descr.mark white then begin - descr.mark <- black; - var_name x, false - end - else begin - descr.mark <- white; - let s, p = match descr.structure with - None -> var_name x, false - | Some t -> paren_nt_type (white, black) t - in - if Mark.same descr.mark black then - (var_name x ^ " = " ^ s, true) - else - (s, p) - end - -let string_of_nt_type t = - string_of paren_nt_type t - -let string_of_var v = - string_of paren_var v - -(* for debugging: - -(* [print_env env] returns a string description of the typing environment. *) -let print_env = - List.iter (fun (k, (_, v)) -> - Printf.eprintf "%s: %s\n" k (string_of_var v)) - -*) - -(* [occurs_check x y] checks that [x] does not occur within [y]. *) - -let dfs action x = - - let black = Mark.fresh () in - - let rec visit_var x = - let descr = UnionFind.find x in - if not (Mark.same descr.mark black) then begin - descr.mark <- black; - action x; - match descr.structure with - | None -> - () - | Some t -> - visit_term t - end - - and visit_term (Arrow ins) = - List.iter visit_var ins - - in - visit_var x - -exception OccursError of variable * variable - -let occurs_check x y = - dfs (fun z -> if UnionFind.equivalent x z then raise (OccursError (x, y))) y - -(* First order unification. *) - -(* 2011/04/05: perform an eager occurs check and prevent the construction - of any cycles. *) - -let fresh_flexible_variable () = - UnionFind.fresh { structure = None; name = None; mark = Mark.none } - -let fresh_structured_variable t = - UnionFind.fresh { structure = Some t; name = None; mark = Mark.none } - -let star_variable = - fresh_structured_variable star - -exception UnificationError of nt_type * nt_type -exception BadArityError of int * int - -let rec unify_var toplevel x y = - if not (UnionFind.equivalent x y) then - let reprx, repry = UnionFind.find x, UnionFind.find y in - match reprx.structure, repry.structure with - None, Some _ -> occurs_check x y; UnionFind.union x y - | Some _, None -> occurs_check y x; UnionFind.union y x - | None, None -> UnionFind.union x y - | Some t, Some t' -> unify toplevel t t'; UnionFind.union x y - -and unify toplevel t1 t2 = - match t1, t2 with - - | Arrow ins, Arrow ins' -> - let n1, n2 = List.length ins, List.length ins' in - if n1 <> n2 then - if n1 = 0 || n2 = 0 || not toplevel then - raise (UnificationError (t1, t2)) - else - (* the flag [toplevel] is used only here and influences which - exception is raised; BadArityError is raised only at toplevel *) - raise (BadArityError (n1, n2)); - List.iter2 (unify_var false) ins ins' - -let unify_var x y = - unify_var true x y - -(* Typing environment. *) -type environment = - (string * (Positions.t list * variable)) list - -(* [lookup x env] returns the type related to [x] in the typing environment - [env]. - By convention, identifiers that are not in [env] are terminals. They are - given the type [Star]. (This seems a rather fragile convention, as it - relies on the fact that the well-definedness of every identifier has - been previously checked; see [PartialGrammar]. -fpottier) *) -let lookup (x : string) (env: environment) = - try - snd (List.assoc x env) - with Not_found -> star_variable - -(* This function checks that the symbol [k] has the type [expected_type]. *) -let check positions env k expected_type : unit = - let inference_var = lookup k env in - let checking_var = fresh_structured_variable expected_type in - try - unify_var inference_var checking_var - with - UnificationError (t1, t2) -> - Error.error - positions - "how is this symbol parameterized?\n\ - It is used at sorts %s and %s.\n\ - The sort %s is not compatible with the sort %s." - (string_of_var inference_var) (string_of_var checking_var) - (string_of_nt_type t1) (string_of_nt_type t2) - - | BadArityError (n1, n2) -> - Error.error - positions - "does this symbol expect %d or %d arguments?" - (min n1 n2) (max n1 n2) - - | OccursError (x, y) -> - Error.error - positions - "how is this symbol parameterized?\n\ - It is used at sorts %s and %s.\n\ - The sort %s cannot be unified with the sort %s." - (string_of_var inference_var) (string_of_var checking_var) - (string_of_var x) (string_of_var y) - - - -(* An identifier can be used either in a total application or as a - higher-order nonterminal (no partial application is allowed). *) -let rec parameter_type env = function - | ParameterVar x -> - lookup x.value env - - | ParameterApp (x, args) -> - assert (args <> []); - let expected_type = - (* [x] is applied, it must be to the exact number - of arguments. *) - Arrow (List.map (parameter_type env) args) - in - (* Check the well-formedness of the application. *) - check [x.position] env x.value expected_type; - - (* Similarly, if it was a total application the result is - [Star] otherwise it is the flexible variable. *) - star_variable - - | ParameterAnonymous _ -> - (* Anonymous rules are eliminated early on. *) - assert false - -let check_parameter_type env p : unit = - let symbol, actuals = Parameters.unapp p in - let expected_ty = - if actuals = [] then star - else Arrow (List.map (parameter_type env) actuals) - in - check [ symbol.position ] env symbol.value expected_ty - -let check_grammar (p_grammar : Syntax.grammar) = - (* [n] is the grammar size. *) - let n = StringMap.cardinal p_grammar.p_rules in - - (* The successors of the non terminal [N] are its producers. It - induce a graph over the non terminals and its successor function - is implemented by [successors]. Non terminals are indexed using - [nt]. - *) - let nt, conv, _iconv = index_map p_grammar.p_rules in - let parameters, name, branches, positions = - (fun n -> (nt n).pr_parameters), (fun n -> (nt n).pr_nt), - (fun n -> (nt n).pr_branches), (fun n -> (nt n).pr_positions) - in - - (* The successors function is implemented as an array using the - indexing previously created. *) - let successors = - Array.init n (fun node -> - (* We only are interested by parameterized non terminals. *) - if parameters node <> [] then - List.fold_left (fun succs { pr_producers = symbols } -> - List.fold_left (fun succs (_, p, _) -> - let symbol, _ = Parameters.unapp p in - try - let symbol_node = conv symbol.value in - (* [symbol] is a parameterized non terminal, we add it - to the successors set. *) - if parameters symbol_node <> [] then - IntSet.add symbol_node succs - else - succs - with Not_found -> - (* [symbol] is a token, it is not interesting for type inference - purpose. *) - succs - ) succs symbols - ) IntSet.empty (branches node) - else - Misc.IntSet.empty - ) - in - - (* The successors function and the indexing induce the following graph - module. *) - let module RulesGraph = - struct - - type node = int - - let n = n - - let index node = - node - - let successors f node = - IntSet.iter f successors.(node) - - let iter f = - for i = 0 to n - 1 do - f i - done - - end - in - let module ConnectedComponents = Tarjan.Run (RulesGraph) in - (* We check that: - - all the parameterized definitions of a particular component - have the same number of parameters. - - every parameterized non terminal definition always uses - parameterized definitions of the same component with its - formal parameters. - - Components are marked during the traversal: - -1 means unvisited - n with n > 0 is the number of parameters of the clique. - *) - let unseen = -1 in - let marked_components = Array.make n unseen in - - let flexible_arrow args = - let ty = Arrow (List.map (fun _ -> fresh_flexible_variable ()) args) in - fresh_structured_variable ty - in - - (* [nt_type i] is the type of the i-th non terminal. *) - let nt_type i = - match parameters i with - | [] -> - star_variable - - | x -> - flexible_arrow x - in - - (* [actual_parameters_as_formal] is the well-formedness checker for - parameterized non terminal application. *) - let actual_parameters_as_formal actual_parameters formal_parameters = - List.for_all2 (fun y -> (function ParameterVar x -> x.value = y - | _ -> false)) - formal_parameters actual_parameters - in - - (* The environment is initialized. *) - let env : environment = StringMap.fold - (fun k r acu -> - (k, (r.pr_positions, nt_type (conv k))) - :: acu) - p_grammar.p_rules [] - in - - (* We traverse the graph checking each parameterized non terminal - definition is well-formed. *) - RulesGraph.iter (fun i -> - let params = parameters i - and iname = name i - and repr = ConnectedComponents.representative i - and positions = positions i - in - - (* The environment is augmented with the parameters whose types are - unknown. *) - let env' = List.map - (fun k -> (k, (positions, fresh_flexible_variable ()))) params - in - let env = env' @ env in - - (* The type of the parameterized non terminal is constrained to be - [expected_ty]. *) - let check_type () = - check positions env iname (Arrow (List.map (fun (_, (_, t)) -> t) env')) - in - - (* We check the number of parameters. *) - let check_parameters () = - let parameters_len = List.length params in - (* The component is visited for the first time. *) - if marked_components.(repr) = unseen then - marked_components.(repr) <- parameters_len - else (* Otherwise, we check that the arity is homogeneous - in the component. *) - if marked_components.(repr) <> parameters_len then - Error.error positions - "mutually recursive definitions must have the same parameters.\n\ - This is not the case for %s and %s." - (name repr) iname - in - - (* In each production rule, the parameterized non terminal - of the same component must be instantiated with the same - formal arguments. *) - let check_producers () = - List.iter - (fun { pr_producers = symbols } -> List.iter - (function (_, p, _) -> - (* We take the use of each symbol into account. *) - check_parameter_type env p; - (* If it is in the same component, check in addition that - the arguments are the formal arguments. *) - let symbol, actuals = Parameters.unapp p in - try - let idx = conv symbol.value in - if ConnectedComponents.representative idx = repr then - if not (actual_parameters_as_formal actuals params) - then - Error.error [ symbol.position ] - "mutually recursive definitions must have the same \ - parameters.\n\ - This is not the case for %s." - (let name1, name2 = (name idx), (name i) in - if name1 <> name2 then name1 ^ " and "^ name2 - else name1) - with _ -> ()) - symbols) (branches i) - in - - check_type(); - check_parameters(); - check_producers() - ); - - (* Check that every %type and %on_error_reduce declaration mentions a - well-typed term. *) - List.iter (fun (p, _) -> - check_parameter_type env p - ) p_grammar.p_types; - List.iter (fun (p, _) -> - check_parameter_type env p - ) p_grammar.p_on_error_reduce - -(* -------------------------------------------------------------------------- *) - -(* Auxiliary functions for expansion. *) - -let rec subst_parameter subst = function - | ParameterVar x -> - (try - List.assoc x.value subst - with Not_found -> - ParameterVar x) - - | ParameterApp (x, ps) -> - (try - match List.assoc x.value subst with - | ParameterVar y -> - ParameterApp (y, List.map (subst_parameter subst) ps) - - | ParameterApp _ -> - (* Type-checking ensures that we cannot do partial - application. Consequently, if a higher-order nonterminal - is an actual argument, it cannot be the result of a - partial application. *) - assert false - - | ParameterAnonymous _ -> - (* Anonymous rules are eliminated early on. *) - assert false - - with Not_found -> - ParameterApp (x, List.map (subst_parameter subst) ps)) - - | ParameterAnonymous _ -> - (* Anonymous rules are eliminated early on. *) - assert false - - -let subst_parameters subst = - List.map (subst_parameter subst) - -let dummy : rule = - { branches = []; - positions = []; - inline_flag = false; - attributes = []; - } - -(* [mangle p] chooses a name for the new nonterminal symbol that corresponds - to the parameter [p]. *) - -let rec mangle = function - | ParameterVar x - | ParameterApp (x, []) -> - Positions.value x - | ParameterApp (x, ps) -> - - (* We include parentheses and commas in the names that we - assign to expanded nonterminals, because that is more - readable and acceptable in many situations. We replace them - with underscores in situations where these characters are - not valid. *) - - Printf.sprintf "%s(%s)" - (Positions.value x) - (separated_list_to_string mangle "," ps) - - | ParameterAnonymous _ -> - (* Anonymous rules are eliminated early on. *) - assert false - -(* -------------------------------------------------------------------------- *) - -(* Expansion. *) - -module Expand (X : sig - val p_grammar: Syntax.grammar -end) = struct - open X - - (* Check that it is safe to expand this parameterized grammar. *) - let () = - check_grammar p_grammar - - (* Set up a mechanism that ensures that names are unique -- and, in - fact, ensures the stronger condition that normalized names are - unique. *) - - let names = - ref (StringSet.empty) - - let ensure_fresh name = - let normalized_name = Misc.normalize name in - if StringSet.mem normalized_name !names then - Error.error [] - "internal name clash over %s" normalized_name; - names := StringSet.add normalized_name !names; - name - - let expanded_rules = - Hashtbl.create 13 - - module InstanceTable = - Hashtbl.Make (Parameters) - - let rule_names = - InstanceTable.create 13 - - let name_of symbol parameters = - let param = ParameterApp (symbol, parameters) in - try - InstanceTable.find rule_names param - with Not_found -> - let name = ensure_fresh (mangle param) in - InstanceTable.add rule_names param name; - name - - (* Now is the time to eliminate (desugar) %attribute declarations. We build - a table of these declarations, and look up this table so as to place - appropriate attributes on terminal and nonterminal symbols. *) - - let symbol_attributes_table = - InstanceTable.create 7 - - let () = - List.iter (fun (actuals, attributes) -> - List.iter (fun actual -> - let attributes', used = - try InstanceTable.find symbol_attributes_table actual - with Not_found -> [], ref false - in - InstanceTable.replace symbol_attributes_table actual - (attributes @ attributes', used) - ) actuals - ) p_grammar.p_symbol_attributes - - let symbol_attributes (actual : parameter) : attributes = - try - let attrs, used = InstanceTable.find symbol_attributes_table actual in - used := true; - attrs - with Not_found -> - [] - - let symbol_attributes_warnings () = - InstanceTable.iter (fun actual (attributes, used) -> - if not !used then - List.iter (fun (id, _payload) -> - Error.warning [Positions.position id] - "this attribute could not be transferred to the symbol %s" - (Parameters.print actual) - ) attributes - ) symbol_attributes_table - - (* This auxiliary function transfers information from the - table [symbol_attributes] towards terminal symbols. *) - - let decorate tok prop : token_properties = - let attrs = symbol_attributes (ParameterVar (Positions.unknown_pos tok)) in - { prop with tk_attributes = attrs @ prop.tk_attributes } - - (* Given the substitution [subst] from parameters to non terminal, we - instantiate the parameterized branch. *) - let rec expand_branch subst pbranch = - let new_producers = List.map (fun (ido, p, attrs) -> - let sym, actual_parameters = Parameters.unapp p in - let sym, actual_parameters = - try - match List.assoc sym.value subst with - | ParameterVar x -> - x, subst_parameters subst actual_parameters - - | ParameterApp (x, ps) -> - assert (actual_parameters = []); - x, ps - - | ParameterAnonymous _ -> - (* Anonymous rules are eliminated early on. *) - assert false - - with Not_found -> - sym, subst_parameters subst actual_parameters - in - (* Instantiate the definition of the producer. *) - { producer_identifier = Positions.value ido; - producer_symbol = expand_branches subst sym actual_parameters; - producer_attributes = attrs } - ) pbranch.pr_producers - in - { - branch_position = pbranch.pr_branch_position; - producers = new_producers; - action = pbranch.pr_action; - branch_prec_annotation = pbranch.pr_branch_prec_annotation; - branch_production_level = pbranch.pr_branch_production_level; - } - - (* Instantiate the branches of sym for a particular set of actual - parameters. *) - and expand_branches subst sym actual_parameters : symbol = - match StringMap.find (Positions.value sym) p_grammar.p_rules with - | exception Not_found -> - (* [sym] is a terminal symbol. Expansion is not needed. *) - Positions.value sym - | prule -> - let nsym = name_of sym actual_parameters in - (* Check up front if [nsym] is marked, so as to deal with it just once. *) - if Hashtbl.mem expanded_rules nsym then - nsym - else begin - (* Type checking ensures that parameterized nonterminal symbols - are applied to an appropriate number of arguments. *) - assert (List.length prule.pr_parameters = - List.length actual_parameters); - let subst = - List.combine prule.pr_parameters actual_parameters @ subst - in - (* Mark [nsym] up front, so as to avoid running in circles. *) - Hashtbl.add expanded_rules nsym dummy; - (* The attributes carried by the expanded symbol [nsym] are those - carried by the original parameterized symbol [sym], plus those - found in %attribute declarations for [nsym], plus those found - in %attribute declarations for [sym]. *) - let attributes = - symbol_attributes (ParameterApp (sym, actual_parameters)) @ - symbol_attributes (ParameterVar sym) @ - prule.pr_attributes - in - Hashtbl.replace expanded_rules nsym { - branches = List.map (expand_branch subst) prule.pr_branches; - positions = prule.pr_positions; - inline_flag = prule.pr_inline_flag; - attributes = attributes; - }; - nsym - end - - (* Process %type declarations. *) - let rec types_from_list - (ps : (Syntax.parameter * 'a Positions.located) list) - : 'a StringMap.t = - match ps with - | [] -> StringMap.empty - | (nt, ty)::q -> - let accu = types_from_list q in - let mangled = mangle nt in - if StringMap.mem mangled accu then - Error.error [Parameters.position nt] - "there are multiple %%type declarations for nonterminal %s." - mangled; - StringMap.add mangled (Positions.value ty) accu - - (* Process %on_error_reduce declarations. *) - let rec on_error_reduce_from_list (ps : (Syntax.parameter * 'p) list) : 'p StringMap.t = - match ps with - | [] -> - StringMap.empty - | (nt, prec) :: ps -> - let accu = on_error_reduce_from_list ps in - let mangled = mangle nt in - if StringMap.mem mangled accu then - Error.error [Parameters.position nt] - "there are multiple %%on_error_reduce declarations for nonterminal %s." - mangled; - StringMap.add mangled prec accu - - (* The entry points are the nonparameterized nonterminal symbols. (Not just - the start symbols, as we haven't run the reachability analysis, and the - grammar may contain unreachable parts, which we still want to expand.) - Because a start symbol cannot be parameterized (which the type analysis - guarantees), all of the start symbols are entry points. *) - let () = - StringMap.iter (fun nt prule -> - if prule.pr_parameters = [] then - let (_ : symbol) = expand_branches [] (Positions.unknown_pos nt) [] in - () - ) p_grammar.p_rules - - let grammar = { - preludes = p_grammar.p_preludes; - postludes = p_grammar.p_postludes; - parameters = p_grammar.p_parameters; - start_symbols = StringMap.domain (p_grammar.p_start_symbols); - types = types_from_list p_grammar.p_types; - on_error_reduce = on_error_reduce_from_list p_grammar.p_on_error_reduce; - tokens = StringMap.mapi decorate p_grammar.p_tokens; - gr_attributes = p_grammar.p_grammar_attributes; - rules = Hashtbl.fold StringMap.add expanded_rules StringMap.empty - } - - (* If some %attribute declarations are unused, warn about it. *) - let () = - symbol_attributes_warnings() - -end - -let expand p_grammar = - let module E = Expand(struct let p_grammar = p_grammar end) in - E.grammar diff -Nru menhir-20171013/src/parameterizedGrammar.mli menhir-20171206/src/parameterizedGrammar.mli --- menhir-20171013/src/parameterizedGrammar.mli 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/parameterizedGrammar.mli 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -(******************************************************************************) -(* *) -(* Menhir *) -(* *) -(* François Pottier, Inria Paris *) -(* Yann Régis-Gianas, PPS, Université Paris Diderot *) -(* *) -(* Copyright Inria. All rights reserved. This file is distributed under the *) -(* terms of the GNU General Public License version 2, as described in the *) -(* file LICENSE. *) -(* *) -(******************************************************************************) - -(* This turns a grammar where nonterminal symbols can be parameterized - into a grammar where nonterminal symbols are not parameterized. The - transformation is a textual expansion process, whose termination is - guaranteed by a simple type system. - - Expansion creates new nonterminal symbols whose names contain - parentheses and commas. These names can be printed directly in - informational messages (error messages, conflict reports, - descriptions of the automaton, etc.). However, they must be - sanitized via [Misc.normalize] when printed in a context where a - valid identifier is expected. *) - -val expand : Syntax.grammar -> UnparameterizedSyntax.grammar diff -Nru menhir-20171013/src/parameters.ml menhir-20171206/src/parameters.ml --- menhir-20171013/src/parameters.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/parameters.ml 2017-12-06 20:41:09.000000000 +0000 @@ -32,6 +32,13 @@ (* Anonymous rules are eliminated early on. *) assert false +let unvar = function + | ParameterVar x -> + x + | ParameterApp _ + | ParameterAnonymous _ -> + assert false + let rec map f = function | ParameterVar x -> ParameterVar (f x) @@ -53,6 +60,34 @@ let identifiers m p = fold (fun accu x -> StringMap.add x.value x.position accu) m p +let rec occurs (x : symbol) (p : parameter) = + match p with + | ParameterVar y -> + x = y.value + | ParameterApp (y, ps) -> + x = y.value || List.exists (occurs x) ps + | ParameterAnonymous _ -> + assert false + +let occurs_shallow (x : symbol) (p : parameter) = + match p with + | ParameterVar y -> + x = y.value + | ParameterApp (y, _) -> + assert (x <> y.value); + false + | ParameterAnonymous _ -> + assert false + +let occurs_deep (x : symbol) (p : parameter) = + match p with + | ParameterVar _ -> + false + | ParameterApp (_, ps) -> + List.exists (occurs x) ps + | ParameterAnonymous _ -> + assert false + type t = parameter let rec equal x y = @@ -83,10 +118,14 @@ let with_pos p = Positions.with_pos (position p) p -let rec print = function - | ParameterVar x -> +let rec print with_spaces = function + | ParameterVar x + | ParameterApp (x, []) -> x.value | ParameterApp (x, ps) -> - x.value ^ "(" ^ Misc.separated_list_to_string print ", " ps ^ ")" + let separator = if with_spaces then ", " else "," in + Printf.sprintf "%s(%s)" + x.value + (Misc.separated_list_to_string (print with_spaces) separator ps) | ParameterAnonymous _ -> assert false diff -Nru menhir-20171013/src/partialGrammar.ml menhir-20171206/src/partialGrammar.ml --- menhir-20171013/src/partialGrammar.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/partialGrammar.ml 2017-12-06 20:41:09.000000000 +0000 @@ -636,13 +636,21 @@ if (must_be_nonterminal && not is_nonterminal) then Error.error [Parameters.position p] "%s is a terminal symbol,\n\ - but %s declarations are applicable only to nonterminal symbols." (Parameters.print p) kind; + but %s declarations are applicable only to nonterminal symbols." + (Parameters.print true p) kind; (* Then, check the arguments. *) List.iter (check kind false) ps in - List.iter (check "%type" true) (List.map fst grammar.p_types); - List.iter (check "%on_error_reduce" true) (List.map fst grammar.p_on_error_reduce); + let check_fst kind must_be_nonterminal (p, _) = + check kind must_be_nonterminal p + in + + List.iter (check_fst "%type" true) grammar.p_types; + List.iter (check_fst "%on_error_reduce" true) grammar.p_on_error_reduce; + List.iter (fun (params, _) -> + List.iter (check "%attribute" false) params + ) grammar.p_symbol_attributes; (* Every reference to a symbol is well defined. *) let used_tokens = ref StringSet.empty in @@ -661,10 +669,19 @@ Error.error [ p ] "%s is undefined." s in StringMap.iter - (fun k prule -> List.iter + (fun k prule -> + + (* The formal parameters of each rule must have distinct names. *) + prule.pr_parameters + |> List.sort compare + |> Misc.dup compare + |> Option.iter (fun x -> + Error.error prule.pr_positions + "several parameters of this rule are named \"%s\"." x + ); (* Check each branch. *) - (fun { pr_producers = producers; + List.iter (fun { pr_producers = producers; pr_branch_prec_annotation; } -> ignore (List.fold_left diff -Nru menhir-20171013/src/SelectiveExpansion.ml menhir-20171206/src/SelectiveExpansion.ml --- menhir-20171013/src/SelectiveExpansion.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/SelectiveExpansion.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,523 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +let value = Positions.value +let unknown = Positions.unknown_pos +open Syntax +open GroundSort + +(* -------------------------------------------------------------------------- *) + +(* Expansion modes. *) + +type mode = + | ExpandHigherSort + | ExpandAll + +(* -------------------------------------------------------------------------- *) + +(* Expansion can be understood as traversing a graph where every vertex is + labeled with a pair of a nonterminal symbol [nt] and an instantiation of + the formal parameters of [nt]. *) + +(* We allow partial instantiations, where some of the formal parameters of + [nt] are instantiated, while others remain present. For this reason, we + represent an instantation as a list of *optional* actual parameters. *) + +(* The actual parameters that appear in an instantiation make sense *in the + source namespace* (at the toplevel). That is, they refer to (terminal and + nonterminal) symbols that exist (at the toplevel) in the original + grammar. *) + +type instantiation = + parameter option list + +type label = + nonterminal * instantiation + +(* Equality and hashing for labels. *) + +module Label = struct + type t = label + let equal (nt1, inst1) (nt2, inst2) = + nt1 = nt2 && + Misc.ListExtras.equal (Option.equal Parameters.equal) inst1 inst2 + let hash (nt, inst) = + Hashtbl.hash (nt, Misc.ListExtras.hash (Option.hash Parameters.hash) inst) +end + +(* -------------------------------------------------------------------------- *) + +(* [mangle label] chooses a concrete name for the new nonterminal symbol that + corresponds to the label [label]. *) + +(* We include parentheses and commas in this name, because that is readable + and acceptable in many situations. We replace them with underscores in + situations where these characters are not valid; see [Misc.normalize]. *) + +let mangle_po (po : parameter option) = + match po with + | None -> + (* When a parameter remains uninstantiated, + we put an underscore in its place. *) + "_" + | Some p -> + Parameters.print false p + +let mangle ((nt, pos) : label) : nonterminal = + if pos = [] then nt else + Printf.sprintf "%s(%s)" + nt + (Misc.separated_list_to_string mangle_po "," pos) + +(* -------------------------------------------------------------------------- *) + +(* An environment maps all of the formal parameters of a rule to actual + parameters, which make sense in the source namespace. *) + +module Env = + StringMap + +type env = + parameter Env.t + +let subst_symbol env sym : parameter = + try + Env.find (value sym) env + with Not_found -> + (* [x] is not a formal parameter. It is a toplevel symbol. *) + ParameterVar sym + +let apply (param : parameter) (params : parameter list) : parameter = + match param with + | ParameterVar sym -> + assert (params <> []); + ParameterApp (sym, params) + | ParameterApp _ -> + (* In a well-sorted grammar, only a variable can have higher + sort. Here, [param] must have higher sort, so [param] must + be a variable. This case cannot arise. *) + assert false + | ParameterAnonymous _ -> + (* Anonymous rules are eliminated early on. *) + assert false + +let rec subst_parameter env param : parameter = + match param with + | ParameterVar sym -> + subst_symbol env sym + | ParameterApp (sym, params) -> + assert (params <> []); + apply (subst_symbol env sym) (subst_parameters env params) + | ParameterAnonymous _ -> + (* Anonymous rules are eliminated early on. *) + assert false + +and subst_parameters env params = + List.map (subst_parameter env) params + +(* -------------------------------------------------------------------------- *) +(* -------------------------------------------------------------------------- *) + +(* For syntactic convenience, the rest of this file is a functor. *) + +module Run (G : sig + (* Expansion mode. *) + val mode: mode + (* Sort information. *) + val sorts: SortInference.sorts + (* The grammar [g] whose expansion is desired. *) + val g : grammar +end) = struct + +open G + +(* -------------------------------------------------------------------------- *) + +(* Determining the sort of a symbol or parameter. *) + +(* Be careful: these functions use the toplevel sort environment [sorts], + so they must not be used within a rule. (The sort environment would have + to be extended with information about the formal parameters.) *) + +let sort symbol = + try + StringMap.find (value symbol) sorts + with Not_found -> + assert false + +let sort param = + match param with + | ParameterVar sym -> + sort sym + | ParameterApp (_, params) -> + assert (params <> []); + (* An application always has sort [*]. *) + star + | ParameterAnonymous _ -> + assert false + +(* -------------------------------------------------------------------------- *) + +(* Looking up the [%attribute] declarations, looking for attributes attached + with a nonterminal symbol [nt]. This is used when we create a specialized + version of this symbol. *) + +(* We use an inefficient linear search, but that shouldn't be a problem. *) + +let global_attributes (nt : symbol) : attribute list = + let param = ParameterVar (unknown nt) in + List.concat (List.map (fun (params, attrs) -> + if List.exists (Parameters.equal param) params then attrs else [] + ) g.p_symbol_attributes) + +(* -------------------------------------------------------------------------- *) + +(* A queue keeps track of the graph vertices that have been discovered but + not yet visited. *) + +let enqueue, repeatedly = + let queue = Queue.create() in + let enqueue label = + Queue.add label queue + and repeatedly visit = + Misc.qiter visit queue + in + enqueue, repeatedly + +(* -------------------------------------------------------------------------- *) + +(* A hash table is used to mark the graph vertices that have been discovered. *) + +let mark, marked = + let module H = Hashtbl.Make(Label) in + let table = H.create 128 in + let mark label = + H.add table label () + and marked label = + H.mem table label + in + mark, marked + +(* -------------------------------------------------------------------------- *) + +(* The rules of the expanded grammar are gradually collected. *) + +let emit, rules = + let rules = ref StringMap.empty in + let emit rule = + assert (not (StringMap.mem rule.pr_nt !rules)); + rules := StringMap.add rule.pr_nt rule !rules + and rules() = + !rules + in + emit, rules + +(* -------------------------------------------------------------------------- *) + +(* On top of the function [mangle], we set up a mechanism that checks that + every (normalized) mangled name is unique. (Indeed, in principle, there + could be clashes, although in practice this is unlikely.) We must check + that every application of [mangle] to a *new* argument yields a *new* + (normalized) result. This is succinctly expressed by combining a claim + and a memoizer. *) + +let mangle : label -> nonterminal = + let ensure_fresh = Misc.new_claim() in + let module M = Memoize.MakeViaHashtbl(Label) in + M.memoize (fun label -> + let name = mangle label in + ensure_fresh (Misc.normalize name); + name + ) + +(* -------------------------------------------------------------------------- *) + +(* [recognize] receives an actual parameter [param] that makes sense in the + source namespace and transforms it into a parameter that makes sense in the + target namespace. This involves examining each application and + "recognizing" it as an application of a label to a sequence of residual + actual parameters, as explained next. All labels thus recognized are + enqueued. *) + +(* [recognize] governs how much specialization is performed. For instance, + [F(X, Y, Z)] could be recognized as: + + - an application of the symbol [F] to the residual arguments [X, Y, Z]. + Then, no specialization at all takes place. + + - an application of the symbol [F(X,Y,Z)] to no residual arguments. + Then, [F] is fully specialized for [X, Y, Z]. + + - in between these extremes, say, + an application of the symbol [F(X,_,Z)] to the residual argument [Y]. + Then, [F] is partially specialized. + + If there are any residual arguments, then they must be recursively + recognized. For instance, [F(X,G(Y),Z)] could be recognized as an + application of the symbol [F(X,_,Z)] to [G(Y)], which itself could + be recognized as an application of the symbol [G(Y)] to no residual + arguments. *) + +let rec recognize (param : parameter) : parameter = + (* [param] must have sort [star], in an appropriate sort environment. *) + match param with + | ParameterAnonymous _ -> + assert false + | ParameterVar _ -> + param + | ParameterApp (sym, ps) -> + assert (ps <> []); + let x = value sym in + (* This symbol is applied to at least one argument, so cannot be + a terminal symbol. It must be either a nonterminal symbol or + an (uninstantiated) formal parameter of the current rule. *) + (* Actually, in both modes, formal parameters of higher sort are + expanded away, so [sym] cannot be an uninstantiated parameter + of the current rule. It must be a nonterminal symbol. We can + therefore look up its sort in the toplevel environment [sorts]. *) + let inst, residuals = + match mode with + | ExpandAll -> + (* Expansion of all parameters. *) + let inst = List.map (fun p -> Some p) ps + and residuals = [] in + inst, residuals + | ExpandHigherSort -> + (* Expansion of only the parameters of higher sort. *) + let ss : sort list = domain (sort (ParameterVar sym)) in + assert (List.length ps = List.length ss); + let pss = List.combine ps ss in + let inst = + pss + |> List.map (fun (param, sort) -> + if sort = star then None else Some param) + in + let residuals = + pss + |> List.filter (fun (_, sort) -> sort = star) + |> List.map (fun (param, _) -> recognize param) + in + inst, residuals + in + let label = (x, inst) in + enqueue label; + let sym = mangle label in + Parameters.app (unknown sym) residuals + +(* -------------------------------------------------------------------------- *) + +(* The following functions take arguments in the source namespace and produce + results in the target namespace. *) + +let subst_parameter env param = + (* [param] must have sort [star], in an appropriate sort environment. *) + recognize (subst_parameter env param) + +let subst_producer env (id, param, attrs) = + let param = subst_parameter env param in + (id, param, attrs) + +let subst_producers env producers = + List.map (subst_producer env) producers + +let subst_branch env branch = + { branch with pr_producers = subst_producers env branch.pr_producers } + +let subst_branches env branches = + List.map (subst_branch env) branches + +(* -------------------------------------------------------------------------- *) + +(* A quick and dirty way of mapping a name to a fresh name. *) + +let freshen : string -> string = + let c = ref 0 in + fun x -> + Printf.sprintf "%s__menhir__%d" x (Misc.postincrement c) + +(* -------------------------------------------------------------------------- *) + +(* [instantiation_env] expects the formal parameters of a rule, [formals], and + an instantiation [inst] that dictates how this rule must be specialized. It + returns an environment [env] that can be used to perform specialization and + a list of residual formal parameters (those that are not specialized). *) + +let instantiation_env formals inst : env * symbol list = + assert (List.length formals = List.length inst); + let env, residuals = + List.fold_right2 (fun formal po (env, residuals) -> + let param, residuals = + match po with + | Some param -> + (* This formal parameter is instantiated. *) + param, residuals + | None -> + (* This formal parameter is not instantiated. *) + (* We would like to map it to itself. *) + (* However, we must in principle be a bit careful: if a toplevel + symbol by the same name as [formal] appears free in the codomain + of the environment that we are building, then we will run intro + trouble. We avoid this problem by systematically renaming every + formal parameter to a fresh unlikely name. *) + let formal = freshen formal in + ParameterVar (unknown formal), + formal :: residuals + in + Env.add formal param env, residuals + ) formals inst (Env.empty, []) + in + env, residuals + +(* -------------------------------------------------------------------------- *) + +(* [visit label] visits a vertex labeled [label] in the graph. This label is a + pair of a nonterminal symbol [nt] and an instantiation [inst]. Unless this + vertex has been visited already, we create a specialized copy of [nt] for + this instantiation. This involves a call to [subst_branches], which can + cause more vertices to be discovered and enqueued. *) + +(* The specialized symbol retains any attributes carried by the original + parameterized symbol. These attributes could be either attached with this + rule ([rule.pr_attributes]) or specified via an [%attribute] declaration. + We have to look up [%attribute] declarations now (as opposed to letting + [Drop] handle them) if this is a parameterized symbol, as the connection + between the original parameterized symbol and its specialized version is + evident here but is lost afterwards. *) + +let visit label = + if not (marked label) then begin + mark label; + let (nt, inst) = label in + let rule = StringMap.find nt g.p_rules in + let formals = rule.pr_parameters in + let env, residuals = instantiation_env formals inst in + emit { + rule with + pr_nt = mangle label; + pr_parameters = residuals; + pr_branches = subst_branches env rule.pr_branches; + pr_attributes = + (if formals = [] then [] else global_attributes nt) @ + rule.pr_attributes + } + end + +(* -------------------------------------------------------------------------- *) + +(* The entry points of the graph traversal include the nonterminal symbols of + sort [*]. (Not just the start symbols, as we haven't run the reachability + analysis, and the grammar may contain unreachable parts, which we still + want to expand.) Because a start symbol must have sort [*], this includes + the start symbols. *) + +let () = + StringMap.iter (fun nt prule -> + if prule.pr_parameters = [] then + let label = (nt, []) in + enqueue label + ) g.p_rules + +(* -------------------------------------------------------------------------- *) + +(* The parameters that appear in [%type] declarations and [%on_error_reduce] + declarations are also considered entry points. They have sort [*]. *) + +let subst_parameter param = + subst_parameter Env.empty param + +let subst_declaration (param, info) = + assert (sort param = star); + (subst_parameter param, info) + +let subst_declarations decls = + List.map subst_declaration decls + +(* -------------------------------------------------------------------------- *) + +(* An [%attribute] declaration for a parameter of sort [*] is treated as an + entry point. An [%attribute] declaration for a symbol of higher sort is not + regarded as an entry point, and at the end, is kept only if this symbol + still appears in the expanded grammar. *) + +(* This is done in two passes over the list of [%attribute] declarations, + named [thingify] and [unthingify]. The first pass runs as part of the + discovery of entry points, before the graph traversal. The second pass runs + after the graph traversal is complete. *) + +type thing = + | TargetParameterOfSortStar of parameter + | SourceParameterOfHigherSort of parameter + +let thingify_parameter param : thing = + if sort param = star then + TargetParameterOfSortStar (subst_parameter param) + else + SourceParameterOfHigherSort param + +let thingify_attribute_declaration (params, attrs) = + (List.map thingify_parameter params, attrs) + +let thingify_attribute_declarations decls = + List.map thingify_attribute_declaration decls + +let unthingify_parameter rules thing = + match thing with + | TargetParameterOfSortStar param -> + (* This parameter has sort [star]. Keep it. *) + Some param + | SourceParameterOfHigherSort param -> + (* This parameter has higher sort. It must be a symbol. + Keep it if it still appears in the expanded grammar. *) + let symbol = value (Parameters.unvar param) in + if StringMap.mem symbol rules then Some param else None + +let unthingify_attribute_declaration rules (params, attrs) = + (Misc.map_opt (unthingify_parameter rules) params, attrs) + +let unthingify_attribute_declarations rules decls = + List.map (unthingify_attribute_declaration rules) decls + +(* -------------------------------------------------------------------------- *) + +(* Put everything together a construct a new grammar. *) + +let g = + (* Discovery of entry points. *) + let p_types = subst_declarations g.p_types + and p_on_error_reduce = subst_declarations g.p_on_error_reduce + and things = thingify_attribute_declarations g.p_symbol_attributes in + (* Graph traversal. *) + repeatedly visit; + (* Construction of the new grammar. *) + let p_rules = rules() in + let p_symbol_attributes = unthingify_attribute_declarations p_rules things in + { g with + p_types; + p_on_error_reduce; + p_symbol_attributes; + p_rules } + +end (* of the functor *) + +(* -------------------------------------------------------------------------- *) + +(* Re-package the above functor as a function. *) + +let expand mode sorts g = + let module G = Run(struct + let mode = mode + let sorts = sorts + let g = g + end) in + G.g diff -Nru menhir-20171013/src/SelectiveExpansion.mli menhir-20171206/src/SelectiveExpansion.mli --- menhir-20171013/src/SelectiveExpansion.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/SelectiveExpansion.mli 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,38 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +open Syntax +open SortInference + +(* [expand sorts g] expands away some or all of the parameterized nonterminal + symbols in the grammar [g], producing a new grammar. [sorts] is the sort + environment produced by [SortInference]. *) + +(* The mode [ExpandHigherSort] causes a partial expansion: only the parameters + of higher sort (i.e., of sort other than [*]) are expanded away. This mode + is safe, in the sense that expansion always terminates. A proof sketch is + as follows: 1- an application always has sort [*]; 2- therefore, only a + variable can have higher sort; 3- therefore, only a finite number of terms + can appear during expansion. *) + +(* The mode [ExpandAll] causes a complete expansion: all parameters are + expanded away. This process is potentially nonterminating. One must first + run the termination test in [CheckSafeParameterizedGrammar] (which itself + is applicable only after the parameters of higher sort have been expanded + away). *) + +type mode = + | ExpandHigherSort + | ExpandAll + +val expand: mode -> sorts -> grammar -> grammar diff -Nru menhir-20171013/src/SortInference.ml menhir-20171206/src/SortInference.ml --- menhir-20171013/src/SortInference.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/SortInference.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,269 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +let value = Positions.value +let position = Positions.position +let error = Error.error +open Syntax +open SortUnification + +(* -------------------------------------------------------------------------- *) + +(* Error handling. *) + +(* In [check_arity], in principle, [arity1] is the expected arity and [arity2] + is the actual arity. This distinction does not make much sense, though, as + we do not know which is wrong, the declaration site or the use site. So, we + display a neutral error message. *) + +let check_arity sym arity1 arity2 = + let plural = max arity1 arity2 > 1 in + if arity1 <> arity2 then + error [position sym] + "does the symbol \"%s\" expect %d or %d argument%s?" + (value sym) + (min arity1 arity2) + (max arity1 arity2) + (if plural then "s" else "") + +(* This variant of [unify] is used when no unification error can arise. *) + +let unify_cannot_fail sort1 sort2 = + try + unify sort1 sort2 + with + | Unify _ | Occurs _ -> + (* If the caller is right, this unification step cannot fail! *) + assert false + +(* In [unify], in principle, [sort1] is the expected sort and [sort2] is the + actual sort. Again, this distinction does not make much sense, so we + display a neutral error message. *) + +let unify sym sort1 sort2 = + try + unify sort1 sort2 + with + | Unify (v1, v2) -> + let print v = print (decode v) in + error [position sym] + "how is the symbol \"%s\" parameterized?\n\ + It is used at sorts %s and %s.\n\ + The sort %s is not compatible with the sort %s." + (value sym) (print sort1) (print sort2) (print v1) (print v2) + | Occurs (v1, v2) -> + let print v = print (decode v) in + error [position sym] + "how is the symbol \"%s\" parameterized?\n\ + It is used at sorts %s and %s.\n\ + The sort %s cannot be unified with the sort %s." + (value sym) (print sort1) (print sort2) (print v1) (print v2) + +(* -------------------------------------------------------------------------- *) + +(* An environment maps (terminal and nonterminal) symbols to unification + variables. *) + +type symbol = + string + +module Env = + StringMap + +type env = + variable Env.t + +let find x env : variable = + try + Env.find x env + with Not_found -> + assert false (* unbound terminal or nonterminal symbol *) + +let extend env (xvs : (symbol * variable) list) = + List.fold_left (fun env (x, v) -> + Env.add x v env + ) env xvs + +(* -------------------------------------------------------------------------- *) + +(* [allocate xs] allocates a fresh unification variable [v] for every element + [x] of the list [xs]. It returns the lists [xvs] and [vs]. *) + +let allocate (xs : 'a list) : ('a * variable) list * variable list = + let xvs = List.map (fun x -> x, fresh()) xs in + let vs = List.map snd xvs in + xvs, vs + +(* -------------------------------------------------------------------------- *) + +(* [check_parameter env param expected] checks that the parameter [param] has + sort [expected]. A parameter is either a symbol or an application of a + symbol to a number of parameters. Every application is total -- the + language does not have partial applications. The sort of every application + is [star], but the sort of a variable is unrestricted. *) + +let rec check_parameter env (param : parameter) (expected : variable) = + match param with + | ParameterVar sym -> + let x = value sym in + unify sym expected (find x env) + | ParameterApp (sym, actuals) -> + let x = value sym in + (* This application has sort [star]. *) + unify sym expected star; + (* Retrieve the expected sort of each parameter. Two cases arise: if [x] + has already been assigned an arrow sort, then we can retrieve its + domain, which gives us the expected sort of each actual parameter; + otherwise, we just make up a fresh arrow sort of appropriate arity. + We could avoid this case distinction and always use the latter + method, but the former method, when applicable, yields better error + messages. If [sym] is a toplevel (nonterminal or terminal) symbol, + then we will be in the first case, as we have been careful to + initially assign an arrow sort of appropriate arity to each such + symbol. *) + let v = find x env in + let expected = + match domain v with + | Some expected -> + check_arity sym (List.length expected) (List.length actuals); + expected + | None -> + let _, expected = allocate actuals in + unify_cannot_fail v (arrow expected); + expected + in + (* Check the sort of each actual parameter. *) + List.iter2 (check_parameter env) actuals expected + | ParameterAnonymous _ -> + (* Anonymous rules have been eliminated already. *) + assert false + +(* -------------------------------------------------------------------------- *) + +(* The following functions respectively check that a producer, a branch, + a rule, and a grammar are well-sorted under an environment [env]. *) + +let check_producer env (producer : producer) = + let (_, param, _) = producer in + (* A producer must have sort [star]. *) + check_parameter env param star + +let check_branch env (branch : parameterized_branch) = + List.iter (check_producer env) branch.pr_producers + +let enter_rule env (nt : symbol) (rule : parameterized_rule) : env = + + (* For each formal parameter, allocate a fresh variable. *) + let formals, domain = allocate rule.pr_parameters in + + (* Connect these variables with the sort of the symbol [nt]. *) + (* Because it is performed first, this particular unification + cannot fail. *) + unify_cannot_fail (find nt env) (arrow domain); + + (* Extend the environment. *) + extend env formals + +let check_rule env (nt : symbol) (rule : parameterized_rule) = + + (* Extend the environment within this rule. *) + let env = enter_rule env nt rule in + + (* Check each branch in this extended environment. *) + List.iter (check_branch env) rule.pr_branches + +let check_grammar env g = + + (* Each rule must be well-sorted. *) + StringMap.iter (check_rule env) g.p_rules; + + (* The start symbols must have sort [star]. *) + StringMap.iter (fun nt position -> + let sym = Positions.with_pos position nt in + unify sym star (find nt env) + ) g.p_start_symbols; + + (* Every symbol that appears in a [%type] declaration must have sort [star]. *) + List.iter (fun (param, _) -> + check_parameter env param star + ) g.p_types; + + (* Same rule for [%on_error_reduce] declarations. *) + List.iter (fun (param, _) -> + check_parameter env param star + ) g.p_on_error_reduce; + + (* The symbols that appear in [%attribute] declarations must be well-sorted. + Their sort is not necessarily [star]: it is legal to attach an attribute + with a parameterized symbol. *) + List.iter (fun (params, _) -> + List.iter (fun param -> + check_parameter env param (fresh()) + ) params + ) g.p_symbol_attributes + +(* -------------------------------------------------------------------------- *) + +type sorts = + GroundSort.sort Env.t + +let infer (g : grammar) : sorts = + + (* For each (terminal or nonterminal) symbol, allocate a unification + variable. The terminal symbols have sort [star], so we can use + this particular variable. *) + + let env = + StringMap.fold (fun tok _ env -> + Env.add tok star env + ) g.p_tokens Env.empty + in + let env = + Env.add "error" star env + in + + let env = + StringMap.fold (fun nt rule env -> + let env = Env.add nt (fresh()) env in + (* The following line unifies the sort of [nt] with an arrow of + appropriate arity. It cannot fail. This strategy should lead + to slightly better unification error messages. *) + let _ : env = enter_rule env nt rule in + env + ) g.p_rules env + in + + (* Impose sort equality constraints. *) + + check_grammar env g; + + (* Decode the environment, so our user doesn't have to deal with + unification variables. *) + + let env = Env.map decode env in + + (* Ground any unassigned sort variables. (These should occur only in + unreachable parts of the grammar.) This guarantees that the user + does not have to deal with sort variables. *) + + let env = Env.map ground env in + + (* At log level 3, display the inferred sort of every symbol. *) + + Error.logG 3 (fun f -> + Env.iter (fun x gsort -> + Printf.fprintf f "%s :: %s\n" x (print (unground gsort)) + ) env + ); + + env diff -Nru menhir-20171013/src/SortInference.mli menhir-20171206/src/SortInference.mli --- menhir-20171013/src/SortInference.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/SortInference.mli 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,23 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +open Syntax +open GroundSort + +(* [infer_grammar g] performs sort inference for the grammar [g], + rejecting the grammar if it is ill-sorted. It returns a map of + (terminal and nonterminal) symbols to ground sorts. *) + +type sorts = sort StringMap.t + +val infer: grammar -> sorts diff -Nru menhir-20171013/src/SortUnification.ml menhir-20171206/src/SortUnification.ml --- menhir-20171013/src/SortUnification.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/SortUnification.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,142 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* This module implements sort inference. *) + +(* -------------------------------------------------------------------------- *) + +(* The syntax of sorts is: + + sort ::= (sort, ..., sort) -> * + + where the arity (the number of sorts on the left-hand side of the arrow) + can be zero. *) + +module S = struct + + type 'a structure = + | Arrow of 'a list + + let map f (Arrow xs) = + Arrow (List.map f xs) + + let iter f (Arrow xs) = + List.iter f xs + + exception Iter2 + + let iter2 f (Arrow xs1) (Arrow xs2) = + let n1 = List.length xs1 + and n2 = List.length xs2 in + if n1 = n2 then + List.iter2 f xs1 xs2 + else + raise Iter2 + +end + +include S + +(* -------------------------------------------------------------------------- *) + +(* Instantiate the unification algorithm with the above signature. *) + +include Unifier.Make(S) + +type sort = term = + | TVar of int + | TNode of sort structure + +(* -------------------------------------------------------------------------- *) + +(* Sort constructors. *) + +let arrow (args : variable list) : variable = + fresh (Some (Arrow args)) + +let star : variable = + arrow [] + +let fresh () = + fresh None + +(* Sort accessors. *) + +let domain (x : variable) : variable list option = + match structure x with + | Some (Arrow xs) -> + Some xs + | None -> + None + +(* -------------------------------------------------------------------------- *) + +(* Converting between sorts and ground sorts. *) + +let rec ground s = + match s with + | TVar _ -> + (* All variables are replaced with [*]. *) + GroundSort.GArrow [] + | TNode (Arrow ss) -> + GroundSort.GArrow (List.map ground ss) + +let rec unground (GroundSort.GArrow ss) = + TNode (Arrow (List.map unground ss)) + +(* -------------------------------------------------------------------------- *) + +(* A name generator for unification variables. *) + +let make_gensym () : unit -> string = + let c = ref 0 in + let gensym () = + let n = Misc.postincrement c in + Printf.sprintf "%c%s" + (char_of_int (Char.code 'a' + n mod 26)) + (let d = n / 26 in if d = 0 then "" else string_of_int d) + in + gensym + +(* A memoized name generator. *) + +let make_name () : int -> string = + let gensym = make_gensym() in + Memoize.Int.memoize (fun _x -> gensym()) + +(* -------------------------------------------------------------------------- *) + +(* A printer. *) + +let rec print name (b : Buffer.t) (sort : sort) = + match sort with + | TVar x -> + Printf.bprintf b "%s" (name x) + | TNode (S.Arrow []) -> + Printf.bprintf b "*" + | TNode (S.Arrow (sort :: sorts)) -> + (* Always parenthesize the domain, so there is no ambiguity. *) + Printf.bprintf b "(%a%a) -> *" + (print name) sort + (print_comma_sorts name) sorts + +and print_comma_sorts name b sorts = + List.iter (print_comma_sort name b) sorts + +and print_comma_sort name b sort = + Printf.bprintf b ", %a" (print name) sort + +let print sort : string = + let b = Buffer.create 32 in + print (make_name()) b sort; + Buffer.contents b diff -Nru menhir-20171013/src/SortUnification.mli menhir-20171206/src/SortUnification.mli --- menhir-20171013/src/SortUnification.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/SortUnification.mli 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,64 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* This module implements sort inference. *) + +(* -------------------------------------------------------------------------- *) + +(* The syntax of sorts is: + + sort ::= (sort, ..., sort) -> * + + where the arity (the number of sorts on the left-hand side of the arrow) + can be zero. See [GroundSort]. *) + +type 'a structure = + | Arrow of 'a list + +type sort = + | TVar of int + | TNode of sort structure + +(* -------------------------------------------------------------------------- *) + +(* Sort unification. *) + +type variable + +val star: variable +val arrow: variable list -> variable +val fresh: unit -> variable + +(* [domain] is the opposite of [arrow]. If [x] has been unified with an arrow, + then [domain x] returns its domain. Otherwise, it returns [None]. Use with + caution. *) +val domain: variable -> variable list option + +exception Unify of variable * variable +exception Occurs of variable * variable +val unify: variable -> variable -> unit + +(* Once unification is over, a unification variable can be decoded as a sort. *) + +val decode: variable -> sort + +(* Grounding a sort replaces all sort variables with the sort [*]. *) + +val ground: sort -> GroundSort.sort +val unground: GroundSort.sort -> sort + +(* -------------------------------------------------------------------------- *) + +(* A sort can be printed. *) + +val print: sort -> string diff -Nru menhir-20171013/src/StaticVersion.ml menhir-20171206/src/StaticVersion.ml --- menhir-20171013/src/StaticVersion.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/StaticVersion.ml 2017-12-06 20:41:09.000000000 +0000 @@ -1 +1 @@ -let require_20171013 = () +let require_20171206 = () diff -Nru menhir-20171013/src/StaticVersion.mli menhir-20171206/src/StaticVersion.mli --- menhir-20171013/src/StaticVersion.mli 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/StaticVersion.mli 2017-12-06 20:41:09.000000000 +0000 @@ -1 +1 @@ -val require_20171013 : unit +val require_20171206 : unit diff -Nru menhir-20171013/src/syntax.ml menhir-20171206/src/syntax.ml --- menhir-20171013/src/syntax.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/syntax.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,266 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* The type [partial_grammar] describes the abstract syntax that is produced + by the parsers (yacc-parser and fancy-parser). + + The type [grammar] describes the abstract syntax that is obtained after one + or more partial grammars are joined (see [PartialGrammar]). It differs in + that declarations are organized in a more useful way and a number of + well-formedness checks have been performed. *) + +(* ------------------------------------------------------------------------ *) + +(* Terminals and nonterminal symbols are strings. Identifiers + (which are used to refer to a symbol's semantic value) are + strings. A file name is a string. *) + +type terminal = + string + +type nonterminal = + string + +type symbol = + string + +type identifier = + string + +type filename = + string + +(* ------------------------------------------------------------------------ *) + +(* A postlude is a source file fragment. *) + +type postlude = + Stretch.t + +(* ------------------------------------------------------------------------ *) + +(* OCaml semantic actions are represented as stretches. *) + +type action = + Action.t + +(* ------------------------------------------------------------------------ *) + +(* An attribute consists of an attribute name and an attribute payload. The + payload is an uninterpreted stretch of source text. *) + +type attribute = + string Positions.located * Stretch.t + +type attributes = + attribute list + +(* Attributes allow the user to annotate the grammar with information that is + ignored by Menhir, but can be exploited by other tools, via the SDK. *) + +(* Attributes can be attached in the following places: + + - with the grammar: %[@bar ...] + - with a terminal symbol: %token FOO [@bar ...] + - with a rule: foo(X) [@bar ...]: ... + - with a producer: e = foo(quux) [@bar ...] + - with an arbitrary symbol: %attribute FOO foo(quux) [@bar ...] + + After expanding away parameterized nonterminal symbols, things become + a bit simpler, as %attribute declarations are desugared away. *) + +(* ------------------------------------------------------------------------ *) + +(* Information about tokens. (Only after joining.) *) + +type token_associativity = + LeftAssoc + | RightAssoc + | NonAssoc + | UndefinedAssoc + +type precedence_level = + UndefinedPrecedence + + (* Items are incomparable when they originate in different files. A + value of type [input_file] is used to record an item's origin. The + positions allow locating certain warnings. *) + + | PrecedenceLevel of InputFile.input_file * int * Lexing.position * Lexing.position + +type token_properties = + { + tk_filename : filename; + tk_ocamltype : Stretch.ocamltype option; + tk_position : Positions.t; + tk_attributes : attributes; + mutable tk_associativity : token_associativity; + mutable tk_precedence : precedence_level; + mutable tk_is_declared : bool; + } + +(* ------------------------------------------------------------------------ *) + +(* A [%prec] annotation is optional. A production can carry at most one. + If there is one, it is a symbol name. See [ParserAux]. *) + +type branch_prec_annotation = + symbol Positions.located option + +(* ------------------------------------------------------------------------ *) + +(* A "production level" is used to solve reduce/reduce conflicts. It reflects + which production appears first in the grammar. See [ParserAux]. *) + +type branch_production_level = + | ProductionLevel of InputFile.input_file * int + +(* ------------------------------------------------------------------------ *) + +(* A level is attached to every [%on_error_reduce] declaration. It is used + to decide what to do when several such declarations are applicable in a + single state. *) + +type on_error_reduce_level = + branch_production_level (* we re-use the above type, to save code *) + +(* ------------------------------------------------------------------------ *) + +(* A parameter is either just a symbol or an application of a symbol to a + nonempty tuple of parameters. Before anonymous rules have been eliminated, + it can also be an anonymous rule, represented as a list of branches. *) + +type parameter = + | ParameterVar of symbol Positions.located + | ParameterApp of symbol Positions.located * parameters + | ParameterAnonymous of parameterized_branch list Positions.located + +and parameters = + parameter list + +(* ------------------------------------------------------------------------ *) + +(* A producer is a pair of identifier and a parameter. In concrete syntax, + it could be [e = expr], for instance. It carries a number of attributes. *) + +and producer = + identifier Positions.located * parameter * attributes + +(* ------------------------------------------------------------------------ *) + +(* A branch contains a series of producers and a semantic action. *) + +and parameterized_branch = + { + pr_branch_position : Positions.t; + pr_producers : producer list; + pr_action : action; + pr_branch_prec_annotation : branch_prec_annotation; + pr_branch_production_level : branch_production_level + } + +(* ------------------------------------------------------------------------ *) + +(* A rule has a header and several branches. *) + +type parameterized_rule = + { + pr_public_flag : bool; + pr_inline_flag : bool; + pr_nt : nonterminal; + pr_positions : Positions.t list; + pr_attributes : attributes; + pr_parameters : symbol list; + pr_branches : parameterized_branch list; + } + +(* ------------------------------------------------------------------------ *) + +(* A declaration. (Only before joining.) *) + +type declaration = + + (* Raw OCaml code. *) + + | DCode of Stretch.t + + (* Raw OCaml functor parameter. *) + + | DParameter of Stretch.ocamltype (* really a stretch *) + + (* Terminal symbol (token) declaration. *) + + | DToken of Stretch.ocamltype option * terminal * attributes + + (* Start symbol declaration. *) + + | DStart of nonterminal + + (* Priority and associativity declaration. *) + + | DTokenProperties of terminal * token_associativity * precedence_level + + (* Type declaration. *) + + | DType of Stretch.ocamltype * parameter + + (* Grammar-level attribute declaration. *) + + | DGrammarAttribute of attribute + + (* Attributes shared among multiple symbols, i.e., [%attribute]. *) + + | DSymbolAttributes of parameter list * attributes + + (* On-error-reduce declaration. *) + + | DOnErrorReduce of parameter * on_error_reduce_level + +(* ------------------------------------------------------------------------ *) + +(* A partial grammar. (Only before joining.) *) + +type partial_grammar = + { + pg_filename : filename; + pg_postlude : postlude option; + pg_declarations : declaration Positions.located list; + pg_rules : parameterized_rule list; + } + +(* ------------------------------------------------------------------------ *) + +(* A grammar. (Only after joining.) *) + +(* The differences with partial grammars (above) are as follows: + 1. the file name is gone (there could be several file names, anyway). + 2. there can be several postludes. + 3. declarations are organized by kind: preludes, postludes, + functor %parameters, %start symbols, %types, %tokens, %on_error_reduce, + grammar attributes, %attributes. + 4. rules are stored in a map, indexed by symbol names, instead of a list. + *) + +type grammar = + { + p_preludes : Stretch.t list; + p_postludes : postlude list; + p_parameters : Stretch.t list; + p_start_symbols : Positions.t StringMap.t; + p_types : (parameter * Stretch.ocamltype Positions.located) list; + p_tokens : token_properties StringMap.t; + p_on_error_reduce : (parameter * on_error_reduce_level) list; + p_grammar_attributes : attributes; + p_symbol_attributes : (parameter list * attributes) list; + p_rules : parameterized_rule StringMap.t; + } diff -Nru menhir-20171013/src/syntax.mli menhir-20171206/src/syntax.mli --- menhir-20171013/src/syntax.mli 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/syntax.mli 1970-01-01 00:00:00.000000000 +0000 @@ -1,266 +0,0 @@ -(******************************************************************************) -(* *) -(* Menhir *) -(* *) -(* François Pottier, Inria Paris *) -(* Yann Régis-Gianas, PPS, Université Paris Diderot *) -(* *) -(* Copyright Inria. All rights reserved. This file is distributed under the *) -(* terms of the GNU General Public License version 2, as described in the *) -(* file LICENSE. *) -(* *) -(******************************************************************************) - -(* The type [partial_grammar] describes the abstract syntax that is produced - by the parsers (yacc-parser and fancy-parser). - - The type [grammar] describes the abstract syntax that is obtained after one - or more partial grammars are joined (see [PartialGrammar]). It differs in - that declarations are organized in a more useful way and a number of - well-formedness checks have been performed. *) - -(* ------------------------------------------------------------------------ *) - -(* Terminals and nonterminal symbols are strings. Identifiers - (which are used to refer to a symbol's semantic value) are - strings. A file name is a string. *) - -type terminal = - string - -type nonterminal = - string - -type symbol = - string - -type identifier = - string - -type filename = - string - -(* ------------------------------------------------------------------------ *) - -(* A postlude is a source file fragment. *) - -type postlude = - Stretch.t - -(* ------------------------------------------------------------------------ *) - -(* OCaml semantic actions are represented as stretches. *) - -type action = - Action.t - -(* ------------------------------------------------------------------------ *) - -(* An attribute consists of an attribute name and an attribute payload. The - payload is an uninterpreted stretch of source text. *) - -type attribute = - string Positions.located * Stretch.t - -type attributes = - attribute list - -(* Attributes allow the user to annotate the grammar with information that is - ignored by Menhir, but can be exploited by other tools, via the SDK. *) - -(* Attributes can be attached in the following places: - - - with the grammar: %[@bar ...] - - with a terminal symbol: %token FOO [@bar ...] - - with a rule: foo(X) [@bar ...]: ... - - with a producer: e = foo(quux) [@bar ...] - - with an arbitrary symbol: %attribute FOO foo(quux) [@bar ...] - - After expanding away parameterized nonterminal symbols, things become - a bit simpler, as %attribute declarations are desugared away. *) - -(* ------------------------------------------------------------------------ *) - -(* Information about tokens. (Only after joining.) *) - -type token_associativity = - LeftAssoc - | RightAssoc - | NonAssoc - | UndefinedAssoc - -type precedence_level = - UndefinedPrecedence - - (* Items are incomparable when they originate in different files. A - value of type [input_file] is used to record an item's origin. The - positions allow locating certain warnings. *) - - | PrecedenceLevel of InputFile.input_file * int * Lexing.position * Lexing.position - -type token_properties = - { - tk_filename : filename; - tk_ocamltype : Stretch.ocamltype option; - tk_position : Positions.t; - tk_attributes : attributes; - mutable tk_associativity : token_associativity; - mutable tk_precedence : precedence_level; - mutable tk_is_declared : bool; - } - -(* ------------------------------------------------------------------------ *) - -(* A [%prec] annotation is optional. A production can carry at most one. - If there is one, it is a symbol name. See [ParserAux]. *) - -type branch_prec_annotation = - symbol Positions.located option - -(* ------------------------------------------------------------------------ *) - -(* A "production level" is used to solve reduce/reduce conflicts. It reflects - which production appears first in the grammar. See [ParserAux]. *) - -type branch_production_level = - | ProductionLevel of InputFile.input_file * int - -(* ------------------------------------------------------------------------ *) - -(* A level is attached to every [%on_error_reduce] declaration. It is used - to decide what to do when several such declarations are applicable in a - single state. *) - -type on_error_reduce_level = - branch_production_level (* we re-use the above type, to save code *) - -(* ------------------------------------------------------------------------ *) - -(* A parameter is either just a symbol or an application of a symbol to a - nonempty tuple of parameters. Before anonymous rules have been eliminated, - it can also be an anonymous rule, represented as a list of branches. *) - -type parameter = - | ParameterVar of symbol Positions.located - | ParameterApp of symbol Positions.located * parameters - | ParameterAnonymous of parameterized_branch list Positions.located - -and parameters = - parameter list - -(* ------------------------------------------------------------------------ *) - -(* A producer is a pair of identifier and a parameter. In concrete syntax, - it could be [e = expr], for instance. It carries a number of attributes. *) - -and producer = - identifier Positions.located * parameter * attributes - -(* ------------------------------------------------------------------------ *) - -(* A branch contains a series of producers and a semantic action. *) - -and parameterized_branch = - { - pr_branch_position : Positions.t; - pr_producers : producer list; - pr_action : action; - pr_branch_prec_annotation : branch_prec_annotation; - pr_branch_production_level : branch_production_level - } - -(* ------------------------------------------------------------------------ *) - -(* A rule has a header and several branches. *) - -type parameterized_rule = - { - pr_public_flag : bool; - pr_inline_flag : bool; - pr_nt : nonterminal; - pr_positions : Positions.t list; - pr_attributes : attributes; - pr_parameters : symbol list; - pr_branches : parameterized_branch list; - } - -(* ------------------------------------------------------------------------ *) - -(* A declaration. (Only before joining.) *) - -type declaration = - - (* Raw OCaml code. *) - - | DCode of Stretch.t - - (* Raw OCaml functor parameter. *) - - | DParameter of Stretch.ocamltype (* really a stretch *) - - (* Terminal symbol (token) declaration. *) - - | DToken of Stretch.ocamltype option * terminal * attributes - - (* Start symbol declaration. *) - - | DStart of nonterminal - - (* Priority and associativity declaration. *) - - | DTokenProperties of terminal * token_associativity * precedence_level - - (* Type declaration. *) - - | DType of Stretch.ocamltype * parameter - - (* Grammar-level attribute declaration. *) - - | DGrammarAttribute of attribute - - (* Attributes shared among multiple symbols, i.e., [%attribute]. *) - - | DSymbolAttributes of parameter list * attributes - - (* On-error-reduce declaration. *) - - | DOnErrorReduce of parameter * on_error_reduce_level - -(* ------------------------------------------------------------------------ *) - -(* A partial grammar. (Only before joining.) *) - -type partial_grammar = - { - pg_filename : filename; - pg_postlude : postlude option; - pg_declarations : declaration Positions.located list; - pg_rules : parameterized_rule list; - } - -(* ------------------------------------------------------------------------ *) - -(* A grammar. (Only after joining.) *) - -(* The differences with partial grammars (above) are as follows: - 1. the file name is gone (there could be several file names, anyway). - 2. there can be several postludes. - 3. declarations are organized by kind: preludes, postludes, - functor %parameters, %start symbols, %types, %tokens, %on_error_reduce, - grammar attributes, %attributes. - 4. rules are stored in a map, indexed by symbol names, instead of a list. - *) - -type grammar = - { - p_preludes : Stretch.t list; - p_postludes : postlude list; - p_parameters : Stretch.t list; - p_start_symbols : Positions.t StringMap.t; - p_types : (parameter * Stretch.ocamltype Positions.located) list; - p_tokens : token_properties StringMap.t; - p_on_error_reduce : (parameter * on_error_reduce_level) list; - p_grammar_attributes : attributes; - p_symbol_attributes : (parameter list * attributes) list; - p_rules : parameterized_rule StringMap.t; - } diff -Nru menhir-20171013/src/Unifier.ml menhir-20171206/src/Unifier.ml --- menhir-20171013/src/Unifier.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/Unifier.ml 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,195 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* This module provides a simple-minded implementation of first-order + unification over an arbitrary signature. *) + +(* -------------------------------------------------------------------------- *) + +(* The signature must be described by the client, as follows. *) + +module type STRUCTURE = sig + + (* The type ['a structure] should be understood as a type of shallow terms + whose leaves have type ['a]. *) + type 'a structure + + val map: ('a -> 'b) -> 'a structure -> 'b structure + + val iter: ('a -> unit) -> 'a structure -> unit + + (* [iter2] fails if the head constructors differ. *) + exception Iter2 + val iter2: ('a -> 'b -> unit) -> 'a structure -> 'b structure -> unit + +end + +(* -------------------------------------------------------------------------- *) + +(* The unifier. *) + +module Make (S : STRUCTURE) = struct + +type 'a structure = 'a S.structure + +(* The data structure maintained by the unifier is as follows. *) + +(* A unifier variable is a point of the union-find algorithm. *) + +type variable = + descriptor UnionFind.point + +and descriptor = { + + (* Every equivalence class carries a globally unique identifier. When + a new equivalence class is created, a fresh identifier is chosen, + and when two classes are merged, one of the two identifiers is kept. + This identifier can be used as a key in a hash table. One should be + aware, though, that identifiers are stable only as long as no unions + are performed. *) + + id : int; + + (* Every equivalence class carries a structure, which is either [None], + which means that the variable is just that, a variable; or [Some t], + which means that the variable represents (has been equated with) the + term [t]. *) + + structure : variable structure option; + + (* Every equivalence class carries a mutable mark, which is used only by the + occurs check. We could also remove this field altogether and use a + separate hash table, where [id]s serve as keys, but this should be + faster. The occurs check is performed eagerly, so this could matter. *) + + mutable mark : Mark.t; + +} + +(* -------------------------------------------------------------------------- *) + +(* Accessors. *) + +let id v = + (UnionFind.get v).id + +let structure v = + (UnionFind.get v).structure + +(* -------------------------------------------------------------------------- *) + +(* [fresh] creates a fresh variable with specified structure. *) + +let fresh = + let c = ref 0 in + fun structure -> + let id = Misc.postincrement c in + let mark = Mark.none in + UnionFind.fresh { id; structure; mark } + +(* -------------------------------------------------------------------------- *) + +(* [occurs_check x y] checks that [x] does not occur within [y]. *) + +exception Occurs of variable * variable + +let occurs_check x y = + (* Generate a fresh color for this particular traversal. *) + let black = Mark.fresh () in + (* The traversal code -- a depth-first search. *) + let rec visit z = + let desc = UnionFind.get z in + if not (Mark.same desc.mark black) then begin + desc.mark <- black; + (* We are looking for [x]. *) + if UnionFind.equivalent x z then + raise (Occurs (x, y)) + else + Option.iter (S.iter visit) desc.structure + end + in + (* The root is [y]. *) + visit y + +(* -------------------------------------------------------------------------- *) + +(* The internal function [unify v1 v2] equates the variables [v1] and [v2] and + propagates the consequences of this equation until a cycle is detected, an + inconsistency is found, or a solved form is reached. The exceptions that + can be raised are [Occurs] and [S.Iter2]. *) + +let rec unify (v1 : variable) (v2 : variable) : unit = + if not (UnionFind.equivalent v1 v2) then begin + let desc1 = UnionFind.get v1 + and desc2 = UnionFind.get v2 in + (* Unify the two descriptors. *) + let desc = + match desc1.structure, desc2.structure with + | None, None -> + (* variable/variable *) + desc1 + | None, Some _ -> + (* variable/term *) + occurs_check v1 v2; + desc2 + | Some _, None -> + (* term/variable *) + occurs_check v2 v1; + desc1 + | Some s1, Some s2 -> + (* term/term *) + S.iter2 unify s1 s2; + { desc1 with structure = Some s1 } + in + (* Merge the equivalence classes. Do this last, so we get more meaningful + output if the recursive call (above) fails and we have to print the + two terms. *) + UnionFind.union v1 v2; + UnionFind.set v1 desc + end + +(* -------------------------------------------------------------------------- *) + +(* The public version of [unify]. *) + +exception Unify of variable * variable + +let unify v1 v2 = + try + unify v1 v2 + with S.Iter2 -> + raise (Unify (v1, v2)) + +(* -------------------------------------------------------------------------- *) + +(* Decoding an acyclic graph as a deep term. *) + +(* This is a simple-minded version of the code, where sharing is lost. Its + cost could be exponential if there is a lot of sharing. In practice, its + use is usually appropriate, especially in the scenario where the term is + meant to be printed as a tree. *) + +type term = + | TVar of int + | TNode of term structure + +let rec decode (v : variable) : term = + match structure v with + | None -> + TVar (id v) + | Some t -> + TNode (S.map decode t) + +(* -------------------------------------------------------------------------- *) + +end diff -Nru menhir-20171013/src/Unifier.mli menhir-20171206/src/Unifier.mli --- menhir-20171013/src/Unifier.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20171206/src/Unifier.mli 2017-12-06 20:41:09.000000000 +0000 @@ -0,0 +1,75 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* This module provides a simple-minded implementation of first-order + unification over an arbitrary signature. *) + +(* -------------------------------------------------------------------------- *) + +(* The signature must be described by the client, as follows. *) + +module type STRUCTURE = sig + + (* The type ['a structure] should be understood as a type of shallow terms + whose leaves have type ['a]. *) + type 'a structure + + val map: ('a -> 'b) -> 'a structure -> 'b structure + + val iter: ('a -> unit) -> 'a structure -> unit + + (* [iter2] fails if the head constructors differ. *) + exception Iter2 + val iter2: ('a -> 'b -> unit) -> 'a structure -> 'b structure -> unit + +end + +(* -------------------------------------------------------------------------- *) + +(* The unifier. *) + +module Make (S : STRUCTURE) : sig + + (* The type of unification variables. *) + + type variable + + (* [fresh s] creates a fresh variable that carries the structure [s]. *) + + val fresh: variable S.structure option -> variable + + (* [structure x] returns the structure (currently) carried by variable [x]. *) + + val structure: variable -> variable S.structure option + + (* [unify x y] attempts to unify the terms represented by the variables + [x] and [y]. The creation of cycles is not permitted; an eager occurs + check rules them out. *) + + exception Unify of variable * variable + exception Occurs of variable * variable + val unify: variable -> variable -> unit + + (* This is the type of deep terms over the signature [S]. *) + + type term = + | TVar of int (* the variable's unique identity *) + | TNode of term S.structure + + (* [decode x] turns the variable [x] into the term that it represents. + Sharing is lost, so this operation can in the worst case have + exponential cost. *) + + val decode: variable -> term + +end diff -Nru menhir-20171013/src/unionFind.ml menhir-20171206/src/unionFind.ml --- menhir-20171013/src/unionFind.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/unionFind.ml 2017-12-06 20:41:09.000000000 +0000 @@ -70,9 +70,9 @@ | Info _ -> point -(** [find point] returns the descriptor associated with [point]'s +(** [get point] returns the descriptor associated with [point]'s equivalence class. *) -let rec find point = +let rec get point = (* By not calling [repr] immediately, we optimize the common cases where the path starting at [point] has length 0 or 1, at the @@ -83,22 +83,20 @@ | Link { link = Info info } -> info.descriptor | Link { link = Link _ } -> - find (repr point) + get (repr point) -let rec change point v = +let rec set point v = match point.link with | Info info | Link { link = Info info } -> info.descriptor <- v | Link { link = Link _ } -> - change (repr point) v + set (repr point) v (** [union point1 point2] merges the equivalence classes associated - with [point1] and [point2] (which must be distinct) into a single - class whose descriptor is that originally associated with [point2]. - - The fact that [point1] and [point2] do not originally belong to the - same class guarantees that we do not create a cycle in the graph. + with [point1] and [point2] into a single class whose descriptor is + that originally associated with [point2]. It does nothing if [point1] + and [point2] already are in the same class. The weights are used to determine whether [point1] should be made to point to [point2], or vice-versa. By making the representative @@ -108,39 +106,24 @@ let union point1 point2 = let point1 = repr point1 and point2 = repr point2 in - assert (point1 != point2); - match point1.link, point2.link with - | Info info1, Info info2 -> - let weight1 = info1.weight - and weight2 = info2.weight in - if weight1 >= weight2 then begin - point2.link <- Link point1; - info1.weight <- weight1 + weight2; - info1.descriptor <- info2.descriptor - end - else begin - point1.link <- Link point2; - info2.weight <- weight1 + weight2 - end - | _, _ -> - assert false (* [repr] guarantees that [link] matches [Info _]. *) + if point1 != point2 then + match point1.link, point2.link with + | Info info1, Info info2 -> + let weight1 = info1.weight + and weight2 = info2.weight in + if weight1 >= weight2 then begin + point2.link <- Link point1; + info1.weight <- weight1 + weight2; + info1.descriptor <- info2.descriptor + end + else begin + point1.link <- Link point2; + info2.weight <- weight1 + weight2 + end + | _, _ -> + assert false (* [repr] guarantees that [link] matches [Info _]. *) (** [equivalent point1 point2] tells whether [point1] and [point2] belong to the same equivalence class. *) let equivalent point1 point2 = repr point1 == repr point2 - -(** [eunion point1 point2] is identical to [union], except it does - nothing if [point1] and [point2] are already equivalent. *) -let eunion point1 point2 = - if not (equivalent point1 point2) then - union point1 point2 - -(** [redundant] maps all members of an equivalence class, but one, to - [true]. *) -let redundant = function - | { link = Link _ } -> - true - | { link = Info _ } -> - false - diff -Nru menhir-20171013/src/unionFind.mli menhir-20171206/src/unionFind.mli --- menhir-20171013/src/unionFind.mli 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/unionFind.mli 2017-12-06 20:41:09.000000000 +0000 @@ -25,26 +25,19 @@ equivalence class of its own, whose descriptor is [desc]. *) val fresh: 'a -> 'a point -(** [find point] returns the descriptor associated with [point]'s +(** [get point] returns the descriptor associated with [point]'s equivalence class. *) -val find: 'a point -> 'a +val get: 'a point -> 'a (** [union point1 point2] merges the equivalence classes associated - with [point1] and [point2] (which must be distinct) into a single - class whose descriptor is that originally associated with [point2]. *) + with [point1] and [point2] into a single class whose descriptor is + that originally associated with [point2]. It does nothing if [point1] + and [point2] already are in the same class. *) val union: 'a point -> 'a point -> unit (** [equivalent point1 point2] tells whether [point1] and [point2] belong to the same equivalence class. *) val equivalent: 'a point -> 'a point -> bool -(** [eunion point1 point2] is identical to [union], except it does - nothing if [point1] and [point2] are already equivalent. *) -val eunion: 'a point -> 'a point -> unit - -(** [redundant] maps all members of an equivalence class, but one, to - [true]. *) -val redundant: 'a point -> bool - -(** [change p d] updates the descriptor of [p] to [d]. *) -val change: 'a point -> 'a -> unit +(** [set p d] updates the descriptor of [p] to [d]. *) +val set: 'a point -> 'a -> unit diff -Nru menhir-20171013/src/unparameterizedSyntax.ml menhir-20171206/src/unparameterizedSyntax.ml --- menhir-20171013/src/unparameterizedSyntax.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/unparameterizedSyntax.ml 2017-12-06 20:41:09.000000000 +0000 @@ -74,7 +74,7 @@ (* A grammar is essentially the same as in the surface syntax; see [Syntax]. The main difference is that [%attribute] declarations, represented by - the field [p_symbol_attributes] in the surface syntax, has disappeared. *) + the field [p_symbol_attributes] in the surface syntax, have disappeared. *) type grammar = { preludes : Stretch.t list; diff -Nru menhir-20171013/src/version.ml menhir-20171206/src/version.ml --- menhir-20171013/src/version.ml 2017-10-13 16:14:06.000000000 +0000 +++ menhir-20171206/src/version.ml 2017-12-06 20:41:09.000000000 +0000 @@ -1 +1 @@ -let version = "20171013" +let version = "20171206"