diff -Nru menhir-20201201/coq-menhirlib/src/.gitignore menhir-20201216/coq-menhirlib/src/.gitignore --- menhir-20201201/coq-menhirlib/src/.gitignore 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/coq-menhirlib/src/.gitignore 2020-12-16 19:18:28.000000000 +0000 @@ -3,3 +3,6 @@ *.v.d .*.aux _CoqProject +*.vok +*.vos +.lia.cache diff -Nru menhir-20201201/coq-menhirlib/src/Interpreter.v menhir-20201216/coq-menhirlib/src/Interpreter.v --- menhir-20201201/coq-menhirlib/src/Interpreter.v 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/coq-menhirlib/src/Interpreter.v 2020-12-16 19:18:28.000000000 +0000 @@ -83,6 +83,9 @@ CoInductive buffer : Type := Buf_cons { buf_head : token; buf_tail : buffer }. +(* Note: Coq 8.12.0 wants a Declare Scope command, + but this breaks compatibility with Coq < 8.10. + Declare Scope buffer_scope. *) Delimit Scope buffer_scope with buf. Bind Scope buffer_scope with buffer. diff -Nru menhir-20201201/coq-menhirlib/src/Makefile menhir-20201216/coq-menhirlib/src/Makefile --- menhir-20201201/coq-menhirlib/src/Makefile 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/coq-menhirlib/src/Makefile 2020-12-16 19:18:28.000000000 +0000 @@ -1,6 +1,6 @@ PWD := $(shell pwd) -COQINCLUDE := -R $(PWD) MenhirLib \ +COQINCLUDE := -R . MenhirLib \ export COQINCLUDE @@ -22,7 +22,7 @@ # A nonempty value can be used to perform a dummy installation # in a different location. -CONTRIB = $(shell $(COQBIN)coqc -where)/user-contrib +CONTRIB = $(shell $(COQBIN)coqc -where | tr -d '\r' | tr '\\' '/')/user-contrib TARGET = $(DESTDIR)$(CONTRIB)/MenhirLib install: all diff -Nru menhir-20201201/coq-menhirlib/src/Makefile.coq menhir-20201216/coq-menhirlib/src/Makefile.coq --- menhir-20201201/coq-menhirlib/src/Makefile.coq 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/coq-menhirlib/src/Makefile.coq 2020-12-16 19:18:28.000000000 +0000 @@ -11,7 +11,7 @@ # # # This Makefile relies on the following variables: -# ROOTDIR (default: `pwd`) +# ROOTDIR (default: .) # COQBIN (default: empty) # COQINCLUDE (default: empty) # VV (default: *.v) @@ -22,17 +22,8 @@ # VERBOSE (default: undefined) # (if defined, commands are displayed) -# We usually refer to the .v files using relative paths (such as Foo.v) -# but [coqdep -R] produces dependencies that refer to absolute paths -# (such as /bar/Foo.v). This confuses [make], which does not recognize -# that these files are the same. As a result, [make] does not respect -# the dependencies. - -# We fix this by using ABSOLUTE PATHS EVERYWHERE. The paths used in targets, -# in -R options, etc., must be absolute paths. - ifndef ROOTDIR - ROOTDIR := $(shell pwd) + ROOTDIR := . endif ifndef VV diff -Nru menhir-20201201/coq-menhirlib/src/Version.v menhir-20201216/coq-menhirlib/src/Version.v --- menhir-20201201/coq-menhirlib/src/Version.v 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/coq-menhirlib/src/Version.v 2020-12-16 19:18:28.000000000 +0000 @@ -1 +1 @@ -Definition require_20201201 := tt. +Definition require_20201216 := tt. diff -Nru menhir-20201201/debian/changelog menhir-20201216/debian/changelog --- menhir-20201201/debian/changelog 2020-12-06 09:29:09.000000000 +0000 +++ menhir-20201216/debian/changelog 2021-01-17 20:01:58.000000000 +0000 @@ -1,3 +1,9 @@ +menhir (20201216-1) unstable; urgency=medium + + * New upstream release + + -- Mehdi Dogguy Sun, 17 Jan 2021 21:01:58 +0100 + menhir (20201201-1) unstable; urgency=medium * New upstream version. diff -Nru menhir-20201201/debian/rules menhir-20201216/debian/rules --- menhir-20201201/debian/rules 2020-12-06 09:29:09.000000000 +0000 +++ menhir-20201216/debian/rules 2021-01-17 20:01:02.000000000 +0000 @@ -14,4 +14,4 @@ dune build @install override_dh_auto_install: - DESTDIR=$(DESTDIR) dune install + DESTDIR=$(DESTDIR) dune install --prefix=usr --libdir=lib/ocaml diff -Nru menhir-20201201/doc/macros.tex menhir-20201216/doc/macros.tex --- menhir-20201201/doc/macros.tex 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/doc/macros.tex 2020-12-16 19:18:28.000000000 +0000 @@ -94,6 +94,7 @@ \newcommand{\menhir}{Menhir\xspace} \newcommand{\menhirlib}{\texttt{MenhirLib}\xspace} \newcommand{\menhirsdk}{\texttt{MenhirSdk}\xspace} +\newcommand{\coqmenhirlib}{\texttt{MenhirLib}\xspace} % for now \newcommand{\menhirinterpreter}{\texttt{MenhirInterpreter}\xspace} \newcommand{\cmenhir}{\texttt{menhir}\xspace} \newcommand{\ml}{\texttt{.ml}\xspace} @@ -119,6 +120,8 @@ \newcommand{\automatonresolved}{\texttt{.automaton.resolved}\xspace} \newcommand{\conflicts}{\texttt{.conflicts}\xspace} \newcommand{\dott}{\texttt{.dot}\xspace} +\newcommand{\legacy}{\texttt{legacy}\xspace} +\newcommand{\simplified}{\texttt{simplified}\xspace} % Environments. @@ -174,6 +177,7 @@ \newcommand{\oocamldep}{\oo{ocamldep}} \newcommand{\oonlypreprocess}{\oo{only-preprocess}} \newcommand{\oonlytokens}{\oo{only-tokens}} +\newcommand{\orequirealiases}{\oo{require-aliases}} \newcommand{\ostrict}{\oo{strict}} \newcommand{\osuggestcomp}{\oo{suggest-comp-flags}} \newcommand{\osuggestlinkb}{\oo{suggest-link-flags-byte}} @@ -191,13 +195,16 @@ \newcommand{\ocoqlibnopath}{\oo{coq-lib-no-path}} \newcommand{\ocoqnocomplete}{\oo{coq-no-complete}} \newcommand{\ocoqnoactions}{\oo{coq-no-actions}} +\newcommand{\ocoqnoversioncheck}{\oo{coq-no-version-check}} \newcommand{\olisterrors}{\oo{list-errors}} \newcommand{\ointerpreterror}{\oo{interpret-error}} \newcommand{\ocompileerrors}{\oo{compile-errors}} \newcommand{\ocompareerrors}{\oo{compare-errors}} \newcommand{\oupdateerrors}{\oo{update-errors}} \newcommand{\oechoerrors}{\oo{echo-errors}} +\newcommand{\oechoerrorsconcrete}{\oo{echo-errors-concrete}} \newcommand{\omergeerrors}{\oo{merge-errors}} +\newcommand{\ostrategy}{\oo{strategy}} % The .messages file format. \newcommand{\messages}{\text{\tt .messages}\xspace} Binary files /tmp/tmpqEkA0c/q9fyCBiJLe/menhir-20201201/doc/manual001.png and /tmp/tmpqEkA0c/T1sE1vd8pc/menhir-20201216/doc/manual001.png differ Binary files /tmp/tmpqEkA0c/q9fyCBiJLe/menhir-20201201/doc/manual002.png and /tmp/tmpqEkA0c/T1sE1vd8pc/menhir-20201216/doc/manual002.png differ Binary files /tmp/tmpqEkA0c/q9fyCBiJLe/menhir-20201201/doc/manual003.png and /tmp/tmpqEkA0c/T1sE1vd8pc/menhir-20201216/doc/manual003.png differ diff -Nru menhir-20201201/doc/manual.html menhir-20201216/doc/manual.html --- menhir-20201201/doc/manual.html 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/doc/manual.html 2020-12-16 19:18:28.000000000 +0000 @@ -58,13 +58,13 @@ } Menhir Reference Manual -(version 20201201) +(version 20201216)

Menhir Reference Manual
-(version 20201201)

François Pottier and Yann Régis-Gianas
+(version 20201216)

François Pottier and Yann Régis-Gianas
INRIA
{Francois.Pottier, Yann.Regis-Gianas}@inria.fr

@@ -211,12 +211,14 @@ it to an OCaml function that maps a state number to a message. The OCaml code is sent to the standard output channel. At the same time, Menhir checks that the collection of input sentences in the file filename is correct and -irredundant. For more information, see §11.

--coq.  This switch causes Menhir to produce Coq code. See §12.

--coq-lib-path path.  This switch allows specifying under what name (or path) the Coq support library -MenhirLib is known to Coq. When Menhir runs in --coq mode, the generated -parser contains references to several modules in this library. This path is -used to qualify these references. Its default value is MenhirLib.

--coq-lib-no-path.  This switch indicates that references to the Coq library MenhirLib -should not be qualified. This was the default behavior of Menhir prior to 2018/05/30. -This switch is provided for compatibility, but normally should not be used.

--coq-no-actions.  (Used in conjunction with --coq.) This switch +irredundant. For more information, see §11.

--coq.  This switch causes Menhir to produce Coq code. See §12.

--coq-lib-path path.  This switch allows specifying under what +name (or path) the Coq support library is known to Coq. When Menhir runs in +--coq mode, the generated parser contains references to several modules in +this library. This path is used to qualify these references. Its default value +is MenhirLib.

--coq-lib-no-path.  This switch indicates that references to the Coq +library MenhirLib should not be qualified. This was the default +behavior of Menhir prior to 2018/05/30. This switch is provided for +compatibility, but normally should not be used.

--coq-no-actions.  (Used in conjunction with --coq.) This switch causes the semantic actions present in the .vy file to be ignored and replaced with tt, the unique inhabitant of Coq’s unit type. This feature can be used to test the Coq back-end with a standard grammar, that is, a @@ -227,7 +229,9 @@ possible only if the grammar has no conflict (not even a benign one, in the sense of §6.1). This can be desirable also because, for a complex grammar, completeness may require a heavy certificate and its -validation by Coq may take time.

--depend.  See §14.

--dump.  This switch causes a description of the automaton to be +validation by Coq may take time.

--coq-no-version-check.  (Used in conjunction with --coq.) This switch +prevents the generation of the check that verifies that the versions of +Menhir and MenhirLib match.

--depend.  See §14.

--dump.  This switch causes a description of the automaton to be written to the file basename.automaton. This description is written after benign conflicts have been resolved, before severe conflicts are resolved (§6), and before extra reductions are introduced @@ -238,7 +242,12 @@ (§4.1.8).

--echo-errors filename.  This switch causes Menhir to read the .messages file filename and to produce on the standard output channel just the input sentences. (That is, all messages, blank lines, and -comments are filtered out.) For more information, see §11.

--explain.  This switch causes conflict explanations to be +comments are filtered out.) For more information, see §11.

--echo-errors-concrete filename.  This switch causes Menhir to +read the .messages file filename and to produce on the standard output +channel just the input sentences. Each sentence is followed with a comment of +the form ## Concrete syntax: ... that shows this sentence in concrete +syntax. This comment is printed only if the user has defined an alias for +every token (§4.1.3).

--explain.  This switch causes conflict explanations to be written to the file basename.conflicts. See also §6.

--external-tokens T.  This switch causes the definition of the token type to be omitted in basename.ml and basename.mli. Instead, the generated parser relies on @@ -292,13 +301,23 @@ the grammar specification to be translated into a definition of the token type, which is written to the files basename.ml and basename.mli. No code is generated. This is useful when a single set of tokens is to be shared between several parsers. The directory -demos/calc-two contains a demo that illustrates the use of this switch.

--raw-depend.  See §14.

--stdlib directory.  This switch exists only for -backwards compatibility and is ignored. It may be removed in the future.

--strict.  This switch causes several warnings about the grammar +demos/calc-two contains a demo that illustrates the use of this switch.

--raw-depend.  See §14.

--require-aliases.  This switch causes Menhir to check that a token +alias (§4.1.3) has been defined for every token. There is no +requirement for this alias to be actually used; it must simply exist. A +missing alias gives rise to a warning (and, in --strict mode, to an error).

--stdlib directory.  This switch exists only for +backwards compatibility and is ignored. It may be removed in the future.

--strategy strategy.  This switch selects an error handling +strategy, to be used by the code back-end, the table back-end, and the +reference interpreter. The available strategies are legacy and +simplified. (However, at the time of writing, the code back-end does +not yet support the simplified strategy.) When this switch is +omitted, the legacy strategy is used. The choice of a strategy +matters only if the grammar uses the error token. For more details, see +§10.

--strict.  This switch causes several warnings about the grammar and about the automaton to be considered errors. This includes warnings about useless precedence declarations, non-terminal symbols that produce the empty language, unreachable non-terminal symbols, productions that are never -reduced, conflicts that are not resolved by precedence declarations, and -end-of-stream conflicts.

--suggest-*.  See §14.

--table.  This switch causes Menhir to use its table-based +reduced, conflicts that are not resolved by precedence declarations, +end-of-stream conflicts, and missing token aliases.

--suggest-*.  See §14.

--table.  This switch causes Menhir to use its table-based back-end, as opposed to its (default) code-based back-end. When --table is used, Menhir produces significantly more compact and somewhat slower parsers. See §16 for a speed comparison.

The table-based back-end produces rather compact tables, which are analogous @@ -1997,13 +2016,17 @@ checkpoint, which again can be an intermediate checkpoint or a final checkpoint. It does not raise any exception. (The exception Error is used only in the monolithic API.)

  val resume:
+    ?strategy:[ `Legacy | `Simplified ] ->
     'a checkpoint ->
     'a checkpoint
 

The function resume allows the user to resume the parser after the parser has suspended itself with a checkpoint of the form AboutToReduce (env, prod) or HandlingError env. This function expects just the previous checkpoint checkpoint. It produces a new -checkpoint. It does not raise any exception.

The incremental API subsumes the monolithic API. Indeed, main can be +checkpoint. It does not raise any exception. +The optional argument strategy influences the manner in which +resume deals with checkpoints of the form ErrorHandling _. Its +default value is `Legacy. For more details, see §10.

The incremental API subsumes the monolithic API. Indeed, main can be (and is in fact) implemented by first using Incremental.main, then calling offer and resume in a loop, until a final checkpoint is obtained.

  type supplier =
@@ -2019,13 +2042,18 @@
 is why we expose offer and resume in the first place.
 Nevertheless, some variations are so common that it is worth providing them,
 ready for use. The following functions are implemented on top of offer
-and resume.

  val loop: supplier -> 'a checkpoint -> 'a
+and resume.

  val loop:
+    ?strategy:[ `Legacy | `Simplified ] ->
+    supplier -> 'a checkpoint -> 'a
 

loop supplier checkpoint begins parsing from checkpoint, reading tokens from supplier. It continues parsing until it reaches a checkpoint of the form Accepted v or Rejected. In the former case, it returns v. In the latter case, it raises the exception Error. (By the way, this is how we implement the monolithic -API on top of the incremental API.)

  val loop_handle:
+API on top of the incremental API.)
+The optional argument strategy influences the manner in which
+loop deals with checkpoints of the form ErrorHandling _. Its
+default value is `Legacy. For more details, see §10.

  val loop_handle:
     ('a -> 'answer) ->
     ('a checkpoint -> 'answer) ->
     supplier -> 'a checkpoint -> 'answer
@@ -2298,16 +2326,41 @@
 by the lexical analyzer. Instead, when an error is detected, the current
 lookahead token is discarded and replaced with the error token, which becomes
 the current lookahead token. At this point, the parser enters error
-handling mode.

In error handling mode, automaton states are popped off the automaton’s stack -until a state that can act on error is found. This includes -both shift and reduce actions. (yacc and ocamlyacc do not -trigger reduce actions on error. It is somewhat unclear why this is so.)

When a state that can reduce on error is found, reduction is performed. -Since the lookahead token is still error, the automaton remains in error -handling mode.

When a state that can shift on error is found, the error token is shifted. -At this point, the parser returns to normal mode.

When no state that can act on error is found on the automaton’s stack, the -parser stops and raises the exception Error. This exception carries -no information. The position of the error can be obtained by reading the -lexical analyzer’s environment record.

+handling mode. +In error handling mode, the parser behaves as follows: +

  • +If the current state has a shift action on the error token, then this +action takes place. Under the legacy strategy, the parser then +reads the next token and returns to normal mode. Under the +simplified strategy, it does not request the next token, so +the current token remains error, and the parser remains in error handling +mode. +
  • If the current state has a reduce action on the error token, then this +action takes place. (This behavior differs from that of yacc and +ocamlyacc, which do not reduce on error. It is somewhat unclear why not.) +The current token remains error and the parser remains in error handling +mode. +
  • If the current state has no action on the error token, then, under the +simplified strategy, the parser rejects the input. Under the +legacy strategy, the parser pops a cell off its stack and remains +in error handling mode. If the stack is empty, then the parser rejects the +input. +

In the monolithic API, the parser rejects the input by raising the exception +Error. This exception carries no information. The position of the +error can be obtained by reading the lexical analyzer’s environment record. In +the incremental API, the parser rejects the input by returning the checkpoint +Rejected.

Which strategy should one choose? First, let us note that the difference +between the strategies legacy and simplified matters only if the grammar +uses the error token. The following rule of thumb can be used to select +between them: +

  • +If the error token is used only to catch an error and stop, then the +simplified strategy should be preferred. (In this this restricted style, +the error token always appears at the end of a production, whose semantic +action raises an exception.) +
  • If the error token is used to survive an error and continue parsing, +then the legacy strategy should be selected. +

Error recovery

ocamlyacc offers an error recovery mode, which is entered immediately after an error token was successfully shifted. In this mode, tokens are repeatedly Binary files /tmp/tmpqEkA0c/q9fyCBiJLe/menhir-20201201/doc/manual.pdf and /tmp/tmpqEkA0c/T1sE1vd8pc/menhir-20201216/doc/manual.pdf differ diff -Nru menhir-20201201/doc/manual.tex menhir-20201216/doc/manual.tex --- menhir-20201201/doc/manual.tex 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/doc/manual.tex 2020-12-16 19:18:28.000000000 +0000 @@ -130,14 +130,16 @@ \docswitch{\ocoq} This switch causes \menhir to produce Coq code. See \sref{sec:coq}. -\docswitch{\ocoqlibpath \nt{path}} This switch allows specifying under what name (or path) the Coq support library -MenhirLib is known to Coq. When \menhir runs in \ocoq mode, the generated -parser contains references to several modules in this library. This path is -used to qualify these references. Its default value is \texttt{MenhirLib}. - -\docswitch{\ocoqlibnopath} This switch indicates that references to the Coq library MenhirLib -should \emph{not} be qualified. This was the default behavior of \menhir prior to 2018/05/30. -This switch is provided for compatibility, but normally should not be used. +\docswitch{\ocoqlibpath \nt{path}} This switch allows specifying under what +name (or path) the Coq support library is known to Coq. When \menhir runs in +\ocoq mode, the generated parser contains references to several modules in +this library. This path is used to qualify these references. Its default value +is \coqmenhirlib. + +\docswitch{\ocoqlibnopath} This switch indicates that references to the Coq +library \coqmenhirlib should \emph{not} be qualified. This was the default +behavior of \menhir prior to 2018/05/30. This switch is provided for +compatibility, but normally should not be used. \docswitch{\ocoqnoactions} (Used in conjunction with \ocoq.) This switch causes the semantic actions present in the \vy file to be ignored and @@ -154,6 +156,10 @@ a complex grammar, completeness may require a heavy certificate and its validation by Coq may take time. +\docswitch{\ocoqnoversioncheck} (Used in conjunction with \ocoq.) This switch +prevents the generation of the check that verifies that the versions of +\menhir and \coqmenhirlib match. + \docswitch{\odepend} See \sref{sec:build}. \docswitch{\odump} This switch causes a description of the automaton to be @@ -173,6 +179,13 @@ channel just the input sentences. (That is, all messages, blank lines, and comments are filtered out.) For more information, see \sref{sec:errors:new}. +\docswitch{\oechoerrorsconcrete \nt{filename}} This switch causes \menhir to +read the \messages file \nt{filename} and to produce on the standard output +channel just the input sentences. Each sentence is followed with a comment of +the form \verb+## Concrete syntax: ...+ that shows this sentence in concrete +syntax. This comment is printed only if the user has defined an alias for +every token (\sref{sec:tokens}). + \docswitch{\oexplain} This switch causes conflict explanations to be written to the file \nt{basename}\conflicts. See also \sref{sec:conflicts}. @@ -274,15 +287,29 @@ \docswitch{\orawdepend} See \sref{sec:build}. +\docswitch{\orequirealiases} This switch causes \menhir to check that a token +alias (\sref{sec:tokens}) has been defined for every token. There is no +requirement for this alias to be actually used; it must simply exist. A +missing alias gives rise to a warning (and, in \ostrict mode, to an error). + \docswitch{\ostdlib \nt{directory}} This switch exists only for backwards compatibility and is ignored. It may be removed in the future. +\docswitch{\ostrategy \nt{strategy}} This switch selects an error handling +strategy, to be used by the code back-end, the table back-end, and the +reference interpreter. The available strategies are \legacy and +simplified. (However, at the time of writing, the code back-end does +not yet support the simplified strategy.) When this switch is +omitted, the \legacy strategy is used. The choice of a strategy +matters only if the grammar uses the \error token. For more details, see +\sref{sec:errors}. + \docswitch{\ostrict} This switch causes several warnings about the grammar and about the automaton to be considered errors. This includes warnings about useless precedence declarations, non-terminal symbols that produce the empty language, unreachable non-terminal symbols, productions that are never -reduced, conflicts that are not resolved by precedence declarations, and -end-of-stream conflicts. +reduced, conflicts that are not resolved by precedence declarations, +end-of-stream conflicts, and missing token aliases. \docswitch{\oo{suggest-*}} See \sref{sec:build}. @@ -2514,6 +2541,10 @@ does everything: it pulls tokens from the lexer, parses, and eventually returns a semantic value (or fails by throwing the exception \texttt{Error}). +% We may wish to note that the behavior of the function \verb+main+ +% is influenced by the strategy that is chosen at compile time via +% \ostrategy. + % ------------------------------------------------------------------------------ \subsection{Incremental API} @@ -2681,6 +2712,7 @@ \begin{verbatim} val resume: + ?strategy:[ `Legacy | `Simplified ] -> 'a checkpoint -> 'a checkpoint \end{verbatim} @@ -2690,6 +2722,10 @@ \verb+AboutToReduce (env, prod)+ or \verb+HandlingError env+. This function expects just the previous checkpoint \verb+checkpoint+. It produces a new checkpoint. It does not raise any exception. +% +The optional argument \verb+strategy+ influences the manner in which +\verb+resume+ deals with checkpoints of the form \verb+ErrorHandling _+. Its +default value is \verb+`Legacy+. For more details, see \sref{sec:errors}. The incremental API subsumes the monolithic API. Indeed, \verb+main+ can be (and is in fact) implemented by first using @@ -2730,7 +2766,9 @@ %% val loop \begin{verbatim} - val loop: supplier -> 'a checkpoint -> 'a + val loop: + ?strategy:[ `Legacy | `Simplified ] -> + supplier -> 'a checkpoint -> 'a \end{verbatim} \verb+loop supplier checkpoint+ begins parsing from \verb+checkpoint+, reading @@ -2739,6 +2777,10 @@ case, it returns \verb+v+. In the latter case, it raises the exception \verb+Error+. (By the way, this is how we implement the monolithic API on top of the incremental API.) +% +The optional argument \verb+strategy+ influences the manner in which +\verb+loop+ deals with checkpoints of the form \verb+ErrorHandling _+. Its +default value is \verb+`Legacy+. For more details, see \sref{sec:errors}. \begin{verbatim} val loop_handle: @@ -3255,6 +3297,7 @@ \menhir's traditional error handling mechanism is considered deprecated: although it is still supported for the time being, it might be removed in the future. +% We recommend setting up an error handling mechanism using the new tools offered by \menhir (\sref{sec:errors:new}). @@ -3268,23 +3311,45 @@ lookahead token is discarded and replaced with the \error token, which becomes the current lookahead token. At this point, the parser enters \emph{error handling} mode. +% +In error handling mode, the parser behaves as follows: +\begin{itemize} +\item If the current state has a shift action on the \error token, then this + action takes place. Under the \legacy strategy, the parser then + reads the next token and returns to normal mode. Under the + simplified strategy, it does \emph{not} request the next token, so + the current token remains \error, and the parser remains in error handling + mode. +\item If the current state has a reduce action on the \error token, then this + action takes place. (This behavior differs from that of \yacc and + \ocamlyacc, which do not reduce on \error. It is somewhat unclear why not.) + The current token remains \error and the parser remains in error handling + mode. +\item If the current state has no action on the \error token, then, under the + simplified strategy, the parser rejects the input. Under the + \legacy strategy, the parser pops a cell off its stack and remains + in error handling mode. If the stack is empty, then the parser rejects the + input. +\end{itemize} -In error handling mode, automaton states are popped off the automaton's stack -until a state that can \emph{act} on \error is found. This includes -\emph{both} shift \emph{and} reduce actions. (\yacc and \ocamlyacc do not -trigger reduce actions on \error. It is somewhat unclear why this is so.) - -When a state that can reduce on \error is found, reduction is performed. -Since the lookahead token is still \error, the automaton remains in error -handling mode. - -When a state that can shift on \error is found, the \error token is shifted. -At this point, the parser returns to normal mode. - -When no state that can act on \error is found on the automaton's stack, the -parser stops and raises the exception \texttt{Error}. This exception carries -no information. The position of the error can be obtained by reading the -lexical analyzer's environment record. +In the monolithic API, the parser rejects the input by raising the exception +\texttt{Error}. This exception carries no information. The position of the +error can be obtained by reading the lexical analyzer's environment record. In +the incremental API, the parser rejects the input by returning the checkpoint +\texttt{Rejected}. + +Which strategy should one choose? First, let us note that the difference +between the strategies \legacy and \simplified matters only if the grammar +uses the \error token. The following rule of thumb can be used to select +between them: +\begin{itemize} +\item If the \error token is used only to catch an error and stop, then the + \simplified strategy should be preferred. (In this this restricted style, + the \error token always appears at the end of a production, whose semantic + action raises an exception.) +\item If the \error token is used to survive an error and continue parsing, + then the legacy strategy should be selected. +\end{itemize} \paragraph{Error recovery} diff -Nru menhir-20201201/doc/version.tex menhir-20201216/doc/version.tex --- menhir-20201201/doc/version.tex 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/doc/version.tex 2020-12-16 19:18:28.000000000 +0000 @@ -1 +1 @@ -\gdef\menhirversion{20201201} +\gdef\menhirversion{20201216} diff -Nru menhir-20201201/dune-project menhir-20201216/dune-project --- menhir-20201201/dune-project 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/dune-project 2020-12-16 19:18:28.000000000 +0000 @@ -1,7 +1,7 @@ (lang dune 2.0) (name menhir) (using menhir 2.0) -(version 20201201) +(version 20201216) (package (name menhirLib) diff -Nru menhir-20201201/lib/Engine.ml menhir-20201216/lib/Engine.ml --- menhir-20201201/lib/Engine.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/lib/Engine.ml 2020-12-16 19:18:28.000000000 +0000 @@ -58,6 +58,74 @@ (* ------------------------------------------------------------------------ *) + (* As of 2020/12/16, we introduce a choice between multiple error handling + strategies. *) + + (* Regardless of the strategy, when a syntax error is encountered, the + function [initiate] is called, a [HandlingError] checkpoint is produced, + and (after resuming) the function [error] is called. This function checks + whether the current state allows shifting, reducing, or neither, when the + lookahead token is [error]. Its behavior, then, depends on the strategy, + as follows. *) + + (* In the legacy strategy, which until now was the only strategy, + + - If shifting is possible, then a [Shifting] checkpoint is produced, + whose field [please_discard] is [true], so (after resuming) an + [InputNeeded] checkpoint is produced, and (after a new token + has been provided) the parser leaves error-handling mode and + returns to normal mode. + + - If reducing is possible, then one or more reductions are performed. + Default reductions are announced via [AboutToReduce] checkpoints, + whereas ordinary reductions are performed silently. (It is unclear + why this is so.) The parser remains in error-handling mode, so + another [HandlingError] checkpoint is produced, and the function + [error] is called again. + + - If neither action is possible and if the stack is nonempty, then a + cell is popped off the stack, then a [HandlingError] checkpoint is + produced, and the function [error] is called again. + + - If neither action is possible and if the stack is empty, then the + parse dies with a [Reject] checkpoint. *) + + (* The simplified strategy differs from the legacy strategy as follows: + + - When shifting, a [Shifting] checkpoint is produced, whose field + [please_discard] is [false], so the parser does not request another + token, and the parser remains in error-handling mode. (If the + destination state of this shift transition has a default reduction, + then the parser will perform this reduction as its next step.) + + - When reducing, all reductions are announced by [AboutToReduce] + checkpoints. + + - If neither shifting [error] nor reducing on [error] is possible, + then the parser dies with a [Reject] checkpoint. (The parser does + not attempt to pop cells off the stack one by one.) + + This simplified strategy is appropriate when the grammar uses the [error] + token in a limited way, where the [error] token always appears at the end + of a production whose semantic action raises an exception (whose purpose + is to signal a syntax error and perhaps produce a custom message). Then, + the parser must not request one token past the syntax error. (In a REPL, + that would be undesirable.) It must perform as many reductions on [error] + as possible, then (if possible) shift the [error] token and move to a new + state where a default reduction will be possible. (Because the [error] + token always appears at the end of a production, no other action can + exist in that state, so a default reduction must exist.) The semantic + action raises an exception, and that is it. *) + + (* Let us note that it is also possible to perform no error handling at + all, or to perform customized error handling, by stopping as soon as + the first [ErrorHandling] checkpoint appears. *) + + type strategy = + [ `Legacy | `Simplified ] + + (* ------------------------------------------------------------------------ *) + (* In the code-based back-end, the [run] function is sometimes responsible for pushing a new cell on the stack. This is motivated by code sharing concerns. In this interpreter, there is no such concern; [run]'s caller @@ -143,8 +211,9 @@ (* Note that, if [please_discard] was true, then we have just called [discard], so the lookahead token cannot be [error]. *) - (* Returning [HandlingError env] is equivalent to calling [error env] - directly, except it allows the user to regain control. *) + (* Returning [HandlingError env] is like calling [error ~strategy env] + directly, except it allows the user to regain control and choose an + error-handling strategy. *) if env.error then begin if log then @@ -295,7 +364,7 @@ (* [error] handles errors. *) - and error env = + and error ~strategy env = assert env.error; (* Consult the column associated with the [error] pseudo-token in the @@ -305,39 +374,64 @@ env.current (* determines a row *) T.error_terminal (* determines a column *) T.error_value - error_shift (* shift continuation *) - error_reduce (* reduce continuation *) - error_fail (* failure continuation *) + (error_shift ~strategy) (* shift continuation *) + (error_reduce ~strategy) (* reduce continuation *) + (error_fail ~strategy) (* failure continuation *) env - and error_shift env please_discard terminal value s' = - - (* Here, [terminal] is [T.error_terminal], - and [value] is [T.error_value]. *) - + and error_shift ~strategy env please_discard terminal value s' = assert (terminal = T.error_terminal && value = T.error_value); (* This state is capable of shifting the [error] token. *) if log then Log.handling_error env.current; + + (* In the simplified strategy, we change [please_discard] to [false], + which means that we won't request the next token and (therefore) + we will remain in error-handling mode after shifting the [error] + token. *) + + let please_discard = + match strategy with `Legacy -> please_discard | `Simplified -> false + in + shift env please_discard terminal value s' - and error_reduce env prod = + and error_reduce ~strategy env prod = (* This state is capable of performing a reduction on [error]. *) if log then Log.handling_error env.current; - reduce env prod - (* Intentionally calling [reduce] instead of [announce_reduce]. - It does not seem very useful, and it could be confusing, to - expose the reduction steps taken during error handling. *) - and error_fail env = + (* In the legacy strategy, we call [reduce] instead of [announce_reduce], + apparently in an attempt to hide the reduction steps performed during + error handling. This seems inconsistent, as the default reduction steps + are still announced. In the simplified strategy, all reductions are + announced. *) + + match strategy with + | `Legacy -> + reduce env prod + | `Simplified -> + announce_reduce env prod + + and error_fail ~strategy env = + + (* This state is unable to handle errors. In the simplified strategy, we + die immediately. In the legacy strategy, we attempt to pop a stack + cell. (This amounts to forgetting part of what we have just read, in + the hope of reaching a state where we can shift the [error] token and + resume parsing in normal mode. Forgetting past input is not appropriate + when the goal is merely to produce a good syntax error message.) *) + + match strategy with + | `Simplified -> + Rejected + | `Legacy -> - (* This state is unable to handle errors. Attempt to pop a stack - cell. *) + (* Attempt to pop a stack cell. *) let cell = env.stack in let next = cell.next in @@ -447,9 +541,11 @@ | _ -> invalid_arg "offer expects InputNeeded" - let resume : 'a . 'a checkpoint -> 'a checkpoint = function + let resume : 'a . ?strategy:strategy -> 'a checkpoint -> 'a checkpoint = + fun ?(strategy=`Legacy) checkpoint -> + match checkpoint with | HandlingError env -> - Obj.magic error env + Obj.magic error ~strategy env | Shifting (_, env, please_discard) -> Obj.magic run env please_discard | AboutToReduce (env, prod) -> @@ -493,8 +589,8 @@ All of the cheating resides in the types assigned to [offer] and [handle] above. *) - let rec loop : 'a . supplier -> 'a checkpoint -> 'a = - fun read checkpoint -> + let rec loop : 'a . ?strategy:strategy -> supplier -> 'a checkpoint -> 'a = + fun ?(strategy=`Legacy) read checkpoint -> match checkpoint with | InputNeeded _ -> (* The parser needs a token. Request one from the lexer, @@ -502,14 +598,14 @@ checkpoint. Then, repeat. *) let triple = read() in let checkpoint = offer checkpoint triple in - loop read checkpoint + loop ~strategy read checkpoint | Shifting _ | AboutToReduce _ | HandlingError _ -> (* The parser has suspended itself, but does not need new input. Just resume the parser. Then, repeat. *) - let checkpoint = resume checkpoint in - loop read checkpoint + let checkpoint = resume ~strategy checkpoint in + loop ~strategy read checkpoint | Accepted v -> (* The parser has succeeded and produced a semantic value. Return this semantic value to the user. *) @@ -518,9 +614,9 @@ (* The parser rejects this input. Raise an exception. *) raise Error - let entry (s : state) lexer lexbuf : semantic_value = + let entry strategy (s : state) lexer lexbuf : semantic_value = let initial = lexbuf.Lexing.lex_curr_p in - loop (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial) + loop ~strategy (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial) (* ------------------------------------------------------------------------ *) @@ -536,6 +632,8 @@ loop_handle succeed fail read checkpoint | Shifting _ | AboutToReduce _ -> + (* Which strategy is passed to [resume] here is irrelevant, + since this checkpoint is not [HandlingError _]. *) let checkpoint = resume checkpoint in loop_handle succeed fail read checkpoint | HandlingError _ @@ -569,6 +667,8 @@ loop_handle_undo succeed fail read (inputneeded, checkpoint) | Shifting _ | AboutToReduce _ -> + (* Which strategy is passed to [resume] here is irrelevant, + since this checkpoint is not [HandlingError _]. *) let checkpoint = resume checkpoint in loop_handle_undo succeed fail read (inputneeded, checkpoint) | HandlingError _ @@ -602,6 +702,8 @@ Some env | AboutToReduce _ -> (* The parser wishes to reduce. Just follow. *) + (* Which strategy is passed to [resume] here is irrelevant, + since this checkpoint is not [HandlingError _]. *) shifts (resume checkpoint) | HandlingError _ -> (* The parser fails, which means it rejects the terminal symbol diff -Nru menhir-20201201/lib/EngineTypes.ml menhir-20201216/lib/EngineTypes.ml --- menhir-20201201/lib/EngineTypes.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/lib/EngineTypes.ml 2020-12-16 19:18:28.000000000 +0000 @@ -333,6 +333,7 @@ exception Error val entry: + (* strategy: *) [ `Legacy | `Simplified ] -> (* see [IncrementalEngine] *) state -> (Lexing.lexbuf -> token) -> Lexing.lexbuf -> diff -Nru menhir-20201201/lib/IncrementalEngine.ml menhir-20201216/lib/IncrementalEngine.ml --- menhir-20201201/lib/IncrementalEngine.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/lib/IncrementalEngine.ml 2020-12-16 19:18:28.000000000 +0000 @@ -78,9 +78,9 @@ | Rejected (* [offer] allows the user to resume the parser after it has suspended - itself with a checkpoint of the form [InputNeeded env]. [offer] expects the - old checkpoint as well as a new token and produces a new checkpoint. It does not - raise any exception. *) + itself with a checkpoint of the form [InputNeeded env]. [offer] expects + the old checkpoint as well as a new token and produces a new checkpoint. + It does not raise any exception. *) val offer: 'a checkpoint -> @@ -89,10 +89,30 @@ (* [resume] allows the user to resume the parser after it has suspended itself with a checkpoint of the form [AboutToReduce (env, prod)] or - [HandlingError env]. [resume] expects the old checkpoint and produces a new - checkpoint. It does not raise any exception. *) + [HandlingError env]. [resume] expects the old checkpoint and produces a + new checkpoint. It does not raise any exception. *) + + (* The optional argument [strategy] influences the manner in which [resume] + deals with checkpoints of the form [ErrorHandling _]. Its default value + is [`Legacy]. It can be briefly described as follows: + + - If the [error] token is used only to report errors (that is, if the + [error] token appears only at the end of a production, whose semantic + action raises an exception) then the simplified strategy should be + preferred. (This includes the case where the [error] token does not + appear at all in the grammar.) + + - If the [error] token is used to recover after an error, or if + perfect backward compatibility is required, the legacy strategy + should be selected. + + More details on these strategies appear in the file [Engine.ml]. *) + + type strategy = + [ `Legacy | `Simplified ] val resume: + ?strategy:strategy -> 'a checkpoint -> 'a checkpoint @@ -102,7 +122,8 @@ type supplier = unit -> token * position * position - (* A pair of a lexer and a lexing buffer can be easily turned into a supplier. *) + (* A pair of a lexer and a lexing buffer can be easily turned into a + supplier. *) val lexer_lexbuf_to_supplier: (Lexing.lexbuf -> token) -> @@ -117,9 +138,11 @@ (* [loop supplier checkpoint] begins parsing from [checkpoint], reading tokens from [supplier]. It continues parsing until it reaches a checkpoint of the form [Accepted v] or [Rejected]. In the former case, it - returns [v]. In the latter case, it raises the exception [Error]. *) + returns [v]. In the latter case, it raises the exception [Error]. + The optional argument [strategy], whose default value is [Legacy], + is passed to [resume] and influences the error-handling strategy. *) - val loop: supplier -> 'a checkpoint -> 'a + val loop: ?strategy:strategy -> supplier -> 'a checkpoint -> 'a (* [loop_handle succeed fail supplier checkpoint] begins parsing from [checkpoint], reading tokens from [supplier]. It continues parsing until @@ -128,10 +151,10 @@ observed first). In the former case, it calls [succeed v]. In the latter case, it calls [fail] with this checkpoint. It cannot raise [Error]. - This means that Menhir's traditional error-handling procedure (which pops - the stack until a state that can act on the [error] token is found) does - not get a chance to run. Instead, the user can implement her own error - handling code, in the [fail] continuation. *) + This means that Menhir's error-handling procedure does not get a chance + to run. For this reason, there is no [strategy] parameter. Instead, the + user can implement her own error handling code, in the [fail] + continuation. *) val loop_handle: ('a -> 'answer) -> diff -Nru menhir-20201201/src/AutomatonGraph.ml menhir-20201216/src/AutomatonGraph.ml --- menhir-20201201/src/AutomatonGraph.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20201216/src/AutomatonGraph.ml 2020-12-16 19:18:28.000000000 +0000 @@ -0,0 +1,41 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +open Printf +open Grammar + +module P = Dot.Print (struct + + type vertex = Lr1.node + + let name node = + sprintf "s%d" (Lr1.number node) + + let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) source = + SymbolMap.iter (fun symbol target -> + let label = Symbol.print symbol in + f ~label target + ) (Lr1.transitions source) + + let iter (f : ?shape:Dot.shape -> ?style:Dot.style -> label:string -> vertex -> unit) = + Lr1.iter (fun node -> + let label = sprintf "%d" (Lr1.number node) in + f ~label node + ) + +end) + +let print_automaton_graph() = + let f = open_out (Settings.base ^ ".dot") in + P.print f; + close_out f diff -Nru menhir-20201201/src/AutomatonGraph.mli menhir-20201216/src/AutomatonGraph.mli --- menhir-20201201/src/AutomatonGraph.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20201216/src/AutomatonGraph.mli 2020-12-16 19:18:28.000000000 +0000 @@ -0,0 +1,18 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* Build and print the LR(1) automaton as a graph. Each state of the automaton + gives rise to a node. Edges are labeled with nonterminal and terminal + symbols. The reduction actions that exist in each state are not shown. *) + +val print_automaton_graph: unit -> unit diff -Nru menhir-20201201/src/back.ml menhir-20201216/src/back.ml --- menhir-20201201/src/back.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/back.ml 2020-12-16 19:18:28.000000000 +0000 @@ -21,6 +21,10 @@ let module D = Dump.Make(Default) in D.dump (Settings.base ^ ".automaton.resolved") +let () = + if Settings.automaton_graph then + AutomatonGraph.print_automaton_graph() + (* Let [Interpret] handle the command line options [--interpret], [--interpret-error], [--compile-errors], [--compare-errors]. *) @@ -66,30 +70,25 @@ if Settings.cmly then Cmly_write.write (Settings.base ^ ".cmly") -(* Construct the code, using either the table-based or the code-based - back-end, and pass it on to the printer. (This continuation-passing - style is imposed by the fact that there is no conditional in ocaml's - module language.) *) +(* Construct and print the code using an appropriate back-end. *) let () = - if Settings.coq then + if Settings.table then begin + let module B = TableBackend.Run (struct end) in + write B.program; + Interface.write Front.grammar () + end + else if Settings.coq then begin let module B = CoqBackend.Run (struct end) in let filename = Settings.base ^ ".v" in let f = open_out filename in - B.write_all f; - exit 0 - else - if Settings.table then - let module B = TableBackend.Run (struct end) in - write B.program - else - let module B = CodeBackend.Run (struct end) in - write (CodeInliner.inline B.program) - -(* Write the interface file. *) - -let () = - Interface.write Front.grammar () + B.write_all f + end + else begin + let module B = CodeBackend.Run (struct end) in + write (CodeInliner.inline B.program); + Interface.write Front.grammar () + end let () = Time.tick "Printing" diff -Nru menhir-20201201/src/codeBackend.ml menhir-20201216/src/codeBackend.ml --- menhir-20201201/src/codeBackend.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/codeBackend.ml 2020-12-16 19:18:28.000000000 +0000 @@ -39,13 +39,6 @@ cell's contents are passed as extra parameters, and it is [run]'s responsibility to allocate that cell. - (When [run] is the target of a shift transition, the position - parameters [startp] and [endp] are redundant with the [env] - parameter, because they are always equal to [env.startp] and - [env.endp]. However, this does not appear to make a great - difference in terms of code size, and makes our life easier, so we - do not attempt to eliminate this redundancy.) - The first thing in [run] is to discard a token, if the state was entered through a shift transition, and to peek at the lookahead token. When the current token is to be discarded, the [discard] function is invoked. It @@ -118,6 +111,21 @@ that branch with a simple [assert false]. TEMPORARY do it *) (* ------------------------------------------------------------------------ *) + +(* At this time, the code back-end supports only the legacy error handling + strategy. It does not (yet?) support the simplified strategy. Let the + user know about this. *) + +let () = + match Settings.strategy with + | `Legacy -> + () + | `Simplified -> + Error.error [] + "The code back-end does not support --strategy simplified.\n\ + Please use either --strategy legacy or --table." + +(* ------------------------------------------------------------------------ *) (* Here is a description of our error handling mechanism. With every state [s], we associate an [error] function. @@ -167,6 +175,12 @@ I note that a state that can handle [error] and has a default reduction must in fact have a reduction action on [error]. *) +(* The variable that holds the environment. This is a parameter of all + functions. *) + +let env = + prefix "env" + (* The type of environments. *) let tcenv = @@ -186,11 +200,6 @@ let discard = prefix "discard" -(* The [initenv] function. *) - -let initenv = - prefix "init" - (* The [run] function associated with a state [s]. *) let run s = @@ -499,13 +508,6 @@ (* This is the type of parser environments. *) -let field modifiable name t = - { - modifiable = modifiable; - fieldname = name; - fieldtype = type2scheme t - } - let envtypedef = { typename = tcenv; typeparams = []; @@ -819,7 +821,7 @@ (* Calls to [assertfalse]. *) let call_assertfalse = - EApp (EVar assertfalse, [ EVar "()" ]) + EApp (EVar assertfalse, [ EUnit ]) (* ------------------------------------------------------------------------ *) (* Code production for the automaton functions. *) @@ -915,12 +917,12 @@ The parameter [defred] tells which default reduction, if any, we are about to perform. *) -(* 2014/12/06 New convention regarding initial states (i.e., states - which have no incoming symbol). The function [initenv] does not - invoke the lexer, so the [run] function for an initial state must - do it. (Except in the very special case where the initial state - has a default reduction on [#] -- this means the grammar recognizes - only the empty word. We have ruled out this case.) *) +(* 2014/12/06 New convention regarding initial states (i.e., states which have + no incoming symbol). We donot invoke the lexer when we construct the + initial environment, so the [run] function for an initial state must do it. + (Except in the very special case where the initial state has a default + reduction on [#] -- this means the grammar recognizes only the empty word. + We have ruled out this case.) *) let gettoken s defred e = match Lr1.incoming_symbol s, defred with @@ -1484,6 +1486,10 @@ a sentinel cell with a valid [endp] field at offset 1. For simplicity, we always create a sentinel cell. *) +(* When we allocate a fresh parser environment, the [token] field receives a + dummy value. It will be overwritten by the first call to [run], which will + invoke [discard]. This allows us to invoke the lexer in just one place. *) + let entrydef s = let nt = Item.startnt (Lr1.start2item s) in let lexer = "lexer" @@ -1497,15 +1503,23 @@ { valpublic = true; valpat = PVar (Nonterminal.print true nt); - valval = EAnnot ( - EFun ( [ PVar lexer; PVar lexbuf ], - blet ( - [ PVar env, EApp (EVar initenv, [ EVar lexer; EVar lexbuf ]) ], - EMagic (EApp (EVar (run s), [ EVar env; initial_stack ])) - ) - ), - entrytypescheme Front.grammar (Nonterminal.print true nt) - ) + valval = + EAnnot ( + EFun ( [ PVar lexer; PVar lexbuf ], + blet ( + [ PVar env, + ERecord ([ + (flexer, EVar lexer); + (flexbuf, EVar lexbuf); + (ftoken, EMagic EUnit); + (ferror, efalse) + ]) + ], + EMagic (EApp (EVar (run s), [ EVar env; initial_stack ])) + ) + ), + entrytypescheme Front.grammar (Nonterminal.print true nt) + ) } (* ------------------------------------------------------------------------ *) @@ -1591,38 +1605,6 @@ (arrow tenv tenv) } -(* This is [initenv], used to allocate a fresh parser environment. - It fills in all fields in a straightforward way. The [token] - field receives a dummy value. It will be overwritten by the - first call to [run], which will invoke [discard]. This allows - us to invoke the lexer in just one place. *) - -let initenvdef = - let lexer = "lexer" - and lexbuf = "lexbuf" in - { - valpublic = false; - valpat = PVar initenv; - valval = - annotate ( - EFun ( [ PVar lexer; PVar lexbuf ], - blet ( - (* We do not have a dummy token at hand, so we forge one. *) - (* It will be overwritten by the first call to the lexer. *) - [ PVar token, EMagic EUnit ], - ERecord ([ - (flexer, EVar lexer); - (flexbuf, EVar lexbuf); - (ftoken, EVar token); - (ferror, efalse) - ] - ) - ) - ) - ) - (marrow [ tlexer; tlexbuf ] tenv) - } - (* ------------------------------------------------------------------------ *) (* Here is complete code for the parser. *) @@ -1655,7 +1637,7 @@ defs else reducedef prod :: defs - ) [ discarddef; initenvdef; printtokendef; assertfalsedef; errorcasedef ]))) + ) [ discarddef; printtokendef; assertfalsedef; errorcasedef ]))) ) :: SIStretch grammar.postludes :: diff -Nru menhir-20201201/src/codeBits.ml menhir-20201216/src/codeBits.ml --- menhir-20201201/src/codeBits.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/codeBits.ml 2020-12-16 19:18:28.000000000 +0000 @@ -313,3 +313,10 @@ let mapp me1 mes2 = List.fold_left mapp me1 mes2 + +let field modifiable name t = + { + modifiable = modifiable; + fieldname = name; + fieldtype = type2scheme t + } diff -Nru menhir-20201201/src/codeBits.mli menhir-20201216/src/codeBits.mli --- menhir-20201201/src/codeBits.mli 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/codeBits.mli 2020-12-16 19:18:28.000000000 +0000 @@ -121,7 +121,10 @@ (* Constructing a named module type together with a list of "with type" constraints. *) -val with_types: IL.with_kind -> string -> (string list * string * IL.typ) list -> IL.module_type +val with_types: with_kind -> string -> (string list * string * typ) list -> module_type (* Functor applications. *) -val mapp: IL.modexpr -> IL.modexpr list -> IL.modexpr +val mapp: modexpr -> modexpr list -> modexpr + +(* Record fields. *) +val field: bool -> string -> typ -> fielddef diff -Nru menhir-20201201/src/codePieces.ml menhir-20201216/src/codePieces.ml --- menhir-20201201/src/codePieces.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/codePieces.ml 2020-12-16 19:18:28.000000000 +0000 @@ -28,13 +28,6 @@ let ntvar nt = Infer.ntvar (Nonterminal.print true nt) -(* The variable that holds the environment. This is a parameter to all - functions. We do not make it a global variable because we wish to - preserve re-entrancy. *) - -let env = - prefix "env" - (* A variable used to hold a semantic value. *) let semv = diff -Nru menhir-20201201/src/codePieces.mli menhir-20201216/src/codePieces.mli --- menhir-20201201/src/codePieces.mli 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/codePieces.mli 2020-12-16 19:18:28.000000000 +0000 @@ -22,12 +22,6 @@ (* Naming conventions. *) -(* The variable that holds the environment. This is a parameter to all - functions. We do not make it a global variable because we wish to - preserve re-entrancy. *) - -val env : string - (* A variable used to hold a semantic value. *) val semv : string diff -Nru menhir-20201201/src/conflict.ml menhir-20201216/src/conflict.ml --- menhir-20201201/src/conflict.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/conflict.ml 2020-12-16 19:18:28.000000000 +0000 @@ -17,8 +17,8 @@ (* I suppose now is as good a time as any to do this. *) let () = - if Settings.graph then - DependencyGraph.print_dependency_graph() + if Settings.reference_graph then + ReferenceGraph.print_reference_graph() (* -------------------------------------------------------------------------- *) (* If [--dump] is present, honor it before performing conflict resolution. *) diff -Nru menhir-20201201/src/DependencyGraph.ml menhir-20201216/src/DependencyGraph.ml --- menhir-20201201/src/DependencyGraph.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/DependencyGraph.ml 1970-01-01 00:00:00.000000000 +0000 @@ -1,65 +0,0 @@ -(******************************************************************************) -(* *) -(* Menhir *) -(* *) -(* François Pottier, Inria Paris *) -(* Yann Régis-Gianas, PPS, Université Paris Diderot *) -(* *) -(* Copyright Inria. All rights reserved. This file is distributed under the *) -(* terms of the GNU General Public License version 2, as described in the *) -(* file LICENSE. *) -(* *) -(******************************************************************************) - -open Grammar - -let print_dependency_graph() = - - (* Allocate. *) - - let forward : NonterminalSet.t NonterminalMap.t ref = - ref NonterminalMap.empty - in - - let successors nt = - try - NonterminalMap.find nt !forward - with Not_found -> - NonterminalSet.empty - in - - (* Populate. *) - - Production.iter (fun prod -> - let nt1 = Production.nt prod - and rhs = Production.rhs prod in - Array.iter (function - | Symbol.T _ -> - () - | Symbol.N nt2 -> - forward := NonterminalMap.add - nt1 - (NonterminalSet.add nt2 (successors nt1)) - !forward - ) rhs - ); - - (* Print. *) - - let module P = Dot.Print (struct - type vertex = Nonterminal.t - let name nt = - Printf.sprintf "nt%d" (Nonterminal.n2i nt) - let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) nt = - NonterminalSet.iter (fun successor -> - f ~label:"" successor - ) (successors nt) - let iter (f : ?shape:Dot.shape -> ?style:Dot.style -> label:string -> vertex -> unit) = - Nonterminal.iter (fun nt -> - f ~label:(Nonterminal.print false nt) nt - ) - end) in - let f = open_out (Settings.base ^ ".dot") in - P.print f; - close_out f - diff -Nru menhir-20201201/src/DependencyGraph.mli menhir-20201216/src/DependencyGraph.mli --- menhir-20201201/src/DependencyGraph.mli 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/DependencyGraph.mli 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -(******************************************************************************) -(* *) -(* Menhir *) -(* *) -(* François Pottier, Inria Paris *) -(* Yann Régis-Gianas, PPS, Université Paris Diderot *) -(* *) -(* Copyright Inria. All rights reserved. This file is distributed under the *) -(* terms of the GNU General Public License version 2, as described in the *) -(* file LICENSE. *) -(* *) -(******************************************************************************) - -(* Build and print the forward reference graph of the grammar. There is an edge - of a nonterminal symbol [nt1] to every nonterminal symbol [nt2] that occurs - in the definition of [nt1]. *) - -val print_dependency_graph: unit -> unit - diff -Nru menhir-20201201/src/grammarFunctor.ml menhir-20201216/src/grammarFunctor.ml --- menhir-20201201/src/grammarFunctor.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/grammarFunctor.ml 2020-12-16 19:18:28.000000000 +0000 @@ -259,6 +259,7 @@ tk_is_declared = true; tk_position = Positions.dummy; tk_attributes = []; + tk_alias = None; } in Array.init n (fun tok -> @@ -310,6 +311,41 @@ f i done + let tokens_without_an_alias = + let accu = ref [] in + iter_real begin fun tok -> + let properties = token_properties.(tok) in + if properties.tk_alias = None then + accu := tok :: !accu + end; + List.rev !accu + + let () = + if verbose && Settings.require_aliases then + tokens_without_an_alias |> List.iter begin fun tok -> + let properties = token_properties.(tok) in + let pos = properties.tk_position in + Error.grammar_warning [pos] + "no alias has been defined for the token %s." + (print tok) + end + + let every_token_has_an_alias = + tokens_without_an_alias = [] + + let alias tok = + token_properties.(tok).tk_alias + + let unquoted_alias tok = + alias tok |> Option.map (fun qid -> + assert (qid.[0] = '"'); + (* Skip the opening quote and decode the remainder using the lexer + [decode_string]. *) + let qid = String.sub qid 1 (String.length qid - 1) in + let lexbuf = Lexing.from_string qid in + Misc.with_buffer 8 (fun b -> Lexer.decode_string b lexbuf) + ) + (* If a token named [EOF] exists, then it is assumed to represent ocamllex's [eof] pattern. *) diff -Nru menhir-20201201/src/grammarFunctor.mli menhir-20201216/src/grammarFunctor.mli --- menhir-20201201/src/grammarFunctor.mli 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/grammarFunctor.mli 2020-12-16 19:18:28.000000000 +0000 @@ -226,6 +226,19 @@ val attributes: t -> Syntax.attribute list + (* [every_token_has_an_alias] is true if a token alias has been defined + by the user for every token. *) + + val every_token_has_an_alias: bool + + (* [alias t] is the token alias that has been defined by the user + for the terminal symbol [t], if there is one. It is a quoted + escaped string literal. [unquoted_alias t] is the same string, + deprived of its opening and closing quotes, and unescaped. *) + + val alias: t -> string option + val unquoted_alias: t -> string option + (* The sub-module [Word] offers an implementation of words (that is, sequences) of terminal symbols. It is used by [LRijkstra]. We make it a functor, because it has internal state (a hash table) diff -Nru menhir-20201201/src/interpret.ml menhir-20201216/src/interpret.ml --- menhir-20201201/src/interpret.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/interpret.ml 2020-12-16 19:18:28.000000000 +0000 @@ -13,6 +13,7 @@ (* --------------------------------------------------------------------------- *) +open Printf open Grammar open SentenceParserAux @@ -59,16 +60,24 @@ (* Display and debugging. *) let print_sentence (nto, terminals) : string = - let b = Buffer.create 128 in - Option.iter (fun nt -> - Printf.bprintf b "%s: " (Nonterminal.print false nt) - ) nto; - let separator = Misc.once "" " " in - List.iter (fun t -> - Printf.bprintf b "%s%s" (separator()) (Terminal.print t) - ) terminals; - Printf.bprintf b "\n"; - Buffer.contents b + Misc.with_buffer 128 (fun b -> + Option.iter (fun nt -> + bprintf b "%s: " (Nonterminal.print false nt) + ) nto; + let separator = Misc.once "" " " in + List.iter (fun t -> + bprintf b "%s%s" (separator()) (Terminal.print t) + ) terminals; + bprintf b "\n"; + ) + +let print_concrete_sentence (_nto, terminals) : string = + Misc.with_buffer 128 (fun b -> + let separator = Misc.once "" " " in + List.iter (fun t -> + bprintf b "%s%s" (separator()) (Misc.unSome (Terminal.unquoted_alias t)) + ) terminals + ) (* --------------------------------------------------------------------------- *) @@ -175,7 +184,7 @@ (* Success. *) - Printf.printf "ACCEPT"; + printf "ACCEPT"; if Settings.interpret_show_cst then begin print_newline(); Cst.show stdout cst @@ -185,13 +194,13 @@ (* Parser failure. *) - Printf.printf "REJECT" + printf "REJECT" with EndOfStream -> (* Lexer failure. *) - Printf.printf "OVERSHOOT" + printf "OVERSHOOT" end; print_newline() @@ -229,14 +238,33 @@ module SS = StackSymbols.Run() -(* [print_messages_auto] displays just the sentence and the auto-generated - comments. *) +(* [print_messages_auto (nt, sentence, target)] displays the sentence + defined by [nt] and [sentence], leading to the state [target]. It + then displays a bunch of auto-generated comments. *) let print_messages_auto (nt, sentence, target) : unit = - (* Print the sentence, followed with auto-generated comments. *) + + (* Print the sentence. *) print_string (print_sentence (Some nt, sentence)); + + (* If a token alias has been defined for every terminal symbol, then + we can convert this sentence into concrete syntax. Do so. We make + a few assumptions about the concrete syntax of the language: + 1. It is permitted to insert one space between two tokens; + 2. No token contains a newline character. + (Our lexer enforces this assumption.) + The name of the start symbol cannot be printed in a meaningful + manner, so it is omitted. *) + if Terminal.every_token_has_an_alias then + printf + "##\n\ + ## Concrete syntax: %s\n" + (print_concrete_sentence (Some nt, sentence)) + ; + + (* Show which state this sentence leads to. *) let (s', spurious) = target in - Printf.printf + printf "##\n\ ## Ends in an error in state: %d.\n\ ##\n\ @@ -246,26 +274,32 @@ could sometimes be helpful, but is usually intolerably verbose. *) (Lr0.print "## " (Lr1.state s')) ; - Printf.printf + + (* Show the known suffix of the stack in this state. *) + printf "## The known suffix of the stack is as follows:\n\ ##%s\n\ ##\n" (SS.print_stack_symbols s') ; + + (* If interpreting this sentence causes spurious reductions (that is, + reductions that take place after the last terminal symbol has been + shifted), say so, and show them. *) if spurious <> [] then begin - Printf.printf + printf "## WARNING: This example involves spurious reductions.\n\ ## This implies that, although the LR(1) items shown above provide an\n\ ## accurate view of the past (what has been recognized so far), they\n\ ## may provide an INCOMPLETE view of the future (what was expected next).\n" ; List.iter (fun (s, prod) -> - Printf.printf + printf "## In state %d, spurious reduction of production %s\n" (Lr1.number s) (Production.print prod) ) spurious; - Printf.printf "##\n" + printf "##\n" end (* [print_messages_item] displays one data item. The item is of the form [nt, @@ -278,7 +312,7 @@ (* Print the sentence, followed with auto-generated comments. *) print_messages_auto (nt, sentence, target); (* Then, print a proposed error message, between two blank lines. *) - Printf.printf "\n%s\n" default_message + printf "\n%s\n" default_message (* --------------------------------------------------------------------------- *) @@ -445,7 +479,7 @@ | Comment _ -> () ) runs; - Printf.eprintf + eprintf "Read %d sample input sentences and %d error messages.\n%!" !s !m; runs @@ -601,9 +635,9 @@ valval = EFun ([ PVar "s" ], EMatch (EVar "s", branches)) } in let program = [ - SIComment (Printf.sprintf + SIComment (sprintf "This file was auto-generated based on \"%s\"." filename); - SIComment (Printf.sprintf + SIComment (sprintf "Please note that the function [%s] can raise [Not_found]." name); SIValDefs (false, [ messagedef ]); @@ -635,7 +669,7 @@ let () = if Settings.interpret then let read = setup() in - Printf.printf "Ready!\n%!"; + printf "Ready!\n%!"; while true do match read() with | None -> @@ -809,7 +843,7 @@ end let conflict_comment filename = - Printf.sprintf + sprintf "#@ WARNING:\n\ #@ The following sentence has been copied from \"%s\".\n\ #@ It is redundant with a sentence that appears earlier in this file,\n\ @@ -817,7 +851,7 @@ filename let toplevel_comment filename = - Printf.sprintf + sprintf "#@ WARNING:\n\ #@ The following comment has been copied from \"%s\".\n\ #@ It may need to be proofread, updated, moved, or removed.\n" @@ -1174,6 +1208,30 @@ )) run.elements )) runs; + exit 0 + ) + +(* [--echo-errors-concrete] works like [--echo-errors], except every sentence + is followed with an auto-generated comment that shows its concrete syntax. *) + +let () = + Settings.echo_errors_concrete |> Option.iter (fun filename -> + + (* Read the file. *) + let strict = false in + let runs : run or_comment list = read_messages strict filename in + + (* Echo. *) + List.iter (or_comment_iter (fun run -> + List.iter (or_comment_iter (fun ((_, sentence), _target) -> + print_string (print_sentence sentence); + if Terminal.every_token_has_an_alias then + printf + "## Concrete syntax: %s\n" + (print_concrete_sentence sentence) + )) run.elements + )) runs; + exit 0 ) diff -Nru menhir-20201201/src/invariant.ml menhir-20201216/src/invariant.ml --- menhir-20201201/src/invariant.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/invariant.ml 2020-12-16 19:18:28.000000000 +0000 @@ -57,39 +57,51 @@ (* Vectors of sets of states. *) -module StateVector = struct +module StateSetVector = struct type property = - Lr1.NodeSet.t list + Lr1.NodeSet.t array + + let bottom height = + Array.make height Lr1.NodeSet.empty let empty = - [] + [||] - let rec leq_join v1 v2 = - match v1, v2 with - | [], [] -> - [] - | states1 :: vtail1, states2 :: vtail2 -> - let states = Lr1.NodeSet.leq_join states1 states2 - and vtail = leq_join vtail1 vtail2 in - if states2 == states && vtail2 == vtail then - v2 - else - states :: vtail - | _, _ -> - (* Because all heights are known ahead of time, we are able - to (and careful to) compare only vectors of equal length. *) - assert false + let leq_join v1 v2 = + let n = Array.length v1 in + (* Because all heights are known ahead of time, we are able (and careful) + to compare only vectors of equal length. *) + assert (n = Array.length v2); + let v = Array.init n (fun i -> Lr1.NodeSet.leq_join v1.(i) v2.(i)) in + if Misc.array_for_all2 (==) v2 v then v2 else v let push v x = - x :: v - - let truncate = - MenhirLib.General.take + (* Push [x] onto the right end of [v]. *) + let n = Array.length v in + Array.init (n+1) (fun i -> if i < n then v.(i) else x) + + let truncate k v = + (* Keep a suffix of length [k] of [v]. *) + let n = Array.length v in + Array.sub v (n-k) k + + let print v = + if Array.length v = 0 then + "epsilon" + else + Misc.separated_list_to_string Lr1.NodeSet.print "; " (Array.to_list v) + + let iter = + Array.iter + + let get = + (* The index 0 corresponds to the cell that lies deepest in the stack. *) + Array.get end -open StateVector +open StateSetVector (* Define the data flow graph. *) @@ -97,7 +109,7 @@ type variable = Lr1.node - type property = StateVector.property + type property = StateSetVector.property (* At each start state of the automaton, the stack is empty. *) @@ -130,7 +142,7 @@ (Fix.Glue.ArraysAsImperativeMaps(Key)) (Key) in - let module F = Fix.DataFlow.Run(M)(StateVector)(G) in + let module F = Fix.DataFlow.Run(M)(StateSetVector)(G) in F.solution (* If every state is reachable, then the least fixed point must be non-[None] @@ -156,10 +168,30 @@ Production.tabulate (fun prod -> let sites = Lr1.production_where prod in let height = Production.length prod in - let bottom = Misc.list_make height Lr1.NodeSet.empty in Lr1.NodeSet.fold (fun node accu -> leq_join (truncate height (stack_states node)) accu - ) sites bottom + ) sites (bottom height) + ) + +(* ------------------------------------------------------------------------ *) +(* If requested, print the information that has been computed above. *) + +let () = + Error.logC 3 (fun f -> + Lr1.iter (fun node -> + Printf.fprintf f "stack(%s) = %s\n" + (Lr1.print node) + (print (stack_states node)) + ) + ) + +let () = + Error.logC 3 (fun f -> + Production.iterx (fun prod -> + Printf.fprintf f "stack when reducing %s = %s\n" + (Production.print prod) + (print (production_states prod)) + ) ) (* ------------------------------------------------------------------------ *) @@ -211,7 +243,7 @@ (* Enforce condition (1) above. *) let share (v : property) = - List.iter (fun states -> + StateSetVector.iter (fun states -> let dummy = UnionFind.fresh false in Lr1.NodeSet.iter (fun state -> UnionFind.union dummy (represented state) @@ -260,7 +292,7 @@ let () = Lr1.iter (fun node -> let v = stack_states node in - List.iter (fun states -> + StateSetVector.iter (fun states -> if Lr1.NodeSet.cardinal states >= 2 && handlers states then represents states ) v @@ -276,7 +308,7 @@ if length = 0 then Lr1.NodeSet.iter represent sites else - let states = List.nth (production_states prod) (length - 1) in + let states = StateSetVector.get (production_states prod) 0 in represents states ) @@ -303,6 +335,16 @@ Printf.fprintf f "%d out of %d states are represented.\n" count Lr1.n ) +(* If requested, show a detailed table of which states are represented. *) + +let () = + Error.logC 3 (fun f -> + Lr1.iter (fun node -> + Printf.fprintf f "represented(%s) = %b\n" + (Lr1.print node) (represented node) + ) + ) + (* ------------------------------------------------------------------------ *) (* Accessors for information about the stack. *) @@ -319,27 +361,34 @@ (* This auxiliary function converts a stack-as-an-array (top of stack at the right end) to a stack-as-a-list (top of stack at list head). *) -let convert a = +let _convert a = let n = Array.length a in let rec loop i accu = if i = n then accu else loop (i + 1) (a.(i) :: accu) in loop 0 [] +(* This auxiliary function converts a pair of stacks-as-arrays to a + stack-as-a-list-of-pairs. *) + +let convert2 a b = + let n = Array.length a in + assert (n = Array.length b); + let rec loop i accu = + if i = n then accu else loop (i + 1) ((a.(i), b.(i)) :: accu) + in + loop 0 [] + (* [stack s] describes the stack when the automaton is in state [s]. *) let stack node : word = - List.combine - (convert (stack_symbols node)) - (stack_states node) + convert2 (stack_symbols node) (stack_states node) (* [prodstack prod] describes the stack when production [prod] is about to be reduced. *) let prodstack prod : word = - List.combine - (convert (Production.rhs prod)) - (production_states prod) + convert2 (Production.rhs prod) (production_states prod) (* [gotostack nt] is the structure of the stack when a shift transition over nonterminal [nt] is about to be taken. It diff -Nru menhir-20201201/src/lexer.mll menhir-20201216/src/lexer.mll --- menhir-20201201/src/lexer.mll 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/lexer.mll 2020-12-16 19:18:28.000000000 +0000 @@ -404,6 +404,17 @@ "on_error_reduce", ON_ERROR_REDUCE; ] +(* ------------------------------------------------------------------------ *) + +(* Decoding escaped characters. *) + +let char_for_backslash = function + | 'n' -> '\010' + | 'r' -> '\013' + | 'b' -> '\008' + | 't' -> '\009' + | c -> c + } (* ------------------------------------------------------------------------ *) @@ -498,11 +509,16 @@ } | (uppercase identchar *) as id { UID (with_pos (cpos lexbuf) id) } -(* Quoted strings, which are used as aliases for tokens. - For simplicity, we just disallow double quotes and backslash outright. - Given the use of terminal strings in grammars, this is fine. *) -| ( "\"" ( [' ' - '~'] # ['"' '\\'] + ) "\"" ) as id - { QID (with_pos (cpos lexbuf) id) } +(* Quoted strings are used as aliases for tokens. *) +(* A quoted string is stored as is -- with the quotes + and with its escape sequences. *) +| '"' + { let buffer = Buffer.create 16 in + let openingpos = lexeme_start_p lexbuf in + let content = record_string openingpos buffer lexbuf in + let id = Printf.sprintf "\"%s\"" content in + let pos = import (openingpos, lexbuf.lex_curr_p) in + QID (with_pos pos id) } | "//" [^ '\010' '\013']* newline (* skip C++ style comment *) | newline { new_line lexbuf; main lexbuf } @@ -746,18 +762,51 @@ and string openingpos = parse | '"' - { () } + { () } | '\\' newline | newline - { new_line lexbuf; string openingpos lexbuf } + { new_line lexbuf; string openingpos lexbuf } | '\\' _ - (* Upon finding a backslash, skip the character that follows, - unless it is a newline. Pretty crude, but should work. *) - { string openingpos lexbuf } + (* Upon finding a backslash, skip the character that follows, + unless it is a newline. Pretty crude, but should work. *) + { string openingpos lexbuf } | eof - { error1 openingpos "unterminated OCaml string." } + { error1 openingpos "unterminated OCaml string." } | _ - { string openingpos lexbuf } + { string openingpos lexbuf } + +(* ------------------------------------------------------------------------ *) + +(* Recording on OCaml string. (This is used for token aliases.) *) + +and record_string openingpos buffer = parse +| '"' + { Buffer.contents buffer } +| ('\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']) as sequence + { (* This escape sequence is recognized as such, but not decoded. *) + Buffer.add_string buffer sequence; + record_string openingpos buffer lexbuf } +| '\\' _ + { error2 lexbuf "illegal backslash escape in string." } +| newline + { error2 lexbuf "illegal newline in string." } +| eof + { error1 openingpos "unterminated string." } +| _ as c + { Buffer.add_char buffer c; + record_string openingpos buffer lexbuf } + +(* Decoding a string that may contain escaped characters. *) + +and decode_string buffer = parse +| '"' + { (* The final double quote is skipped. *) } +| '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c) + { Buffer.add_char buffer (char_for_backslash c); + decode_string buffer lexbuf } +| _ as c + { Buffer.add_char buffer c; + decode_string buffer lexbuf } (* ------------------------------------------------------------------------ *) diff -Nru menhir-20201201/src/lr0.ml menhir-20201216/src/lr0.ml --- menhir-20201201/src/lr0.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/lr0.ml 2020-12-16 19:18:28.000000000 +0000 @@ -304,14 +304,14 @@ (* Displaying a concrete state. *) let print_concrete leading (state : concretelr1state) = - let buffer = Buffer.create 1024 in - Item.Map.iter (fun item toks -> - Printf.bprintf buffer "%s%s [ %s ]\n" - leading - (Item.print item) - (TerminalSet.print toks) - ) state; - Buffer.contents buffer + Misc.with_buffer 1024 (fun buffer -> + Item.Map.iter (fun item toks -> + Printf.bprintf buffer "%s%s [ %s ]\n" + leading + (Item.print item) + (TerminalSet.print toks) + ) state + ) (* Displaying a state. By default, only the kernel is displayed, not the closure. *) diff -Nru menhir-20201201/src/lr1.ml menhir-20201216/src/lr1.ml --- menhir-20201201/src/lr1.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/lr1.ml 2020-12-16 19:18:28.000000000 +0000 @@ -466,6 +466,9 @@ let number node = node +let print node = + Printf.sprintf "%d" (number node) + let entry = ProductionMap.map transport Raw.entry @@ -695,6 +698,11 @@ let leq_join s1 s2 = if subset s1 s2 then s2 else union s1 s2 + let print s = + Printf.sprintf "{ %s }" ( + Misc.separated_iter_to_string print ", " (fun f -> iter f s) + ) + end module NodeMap = @@ -933,7 +941,7 @@ is preferable to every other (so priority plays a role). *) let prioritized = - ref 0 + ref [] (* The set of nonterminal symbols in the left-hand side of an extra reduction. *) @@ -961,7 +969,8 @@ with a reduction, update [extra] and [extra_nts]. *) let triggered = lazy ( incr extra; - if List.length prods > 1 then incr prioritized; + if List.length prods > 1 then + prioritized := node :: !prioritized; extra_nts := NonterminalSet.add (Production.nt prod) !extra_nts ) in Terminal.iter_real (fun tok -> @@ -980,13 +989,19 @@ extra_reductions_in_node node ); (* Info message. *) - if !extra > 0 then + if !extra > 0 then begin Error.logA 1 (fun f -> Printf.fprintf f "Extra reductions on error were added in %d states.\n" !extra; Printf.fprintf f "Priority played a role in %d of these states.\n" - !prioritized + (List.length !prioritized) ); + Error.logA 2 (fun f -> + if !prioritized <> [] then + Printf.fprintf f "These states are %s.\n" + (NodeSet.print (NodeSet.of_list !prioritized)) + ) + end; (* Warn about useless %on_error_reduce declarations. *) OnErrorReduce.iter (fun nt -> if not (NonterminalSet.mem nt !extra_nts) then diff -Nru menhir-20201201/src/lr1.mli menhir-20201216/src/lr1.mli --- menhir-20201201/src/lr1.mli 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/lr1.mli 2020-12-16 19:18:28.000000000 +0000 @@ -53,6 +53,7 @@ module NodeSet : sig include Set.S with type elt = node val leq_join: t -> t -> t + val print: t -> string end module NodeMap : Map.S with type key = node @@ -80,6 +81,10 @@ val n: int val number: node -> int +(* A state is printed simply as its number. *) + +val print: node -> string + (* This provides access to the LR(1) state that a node stands for. *) val state: node -> Lr0.lr1state diff -Nru menhir-20201201/src/misc.ml menhir-20201216/src/misc.ml --- menhir-20201201/src/misc.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/misc.ml 2020-12-16 19:18:28.000000000 +0000 @@ -46,22 +46,27 @@ done; !sum +let with_buffer n f = + let b = Buffer.create n in + f b; + Buffer.contents b + type 'a iter = ('a -> unit) -> unit let separated_iter_to_string printer separator iter = - let b = Buffer.create 32 in - let first = ref true in - iter (fun x -> - if !first then begin - Buffer.add_string b (printer x); - first := false - end - else begin - Buffer.add_string b separator; - Buffer.add_string b (printer x) - end - ); - Buffer.contents b + with_buffer 32 (fun b -> + let first = ref true in + iter (fun x -> + if !first then begin + Buffer.add_string b (printer x); + first := false + end + else begin + Buffer.add_string b separator; + Buffer.add_string b (printer x) + end + ) + ) let separated_list_to_string printer separator xs = separated_iter_to_string printer separator (fun f -> List.iter f xs) @@ -417,5 +422,15 @@ else false in loop 0 +let array_fold_left2 f accu a1 a2 = + let n1 = Array.length a1 + and n2 = Array.length a2 in + if n1 <> n2 then invalid_arg "Array.fold_left2"; + let accu = ref accu in + for i = 0 to n1 - 1 do + accu := f !accu (Array.unsafe_get a1 i) (Array.unsafe_get a2 i) + done; + !accu + let rec list_make n x = if n = 0 then [] else x :: list_make (n - 1) x diff -Nru menhir-20201201/src/misc.mli menhir-20201216/src/misc.mli --- menhir-20201201/src/misc.mli 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/misc.mli 2020-12-16 19:18:28.000000000 +0000 @@ -40,6 +40,11 @@ val sum: int -> (int -> int) -> int +(* [with_buffer n f] creates a fresh buffer of size [n], passes it to [f], + and returns the final content of the buffer. *) + +val with_buffer: int -> (Buffer.t -> unit) -> string + (* [separated_list_to_string printer sep l] converts [l] into a string representation built by using [printer] on each element and [sep] as a separator. *) @@ -191,6 +196,9 @@ val array_for_all : ('a -> bool) -> 'a array -> bool val array_for_all2 : ('a -> 'b -> bool) -> 'a array -> 'b array -> bool +val array_fold_left2: + ('a -> 'b1 -> 'b2 -> 'a) -> 'a -> 'b1 array -> 'b2 array -> 'a + (* [List.make] *) val list_make: int -> 'a -> 'a list diff -Nru menhir-20201201/src/partialGrammar.ml menhir-20201216/src/partialGrammar.ml --- menhir-20201201/src/partialGrammar.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/partialGrammar.ml 2020-12-16 19:18:28.000000000 +0000 @@ -36,9 +36,9 @@ difficult by the fact that %token and %left-%right-%nonassoc declarations are independent. *) - (* Declarations of token aliases are lost at this point. *) + (* If a token carries an alias, it is recorded in the field [tk_alias]. *) - | DToken (ocamltype, terminal, _alias, attributes) -> + | DToken (ocamltype, terminal, alias, attributes) -> let token_property = try @@ -64,6 +64,7 @@ tk_filename = filename; tk_position = decl.position; tk_attributes = attributes; + tk_alias = alias; } with Not_found -> @@ -77,7 +78,8 @@ tk_precedence = UndefinedPrecedence; tk_position = decl.position; tk_attributes = attributes; - tk_is_declared = true + tk_is_declared = true; + tk_alias = alias; } in @@ -121,6 +123,7 @@ tk_precedence = prec; tk_is_declared = false; tk_attributes = []; + tk_alias = None; (* Will be updated later. *) tk_position = decl.position; } in diff -Nru menhir-20201201/src/printer.ml menhir-20201216/src/printer.ml --- menhir-20201201/src/printer.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/printer.ml 2020-12-16 19:18:28.000000000 +0000 @@ -824,12 +824,12 @@ P.expr e let string_of_expr e = - let b = Buffer.create 512 in - let module P = - MakeBuffered (struct - let f = b - let locate_stretches = None - end) - in - P.expr e; - Buffer.contents b + Misc.with_buffer 512 (fun b -> + let module P = + MakeBuffered (struct + let f = b + let locate_stretches = None + end) + in + P.expr e + ) diff -Nru menhir-20201201/src/ReferenceGraph.ml menhir-20201216/src/ReferenceGraph.ml --- menhir-20201201/src/ReferenceGraph.ml 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20201216/src/ReferenceGraph.ml 2020-12-16 19:18:28.000000000 +0000 @@ -0,0 +1,64 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +open Grammar + +let print_reference_graph() = + + (* Allocate. *) + + let forward : NonterminalSet.t NonterminalMap.t ref = + ref NonterminalMap.empty + in + + let successors nt = + try + NonterminalMap.find nt !forward + with Not_found -> + NonterminalSet.empty + in + + (* Populate. *) + + Production.iter (fun prod -> + let nt1 = Production.nt prod + and rhs = Production.rhs prod in + Array.iter (function + | Symbol.T _ -> + () + | Symbol.N nt2 -> + forward := NonterminalMap.add + nt1 + (NonterminalSet.add nt2 (successors nt1)) + !forward + ) rhs + ); + + (* Print. *) + + let module P = Dot.Print (struct + type vertex = Nonterminal.t + let name nt = + Printf.sprintf "nt%d" (Nonterminal.n2i nt) + let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) nt = + NonterminalSet.iter (fun successor -> + f ~label:"" successor + ) (successors nt) + let iter (f : ?shape:Dot.shape -> ?style:Dot.style -> label:string -> vertex -> unit) = + Nonterminal.iter (fun nt -> + f ~label:(Nonterminal.print false nt) nt + ) + end) in + let f = open_out (Settings.base ^ ".dot") in + P.print f; + close_out f diff -Nru menhir-20201201/src/ReferenceGraph.mli menhir-20201216/src/ReferenceGraph.mli --- menhir-20201201/src/ReferenceGraph.mli 1970-01-01 00:00:00.000000000 +0000 +++ menhir-20201216/src/ReferenceGraph.mli 2020-12-16 19:18:28.000000000 +0000 @@ -0,0 +1,18 @@ +(******************************************************************************) +(* *) +(* Menhir *) +(* *) +(* François Pottier, Inria Paris *) +(* Yann Régis-Gianas, PPS, Université Paris Diderot *) +(* *) +(* Copyright Inria. All rights reserved. This file is distributed under the *) +(* terms of the GNU General Public License version 2, as described in the *) +(* file LICENSE. *) +(* *) +(******************************************************************************) + +(* Build and print the forward reference graph of the grammar. There is an edge + of a nonterminal symbol [nt1] to every nonterminal symbol [nt2] that occurs + in the definition of [nt1]. *) + +val print_reference_graph: unit -> unit diff -Nru menhir-20201201/src/referenceInterpreter.ml menhir-20201216/src/referenceInterpreter.ml --- menhir-20201201/src/referenceInterpreter.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/referenceInterpreter.ml 2020-12-16 19:18:28.000000000 +0000 @@ -245,6 +245,14 @@ (* ------------------------------------------------------------------------ *) +(* The strategy used by the reference interpreter is determined by the + command line switch [--strategy]. *) + +let strategy = + Settings.strategy + +(* ------------------------------------------------------------------------ *) + (* Define a palatable user entry point. *) let interpret log nt lexer lexbuf = @@ -261,7 +269,7 @@ (* Run it. *) try - Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf) + Some (E.entry strategy (Lr1.entry_of_nt nt) lexer lexbuf) with T.Error -> None @@ -355,7 +363,7 @@ loop (E.offer checkpoint (t, Lexing.dummy_pos, Lexing.dummy_pos)) spurious end | E.Shifting _ -> - loop (E.resume checkpoint) spurious + loop (E.resume ~strategy checkpoint) spurious | E.AboutToReduce (env, prod) -> (* If we have requested the last input token and if this is not a default reduction, then this is a spurious reduction. @@ -369,7 +377,7 @@ else spurious in - loop (E.resume checkpoint) spurious + loop (E.resume ~strategy checkpoint) spurious | E.HandlingError env -> (* Check that all of the input has been read. Otherwise, the error has occurred sooner than expected. *) diff -Nru menhir-20201201/src/settings.ml menhir-20201216/src/settings.ml --- menhir-20201201/src/settings.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/settings.ml 2020-12-16 19:18:28.000000000 +0000 @@ -71,7 +71,10 @@ let dump_resolved = ref false -let graph = +let reference_graph = + ref false + +let automaton_graph = ref false let trace = @@ -279,6 +282,12 @@ let set_echo_errors filename = echo_errors := Some filename +let echo_errors_concrete = + ref None + +let set_echo_errors_concrete filename = + echo_errors_concrete := Some filename + let cmly = ref false @@ -292,10 +301,26 @@ let dollars = ref DollarsAllowed +let require_aliases = + ref false + +let strategy = + ref `Legacy + +let set_strategy = function + | "legacy" -> + strategy := `Legacy + | "simplified" -> + strategy := `Simplified + | _ -> + eprintf "Error: --strategy should be followed with legacy | simplified.\n"; + exit 1 + (* When new command line options are added, please update both the manual in [doc/manual.tex] and the man page in [doc/menhir.1]. *) let options = Arg.align [ + "--automaton-graph", Arg.Set automaton_graph, " (undocumented)"; "--base", Arg.Set_string base, " Specifies a base name for the output file(s)"; "--canonical", Arg.Unit (fun () -> construction_mode := ModeCanonical), " Construct a canonical Knuth LR(1) automaton"; "--cmly", Arg.Set cmly, " Write a .cmly file"; @@ -305,18 +330,18 @@ "--coq", Arg.Set coq, " Generate a formally verified parser, in Coq"; "--coq-lib-path", Arg.String (fun path -> coq_lib_path := Some path), " How to qualify references to MenhirLib"; "--coq-lib-no-path", Arg.Unit (fun () -> coq_lib_path := None), " Do *not* qualify references to MenhirLib"; - "--coq-no-version-check", Arg.Set coq_no_version_check, " The generated parser will not check that the versions of Menhir and MenhirLib match."; + "--coq-no-version-check", Arg.Set coq_no_version_check, " Do not generate a version check."; "--coq-no-actions", Arg.Set coq_no_actions, " Ignore semantic actions in the Coq output"; "--coq-no-complete", Arg.Set coq_no_complete, " Do not generate a proof of completeness"; "--depend", Arg.Unit enable_depend, " Invoke ocamldep and display dependencies"; "--dump", Arg.Set dump, " Write an .automaton file"; "--dump-resolved", Arg.Set dump_resolved, " Write an .automaton.resolved file"; "--echo-errors", Arg.String set_echo_errors, " Echo the sentences in a .messages file"; + "--echo-errors-concrete", Arg.String set_echo_errors_concrete, " Echo the sentences in a .messages file"; "--error-recovery", Arg.Set recovery, " (no longer supported)"; "--explain", Arg.Set explain, " Explain conflicts in .conflicts"; "--external-tokens", Arg.String codeonly, " Import token type definition from "; "--fixed-exception", Arg.Set fixedexc, " Declares Error = Parsing.Parse_error"; - "--graph", Arg.Set graph, " Write a dependency graph to a .dot file"; "--infer", Arg.Unit enable_infer, " Invoke ocamlc to do type inference"; "--infer-protocol-supported", Arg.Unit (fun () -> exit 0), " Stop with exit code 0"; "--infer-write-query", Arg.String enable_write_query, " Write mock .ml file"; @@ -349,7 +374,10 @@ " Print grammar with unit actions & tokens"; "--only-tokens", Arg.Unit tokentypeonly, " Generate token type definition only, no code"; "--raw-depend", Arg.Unit enable_raw_depend, " Invoke ocamldep and echo its raw output"; + "--reference-graph", Arg.Set reference_graph, " (undocumented)"; + "--require-aliases", Arg.Set require_aliases, " Check that every token has a token alias"; "--stdlib", Arg.String ignore, " Ignored (deprecated)"; + "--strategy", Arg.String set_strategy, " Choose an error-handling strategy"; "--strict", Arg.Set strict, " Warnings about the grammar are errors"; "--suggest-comp-flags", Arg.Unit (fun () -> suggestion := SuggestCompFlags), " Suggest compilation flags for ocaml{c,opt}"; @@ -465,8 +493,11 @@ let dump_resolved = !dump_resolved -let graph = - !graph +let reference_graph = + !reference_graph + +let automaton_graph = + !automaton_graph let trace = !trace @@ -591,6 +622,9 @@ let echo_errors = !echo_errors +let echo_errors_concrete = + !echo_errors_concrete + let cmly = !cmly @@ -600,6 +634,12 @@ let dollars = !dollars +let require_aliases = + !require_aliases + +let strategy = + !strategy + let infer = !infer diff -Nru menhir-20201201/src/settings.mli menhir-20201216/src/settings.mli --- menhir-20201201/src/settings.mli 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/settings.mli 2020-12-16 19:18:28.000000000 +0000 @@ -51,9 +51,13 @@ val dump_resolved: bool -(* Whether the grammar's dependence graph should be dumped. *) +(* Whether the grammar's reference graph should be dumped. *) -val graph: bool +val reference_graph: bool + +(* Whether the automaton's graph should be dumped. *) + +val automaton_graph: bool (* Whether tracing instructions should be generated. *) @@ -240,6 +244,12 @@ val echo_errors: string option +(* This flag causes Menhir to read the error message descriptions stored in + [filename] and echo the error sentences, including the concrete syntax + of each sentence, in an auto-comment. *) + +val echo_errors_concrete: string option + (* This flag causes Menhir to produce a [.cmly] file, which contains a binary-format description of the grammar and automaton. *) @@ -258,3 +268,14 @@ | DollarsAllowed val dollars: dollars + +(* This flag requires every token to come with a token alias. If that is + not the case, warnings are emitted. *) + +val require_aliases : bool + +(* The error handling strategy that should be used by the code back-end, the + table back-end, and the reference interpreter. See [IncrementalEngine] for + an explanation of the available strategies. *) + +val strategy: [`Legacy | `Simplified] diff -Nru menhir-20201201/src/stage2/menhir_flags menhir-20201216/src/stage2/menhir_flags --- menhir-20201201/src/stage2/menhir_flags 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/stage2/menhir_flags 2020-12-16 19:18:28.000000000 +0000 @@ -1,3 +1,5 @@ --no-pager --table --fixed-exception +--require-aliases +--strict diff -Nru menhir-20201201/src/stage2/parser.mly menhir-20201216/src/stage2/parser.mly --- menhir-20201201/src/stage2/parser.mly 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/stage2/parser.mly 2020-12-16 19:18:28.000000000 +0000 @@ -77,17 +77,59 @@ /* ------------------------------------------------------------------------- */ /* Tokens. */ -%token TOKEN TYPE LEFT RIGHT NONASSOC START PREC PUBLIC COLON BAR EOF EQUAL -%token INLINE LPAREN RPAREN COMMA QUESTION STAR PLUS PARAMETER ON_ERROR_REDUCE -%token PERCENTATTRIBUTE SEMI -%token LID UID QID -%token HEADER -%token OCAMLTYPE -%token PERCENTPERCENT -%token ACTION -%token ATTRIBUTE GRAMMARATTRIBUTE +%token + TOKEN "%token" + TYPE "%type" + LEFT "%left" + RIGHT "%right" + NONASSOC "%nonassoc" + START "%start" + PREC "%prec" + PUBLIC "%public" + COLON ":" + BAR "|" + EOF "" + EQUAL "=" + INLINE "%inline" + LPAREN "(" + RPAREN ")" + COMMA "," + QUESTION "?" + STAR "*" + PLUS "+" + PARAMETER "%parameter" + ON_ERROR_REDUCE "%on_error_reduce" + PERCENTATTRIBUTE "%attribute" + SEMI ";" + +%token + LID "lident" + UID "UIdent" + QID "\"alias\"" + +%token + HEADER "%{ header %}" + +%token + OCAMLTYPE "" + +%token + PERCENTPERCENT "%%" + +%token + ACTION "{}" + +%token + ATTRIBUTE "[@foo]" + GRAMMARATTRIBUTE "%[@foo]" + /* For the new rule syntax: */ -%token LET TILDE UNDERSCORE COLONEQUAL EQUALEQUAL +%token + LET "let" + TILDE "~" + UNDERSCORE "_" + COLONEQUAL ":=" + EQUALEQUAL "==" /* ------------------------------------------------------------------------- */ /* Type annotations and start symbol. */ diff -Nru menhir-20201201/src/syntax.ml menhir-20201216/src/syntax.ml --- menhir-20201201/src/syntax.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/syntax.ml 2020-12-16 19:18:28.000000000 +0000 @@ -41,10 +41,13 @@ case it is a token alias. Token aliases are eliminated by replacing them with the corresponding - terminal symbols very early on during the joining of the partial grammars - -- see the function [dealias_pg] in [PartialGrammar]. + terminal symbols very early on during the joining of the partial grammars; + see the module [ExpandTokenAliases]. - In a complete grammar, there are no token aliases any longer. *) + In a complete grammar, there are no token aliases any longer. That is, + we keep track of the aliases that have been declared (they can be found + via the field [tk_alias]), but we never use them, since they have been + eliminated up front. *) type alias = string option @@ -123,6 +126,7 @@ tk_filename : filename; tk_ocamltype : Stretch.ocamltype option; tk_position : Positions.t; + tk_alias : alias; tk_attributes : attributes; mutable tk_associativity : token_associativity; mutable tk_precedence : precedence_level; diff -Nru menhir-20201201/src/tableBackend.ml menhir-20201216/src/tableBackend.ml --- menhir-20201201/src/tableBackend.ml 2020-12-01 15:15:50.000000000 +0000 +++ menhir-20201216/src/tableBackend.ml 2020-12-16 19:18:28.000000000 +0000 @@ -200,9 +200,8 @@ let endpos_of_top_stack_cell = ERecordAccess(EVar stack, fendp) -(* This is the body of the [reduce] function associated with - production [prod]. It assumes that the variables [env] and [stack] - have been bound. *) +(* This is the body of the [reduce] function associated with production + [prod]. It assumes that the variable [stack] is bound. *) let reducebody prod = @@ -284,7 +283,11 @@ ) ) +(* This is the body of the semantic action associated with production + [prod]. It takes just one parameter, namely the environment [env]. *) + let semantic_action prod = + let env = prefix "env" in EFun ( [ PVar env ], @@ -684,9 +687,24 @@ (* ------------------------------------------------------------------------ *) -(* The client APIs invoke the interpreter with an appropriate start state. - The monolithic API calls [entry] (see [Engine]), while the incremental - API calls [start]. *) +(* The client APIs invoke the interpreter with an appropriate start state. The + monolithic API uses the function [entry], which performs the entire parsing + process, while the incremental API relies on the function [start], which + returns just an initial checkpoint. Both functions are defined in + [lib/Engine.ml]. *) + +(* The function [entry] takes a [strategy] parameter, whose value is fixed at + compile time, based on [Settings.strategy]. For users of the incremental + API, the value of [Settings.strategy] is irrelevant; the functions [resume] + and [loop] offered by the incremental API take a [strategy] parameter at + runtime. *) + +let strategy = + match Settings.strategy with + | `Legacy -> + EData ("`Legacy", []) + | `Simplified -> + EData ("`Simplified", []) (* An entry point to the monolithic API. *) @@ -701,6 +719,7 @@ EMagic ( EApp ( EVar entry, [ + strategy; EIntConst (Lr1.number state); EVar lexer; EVar lexbuf