diff -Nru menhir-20201201/coq-menhirlib/src/.gitignore menhir-20201216/coq-menhirlib/src/.gitignore
--- menhir-20201201/coq-menhirlib/src/.gitignore 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/coq-menhirlib/src/.gitignore 2020-12-16 19:18:28.000000000 +0000
@@ -3,3 +3,6 @@
*.v.d
.*.aux
_CoqProject
+*.vok
+*.vos
+.lia.cache
diff -Nru menhir-20201201/coq-menhirlib/src/Interpreter.v menhir-20201216/coq-menhirlib/src/Interpreter.v
--- menhir-20201201/coq-menhirlib/src/Interpreter.v 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/coq-menhirlib/src/Interpreter.v 2020-12-16 19:18:28.000000000 +0000
@@ -83,6 +83,9 @@
CoInductive buffer : Type :=
Buf_cons { buf_head : token; buf_tail : buffer }.
+(* Note: Coq 8.12.0 wants a Declare Scope command,
+ but this breaks compatibility with Coq < 8.10.
+ Declare Scope buffer_scope. *)
Delimit Scope buffer_scope with buf.
Bind Scope buffer_scope with buffer.
diff -Nru menhir-20201201/coq-menhirlib/src/Makefile menhir-20201216/coq-menhirlib/src/Makefile
--- menhir-20201201/coq-menhirlib/src/Makefile 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/coq-menhirlib/src/Makefile 2020-12-16 19:18:28.000000000 +0000
@@ -1,6 +1,6 @@
PWD := $(shell pwd)
-COQINCLUDE := -R $(PWD) MenhirLib \
+COQINCLUDE := -R . MenhirLib \
export COQINCLUDE
@@ -22,7 +22,7 @@
# A nonempty value can be used to perform a dummy installation
# in a different location.
-CONTRIB = $(shell $(COQBIN)coqc -where)/user-contrib
+CONTRIB = $(shell $(COQBIN)coqc -where | tr -d '\r' | tr '\\' '/')/user-contrib
TARGET = $(DESTDIR)$(CONTRIB)/MenhirLib
install: all
diff -Nru menhir-20201201/coq-menhirlib/src/Makefile.coq menhir-20201216/coq-menhirlib/src/Makefile.coq
--- menhir-20201201/coq-menhirlib/src/Makefile.coq 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/coq-menhirlib/src/Makefile.coq 2020-12-16 19:18:28.000000000 +0000
@@ -11,7 +11,7 @@
#
#
# This Makefile relies on the following variables:
-# ROOTDIR (default: `pwd`)
+# ROOTDIR (default: .)
# COQBIN (default: empty)
# COQINCLUDE (default: empty)
# VV (default: *.v)
@@ -22,17 +22,8 @@
# VERBOSE (default: undefined)
# (if defined, commands are displayed)
-# We usually refer to the .v files using relative paths (such as Foo.v)
-# but [coqdep -R] produces dependencies that refer to absolute paths
-# (such as /bar/Foo.v). This confuses [make], which does not recognize
-# that these files are the same. As a result, [make] does not respect
-# the dependencies.
-
-# We fix this by using ABSOLUTE PATHS EVERYWHERE. The paths used in targets,
-# in -R options, etc., must be absolute paths.
-
ifndef ROOTDIR
- ROOTDIR := $(shell pwd)
+ ROOTDIR := .
endif
ifndef VV
diff -Nru menhir-20201201/coq-menhirlib/src/Version.v menhir-20201216/coq-menhirlib/src/Version.v
--- menhir-20201201/coq-menhirlib/src/Version.v 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/coq-menhirlib/src/Version.v 2020-12-16 19:18:28.000000000 +0000
@@ -1 +1 @@
-Definition require_20201201 := tt.
+Definition require_20201216 := tt.
diff -Nru menhir-20201201/debian/changelog menhir-20201216/debian/changelog
--- menhir-20201201/debian/changelog 2020-12-06 09:29:09.000000000 +0000
+++ menhir-20201216/debian/changelog 2021-01-17 20:01:58.000000000 +0000
@@ -1,3 +1,9 @@
+menhir (20201216-1) unstable; urgency=medium
+
+ * New upstream release
+
+ -- Mehdi Dogguy
@@ -211,12 +211,14 @@
it to an OCaml function that maps a state number to a message. The OCaml code
is sent to the standard output channel. At the same time, Menhir checks that
the collection of input sentences in the file filename is correct and
-irredundant. For more information, see §11.Menhir Reference Manual
-(version 20201201)François Pottier and Yann Régis-Gianas
+(version 20201216)François Pottier and Yann Régis-Gianas
INRIA
{Francois.Pottier, Yann.Regis-Gianas}@inria.fr
--coq. This switch causes Menhir to produce Coq code. See §12.
--coq-lib-path path. This switch allows specifying under what name (or path) the Coq support library -MenhirLib is known to Coq. When Menhir runs in --coq mode, the generated -parser contains references to several modules in this library. This path is -used to qualify these references. Its default value is MenhirLib.
--coq-lib-no-path. This switch indicates that references to the Coq library MenhirLib -should not be qualified. This was the default behavior of Menhir prior to 2018/05/30. -This switch is provided for compatibility, but normally should not be used.
--coq-no-actions. (Used in conjunction with --coq.) This switch +irredundant. For more information, see §11.
--coq. This switch causes Menhir to produce Coq code. See §12.
--coq-lib-path path. This switch allows specifying under what +name (or path) the Coq support library is known to Coq. When Menhir runs in +--coq mode, the generated parser contains references to several modules in +this library. This path is used to qualify these references. Its default value +is MenhirLib.
--coq-lib-no-path. This switch indicates that references to the Coq +library MenhirLib should not be qualified. This was the default +behavior of Menhir prior to 2018/05/30. This switch is provided for +compatibility, but normally should not be used.
--coq-no-actions. (Used in conjunction with --coq.) This switch
causes the semantic actions present in the .vy file to be ignored and
replaced with tt
, the unique inhabitant of Coq’s unit
type. This
feature can be used to test the Coq back-end with a standard grammar, that is, a
@@ -227,7 +229,9 @@
possible only if the grammar has no conflict (not even a benign one, in the
sense of §6.1). This can be desirable also because, for
a complex grammar, completeness may require a heavy certificate and its
-validation by Coq may take time.
--depend. See §14.
--dump. This switch causes a description of the automaton to be +validation by Coq may take time.
--coq-no-version-check. (Used in conjunction with --coq.) This switch +prevents the generation of the check that verifies that the versions of +Menhir and MenhirLib match.
--depend. See §14.
--dump. This switch causes a description of the automaton to be written to the file basename.automaton. This description is written after benign conflicts have been resolved, before severe conflicts are resolved (§6), and before extra reductions are introduced @@ -238,7 +242,12 @@ (§4.1.8).
--echo-errors filename. This switch causes Menhir to read the .messages file filename and to produce on the standard output channel just the input sentences. (That is, all messages, blank lines, and -comments are filtered out.) For more information, see §11.
--explain. This switch causes conflict explanations to be +comments are filtered out.) For more information, see §11.
--echo-errors-concrete filename. This switch causes Menhir to
+read the .messages file filename and to produce on the standard output
+channel just the input sentences. Each sentence is followed with a comment of
+the form ## Concrete syntax: ...
that shows this sentence in concrete
+syntax. This comment is printed only if the user has defined an alias for
+every token (§4.1.3).
--explain. This switch causes conflict explanations to be written to the file basename.conflicts. See also §6.
--external-tokens T. This switch causes the definition of the token type to be omitted in basename.ml and basename.mli. Instead, the generated parser relies on @@ -292,13 +301,23 @@ the grammar specification to be translated into a definition of the token type, which is written to the files basename.ml and basename.mli. No code is generated. This is useful when a single set of tokens is to be shared between several parsers. The directory -demos/calc-two contains a demo that illustrates the use of this switch.
--raw-depend. See §14.
--stdlib directory. This switch exists only for -backwards compatibility and is ignored. It may be removed in the future.
--strict. This switch causes several warnings about the grammar +demos/calc-two contains a demo that illustrates the use of this switch.
--raw-depend. See §14.
--require-aliases. This switch causes Menhir to check that a token +alias (§4.1.3) has been defined for every token. There is no +requirement for this alias to be actually used; it must simply exist. A +missing alias gives rise to a warning (and, in --strict mode, to an error).
--stdlib directory. This switch exists only for +backwards compatibility and is ignored. It may be removed in the future.
--strategy strategy. This switch selects an error handling +strategy, to be used by the code back-end, the table back-end, and the +reference interpreter. The available strategies are legacy and +simplified. (However, at the time of writing, the code back-end does +not yet support the simplified strategy.) When this switch is +omitted, the legacy strategy is used. The choice of a strategy +matters only if the grammar uses the error token. For more details, see +§10.
--strict. This switch causes several warnings about the grammar and about the automaton to be considered errors. This includes warnings about useless precedence declarations, non-terminal symbols that produce the empty language, unreachable non-terminal symbols, productions that are never -reduced, conflicts that are not resolved by precedence declarations, and -end-of-stream conflicts.
--suggest-*. See §14.
--table. This switch causes Menhir to use its table-based +reduced, conflicts that are not resolved by precedence declarations, +end-of-stream conflicts, and missing token aliases.
--suggest-*. See §14.
--table. This switch causes Menhir to use its table-based back-end, as opposed to its (default) code-based back-end. When --table is used, Menhir produces significantly more compact and somewhat slower parsers. See §16 for a speed comparison.
The table-based back-end produces rather compact tables, which are analogous @@ -1997,13 +2016,17 @@ checkpoint, which again can be an intermediate checkpoint or a final checkpoint. It does not raise any exception. (The exception Error is used only in the monolithic API.)
val resume: + ?strategy:[ `Legacy | `Simplified ] -> 'a checkpoint -> 'a checkpoint
The function resume
allows the user to resume the parser after the
parser has suspended itself with a checkpoint of the form
AboutToReduce (env, prod)
or HandlingError env
.
This function expects just the previous checkpoint checkpoint
. It produces a new
-checkpoint. It does not raise any exception.
The incremental API subsumes the monolithic API. Indeed, main
can be
+checkpoint. It does not raise any exception.
+The optional argument strategy
influences the manner in which
+resume
deals with checkpoints of the form ErrorHandling _
. Its
+default value is `Legacy
. For more details, see §10.
The incremental API subsumes the monolithic API. Indeed, main
can be
(and is in fact) implemented by first using
Incremental.main
, then calling offer
and
resume
in a loop, until a final checkpoint is obtained.
type supplier = @@ -2019,13 +2042,18 @@ is why we exposeoffer
andresume
in the first place. Nevertheless, some variations are so common that it is worth providing them, ready for use. The following functions are implemented on top ofoffer
-andresume
.val loop: supplier -> 'a checkpoint -> 'a +andresume
.val loop: + ?strategy:[ `Legacy | `Simplified ] -> + supplier -> 'a checkpoint -> 'a
loop supplier checkpoint
begins parsing fromcheckpoint
, reading tokens fromsupplier
. It continues parsing until it reaches a checkpoint of the formAccepted v
orRejected
. In the former case, it returnsv
. In the latter case, it raises the exceptionError
. (By the way, this is how we implement the monolithic -API on top of the incremental API.)val loop_handle: +API on top of the incremental API.) +The optional argumentstrategy
influences the manner in which +loop
deals with checkpoints of the formErrorHandling _
. Its +default value is`Legacy
. For more details, see §10.val loop_handle: ('a -> 'answer) -> ('a checkpoint -> 'answer) -> supplier -> 'a checkpoint -> 'answer @@ -2298,16 +2326,41 @@ by the lexical analyzer. Instead, when an error is detected, the current lookahead token is discarded and replaced with the error token, which becomes the current lookahead token. At this point, the parser enters error -handling mode.In error handling mode, automaton states are popped off the automaton’s stack -until a state that can act on error is found. This includes -both shift and reduce actions. (yacc and ocamlyacc do not -trigger reduce actions on error. It is somewhat unclear why this is so.)
When a state that can reduce on error is found, reduction is performed. -Since the lookahead token is still error, the automaton remains in error -handling mode.
When a state that can shift on error is found, the error token is shifted. -At this point, the parser returns to normal mode.
When no state that can act on error is found on the automaton’s stack, the -parser stops and raises the exception Error. This exception carries -no information. The position of the error can be obtained by reading the -lexical analyzer’s environment record.
+handling mode. +In error handling mode, the parser behaves as follows: +
In the monolithic API, the parser rejects the input by raising the exception +Error. This exception carries no information. The position of the +error can be obtained by reading the lexical analyzer’s environment record. In +the incremental API, the parser rejects the input by returning the checkpoint +Rejected.
Which strategy should one choose? First, let us note that the difference +between the strategies legacy and simplified matters only if the grammar +uses the error token. The following rule of thumb can be used to select +between them: +
ocamlyacc offers an error recovery mode, which is entered immediately after
an error token was successfully shifted. In this mode, tokens are repeatedly
Binary files /tmp/tmpqEkA0c/q9fyCBiJLe/menhir-20201201/doc/manual.pdf and /tmp/tmpqEkA0c/T1sE1vd8pc/menhir-20201216/doc/manual.pdf differ
diff -Nru menhir-20201201/doc/manual.tex menhir-20201216/doc/manual.tex
--- menhir-20201201/doc/manual.tex 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/doc/manual.tex 2020-12-16 19:18:28.000000000 +0000
@@ -130,14 +130,16 @@
\docswitch{\ocoq} This switch causes \menhir to produce Coq code. See \sref{sec:coq}.
-\docswitch{\ocoqlibpath \nt{path}} This switch allows specifying under what name (or path) the Coq support library
-MenhirLib is known to Coq. When \menhir runs in \ocoq mode, the generated
-parser contains references to several modules in this library. This path is
-used to qualify these references. Its default value is \texttt{MenhirLib}.
-
-\docswitch{\ocoqlibnopath} This switch indicates that references to the Coq library MenhirLib
-should \emph{not} be qualified. This was the default behavior of \menhir prior to 2018/05/30.
-This switch is provided for compatibility, but normally should not be used.
+\docswitch{\ocoqlibpath \nt{path}} This switch allows specifying under what
+name (or path) the Coq support library is known to Coq. When \menhir runs in
+\ocoq mode, the generated parser contains references to several modules in
+this library. This path is used to qualify these references. Its default value
+is \coqmenhirlib.
+
+\docswitch{\ocoqlibnopath} This switch indicates that references to the Coq
+library \coqmenhirlib should \emph{not} be qualified. This was the default
+behavior of \menhir prior to 2018/05/30. This switch is provided for
+compatibility, but normally should not be used.
\docswitch{\ocoqnoactions} (Used in conjunction with \ocoq.) This switch
causes the semantic actions present in the \vy file to be ignored and
@@ -154,6 +156,10 @@
a complex grammar, completeness may require a heavy certificate and its
validation by Coq may take time.
+\docswitch{\ocoqnoversioncheck} (Used in conjunction with \ocoq.) This switch
+prevents the generation of the check that verifies that the versions of
+\menhir and \coqmenhirlib match.
+
\docswitch{\odepend} See \sref{sec:build}.
\docswitch{\odump} This switch causes a description of the automaton to be
@@ -173,6 +179,13 @@
channel just the input sentences. (That is, all messages, blank lines, and
comments are filtered out.) For more information, see \sref{sec:errors:new}.
+\docswitch{\oechoerrorsconcrete \nt{filename}} This switch causes \menhir to
+read the \messages file \nt{filename} and to produce on the standard output
+channel just the input sentences. Each sentence is followed with a comment of
+the form \verb+## Concrete syntax: ...+ that shows this sentence in concrete
+syntax. This comment is printed only if the user has defined an alias for
+every token (\sref{sec:tokens}).
+
\docswitch{\oexplain} This switch causes conflict explanations to be
written to the file \nt{basename}\conflicts. See also \sref{sec:conflicts}.
@@ -274,15 +287,29 @@
\docswitch{\orawdepend} See \sref{sec:build}.
+\docswitch{\orequirealiases} This switch causes \menhir to check that a token
+alias (\sref{sec:tokens}) has been defined for every token. There is no
+requirement for this alias to be actually used; it must simply exist. A
+missing alias gives rise to a warning (and, in \ostrict mode, to an error).
+
\docswitch{\ostdlib \nt{directory}} This switch exists only for
backwards compatibility and is ignored. It may be removed in the future.
+\docswitch{\ostrategy \nt{strategy}} This switch selects an error handling
+strategy, to be used by the code back-end, the table back-end, and the
+reference interpreter. The available strategies are \legacy and
+simplified. (However, at the time of writing, the code back-end does
+not yet support the simplified strategy.) When this switch is
+omitted, the \legacy strategy is used. The choice of a strategy
+matters only if the grammar uses the \error token. For more details, see
+\sref{sec:errors}.
+
\docswitch{\ostrict} This switch causes several warnings about the grammar
and about the automaton to be considered errors. This includes warnings about
useless precedence declarations, non-terminal symbols that produce the empty
language, unreachable non-terminal symbols, productions that are never
-reduced, conflicts that are not resolved by precedence declarations, and
-end-of-stream conflicts.
+reduced, conflicts that are not resolved by precedence declarations,
+end-of-stream conflicts, and missing token aliases.
\docswitch{\oo{suggest-*}} See \sref{sec:build}.
@@ -2514,6 +2541,10 @@
does everything: it pulls tokens from the lexer, parses, and eventually
returns a semantic value (or fails by throwing the exception \texttt{Error}).
+% We may wish to note that the behavior of the function \verb+main+
+% is influenced by the strategy that is chosen at compile time via
+% \ostrategy.
+
% ------------------------------------------------------------------------------
\subsection{Incremental API}
@@ -2681,6 +2712,7 @@
\begin{verbatim}
val resume:
+ ?strategy:[ `Legacy | `Simplified ] ->
'a checkpoint ->
'a checkpoint
\end{verbatim}
@@ -2690,6 +2722,10 @@
\verb+AboutToReduce (env, prod)+ or \verb+HandlingError env+.
This function expects just the previous checkpoint \verb+checkpoint+. It produces a new
checkpoint. It does not raise any exception.
+%
+The optional argument \verb+strategy+ influences the manner in which
+\verb+resume+ deals with checkpoints of the form \verb+ErrorHandling _+. Its
+default value is \verb+`Legacy+. For more details, see \sref{sec:errors}.
The incremental API subsumes the monolithic API. Indeed, \verb+main+ can be
(and is in fact) implemented by first using
@@ -2730,7 +2766,9 @@
%% val loop
\begin{verbatim}
- val loop: supplier -> 'a checkpoint -> 'a
+ val loop:
+ ?strategy:[ `Legacy | `Simplified ] ->
+ supplier -> 'a checkpoint -> 'a
\end{verbatim}
\verb+loop supplier checkpoint+ begins parsing from \verb+checkpoint+, reading
@@ -2739,6 +2777,10 @@
case, it returns \verb+v+. In the latter case, it raises the
exception \verb+Error+. (By the way, this is how we implement the monolithic
API on top of the incremental API.)
+%
+The optional argument \verb+strategy+ influences the manner in which
+\verb+loop+ deals with checkpoints of the form \verb+ErrorHandling _+. Its
+default value is \verb+`Legacy+. For more details, see \sref{sec:errors}.
\begin{verbatim}
val loop_handle:
@@ -3255,6 +3297,7 @@
\menhir's traditional error handling mechanism is considered deprecated: although
it is still supported for the time being, it might be removed in the future.
+%
We recommend setting up an error handling mechanism using the new tools
offered by \menhir (\sref{sec:errors:new}).
@@ -3268,23 +3311,45 @@
lookahead token is discarded and replaced with the \error token, which becomes
the current lookahead token. At this point, the parser enters \emph{error
handling} mode.
+%
+In error handling mode, the parser behaves as follows:
+\begin{itemize}
+\item If the current state has a shift action on the \error token, then this
+ action takes place. Under the \legacy strategy, the parser then
+ reads the next token and returns to normal mode. Under the
+ simplified strategy, it does \emph{not} request the next token, so
+ the current token remains \error, and the parser remains in error handling
+ mode.
+\item If the current state has a reduce action on the \error token, then this
+ action takes place. (This behavior differs from that of \yacc and
+ \ocamlyacc, which do not reduce on \error. It is somewhat unclear why not.)
+ The current token remains \error and the parser remains in error handling
+ mode.
+\item If the current state has no action on the \error token, then, under the
+ simplified strategy, the parser rejects the input. Under the
+ \legacy strategy, the parser pops a cell off its stack and remains
+ in error handling mode. If the stack is empty, then the parser rejects the
+ input.
+\end{itemize}
-In error handling mode, automaton states are popped off the automaton's stack
-until a state that can \emph{act} on \error is found. This includes
-\emph{both} shift \emph{and} reduce actions. (\yacc and \ocamlyacc do not
-trigger reduce actions on \error. It is somewhat unclear why this is so.)
-
-When a state that can reduce on \error is found, reduction is performed.
-Since the lookahead token is still \error, the automaton remains in error
-handling mode.
-
-When a state that can shift on \error is found, the \error token is shifted.
-At this point, the parser returns to normal mode.
-
-When no state that can act on \error is found on the automaton's stack, the
-parser stops and raises the exception \texttt{Error}. This exception carries
-no information. The position of the error can be obtained by reading the
-lexical analyzer's environment record.
+In the monolithic API, the parser rejects the input by raising the exception
+\texttt{Error}. This exception carries no information. The position of the
+error can be obtained by reading the lexical analyzer's environment record. In
+the incremental API, the parser rejects the input by returning the checkpoint
+\texttt{Rejected}.
+
+Which strategy should one choose? First, let us note that the difference
+between the strategies \legacy and \simplified matters only if the grammar
+uses the \error token. The following rule of thumb can be used to select
+between them:
+\begin{itemize}
+\item If the \error token is used only to catch an error and stop, then the
+ \simplified strategy should be preferred. (In this this restricted style,
+ the \error token always appears at the end of a production, whose semantic
+ action raises an exception.)
+\item If the \error token is used to survive an error and continue parsing,
+ then the legacy strategy should be selected.
+\end{itemize}
\paragraph{Error recovery}
diff -Nru menhir-20201201/doc/version.tex menhir-20201216/doc/version.tex
--- menhir-20201201/doc/version.tex 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/doc/version.tex 2020-12-16 19:18:28.000000000 +0000
@@ -1 +1 @@
-\gdef\menhirversion{20201201}
+\gdef\menhirversion{20201216}
diff -Nru menhir-20201201/dune-project menhir-20201216/dune-project
--- menhir-20201201/dune-project 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/dune-project 2020-12-16 19:18:28.000000000 +0000
@@ -1,7 +1,7 @@
(lang dune 2.0)
(name menhir)
(using menhir 2.0)
-(version 20201201)
+(version 20201216)
(package
(name menhirLib)
diff -Nru menhir-20201201/lib/Engine.ml menhir-20201216/lib/Engine.ml
--- menhir-20201201/lib/Engine.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/lib/Engine.ml 2020-12-16 19:18:28.000000000 +0000
@@ -58,6 +58,74 @@
(* ------------------------------------------------------------------------ *)
+ (* As of 2020/12/16, we introduce a choice between multiple error handling
+ strategies. *)
+
+ (* Regardless of the strategy, when a syntax error is encountered, the
+ function [initiate] is called, a [HandlingError] checkpoint is produced,
+ and (after resuming) the function [error] is called. This function checks
+ whether the current state allows shifting, reducing, or neither, when the
+ lookahead token is [error]. Its behavior, then, depends on the strategy,
+ as follows. *)
+
+ (* In the legacy strategy, which until now was the only strategy,
+
+ - If shifting is possible, then a [Shifting] checkpoint is produced,
+ whose field [please_discard] is [true], so (after resuming) an
+ [InputNeeded] checkpoint is produced, and (after a new token
+ has been provided) the parser leaves error-handling mode and
+ returns to normal mode.
+
+ - If reducing is possible, then one or more reductions are performed.
+ Default reductions are announced via [AboutToReduce] checkpoints,
+ whereas ordinary reductions are performed silently. (It is unclear
+ why this is so.) The parser remains in error-handling mode, so
+ another [HandlingError] checkpoint is produced, and the function
+ [error] is called again.
+
+ - If neither action is possible and if the stack is nonempty, then a
+ cell is popped off the stack, then a [HandlingError] checkpoint is
+ produced, and the function [error] is called again.
+
+ - If neither action is possible and if the stack is empty, then the
+ parse dies with a [Reject] checkpoint. *)
+
+ (* The simplified strategy differs from the legacy strategy as follows:
+
+ - When shifting, a [Shifting] checkpoint is produced, whose field
+ [please_discard] is [false], so the parser does not request another
+ token, and the parser remains in error-handling mode. (If the
+ destination state of this shift transition has a default reduction,
+ then the parser will perform this reduction as its next step.)
+
+ - When reducing, all reductions are announced by [AboutToReduce]
+ checkpoints.
+
+ - If neither shifting [error] nor reducing on [error] is possible,
+ then the parser dies with a [Reject] checkpoint. (The parser does
+ not attempt to pop cells off the stack one by one.)
+
+ This simplified strategy is appropriate when the grammar uses the [error]
+ token in a limited way, where the [error] token always appears at the end
+ of a production whose semantic action raises an exception (whose purpose
+ is to signal a syntax error and perhaps produce a custom message). Then,
+ the parser must not request one token past the syntax error. (In a REPL,
+ that would be undesirable.) It must perform as many reductions on [error]
+ as possible, then (if possible) shift the [error] token and move to a new
+ state where a default reduction will be possible. (Because the [error]
+ token always appears at the end of a production, no other action can
+ exist in that state, so a default reduction must exist.) The semantic
+ action raises an exception, and that is it. *)
+
+ (* Let us note that it is also possible to perform no error handling at
+ all, or to perform customized error handling, by stopping as soon as
+ the first [ErrorHandling] checkpoint appears. *)
+
+ type strategy =
+ [ `Legacy | `Simplified ]
+
+ (* ------------------------------------------------------------------------ *)
+
(* In the code-based back-end, the [run] function is sometimes responsible
for pushing a new cell on the stack. This is motivated by code sharing
concerns. In this interpreter, there is no such concern; [run]'s caller
@@ -143,8 +211,9 @@
(* Note that, if [please_discard] was true, then we have just called
[discard], so the lookahead token cannot be [error]. *)
- (* Returning [HandlingError env] is equivalent to calling [error env]
- directly, except it allows the user to regain control. *)
+ (* Returning [HandlingError env] is like calling [error ~strategy env]
+ directly, except it allows the user to regain control and choose an
+ error-handling strategy. *)
if env.error then begin
if log then
@@ -295,7 +364,7 @@
(* [error] handles errors. *)
- and error env =
+ and error ~strategy env =
assert env.error;
(* Consult the column associated with the [error] pseudo-token in the
@@ -305,39 +374,64 @@
env.current (* determines a row *)
T.error_terminal (* determines a column *)
T.error_value
- error_shift (* shift continuation *)
- error_reduce (* reduce continuation *)
- error_fail (* failure continuation *)
+ (error_shift ~strategy) (* shift continuation *)
+ (error_reduce ~strategy) (* reduce continuation *)
+ (error_fail ~strategy) (* failure continuation *)
env
- and error_shift env please_discard terminal value s' =
-
- (* Here, [terminal] is [T.error_terminal],
- and [value] is [T.error_value]. *)
-
+ and error_shift ~strategy env please_discard terminal value s' =
assert (terminal = T.error_terminal && value = T.error_value);
(* This state is capable of shifting the [error] token. *)
if log then
Log.handling_error env.current;
+
+ (* In the simplified strategy, we change [please_discard] to [false],
+ which means that we won't request the next token and (therefore)
+ we will remain in error-handling mode after shifting the [error]
+ token. *)
+
+ let please_discard =
+ match strategy with `Legacy -> please_discard | `Simplified -> false
+ in
+
shift env please_discard terminal value s'
- and error_reduce env prod =
+ and error_reduce ~strategy env prod =
(* This state is capable of performing a reduction on [error]. *)
if log then
Log.handling_error env.current;
- reduce env prod
- (* Intentionally calling [reduce] instead of [announce_reduce].
- It does not seem very useful, and it could be confusing, to
- expose the reduction steps taken during error handling. *)
- and error_fail env =
+ (* In the legacy strategy, we call [reduce] instead of [announce_reduce],
+ apparently in an attempt to hide the reduction steps performed during
+ error handling. This seems inconsistent, as the default reduction steps
+ are still announced. In the simplified strategy, all reductions are
+ announced. *)
+
+ match strategy with
+ | `Legacy ->
+ reduce env prod
+ | `Simplified ->
+ announce_reduce env prod
+
+ and error_fail ~strategy env =
+
+ (* This state is unable to handle errors. In the simplified strategy, we
+ die immediately. In the legacy strategy, we attempt to pop a stack
+ cell. (This amounts to forgetting part of what we have just read, in
+ the hope of reaching a state where we can shift the [error] token and
+ resume parsing in normal mode. Forgetting past input is not appropriate
+ when the goal is merely to produce a good syntax error message.) *)
+
+ match strategy with
+ | `Simplified ->
+ Rejected
+ | `Legacy ->
- (* This state is unable to handle errors. Attempt to pop a stack
- cell. *)
+ (* Attempt to pop a stack cell. *)
let cell = env.stack in
let next = cell.next in
@@ -447,9 +541,11 @@
| _ ->
invalid_arg "offer expects InputNeeded"
- let resume : 'a . 'a checkpoint -> 'a checkpoint = function
+ let resume : 'a . ?strategy:strategy -> 'a checkpoint -> 'a checkpoint =
+ fun ?(strategy=`Legacy) checkpoint ->
+ match checkpoint with
| HandlingError env ->
- Obj.magic error env
+ Obj.magic error ~strategy env
| Shifting (_, env, please_discard) ->
Obj.magic run env please_discard
| AboutToReduce (env, prod) ->
@@ -493,8 +589,8 @@
All of the cheating resides in the types assigned to [offer] and [handle]
above. *)
- let rec loop : 'a . supplier -> 'a checkpoint -> 'a =
- fun read checkpoint ->
+ let rec loop : 'a . ?strategy:strategy -> supplier -> 'a checkpoint -> 'a =
+ fun ?(strategy=`Legacy) read checkpoint ->
match checkpoint with
| InputNeeded _ ->
(* The parser needs a token. Request one from the lexer,
@@ -502,14 +598,14 @@
checkpoint. Then, repeat. *)
let triple = read() in
let checkpoint = offer checkpoint triple in
- loop read checkpoint
+ loop ~strategy read checkpoint
| Shifting _
| AboutToReduce _
| HandlingError _ ->
(* The parser has suspended itself, but does not need
new input. Just resume the parser. Then, repeat. *)
- let checkpoint = resume checkpoint in
- loop read checkpoint
+ let checkpoint = resume ~strategy checkpoint in
+ loop ~strategy read checkpoint
| Accepted v ->
(* The parser has succeeded and produced a semantic value.
Return this semantic value to the user. *)
@@ -518,9 +614,9 @@
(* The parser rejects this input. Raise an exception. *)
raise Error
- let entry (s : state) lexer lexbuf : semantic_value =
+ let entry strategy (s : state) lexer lexbuf : semantic_value =
let initial = lexbuf.Lexing.lex_curr_p in
- loop (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial)
+ loop ~strategy (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial)
(* ------------------------------------------------------------------------ *)
@@ -536,6 +632,8 @@
loop_handle succeed fail read checkpoint
| Shifting _
| AboutToReduce _ ->
+ (* Which strategy is passed to [resume] here is irrelevant,
+ since this checkpoint is not [HandlingError _]. *)
let checkpoint = resume checkpoint in
loop_handle succeed fail read checkpoint
| HandlingError _
@@ -569,6 +667,8 @@
loop_handle_undo succeed fail read (inputneeded, checkpoint)
| Shifting _
| AboutToReduce _ ->
+ (* Which strategy is passed to [resume] here is irrelevant,
+ since this checkpoint is not [HandlingError _]. *)
let checkpoint = resume checkpoint in
loop_handle_undo succeed fail read (inputneeded, checkpoint)
| HandlingError _
@@ -602,6 +702,8 @@
Some env
| AboutToReduce _ ->
(* The parser wishes to reduce. Just follow. *)
+ (* Which strategy is passed to [resume] here is irrelevant,
+ since this checkpoint is not [HandlingError _]. *)
shifts (resume checkpoint)
| HandlingError _ ->
(* The parser fails, which means it rejects the terminal symbol
diff -Nru menhir-20201201/lib/EngineTypes.ml menhir-20201216/lib/EngineTypes.ml
--- menhir-20201201/lib/EngineTypes.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/lib/EngineTypes.ml 2020-12-16 19:18:28.000000000 +0000
@@ -333,6 +333,7 @@
exception Error
val entry:
+ (* strategy: *) [ `Legacy | `Simplified ] -> (* see [IncrementalEngine] *)
state ->
(Lexing.lexbuf -> token) ->
Lexing.lexbuf ->
diff -Nru menhir-20201201/lib/IncrementalEngine.ml menhir-20201216/lib/IncrementalEngine.ml
--- menhir-20201201/lib/IncrementalEngine.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/lib/IncrementalEngine.ml 2020-12-16 19:18:28.000000000 +0000
@@ -78,9 +78,9 @@
| Rejected
(* [offer] allows the user to resume the parser after it has suspended
- itself with a checkpoint of the form [InputNeeded env]. [offer] expects the
- old checkpoint as well as a new token and produces a new checkpoint. It does not
- raise any exception. *)
+ itself with a checkpoint of the form [InputNeeded env]. [offer] expects
+ the old checkpoint as well as a new token and produces a new checkpoint.
+ It does not raise any exception. *)
val offer:
'a checkpoint ->
@@ -89,10 +89,30 @@
(* [resume] allows the user to resume the parser after it has suspended
itself with a checkpoint of the form [AboutToReduce (env, prod)] or
- [HandlingError env]. [resume] expects the old checkpoint and produces a new
- checkpoint. It does not raise any exception. *)
+ [HandlingError env]. [resume] expects the old checkpoint and produces a
+ new checkpoint. It does not raise any exception. *)
+
+ (* The optional argument [strategy] influences the manner in which [resume]
+ deals with checkpoints of the form [ErrorHandling _]. Its default value
+ is [`Legacy]. It can be briefly described as follows:
+
+ - If the [error] token is used only to report errors (that is, if the
+ [error] token appears only at the end of a production, whose semantic
+ action raises an exception) then the simplified strategy should be
+ preferred. (This includes the case where the [error] token does not
+ appear at all in the grammar.)
+
+ - If the [error] token is used to recover after an error, or if
+ perfect backward compatibility is required, the legacy strategy
+ should be selected.
+
+ More details on these strategies appear in the file [Engine.ml]. *)
+
+ type strategy =
+ [ `Legacy | `Simplified ]
val resume:
+ ?strategy:strategy ->
'a checkpoint ->
'a checkpoint
@@ -102,7 +122,8 @@
type supplier =
unit -> token * position * position
- (* A pair of a lexer and a lexing buffer can be easily turned into a supplier. *)
+ (* A pair of a lexer and a lexing buffer can be easily turned into a
+ supplier. *)
val lexer_lexbuf_to_supplier:
(Lexing.lexbuf -> token) ->
@@ -117,9 +138,11 @@
(* [loop supplier checkpoint] begins parsing from [checkpoint], reading
tokens from [supplier]. It continues parsing until it reaches a
checkpoint of the form [Accepted v] or [Rejected]. In the former case, it
- returns [v]. In the latter case, it raises the exception [Error]. *)
+ returns [v]. In the latter case, it raises the exception [Error].
+ The optional argument [strategy], whose default value is [Legacy],
+ is passed to [resume] and influences the error-handling strategy. *)
- val loop: supplier -> 'a checkpoint -> 'a
+ val loop: ?strategy:strategy -> supplier -> 'a checkpoint -> 'a
(* [loop_handle succeed fail supplier checkpoint] begins parsing from
[checkpoint], reading tokens from [supplier]. It continues parsing until
@@ -128,10 +151,10 @@
observed first). In the former case, it calls [succeed v]. In the latter
case, it calls [fail] with this checkpoint. It cannot raise [Error].
- This means that Menhir's traditional error-handling procedure (which pops
- the stack until a state that can act on the [error] token is found) does
- not get a chance to run. Instead, the user can implement her own error
- handling code, in the [fail] continuation. *)
+ This means that Menhir's error-handling procedure does not get a chance
+ to run. For this reason, there is no [strategy] parameter. Instead, the
+ user can implement her own error handling code, in the [fail]
+ continuation. *)
val loop_handle:
('a -> 'answer) ->
diff -Nru menhir-20201201/src/AutomatonGraph.ml menhir-20201216/src/AutomatonGraph.ml
--- menhir-20201201/src/AutomatonGraph.ml 1970-01-01 00:00:00.000000000 +0000
+++ menhir-20201216/src/AutomatonGraph.ml 2020-12-16 19:18:28.000000000 +0000
@@ -0,0 +1,41 @@
+(******************************************************************************)
+(* *)
+(* Menhir *)
+(* *)
+(* François Pottier, Inria Paris *)
+(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
+(* *)
+(* Copyright Inria. All rights reserved. This file is distributed under the *)
+(* terms of the GNU General Public License version 2, as described in the *)
+(* file LICENSE. *)
+(* *)
+(******************************************************************************)
+
+open Printf
+open Grammar
+
+module P = Dot.Print (struct
+
+ type vertex = Lr1.node
+
+ let name node =
+ sprintf "s%d" (Lr1.number node)
+
+ let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) source =
+ SymbolMap.iter (fun symbol target ->
+ let label = Symbol.print symbol in
+ f ~label target
+ ) (Lr1.transitions source)
+
+ let iter (f : ?shape:Dot.shape -> ?style:Dot.style -> label:string -> vertex -> unit) =
+ Lr1.iter (fun node ->
+ let label = sprintf "%d" (Lr1.number node) in
+ f ~label node
+ )
+
+end)
+
+let print_automaton_graph() =
+ let f = open_out (Settings.base ^ ".dot") in
+ P.print f;
+ close_out f
diff -Nru menhir-20201201/src/AutomatonGraph.mli menhir-20201216/src/AutomatonGraph.mli
--- menhir-20201201/src/AutomatonGraph.mli 1970-01-01 00:00:00.000000000 +0000
+++ menhir-20201216/src/AutomatonGraph.mli 2020-12-16 19:18:28.000000000 +0000
@@ -0,0 +1,18 @@
+(******************************************************************************)
+(* *)
+(* Menhir *)
+(* *)
+(* François Pottier, Inria Paris *)
+(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
+(* *)
+(* Copyright Inria. All rights reserved. This file is distributed under the *)
+(* terms of the GNU General Public License version 2, as described in the *)
+(* file LICENSE. *)
+(* *)
+(******************************************************************************)
+
+(* Build and print the LR(1) automaton as a graph. Each state of the automaton
+ gives rise to a node. Edges are labeled with nonterminal and terminal
+ symbols. The reduction actions that exist in each state are not shown. *)
+
+val print_automaton_graph: unit -> unit
diff -Nru menhir-20201201/src/back.ml menhir-20201216/src/back.ml
--- menhir-20201201/src/back.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/back.ml 2020-12-16 19:18:28.000000000 +0000
@@ -21,6 +21,10 @@
let module D = Dump.Make(Default) in
D.dump (Settings.base ^ ".automaton.resolved")
+let () =
+ if Settings.automaton_graph then
+ AutomatonGraph.print_automaton_graph()
+
(* Let [Interpret] handle the command line options [--interpret],
[--interpret-error], [--compile-errors], [--compare-errors]. *)
@@ -66,30 +70,25 @@
if Settings.cmly then
Cmly_write.write (Settings.base ^ ".cmly")
-(* Construct the code, using either the table-based or the code-based
- back-end, and pass it on to the printer. (This continuation-passing
- style is imposed by the fact that there is no conditional in ocaml's
- module language.) *)
+(* Construct and print the code using an appropriate back-end. *)
let () =
- if Settings.coq then
+ if Settings.table then begin
+ let module B = TableBackend.Run (struct end) in
+ write B.program;
+ Interface.write Front.grammar ()
+ end
+ else if Settings.coq then begin
let module B = CoqBackend.Run (struct end) in
let filename = Settings.base ^ ".v" in
let f = open_out filename in
- B.write_all f;
- exit 0
- else
- if Settings.table then
- let module B = TableBackend.Run (struct end) in
- write B.program
- else
- let module B = CodeBackend.Run (struct end) in
- write (CodeInliner.inline B.program)
-
-(* Write the interface file. *)
-
-let () =
- Interface.write Front.grammar ()
+ B.write_all f
+ end
+ else begin
+ let module B = CodeBackend.Run (struct end) in
+ write (CodeInliner.inline B.program);
+ Interface.write Front.grammar ()
+ end
let () =
Time.tick "Printing"
diff -Nru menhir-20201201/src/codeBackend.ml menhir-20201216/src/codeBackend.ml
--- menhir-20201201/src/codeBackend.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/codeBackend.ml 2020-12-16 19:18:28.000000000 +0000
@@ -39,13 +39,6 @@
cell's contents are passed as extra parameters, and it is [run]'s
responsibility to allocate that cell.
- (When [run] is the target of a shift transition, the position
- parameters [startp] and [endp] are redundant with the [env]
- parameter, because they are always equal to [env.startp] and
- [env.endp]. However, this does not appear to make a great
- difference in terms of code size, and makes our life easier, so we
- do not attempt to eliminate this redundancy.)
-
The first thing in [run] is to discard a token, if the state was entered
through a shift transition, and to peek at the lookahead token. When the
current token is to be discarded, the [discard] function is invoked. It
@@ -118,6 +111,21 @@
that branch with a simple [assert false]. TEMPORARY do it *)
(* ------------------------------------------------------------------------ *)
+
+(* At this time, the code back-end supports only the legacy error handling
+ strategy. It does not (yet?) support the simplified strategy. Let the
+ user know about this. *)
+
+let () =
+ match Settings.strategy with
+ | `Legacy ->
+ ()
+ | `Simplified ->
+ Error.error []
+ "The code back-end does not support --strategy simplified.\n\
+ Please use either --strategy legacy or --table."
+
+(* ------------------------------------------------------------------------ *)
(* Here is a description of our error handling mechanism.
With every state [s], we associate an [error] function.
@@ -167,6 +175,12 @@
I note that a state that can handle [error] and has a default
reduction must in fact have a reduction action on [error]. *)
+(* The variable that holds the environment. This is a parameter of all
+ functions. *)
+
+let env =
+ prefix "env"
+
(* The type of environments. *)
let tcenv =
@@ -186,11 +200,6 @@
let discard =
prefix "discard"
-(* The [initenv] function. *)
-
-let initenv =
- prefix "init"
-
(* The [run] function associated with a state [s]. *)
let run s =
@@ -499,13 +508,6 @@
(* This is the type of parser environments. *)
-let field modifiable name t =
- {
- modifiable = modifiable;
- fieldname = name;
- fieldtype = type2scheme t
- }
-
let envtypedef = {
typename = tcenv;
typeparams = [];
@@ -819,7 +821,7 @@
(* Calls to [assertfalse]. *)
let call_assertfalse =
- EApp (EVar assertfalse, [ EVar "()" ])
+ EApp (EVar assertfalse, [ EUnit ])
(* ------------------------------------------------------------------------ *)
(* Code production for the automaton functions. *)
@@ -915,12 +917,12 @@
The parameter [defred] tells which default reduction, if any, we
are about to perform. *)
-(* 2014/12/06 New convention regarding initial states (i.e., states
- which have no incoming symbol). The function [initenv] does not
- invoke the lexer, so the [run] function for an initial state must
- do it. (Except in the very special case where the initial state
- has a default reduction on [#] -- this means the grammar recognizes
- only the empty word. We have ruled out this case.) *)
+(* 2014/12/06 New convention regarding initial states (i.e., states which have
+ no incoming symbol). We donot invoke the lexer when we construct the
+ initial environment, so the [run] function for an initial state must do it.
+ (Except in the very special case where the initial state has a default
+ reduction on [#] -- this means the grammar recognizes only the empty word.
+ We have ruled out this case.) *)
let gettoken s defred e =
match Lr1.incoming_symbol s, defred with
@@ -1484,6 +1486,10 @@
a sentinel cell with a valid [endp] field at offset 1. For simplicity, we
always create a sentinel cell. *)
+(* When we allocate a fresh parser environment, the [token] field receives a
+ dummy value. It will be overwritten by the first call to [run], which will
+ invoke [discard]. This allows us to invoke the lexer in just one place. *)
+
let entrydef s =
let nt = Item.startnt (Lr1.start2item s) in
let lexer = "lexer"
@@ -1497,15 +1503,23 @@
{
valpublic = true;
valpat = PVar (Nonterminal.print true nt);
- valval = EAnnot (
- EFun ( [ PVar lexer; PVar lexbuf ],
- blet (
- [ PVar env, EApp (EVar initenv, [ EVar lexer; EVar lexbuf ]) ],
- EMagic (EApp (EVar (run s), [ EVar env; initial_stack ]))
- )
- ),
- entrytypescheme Front.grammar (Nonterminal.print true nt)
- )
+ valval =
+ EAnnot (
+ EFun ( [ PVar lexer; PVar lexbuf ],
+ blet (
+ [ PVar env,
+ ERecord ([
+ (flexer, EVar lexer);
+ (flexbuf, EVar lexbuf);
+ (ftoken, EMagic EUnit);
+ (ferror, efalse)
+ ])
+ ],
+ EMagic (EApp (EVar (run s), [ EVar env; initial_stack ]))
+ )
+ ),
+ entrytypescheme Front.grammar (Nonterminal.print true nt)
+ )
}
(* ------------------------------------------------------------------------ *)
@@ -1591,38 +1605,6 @@
(arrow tenv tenv)
}
-(* This is [initenv], used to allocate a fresh parser environment.
- It fills in all fields in a straightforward way. The [token]
- field receives a dummy value. It will be overwritten by the
- first call to [run], which will invoke [discard]. This allows
- us to invoke the lexer in just one place. *)
-
-let initenvdef =
- let lexer = "lexer"
- and lexbuf = "lexbuf" in
- {
- valpublic = false;
- valpat = PVar initenv;
- valval =
- annotate (
- EFun ( [ PVar lexer; PVar lexbuf ],
- blet (
- (* We do not have a dummy token at hand, so we forge one. *)
- (* It will be overwritten by the first call to the lexer. *)
- [ PVar token, EMagic EUnit ],
- ERecord ([
- (flexer, EVar lexer);
- (flexbuf, EVar lexbuf);
- (ftoken, EVar token);
- (ferror, efalse)
- ]
- )
- )
- )
- )
- (marrow [ tlexer; tlexbuf ] tenv)
- }
-
(* ------------------------------------------------------------------------ *)
(* Here is complete code for the parser. *)
@@ -1655,7 +1637,7 @@
defs
else
reducedef prod :: defs
- ) [ discarddef; initenvdef; printtokendef; assertfalsedef; errorcasedef ])))
+ ) [ discarddef; printtokendef; assertfalsedef; errorcasedef ])))
) ::
SIStretch grammar.postludes ::
diff -Nru menhir-20201201/src/codeBits.ml menhir-20201216/src/codeBits.ml
--- menhir-20201201/src/codeBits.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/codeBits.ml 2020-12-16 19:18:28.000000000 +0000
@@ -313,3 +313,10 @@
let mapp me1 mes2 =
List.fold_left mapp me1 mes2
+
+let field modifiable name t =
+ {
+ modifiable = modifiable;
+ fieldname = name;
+ fieldtype = type2scheme t
+ }
diff -Nru menhir-20201201/src/codeBits.mli menhir-20201216/src/codeBits.mli
--- menhir-20201201/src/codeBits.mli 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/codeBits.mli 2020-12-16 19:18:28.000000000 +0000
@@ -121,7 +121,10 @@
(* Constructing a named module type together with a list of "with type"
constraints. *)
-val with_types: IL.with_kind -> string -> (string list * string * IL.typ) list -> IL.module_type
+val with_types: with_kind -> string -> (string list * string * typ) list -> module_type
(* Functor applications. *)
-val mapp: IL.modexpr -> IL.modexpr list -> IL.modexpr
+val mapp: modexpr -> modexpr list -> modexpr
+
+(* Record fields. *)
+val field: bool -> string -> typ -> fielddef
diff -Nru menhir-20201201/src/codePieces.ml menhir-20201216/src/codePieces.ml
--- menhir-20201201/src/codePieces.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/codePieces.ml 2020-12-16 19:18:28.000000000 +0000
@@ -28,13 +28,6 @@
let ntvar nt =
Infer.ntvar (Nonterminal.print true nt)
-(* The variable that holds the environment. This is a parameter to all
- functions. We do not make it a global variable because we wish to
- preserve re-entrancy. *)
-
-let env =
- prefix "env"
-
(* A variable used to hold a semantic value. *)
let semv =
diff -Nru menhir-20201201/src/codePieces.mli menhir-20201216/src/codePieces.mli
--- menhir-20201201/src/codePieces.mli 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/codePieces.mli 2020-12-16 19:18:28.000000000 +0000
@@ -22,12 +22,6 @@
(* Naming conventions. *)
-(* The variable that holds the environment. This is a parameter to all
- functions. We do not make it a global variable because we wish to
- preserve re-entrancy. *)
-
-val env : string
-
(* A variable used to hold a semantic value. *)
val semv : string
diff -Nru menhir-20201201/src/conflict.ml menhir-20201216/src/conflict.ml
--- menhir-20201201/src/conflict.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/conflict.ml 2020-12-16 19:18:28.000000000 +0000
@@ -17,8 +17,8 @@
(* I suppose now is as good a time as any to do this. *)
let () =
- if Settings.graph then
- DependencyGraph.print_dependency_graph()
+ if Settings.reference_graph then
+ ReferenceGraph.print_reference_graph()
(* -------------------------------------------------------------------------- *)
(* If [--dump] is present, honor it before performing conflict resolution. *)
diff -Nru menhir-20201201/src/DependencyGraph.ml menhir-20201216/src/DependencyGraph.ml
--- menhir-20201201/src/DependencyGraph.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/DependencyGraph.ml 1970-01-01 00:00:00.000000000 +0000
@@ -1,65 +0,0 @@
-(******************************************************************************)
-(* *)
-(* Menhir *)
-(* *)
-(* François Pottier, Inria Paris *)
-(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
-(* *)
-(* Copyright Inria. All rights reserved. This file is distributed under the *)
-(* terms of the GNU General Public License version 2, as described in the *)
-(* file LICENSE. *)
-(* *)
-(******************************************************************************)
-
-open Grammar
-
-let print_dependency_graph() =
-
- (* Allocate. *)
-
- let forward : NonterminalSet.t NonterminalMap.t ref =
- ref NonterminalMap.empty
- in
-
- let successors nt =
- try
- NonterminalMap.find nt !forward
- with Not_found ->
- NonterminalSet.empty
- in
-
- (* Populate. *)
-
- Production.iter (fun prod ->
- let nt1 = Production.nt prod
- and rhs = Production.rhs prod in
- Array.iter (function
- | Symbol.T _ ->
- ()
- | Symbol.N nt2 ->
- forward := NonterminalMap.add
- nt1
- (NonterminalSet.add nt2 (successors nt1))
- !forward
- ) rhs
- );
-
- (* Print. *)
-
- let module P = Dot.Print (struct
- type vertex = Nonterminal.t
- let name nt =
- Printf.sprintf "nt%d" (Nonterminal.n2i nt)
- let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) nt =
- NonterminalSet.iter (fun successor ->
- f ~label:"" successor
- ) (successors nt)
- let iter (f : ?shape:Dot.shape -> ?style:Dot.style -> label:string -> vertex -> unit) =
- Nonterminal.iter (fun nt ->
- f ~label:(Nonterminal.print false nt) nt
- )
- end) in
- let f = open_out (Settings.base ^ ".dot") in
- P.print f;
- close_out f
-
diff -Nru menhir-20201201/src/DependencyGraph.mli menhir-20201216/src/DependencyGraph.mli
--- menhir-20201201/src/DependencyGraph.mli 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/DependencyGraph.mli 1970-01-01 00:00:00.000000000 +0000
@@ -1,19 +0,0 @@
-(******************************************************************************)
-(* *)
-(* Menhir *)
-(* *)
-(* François Pottier, Inria Paris *)
-(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
-(* *)
-(* Copyright Inria. All rights reserved. This file is distributed under the *)
-(* terms of the GNU General Public License version 2, as described in the *)
-(* file LICENSE. *)
-(* *)
-(******************************************************************************)
-
-(* Build and print the forward reference graph of the grammar. There is an edge
- of a nonterminal symbol [nt1] to every nonterminal symbol [nt2] that occurs
- in the definition of [nt1]. *)
-
-val print_dependency_graph: unit -> unit
-
diff -Nru menhir-20201201/src/grammarFunctor.ml menhir-20201216/src/grammarFunctor.ml
--- menhir-20201201/src/grammarFunctor.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/grammarFunctor.ml 2020-12-16 19:18:28.000000000 +0000
@@ -259,6 +259,7 @@
tk_is_declared = true;
tk_position = Positions.dummy;
tk_attributes = [];
+ tk_alias = None;
}
in
Array.init n (fun tok ->
@@ -310,6 +311,41 @@
f i
done
+ let tokens_without_an_alias =
+ let accu = ref [] in
+ iter_real begin fun tok ->
+ let properties = token_properties.(tok) in
+ if properties.tk_alias = None then
+ accu := tok :: !accu
+ end;
+ List.rev !accu
+
+ let () =
+ if verbose && Settings.require_aliases then
+ tokens_without_an_alias |> List.iter begin fun tok ->
+ let properties = token_properties.(tok) in
+ let pos = properties.tk_position in
+ Error.grammar_warning [pos]
+ "no alias has been defined for the token %s."
+ (print tok)
+ end
+
+ let every_token_has_an_alias =
+ tokens_without_an_alias = []
+
+ let alias tok =
+ token_properties.(tok).tk_alias
+
+ let unquoted_alias tok =
+ alias tok |> Option.map (fun qid ->
+ assert (qid.[0] = '"');
+ (* Skip the opening quote and decode the remainder using the lexer
+ [decode_string]. *)
+ let qid = String.sub qid 1 (String.length qid - 1) in
+ let lexbuf = Lexing.from_string qid in
+ Misc.with_buffer 8 (fun b -> Lexer.decode_string b lexbuf)
+ )
+
(* If a token named [EOF] exists, then it is assumed to represent
ocamllex's [eof] pattern. *)
diff -Nru menhir-20201201/src/grammarFunctor.mli menhir-20201216/src/grammarFunctor.mli
--- menhir-20201201/src/grammarFunctor.mli 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/grammarFunctor.mli 2020-12-16 19:18:28.000000000 +0000
@@ -226,6 +226,19 @@
val attributes: t -> Syntax.attribute list
+ (* [every_token_has_an_alias] is true if a token alias has been defined
+ by the user for every token. *)
+
+ val every_token_has_an_alias: bool
+
+ (* [alias t] is the token alias that has been defined by the user
+ for the terminal symbol [t], if there is one. It is a quoted
+ escaped string literal. [unquoted_alias t] is the same string,
+ deprived of its opening and closing quotes, and unescaped. *)
+
+ val alias: t -> string option
+ val unquoted_alias: t -> string option
+
(* The sub-module [Word] offers an implementation of words (that is,
sequences) of terminal symbols. It is used by [LRijkstra]. We
make it a functor, because it has internal state (a hash table)
diff -Nru menhir-20201201/src/interpret.ml menhir-20201216/src/interpret.ml
--- menhir-20201201/src/interpret.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/interpret.ml 2020-12-16 19:18:28.000000000 +0000
@@ -13,6 +13,7 @@
(* --------------------------------------------------------------------------- *)
+open Printf
open Grammar
open SentenceParserAux
@@ -59,16 +60,24 @@
(* Display and debugging. *)
let print_sentence (nto, terminals) : string =
- let b = Buffer.create 128 in
- Option.iter (fun nt ->
- Printf.bprintf b "%s: " (Nonterminal.print false nt)
- ) nto;
- let separator = Misc.once "" " " in
- List.iter (fun t ->
- Printf.bprintf b "%s%s" (separator()) (Terminal.print t)
- ) terminals;
- Printf.bprintf b "\n";
- Buffer.contents b
+ Misc.with_buffer 128 (fun b ->
+ Option.iter (fun nt ->
+ bprintf b "%s: " (Nonterminal.print false nt)
+ ) nto;
+ let separator = Misc.once "" " " in
+ List.iter (fun t ->
+ bprintf b "%s%s" (separator()) (Terminal.print t)
+ ) terminals;
+ bprintf b "\n";
+ )
+
+let print_concrete_sentence (_nto, terminals) : string =
+ Misc.with_buffer 128 (fun b ->
+ let separator = Misc.once "" " " in
+ List.iter (fun t ->
+ bprintf b "%s%s" (separator()) (Misc.unSome (Terminal.unquoted_alias t))
+ ) terminals
+ )
(* --------------------------------------------------------------------------- *)
@@ -175,7 +184,7 @@
(* Success. *)
- Printf.printf "ACCEPT";
+ printf "ACCEPT";
if Settings.interpret_show_cst then begin
print_newline();
Cst.show stdout cst
@@ -185,13 +194,13 @@
(* Parser failure. *)
- Printf.printf "REJECT"
+ printf "REJECT"
with EndOfStream ->
(* Lexer failure. *)
- Printf.printf "OVERSHOOT"
+ printf "OVERSHOOT"
end;
print_newline()
@@ -229,14 +238,33 @@
module SS = StackSymbols.Run()
-(* [print_messages_auto] displays just the sentence and the auto-generated
- comments. *)
+(* [print_messages_auto (nt, sentence, target)] displays the sentence
+ defined by [nt] and [sentence], leading to the state [target]. It
+ then displays a bunch of auto-generated comments. *)
let print_messages_auto (nt, sentence, target) : unit =
- (* Print the sentence, followed with auto-generated comments. *)
+
+ (* Print the sentence. *)
print_string (print_sentence (Some nt, sentence));
+
+ (* If a token alias has been defined for every terminal symbol, then
+ we can convert this sentence into concrete syntax. Do so. We make
+ a few assumptions about the concrete syntax of the language:
+ 1. It is permitted to insert one space between two tokens;
+ 2. No token contains a newline character.
+ (Our lexer enforces this assumption.)
+ The name of the start symbol cannot be printed in a meaningful
+ manner, so it is omitted. *)
+ if Terminal.every_token_has_an_alias then
+ printf
+ "##\n\
+ ## Concrete syntax: %s\n"
+ (print_concrete_sentence (Some nt, sentence))
+ ;
+
+ (* Show which state this sentence leads to. *)
let (s', spurious) = target in
- Printf.printf
+ printf
"##\n\
## Ends in an error in state: %d.\n\
##\n\
@@ -246,26 +274,32 @@
could sometimes be helpful, but is usually intolerably verbose. *)
(Lr0.print "## " (Lr1.state s'))
;
- Printf.printf
+
+ (* Show the known suffix of the stack in this state. *)
+ printf
"## The known suffix of the stack is as follows:\n\
##%s\n\
##\n"
(SS.print_stack_symbols s')
;
+
+ (* If interpreting this sentence causes spurious reductions (that is,
+ reductions that take place after the last terminal symbol has been
+ shifted), say so, and show them. *)
if spurious <> [] then begin
- Printf.printf
+ printf
"## WARNING: This example involves spurious reductions.\n\
## This implies that, although the LR(1) items shown above provide an\n\
## accurate view of the past (what has been recognized so far), they\n\
## may provide an INCOMPLETE view of the future (what was expected next).\n"
;
List.iter (fun (s, prod) ->
- Printf.printf
+ printf
"## In state %d, spurious reduction of production %s\n"
(Lr1.number s)
(Production.print prod)
) spurious;
- Printf.printf "##\n"
+ printf "##\n"
end
(* [print_messages_item] displays one data item. The item is of the form [nt,
@@ -278,7 +312,7 @@
(* Print the sentence, followed with auto-generated comments. *)
print_messages_auto (nt, sentence, target);
(* Then, print a proposed error message, between two blank lines. *)
- Printf.printf "\n%s\n" default_message
+ printf "\n%s\n" default_message
(* --------------------------------------------------------------------------- *)
@@ -445,7 +479,7 @@
| Comment _ ->
()
) runs;
- Printf.eprintf
+ eprintf
"Read %d sample input sentences and %d error messages.\n%!"
!s !m;
runs
@@ -601,9 +635,9 @@
valval = EFun ([ PVar "s" ], EMatch (EVar "s", branches))
} in
let program = [
- SIComment (Printf.sprintf
+ SIComment (sprintf
"This file was auto-generated based on \"%s\"." filename);
- SIComment (Printf.sprintf
+ SIComment (sprintf
"Please note that the function [%s] can raise [Not_found]." name);
SIValDefs (false,
[ messagedef ]);
@@ -635,7 +669,7 @@
let () =
if Settings.interpret then
let read = setup() in
- Printf.printf "Ready!\n%!";
+ printf "Ready!\n%!";
while true do
match read() with
| None ->
@@ -809,7 +843,7 @@
end
let conflict_comment filename =
- Printf.sprintf
+ sprintf
"#@ WARNING:\n\
#@ The following sentence has been copied from \"%s\".\n\
#@ It is redundant with a sentence that appears earlier in this file,\n\
@@ -817,7 +851,7 @@
filename
let toplevel_comment filename =
- Printf.sprintf
+ sprintf
"#@ WARNING:\n\
#@ The following comment has been copied from \"%s\".\n\
#@ It may need to be proofread, updated, moved, or removed.\n"
@@ -1174,6 +1208,30 @@
)) run.elements
)) runs;
+ exit 0
+ )
+
+(* [--echo-errors-concrete] works like [--echo-errors], except every sentence
+ is followed with an auto-generated comment that shows its concrete syntax. *)
+
+let () =
+ Settings.echo_errors_concrete |> Option.iter (fun filename ->
+
+ (* Read the file. *)
+ let strict = false in
+ let runs : run or_comment list = read_messages strict filename in
+
+ (* Echo. *)
+ List.iter (or_comment_iter (fun run ->
+ List.iter (or_comment_iter (fun ((_, sentence), _target) ->
+ print_string (print_sentence sentence);
+ if Terminal.every_token_has_an_alias then
+ printf
+ "## Concrete syntax: %s\n"
+ (print_concrete_sentence sentence)
+ )) run.elements
+ )) runs;
+
exit 0
)
diff -Nru menhir-20201201/src/invariant.ml menhir-20201216/src/invariant.ml
--- menhir-20201201/src/invariant.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/invariant.ml 2020-12-16 19:18:28.000000000 +0000
@@ -57,39 +57,51 @@
(* Vectors of sets of states. *)
-module StateVector = struct
+module StateSetVector = struct
type property =
- Lr1.NodeSet.t list
+ Lr1.NodeSet.t array
+
+ let bottom height =
+ Array.make height Lr1.NodeSet.empty
let empty =
- []
+ [||]
- let rec leq_join v1 v2 =
- match v1, v2 with
- | [], [] ->
- []
- | states1 :: vtail1, states2 :: vtail2 ->
- let states = Lr1.NodeSet.leq_join states1 states2
- and vtail = leq_join vtail1 vtail2 in
- if states2 == states && vtail2 == vtail then
- v2
- else
- states :: vtail
- | _, _ ->
- (* Because all heights are known ahead of time, we are able
- to (and careful to) compare only vectors of equal length. *)
- assert false
+ let leq_join v1 v2 =
+ let n = Array.length v1 in
+ (* Because all heights are known ahead of time, we are able (and careful)
+ to compare only vectors of equal length. *)
+ assert (n = Array.length v2);
+ let v = Array.init n (fun i -> Lr1.NodeSet.leq_join v1.(i) v2.(i)) in
+ if Misc.array_for_all2 (==) v2 v then v2 else v
let push v x =
- x :: v
-
- let truncate =
- MenhirLib.General.take
+ (* Push [x] onto the right end of [v]. *)
+ let n = Array.length v in
+ Array.init (n+1) (fun i -> if i < n then v.(i) else x)
+
+ let truncate k v =
+ (* Keep a suffix of length [k] of [v]. *)
+ let n = Array.length v in
+ Array.sub v (n-k) k
+
+ let print v =
+ if Array.length v = 0 then
+ "epsilon"
+ else
+ Misc.separated_list_to_string Lr1.NodeSet.print "; " (Array.to_list v)
+
+ let iter =
+ Array.iter
+
+ let get =
+ (* The index 0 corresponds to the cell that lies deepest in the stack. *)
+ Array.get
end
-open StateVector
+open StateSetVector
(* Define the data flow graph. *)
@@ -97,7 +109,7 @@
type variable = Lr1.node
- type property = StateVector.property
+ type property = StateSetVector.property
(* At each start state of the automaton, the stack is empty. *)
@@ -130,7 +142,7 @@
(Fix.Glue.ArraysAsImperativeMaps(Key))
(Key)
in
- let module F = Fix.DataFlow.Run(M)(StateVector)(G) in
+ let module F = Fix.DataFlow.Run(M)(StateSetVector)(G) in
F.solution
(* If every state is reachable, then the least fixed point must be non-[None]
@@ -156,10 +168,30 @@
Production.tabulate (fun prod ->
let sites = Lr1.production_where prod in
let height = Production.length prod in
- let bottom = Misc.list_make height Lr1.NodeSet.empty in
Lr1.NodeSet.fold (fun node accu ->
leq_join (truncate height (stack_states node)) accu
- ) sites bottom
+ ) sites (bottom height)
+ )
+
+(* ------------------------------------------------------------------------ *)
+(* If requested, print the information that has been computed above. *)
+
+let () =
+ Error.logC 3 (fun f ->
+ Lr1.iter (fun node ->
+ Printf.fprintf f "stack(%s) = %s\n"
+ (Lr1.print node)
+ (print (stack_states node))
+ )
+ )
+
+let () =
+ Error.logC 3 (fun f ->
+ Production.iterx (fun prod ->
+ Printf.fprintf f "stack when reducing %s = %s\n"
+ (Production.print prod)
+ (print (production_states prod))
+ )
)
(* ------------------------------------------------------------------------ *)
@@ -211,7 +243,7 @@
(* Enforce condition (1) above. *)
let share (v : property) =
- List.iter (fun states ->
+ StateSetVector.iter (fun states ->
let dummy = UnionFind.fresh false in
Lr1.NodeSet.iter (fun state ->
UnionFind.union dummy (represented state)
@@ -260,7 +292,7 @@
let () =
Lr1.iter (fun node ->
let v = stack_states node in
- List.iter (fun states ->
+ StateSetVector.iter (fun states ->
if Lr1.NodeSet.cardinal states >= 2 && handlers states then
represents states
) v
@@ -276,7 +308,7 @@
if length = 0 then
Lr1.NodeSet.iter represent sites
else
- let states = List.nth (production_states prod) (length - 1) in
+ let states = StateSetVector.get (production_states prod) 0 in
represents states
)
@@ -303,6 +335,16 @@
Printf.fprintf f "%d out of %d states are represented.\n" count Lr1.n
)
+(* If requested, show a detailed table of which states are represented. *)
+
+let () =
+ Error.logC 3 (fun f ->
+ Lr1.iter (fun node ->
+ Printf.fprintf f "represented(%s) = %b\n"
+ (Lr1.print node) (represented node)
+ )
+ )
+
(* ------------------------------------------------------------------------ *)
(* Accessors for information about the stack. *)
@@ -319,27 +361,34 @@
(* This auxiliary function converts a stack-as-an-array (top of stack
at the right end) to a stack-as-a-list (top of stack at list head). *)
-let convert a =
+let _convert a =
let n = Array.length a in
let rec loop i accu =
if i = n then accu else loop (i + 1) (a.(i) :: accu)
in
loop 0 []
+(* This auxiliary function converts a pair of stacks-as-arrays to a
+ stack-as-a-list-of-pairs. *)
+
+let convert2 a b =
+ let n = Array.length a in
+ assert (n = Array.length b);
+ let rec loop i accu =
+ if i = n then accu else loop (i + 1) ((a.(i), b.(i)) :: accu)
+ in
+ loop 0 []
+
(* [stack s] describes the stack when the automaton is in state [s]. *)
let stack node : word =
- List.combine
- (convert (stack_symbols node))
- (stack_states node)
+ convert2 (stack_symbols node) (stack_states node)
(* [prodstack prod] describes the stack when production [prod] is about to be
reduced. *)
let prodstack prod : word =
- List.combine
- (convert (Production.rhs prod))
- (production_states prod)
+ convert2 (Production.rhs prod) (production_states prod)
(* [gotostack nt] is the structure of the stack when a shift
transition over nonterminal [nt] is about to be taken. It
diff -Nru menhir-20201201/src/lexer.mll menhir-20201216/src/lexer.mll
--- menhir-20201201/src/lexer.mll 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/lexer.mll 2020-12-16 19:18:28.000000000 +0000
@@ -404,6 +404,17 @@
"on_error_reduce", ON_ERROR_REDUCE;
]
+(* ------------------------------------------------------------------------ *)
+
+(* Decoding escaped characters. *)
+
+let char_for_backslash = function
+ | 'n' -> '\010'
+ | 'r' -> '\013'
+ | 'b' -> '\008'
+ | 't' -> '\009'
+ | c -> c
+
}
(* ------------------------------------------------------------------------ *)
@@ -498,11 +509,16 @@
}
| (uppercase identchar *) as id
{ UID (with_pos (cpos lexbuf) id) }
-(* Quoted strings, which are used as aliases for tokens.
- For simplicity, we just disallow double quotes and backslash outright.
- Given the use of terminal strings in grammars, this is fine. *)
-| ( "\"" ( [' ' - '~'] # ['"' '\\'] + ) "\"" ) as id
- { QID (with_pos (cpos lexbuf) id) }
+(* Quoted strings are used as aliases for tokens. *)
+(* A quoted string is stored as is -- with the quotes
+ and with its escape sequences. *)
+| '"'
+ { let buffer = Buffer.create 16 in
+ let openingpos = lexeme_start_p lexbuf in
+ let content = record_string openingpos buffer lexbuf in
+ let id = Printf.sprintf "\"%s\"" content in
+ let pos = import (openingpos, lexbuf.lex_curr_p) in
+ QID (with_pos pos id) }
| "//" [^ '\010' '\013']* newline (* skip C++ style comment *)
| newline
{ new_line lexbuf; main lexbuf }
@@ -746,18 +762,51 @@
and string openingpos = parse
| '"'
- { () }
+ { () }
| '\\' newline
| newline
- { new_line lexbuf; string openingpos lexbuf }
+ { new_line lexbuf; string openingpos lexbuf }
| '\\' _
- (* Upon finding a backslash, skip the character that follows,
- unless it is a newline. Pretty crude, but should work. *)
- { string openingpos lexbuf }
+ (* Upon finding a backslash, skip the character that follows,
+ unless it is a newline. Pretty crude, but should work. *)
+ { string openingpos lexbuf }
| eof
- { error1 openingpos "unterminated OCaml string." }
+ { error1 openingpos "unterminated OCaml string." }
| _
- { string openingpos lexbuf }
+ { string openingpos lexbuf }
+
+(* ------------------------------------------------------------------------ *)
+
+(* Recording on OCaml string. (This is used for token aliases.) *)
+
+and record_string openingpos buffer = parse
+| '"'
+ { Buffer.contents buffer }
+| ('\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']) as sequence
+ { (* This escape sequence is recognized as such, but not decoded. *)
+ Buffer.add_string buffer sequence;
+ record_string openingpos buffer lexbuf }
+| '\\' _
+ { error2 lexbuf "illegal backslash escape in string." }
+| newline
+ { error2 lexbuf "illegal newline in string." }
+| eof
+ { error1 openingpos "unterminated string." }
+| _ as c
+ { Buffer.add_char buffer c;
+ record_string openingpos buffer lexbuf }
+
+(* Decoding a string that may contain escaped characters. *)
+
+and decode_string buffer = parse
+| '"'
+ { (* The final double quote is skipped. *) }
+| '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c)
+ { Buffer.add_char buffer (char_for_backslash c);
+ decode_string buffer lexbuf }
+| _ as c
+ { Buffer.add_char buffer c;
+ decode_string buffer lexbuf }
(* ------------------------------------------------------------------------ *)
diff -Nru menhir-20201201/src/lr0.ml menhir-20201216/src/lr0.ml
--- menhir-20201201/src/lr0.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/lr0.ml 2020-12-16 19:18:28.000000000 +0000
@@ -304,14 +304,14 @@
(* Displaying a concrete state. *)
let print_concrete leading (state : concretelr1state) =
- let buffer = Buffer.create 1024 in
- Item.Map.iter (fun item toks ->
- Printf.bprintf buffer "%s%s [ %s ]\n"
- leading
- (Item.print item)
- (TerminalSet.print toks)
- ) state;
- Buffer.contents buffer
+ Misc.with_buffer 1024 (fun buffer ->
+ Item.Map.iter (fun item toks ->
+ Printf.bprintf buffer "%s%s [ %s ]\n"
+ leading
+ (Item.print item)
+ (TerminalSet.print toks)
+ ) state
+ )
(* Displaying a state. By default, only the kernel is displayed, not
the closure. *)
diff -Nru menhir-20201201/src/lr1.ml menhir-20201216/src/lr1.ml
--- menhir-20201201/src/lr1.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/lr1.ml 2020-12-16 19:18:28.000000000 +0000
@@ -466,6 +466,9 @@
let number node =
node
+let print node =
+ Printf.sprintf "%d" (number node)
+
let entry =
ProductionMap.map transport Raw.entry
@@ -695,6 +698,11 @@
let leq_join s1 s2 =
if subset s1 s2 then s2 else union s1 s2
+ let print s =
+ Printf.sprintf "{ %s }" (
+ Misc.separated_iter_to_string print ", " (fun f -> iter f s)
+ )
+
end
module NodeMap =
@@ -933,7 +941,7 @@
is preferable to every other (so priority plays a role). *)
let prioritized =
- ref 0
+ ref []
(* The set of nonterminal symbols in the left-hand side of an extra reduction. *)
@@ -961,7 +969,8 @@
with a reduction, update [extra] and [extra_nts]. *)
let triggered = lazy (
incr extra;
- if List.length prods > 1 then incr prioritized;
+ if List.length prods > 1 then
+ prioritized := node :: !prioritized;
extra_nts := NonterminalSet.add (Production.nt prod) !extra_nts
) in
Terminal.iter_real (fun tok ->
@@ -980,13 +989,19 @@
extra_reductions_in_node node
);
(* Info message. *)
- if !extra > 0 then
+ if !extra > 0 then begin
Error.logA 1 (fun f ->
Printf.fprintf f "Extra reductions on error were added in %d states.\n"
!extra;
Printf.fprintf f "Priority played a role in %d of these states.\n"
- !prioritized
+ (List.length !prioritized)
);
+ Error.logA 2 (fun f ->
+ if !prioritized <> [] then
+ Printf.fprintf f "These states are %s.\n"
+ (NodeSet.print (NodeSet.of_list !prioritized))
+ )
+ end;
(* Warn about useless %on_error_reduce declarations. *)
OnErrorReduce.iter (fun nt ->
if not (NonterminalSet.mem nt !extra_nts) then
diff -Nru menhir-20201201/src/lr1.mli menhir-20201216/src/lr1.mli
--- menhir-20201201/src/lr1.mli 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/lr1.mli 2020-12-16 19:18:28.000000000 +0000
@@ -53,6 +53,7 @@
module NodeSet : sig
include Set.S with type elt = node
val leq_join: t -> t -> t
+ val print: t -> string
end
module NodeMap : Map.S with type key = node
@@ -80,6 +81,10 @@
val n: int
val number: node -> int
+(* A state is printed simply as its number. *)
+
+val print: node -> string
+
(* This provides access to the LR(1) state that a node stands for. *)
val state: node -> Lr0.lr1state
diff -Nru menhir-20201201/src/misc.ml menhir-20201216/src/misc.ml
--- menhir-20201201/src/misc.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/misc.ml 2020-12-16 19:18:28.000000000 +0000
@@ -46,22 +46,27 @@
done;
!sum
+let with_buffer n f =
+ let b = Buffer.create n in
+ f b;
+ Buffer.contents b
+
type 'a iter = ('a -> unit) -> unit
let separated_iter_to_string printer separator iter =
- let b = Buffer.create 32 in
- let first = ref true in
- iter (fun x ->
- if !first then begin
- Buffer.add_string b (printer x);
- first := false
- end
- else begin
- Buffer.add_string b separator;
- Buffer.add_string b (printer x)
- end
- );
- Buffer.contents b
+ with_buffer 32 (fun b ->
+ let first = ref true in
+ iter (fun x ->
+ if !first then begin
+ Buffer.add_string b (printer x);
+ first := false
+ end
+ else begin
+ Buffer.add_string b separator;
+ Buffer.add_string b (printer x)
+ end
+ )
+ )
let separated_list_to_string printer separator xs =
separated_iter_to_string printer separator (fun f -> List.iter f xs)
@@ -417,5 +422,15 @@
else false in
loop 0
+let array_fold_left2 f accu a1 a2 =
+ let n1 = Array.length a1
+ and n2 = Array.length a2 in
+ if n1 <> n2 then invalid_arg "Array.fold_left2";
+ let accu = ref accu in
+ for i = 0 to n1 - 1 do
+ accu := f !accu (Array.unsafe_get a1 i) (Array.unsafe_get a2 i)
+ done;
+ !accu
+
let rec list_make n x =
if n = 0 then [] else x :: list_make (n - 1) x
diff -Nru menhir-20201201/src/misc.mli menhir-20201216/src/misc.mli
--- menhir-20201201/src/misc.mli 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/misc.mli 2020-12-16 19:18:28.000000000 +0000
@@ -40,6 +40,11 @@
val sum: int -> (int -> int) -> int
+(* [with_buffer n f] creates a fresh buffer of size [n], passes it to [f],
+ and returns the final content of the buffer. *)
+
+val with_buffer: int -> (Buffer.t -> unit) -> string
+
(* [separated_list_to_string printer sep l] converts [l] into a string
representation built by using [printer] on each element and [sep] as
a separator. *)
@@ -191,6 +196,9 @@
val array_for_all : ('a -> bool) -> 'a array -> bool
val array_for_all2 : ('a -> 'b -> bool) -> 'a array -> 'b array -> bool
+val array_fold_left2:
+ ('a -> 'b1 -> 'b2 -> 'a) -> 'a -> 'b1 array -> 'b2 array -> 'a
+
(* [List.make] *)
val list_make: int -> 'a -> 'a list
diff -Nru menhir-20201201/src/partialGrammar.ml menhir-20201216/src/partialGrammar.ml
--- menhir-20201201/src/partialGrammar.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/partialGrammar.ml 2020-12-16 19:18:28.000000000 +0000
@@ -36,9 +36,9 @@
difficult by the fact that %token and %left-%right-%nonassoc
declarations are independent. *)
- (* Declarations of token aliases are lost at this point. *)
+ (* If a token carries an alias, it is recorded in the field [tk_alias]. *)
- | DToken (ocamltype, terminal, _alias, attributes) ->
+ | DToken (ocamltype, terminal, alias, attributes) ->
let token_property =
try
@@ -64,6 +64,7 @@
tk_filename = filename;
tk_position = decl.position;
tk_attributes = attributes;
+ tk_alias = alias;
}
with Not_found ->
@@ -77,7 +78,8 @@
tk_precedence = UndefinedPrecedence;
tk_position = decl.position;
tk_attributes = attributes;
- tk_is_declared = true
+ tk_is_declared = true;
+ tk_alias = alias;
}
in
@@ -121,6 +123,7 @@
tk_precedence = prec;
tk_is_declared = false;
tk_attributes = [];
+ tk_alias = None;
(* Will be updated later. *)
tk_position = decl.position;
} in
diff -Nru menhir-20201201/src/printer.ml menhir-20201216/src/printer.ml
--- menhir-20201201/src/printer.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/printer.ml 2020-12-16 19:18:28.000000000 +0000
@@ -824,12 +824,12 @@
P.expr e
let string_of_expr e =
- let b = Buffer.create 512 in
- let module P =
- MakeBuffered (struct
- let f = b
- let locate_stretches = None
- end)
- in
- P.expr e;
- Buffer.contents b
+ Misc.with_buffer 512 (fun b ->
+ let module P =
+ MakeBuffered (struct
+ let f = b
+ let locate_stretches = None
+ end)
+ in
+ P.expr e
+ )
diff -Nru menhir-20201201/src/ReferenceGraph.ml menhir-20201216/src/ReferenceGraph.ml
--- menhir-20201201/src/ReferenceGraph.ml 1970-01-01 00:00:00.000000000 +0000
+++ menhir-20201216/src/ReferenceGraph.ml 2020-12-16 19:18:28.000000000 +0000
@@ -0,0 +1,64 @@
+(******************************************************************************)
+(* *)
+(* Menhir *)
+(* *)
+(* François Pottier, Inria Paris *)
+(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
+(* *)
+(* Copyright Inria. All rights reserved. This file is distributed under the *)
+(* terms of the GNU General Public License version 2, as described in the *)
+(* file LICENSE. *)
+(* *)
+(******************************************************************************)
+
+open Grammar
+
+let print_reference_graph() =
+
+ (* Allocate. *)
+
+ let forward : NonterminalSet.t NonterminalMap.t ref =
+ ref NonterminalMap.empty
+ in
+
+ let successors nt =
+ try
+ NonterminalMap.find nt !forward
+ with Not_found ->
+ NonterminalSet.empty
+ in
+
+ (* Populate. *)
+
+ Production.iter (fun prod ->
+ let nt1 = Production.nt prod
+ and rhs = Production.rhs prod in
+ Array.iter (function
+ | Symbol.T _ ->
+ ()
+ | Symbol.N nt2 ->
+ forward := NonterminalMap.add
+ nt1
+ (NonterminalSet.add nt2 (successors nt1))
+ !forward
+ ) rhs
+ );
+
+ (* Print. *)
+
+ let module P = Dot.Print (struct
+ type vertex = Nonterminal.t
+ let name nt =
+ Printf.sprintf "nt%d" (Nonterminal.n2i nt)
+ let successors (f : ?style:Dot.style -> label:string -> vertex -> unit) nt =
+ NonterminalSet.iter (fun successor ->
+ f ~label:"" successor
+ ) (successors nt)
+ let iter (f : ?shape:Dot.shape -> ?style:Dot.style -> label:string -> vertex -> unit) =
+ Nonterminal.iter (fun nt ->
+ f ~label:(Nonterminal.print false nt) nt
+ )
+ end) in
+ let f = open_out (Settings.base ^ ".dot") in
+ P.print f;
+ close_out f
diff -Nru menhir-20201201/src/ReferenceGraph.mli menhir-20201216/src/ReferenceGraph.mli
--- menhir-20201201/src/ReferenceGraph.mli 1970-01-01 00:00:00.000000000 +0000
+++ menhir-20201216/src/ReferenceGraph.mli 2020-12-16 19:18:28.000000000 +0000
@@ -0,0 +1,18 @@
+(******************************************************************************)
+(* *)
+(* Menhir *)
+(* *)
+(* François Pottier, Inria Paris *)
+(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
+(* *)
+(* Copyright Inria. All rights reserved. This file is distributed under the *)
+(* terms of the GNU General Public License version 2, as described in the *)
+(* file LICENSE. *)
+(* *)
+(******************************************************************************)
+
+(* Build and print the forward reference graph of the grammar. There is an edge
+ of a nonterminal symbol [nt1] to every nonterminal symbol [nt2] that occurs
+ in the definition of [nt1]. *)
+
+val print_reference_graph: unit -> unit
diff -Nru menhir-20201201/src/referenceInterpreter.ml menhir-20201216/src/referenceInterpreter.ml
--- menhir-20201201/src/referenceInterpreter.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/referenceInterpreter.ml 2020-12-16 19:18:28.000000000 +0000
@@ -245,6 +245,14 @@
(* ------------------------------------------------------------------------ *)
+(* The strategy used by the reference interpreter is determined by the
+ command line switch [--strategy]. *)
+
+let strategy =
+ Settings.strategy
+
+(* ------------------------------------------------------------------------ *)
+
(* Define a palatable user entry point. *)
let interpret log nt lexer lexbuf =
@@ -261,7 +269,7 @@
(* Run it. *)
try
- Some (E.entry (Lr1.entry_of_nt nt) lexer lexbuf)
+ Some (E.entry strategy (Lr1.entry_of_nt nt) lexer lexbuf)
with T.Error ->
None
@@ -355,7 +363,7 @@
loop (E.offer checkpoint (t, Lexing.dummy_pos, Lexing.dummy_pos)) spurious
end
| E.Shifting _ ->
- loop (E.resume checkpoint) spurious
+ loop (E.resume ~strategy checkpoint) spurious
| E.AboutToReduce (env, prod) ->
(* If we have requested the last input token and if this is not
a default reduction, then this is a spurious reduction.
@@ -369,7 +377,7 @@
else
spurious
in
- loop (E.resume checkpoint) spurious
+ loop (E.resume ~strategy checkpoint) spurious
| E.HandlingError env ->
(* Check that all of the input has been read. Otherwise, the error
has occurred sooner than expected. *)
diff -Nru menhir-20201201/src/settings.ml menhir-20201216/src/settings.ml
--- menhir-20201201/src/settings.ml 2020-12-01 15:15:50.000000000 +0000
+++ menhir-20201216/src/settings.ml 2020-12-16 19:18:28.000000000 +0000
@@ -71,7 +71,10 @@
let dump_resolved =
ref false
-let graph =
+let reference_graph =
+ ref false
+
+let automaton_graph =
ref false
let trace =
@@ -279,6 +282,12 @@
let set_echo_errors filename =
echo_errors := Some filename
+let echo_errors_concrete =
+ ref None
+
+let set_echo_errors_concrete filename =
+ echo_errors_concrete := Some filename
+
let cmly =
ref false
@@ -292,10 +301,26 @@
let dollars =
ref DollarsAllowed
+let require_aliases =
+ ref false
+
+let strategy =
+ ref `Legacy
+
+let set_strategy = function
+ | "legacy" ->
+ strategy := `Legacy
+ | "simplified" ->
+ strategy := `Simplified
+ | _ ->
+ eprintf "Error: --strategy should be followed with legacy | simplified.\n";
+ exit 1
+
(* When new command line options are added, please update both the manual
in [doc/manual.tex] and the man page in [doc/menhir.1]. *)
let options = Arg.align [
+ "--automaton-graph", Arg.Set automaton_graph, " (undocumented)";
"--base", Arg.Set_string base, "