diff -Nru python-ptk-1.3.2/debian/changelog python-ptk-1.3.5/debian/changelog --- python-ptk-1.3.2/debian/changelog 2017-08-12 19:55:18.000000000 +0000 +++ python-ptk-1.3.5/debian/changelog 2018-04-23 22:23:30.000000000 +0000 @@ -1,3 +1,26 @@ +python-ptk (1.3.5-1) unstable; urgency=medium + + * New upstream release. Repackage again without generated docs + (its removal by upstream was not deliberate). + * Build-Depend, Recommend: python3-async-generator, + The python3 tests depend on it for asynchronous streams. + * Add python minimal versions. + * Revert removal of replace_install_requires_in_setup_py.diff + (its absence modifies runtime dependencies). + * Prevent network access during build with https as well as http. + * Prefer pypi upstream tarballs over bitbucket ones. + + -- Nicolas Boulenguez Tue, 24 Apr 2018 00:23:30 +0200 + +python-ptk (1.3.4-1) unstable; urgency=medium + + * New upstream release. Stop repackaging, backup README.rst. + * Drop obsolete replace_install_requires_in_setup_py.diff. + * Switch to debhelper 11. Install documentation to main doc dir. + * Standards-Version 4.1.4. + + -- Nicolas Boulenguez Sat, 21 Apr 2018 22:45:24 +0200 + python-ptk (1.3.2-2) unstable; urgency=medium * Debhelper 10. Remove explicit rules target confusing dh. diff -Nru python-ptk-1.3.2/debian/compat python-ptk-1.3.5/debian/compat --- python-ptk-1.3.2/debian/compat 2017-08-12 19:42:46.000000000 +0000 +++ python-ptk-1.3.5/debian/compat 2018-04-22 15:56:57.000000000 +0000 @@ -1 +1 @@ -10 +11 diff -Nru python-ptk-1.3.2/debian/control python-ptk-1.3.5/debian/control --- python-ptk-1.3.2/debian/control 2017-08-12 19:53:45.000000000 +0000 +++ python-ptk-1.3.5/debian/control 2018-04-23 22:23:30.000000000 +0000 @@ -1,16 +1,19 @@ Source: python-ptk Priority: optional Maintainer: Nicolas Boulenguez -Build-Depends-Indep: debhelper (>= 10) - , dh-python - , python-all (>= 2.7), python3-all (>= 3.5) - , python-six, python3-six - , python3-sphinx -Standards-Version: 4.0.1 +Build-Depends-Indep: debhelper (>= 11), + dh-python, + python-all (>= 2.7), python3-all (>= 3.5), + python-six, python3-six, + python3-async-generator, + python3-sphinx, +Standards-Version: 4.1.4 Section: python Homepage: https://bitbucket.org/fraca7/ptk -Vcs-Git: git://anonscm.debian.org/collab-maint/python-ptk.git -Vcs-Browser: https://anonscm.debian.org/gitweb/?p=collab-maint/python-ptk.git;a=summary +Vcs-Browser: https://salsa.debian.org/debian/python-ptk +Vcs-Git: https://salsa.debian.org/debian/python-ptk.git +X-Python-Version: >= 2.7 +X-Python3-Version: >= 3.5 Package: python-ptk Architecture: all @@ -28,6 +31,7 @@ Package: python3-ptk Architecture: all Depends: ${misc:Depends}, ${python3:Depends} +Recommends: python3-async-generator Suggests: python-ptk-doc, python3 (>= 3.5), python3-twisted Description: parser for Python 3 with support for asynchronous input PTK implements LR(1) parsing in Python. Compared to compiled tools @@ -36,6 +40,8 @@ generation. Various inputs are accepted: Python 3 asynchronous streams, PyZipFile archives, Twisted Deferred objects. . + python3-async-generator is required for asynchronous streams. + . This package installs the library for Python 3. Package: python-ptk-doc diff -Nru python-ptk-1.3.2/debian/copyright python-ptk-1.3.5/debian/copyright --- python-ptk-1.3.2/debian/copyright 2017-08-12 19:55:18.000000000 +0000 +++ python-ptk-1.3.5/debian/copyright 2018-04-22 16:01:01.000000000 +0000 @@ -9,11 +9,11 @@ We also remove the generated README.rst to simplify packaging. Files: * -Copyright: 2015-2016 Jérôme Laheurte +Copyright: 2015-2018 Jérôme Laheurte License: LGPL-3+ Files: debian/* -Copyright: 2016-2017 Nicolas Boulenguez +Copyright: 2016-2018 Nicolas Boulenguez License: LGPL-3+ License: LGPL-3+ diff -Nru python-ptk-1.3.2/debian/python-ptk-doc.doc-base.python-ptk python-ptk-1.3.5/debian/python-ptk-doc.doc-base.python-ptk --- python-ptk-1.3.2/debian/python-ptk-doc.doc-base.python-ptk 2016-07-08 16:31:07.000000000 +0000 +++ python-ptk-1.3.5/debian/python-ptk-doc.doc-base.python-ptk 2018-04-22 15:56:57.000000000 +0000 @@ -12,5 +12,5 @@ Section: Programming/Python Format: HTML -Index: /usr/share/doc/python-ptk-doc/html/index.html -Files: /usr/share/doc/python-ptk-doc/html/*.html +Index: /usr/share/doc/python-ptk/html/index.html +Files: /usr/share/doc/python-ptk/html/*.html diff -Nru python-ptk-1.3.2/debian/rules python-ptk-1.3.5/debian/rules --- python-ptk-1.3.2/debian/rules 2017-08-12 19:55:18.000000000 +0000 +++ python-ptk-1.3.5/debian/rules 2018-04-23 22:23:30.000000000 +0000 @@ -22,6 +22,7 @@ # Forbid internet access. # Select Python3, able to parse async def in imported Python modules. html: + https_proxy=127.0.0.1:9 \ PYTHONPATH=. http_proxy='127.0.0.1:9' $(MAKE) documentation \ SPHINXBUILD=/usr/share/sphinx/scripts/python3/sphinx-build @@ -33,7 +34,6 @@ # Avoid pybuild when cleaning, as it creates subdirectories. .PHONY: override_dh_auto_clean override_dh_auto_clean: - # Versions > 1.2.0 should leave no doc/html in the way anymore. rm -fr .pybuild doc/build html + find . -name '*.pyc' -delete rm -f README.rst - find . -name "*.pyc" -delete diff -Nru python-ptk-1.3.2/debian/tests/control python-ptk-1.3.5/debian/tests/control --- python-ptk-1.3.2/debian/tests/control 2016-01-23 15:36:45.000000000 +0000 +++ python-ptk-1.3.5/debian/tests/control 2018-04-23 12:20:31.000000000 +0000 @@ -1,7 +1,11 @@ +# As long as the dependencies differ between python2 and python3, +# these stanzas cannot be replaced with Testsuite: +# autopkgtest-pkg-python. + Test-Command: python2 tests/test_all.py -v Depends: python-ptk, python-twisted-core Restrictions: allow-stderr Test-Command: python3 tests/test_all.py -v -Depends: python3-ptk, python3-twisted +Depends: python3-ptk, python3-twisted, python3-async-generator Restrictions: allow-stderr diff -Nru python-ptk-1.3.2/debian/watch python-ptk-1.3.5/debian/watch --- python-ptk-1.3.2/debian/watch 2017-08-12 19:42:35.000000000 +0000 +++ python-ptk-1.3.5/debian/watch 2018-04-23 12:13:07.000000000 +0000 @@ -1,3 +1,7 @@ version=4 -https://bitbucket.org/fraca7/ptk/downloads/?tab=tags \ -.*/release@ANY_VERSION@@ARCHIVE_EXT@ + +# Upstream develops on bitbucket, but releases on pypi. +# Bitbuckets automatically generated tarballs are not easy to configure. + +opts=uversionmangle=s/(rc|a|b|c)/~$1/ \ +https://pypi.debian.net/ptk/ptk@ANY_VERSION@@ARCHIVE_EXT@ diff -Nru python-ptk-1.3.2/doc/source/conf.py python-ptk-1.3.5/doc/source/conf.py --- python-ptk-1.3.2/doc/source/conf.py 2015-12-25 16:15:22.000000000 +0000 +++ python-ptk-1.3.5/doc/source/conf.py 2018-04-22 13:36:38.000000000 +0000 @@ -18,7 +18,7 @@ master_doc = 'index' project = PackageInfo.project_name -copyright = six.u('2015, %s') % PackageInfo.author_name +copyright = six.u('2018, %s') % PackageInfo.author_name author = PackageInfo.author_name release = version language = None diff -Nru python-ptk-1.3.2/doc/source/parser.rst python-ptk-1.3.5/doc/source/parser.rst --- python-ptk-1.3.2/doc/source/parser.rst 2016-01-17 14:46:18.000000000 +0000 +++ python-ptk-1.3.5/doc/source/parser.rst 2017-01-29 12:53:02.000000000 +0000 @@ -246,7 +246,8 @@ The :py:class:`AsyncLexer` and :py:class:`AsyncLRParser` classes allow you to parse an input stream asynchronously. Since this uses the new asynchronous method syntax introduced in Python 3.5, it's only -available with this version of Python. +available with this version of Python. Additionally, you must install +the `async_generator `_ module. The basic idea is that the production methods are asynchronous. Feed the input stream one byte/char at a time by awaiting on @@ -255,6 +256,9 @@ :py:func:`AsyncParser.asyncNewToken`. Semantic actions may then be awaited on as a result. +Note that if you use a consumer in your lexer, the `feed` method must +be asynchronous as well. + The samples directory contains the following example of an asynchronous parser: @@ -275,8 +279,8 @@ import six, operator, os, asyncio, sys, codecs - from ptk.lexer import token, AsyncLexer, EOF - from ptk.parser import production, leftAssoc, AsyncLRParser, ParseError + from ptk.async_lexer import token, AsyncLexer, EOF + from ptk.async_parser import production, leftAssoc, AsyncLRParser, ParseError @leftAssoc('+', '-') diff -Nru python-ptk-1.3.2/PKG-INFO python-ptk-1.3.5/PKG-INFO --- python-ptk-1.3.2/PKG-INFO 2016-06-26 15:44:55.000000000 +0000 +++ python-ptk-1.3.5/PKG-INFO 2018-04-22 13:50:28.000000000 +0000 @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: ptk -Version: 1.3.2 +Version: 1.3.5 Summary: LR(1) parsing framework for Python with support for asynchronous input Home-page: https://bitbucket.org/fraca7/ptk Author: Jérôme Laheurte Author-email: jerome@jeromelaheurte.net License: UNKNOWN -Download-URL: https://pypi.python.org/packages/source/p/ptk/ptk-1.3.2.tar.gz +Download-URL: https://pypi.python.org/packages/source/p/ptk/ptk-1.3.5.tar.gz Description: UNKNOWN Keywords: parser,parsing,compiler,lr,slr Platform: UNKNOWN diff -Nru python-ptk-1.3.2/ptk/async_lexer.py python-ptk-1.3.5/ptk/async_lexer.py --- python-ptk-1.3.2/ptk/async_lexer.py 2016-01-03 12:47:40.000000000 +0000 +++ python-ptk-1.3.5/ptk/async_lexer.py 2018-04-22 13:36:58.000000000 +0000 @@ -1,12 +1,20 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt # XXXTODO: when pylint supports async, remove this... # pylint: skip-file +import six + from ptk.lexer import ProgressiveLexer, token, EOF, LexerError +from ptk.regex import DeadState + +try: + from async_generator import aclosing, async_generator, yield_, yield_from_ +except ImportError: + raise RuntimeError('You need to have the async_generator package installed to use the async lexer.') class AsyncLexer(ProgressiveLexer): @@ -18,6 +26,11 @@ """ + async def asyncParse(self, text): + for i, char in enumerate(text): + await self.asyncFeed(char, i+1) + return (await self.asyncFeed(EOF)) + async def asyncFeed(self, char, charPos=None): """ Asynchronous version of :py:func:`ProgressiveLexer.feed`. This @@ -27,11 +40,102 @@ self._input.append((char, charPos)) while self._input: char, charPos = self._input.pop(0) - for tok in self._feed(char, charPos): - await self.asyncNewToken(tok) + async with aclosing(self._asyncFeed(char, charPos)) as agen: + async for tok in agen: + value = await self.asyncNewToken(tok) + if value is not None: + return value + + @async_generator + async def asyncIterFeed(self, char, charPos=None): + self._input.append((char, charPos)) + while self._input: + char, charPos = self._input.pop(0) + async with aclosing(self._asyncFeed(char, charPos)) as agen: + async for tok in agen: + value = await self.asyncNewToken(tok) + if value is not None: + await yield_(value) + + @async_generator + async def asyncIterParse(self, chars): + for char in chars: + async with aclosing(self.asyncIterFeed(char)) as agen: + await yield_from_(agen) async def asyncNewToken(self, tok): """ Asynchronous version of py:func:`LexerBase.newToken`. """ raise NotImplementedError + + @async_generator + async def _asyncFeed(self, char, charPos): # pylint: disable=R0912,R0915 + # Unfortunately this is copy/pasted from ProgressiveLexer._feed to add the async stuff... + if char in (six.u('\n'), six.b('\n')[0]): + self.advanceLine() + else: + self.advanceColumn() + + if self.consumer() is not None: + tok = await self.consumer().feed(char) + if tok is not None: + self.setConsumer(None) + if tok[0] is not None: + await yield_(self.Token(*tok)) + return + + try: + if char is EOF: + if self._state == 0: + self.restartLexer() + await yield_(EOF) + return + self._maxPos = max(self._maxPos, max(pos[0] for regex, callback, defaultType, pos in self._currentState)) + if self._maxPos == 0 and self._currentMatch: + raise LexerError(self._currentMatch[0][0], *self._currentMatch[0][1]) + self._matches.extend([(pos[0], callback) for regex, callback, defaultType, pos in self._currentState if pos[0] == self._maxPos]) + self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos] + else: + if self._state == 0 and self.ignore(char): + return + self._state = 1 + + newState = list() + for regex, callback, defaultType, pos in self._currentState: + try: + if regex.feed(char): + pos[0] = len(self._currentMatch) + 1 + except DeadState: + if pos[0]: + self._matches.append((pos[0], callback)) + self._maxPos = max(self._maxPos, pos[0]) + else: + newState.append((regex, callback, defaultType, pos)) + + if all([regex.isDeadEnd() for regex, callback, defaultType, pos in newState]): + for regex, callback, defaultType, pos in newState: + self._matches.append((len(self._currentMatch) + 1, callback)) + self._maxPos = max(self._maxPos, len(self._currentMatch) + 1) + newState = list() + + self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos] + self._currentState = newState + + self._currentMatch.append((char, self.position() if charPos is None else charPos)) + if self._currentState: + return + + if self._maxPos == 0: + raise LexerError(char, *self.position()) + except LexerError: + self.restartLexer() + raise + + tok = self._finalizeMatch() + if tok is not None: + await yield_(tok) + + if char is EOF: + self.restartLexer() + await yield_(EOF) diff -Nru python-ptk-1.3.2/ptk/async_parser.py python-ptk-1.3.5/ptk/async_parser.py --- python-ptk-1.3.2/ptk/async_parser.py 2016-01-03 13:09:42.000000000 +0000 +++ python-ptk-1.3.5/ptk/async_parser.py 2018-04-22 13:37:04.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt # XXXTODO: when pylint supports async, remove this... @@ -81,6 +81,7 @@ except _Accept as exc: self._restartParser() await self.asyncNewSentence(exc.result) + return exc.result async def asyncNewSentence(self, result): """ @@ -89,8 +90,8 @@ raise NotImplementedError @classmethod - def _createProductionParser(cls, name, priority): - return AsyncProductionParser(asyncCallbackByName(name), priority, cls) + def _createProductionParser(cls, name, priority, attrs): + return AsyncProductionParser(asyncCallbackByName(name), priority, cls, attrs) @classmethod def _createShiftAction(cls, state): diff -Nru python-ptk-1.3.2/ptk/deferred_lexer.py python-ptk-1.3.5/ptk/deferred_lexer.py --- python-ptk-1.3.2/ptk/deferred_lexer.py 2016-01-17 14:38:42.000000000 +0000 +++ python-ptk-1.3.5/ptk/deferred_lexer.py 2018-04-22 13:37:11.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt from ptk.lexer import ProgressiveLexer, token, EOF, LexerError # pylint: disable=W0611 diff -Nru python-ptk-1.3.2/ptk/deferred_parser.py python-ptk-1.3.5/ptk/deferred_parser.py --- python-ptk-1.3.2/ptk/deferred_parser.py 2016-01-17 14:38:24.000000000 +0000 +++ python-ptk-1.3.5/ptk/deferred_parser.py 2018-04-22 13:37:17.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt from ptk.parser import production, LRParser, ProductionParser, leftAssoc, rightAssoc, nonAssoc, ParseError, _Accept, _Reduce, _Shift # pylint: disable=W0611 @@ -103,8 +103,8 @@ raise NotImplementedError @classmethod - def _createProductionParser(cls, name, priority): - return DeferredProductionParser(callbackByName(name), priority, cls) + def _createProductionParser(cls, name, priority, attributes): + return DeferredProductionParser(callbackByName(name), priority, cls, attributes) @classmethod def _createShiftAction(cls, state): diff -Nru python-ptk-1.3.2/ptk/grammar.py python-ptk-1.3.5/ptk/grammar.py --- python-ptk-1.3.2/ptk/grammar.py 2016-01-17 13:44:32.000000000 +0000 +++ python-ptk-1.3.5/ptk/grammar.py 2018-04-22 13:37:24.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt """ @@ -43,10 +43,11 @@ """ Production object """ - def __init__(self, name, callback, priority=None): + def __init__(self, name, callback, priority=None, attributes=None): self.name = name self.callback = callback self.right = list() + self.attributes = attributes or {} self.__priority = priority self.__ids = dict() # position => id @@ -114,20 +115,21 @@ attrs['__productions__'] = list() attrs['__precedence__'] = list() attrs['__prepared__'] = False + attrs['__lrstates__'] = list() klass = super(_GrammarMeta, metacls).__new__(metacls, name, bases, attrs) - for func, string, priority in _PRODREGISTER: - parser = klass._createProductionParser(func.__name__, priority) # pylint: disable=W0212 + for func, string, priority, attrs in _PRODREGISTER: + parser = klass._createProductionParser(func.__name__, priority, attrs) # pylint: disable=W0212 parser.parse(string) return klass finally: _PRODREGISTER = list() -def production(prod, priority=None): +def production(prod, priority=None, **kwargs): def _wrap(func): - if any([func.__name__ == aFunc.__name__ and func != aFunc for aFunc, _, _ in _PRODREGISTER]): + if any([func.__name__ == aFunc.__name__ and func != aFunc for aFunc, _, _, _ in _PRODREGISTER]): raise TypeError('Duplicate production method name "%s"' % func.__name__) - _PRODREGISTER.append((func, prod, priority)) + _PRODREGISTER.append((func, prod, priority, kwargs)) return func return _wrap diff -Nru python-ptk-1.3.2/ptk/__init__.py python-ptk-1.3.5/ptk/__init__.py --- python-ptk-1.3.2/ptk/__init__.py 2015-12-13 12:30:37.000000000 +0000 +++ python-ptk-1.3.5/ptk/__init__.py 2018-04-22 13:36:51.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt from .meta import version, version_info diff -Nru python-ptk-1.3.2/ptk/lexer.py python-ptk-1.3.5/ptk/lexer.py --- python-ptk-1.3.2/ptk/lexer.py 2016-06-26 15:41:55.000000000 +0000 +++ python-ptk-1.3.5/ptk/lexer.py 2018-04-22 13:37:30.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt import six @@ -38,6 +38,12 @@ return _wrap +class SkipToken(Exception): + """ + Raise this from your consumer to ignore the token. + """ + + class LexerError(Exception): """ Unrecognized token in input @@ -98,29 +104,29 @@ def restartLexer(self, resetPos=True): if resetPos: - self.__pos = _LexerPosition(0, 1) + self._pos = _LexerPosition(0, 1) self._input = list() - self.__consumer = None + self._consumer = None def position(self): """ :return: The current position in stream as a 2-tuple (column, line). """ - return self.__pos + return self._pos def advanceColumn(self, count=1): """ Advances the current position by *count* columns. """ - col, row = self.__pos - self.__pos = _LexerPosition(col + count, row) + col, row = self._pos + self._pos = _LexerPosition(col + count, row) def advanceLine(self, count=1): """ Advances the current position by *count* lines. """ - _, row = self.__pos - self.__pos = _LexerPosition(0, row + count) + _, row = self._pos + self._pos = _LexerPosition(0, row + count) @staticmethod def ignore(char): @@ -131,7 +137,7 @@ :param char: The character to test :return: True if *char* should be ignored """ - return char in [six.b(' '), six.u(' '), six.b('\t'), six.u('\t')] + return char in [six.b(' ')[0], six.u(' '), six.b('\t')[0], six.u('\t')] def setConsumer(self, consumer): """ @@ -166,11 +172,14 @@ self.value.write(char) self.state = 0 self.setConsumer(CString()) + + You can also raise SkipToken instead of returning a token if it + is to be ignored (comments). """ - self.__consumer = consumer + self._consumer = consumer def consumer(self): - return self.__consumer + return self._consumer def parse(self, string): # pragma: no cover """ @@ -217,20 +226,19 @@ tokenize whole strings. """ def __init__(self): - self.__regexes = list() + self._regexes = list() for rx, callback, defaultType in self._allTokens()[1]: if six.PY2 and isinstance(rx, str) or six.PY3 and isinstance(rx, bytes): crx = re.compile(six.b('^') + rx) else: crx = re.compile(six.u('^') + rx) - self.__regexes.append((crx, callback, defaultType)) + self._regexes.append((crx, callback, defaultType)) super(ReLexer, self).__init__() - def parse(self, string): - pos = 0 + def _parse(self, string, pos): while pos < len(string): char = string[pos] - if char == '\n': + if char in (six.u('\n'), six.b('\n')[0]): self.advanceLine() else: self.advanceColumn() @@ -238,20 +246,28 @@ if self.ignore(char): pos += 1 continue - pos = self.__findMatch(string, pos) + pos = self._findMatch(string, pos) else: - tok = self.consumer().feed(char) - if tok is not None: + try: + tok = self.consumer().feed(char) + except SkipToken: self.setConsumer(None) - if tok[0] is not None: - self.newToken(self.Token(*tok)) + else: + if tok is not None: + self.setConsumer(None) + if tok[0] is not None: + self.newToken(self.Token(*tok)) pos += 1 - self.newToken(EOF) + return pos - def __findMatch(self, string, pos): + def parse(self, string): + self._parse(string, 0) + return self.newToken(EOF) + + def _findMatch(self, string, pos): match = None matchlen = 0 - for rx, callback, defaultType in self.__regexes: + for rx, callback, defaultType in self._regexes: mtc = rx.match(string[pos:]) if mtc: value = mtc.group(0) @@ -284,17 +300,15 @@ This is **slow as hell**. """ def restartLexer(self, resetPos=True): - self.__currentState = [(buildRegex(rx).start(), callback, defaultType, [0]) for rx, callback, defaultType in self._allTokens()[1]] - self.__currentMatch = list() - self.__matches = list() - self.__maxPos = 0 - self.__state = 0 + self._currentState = [(buildRegex(rx).start(), callback, defaultType, [0]) for rx, callback, defaultType in self._allTokens()[1]] + self._currentMatch = list() + self._matches = list() + self._maxPos = 0 + self._state = 0 self._input = list() super(ProgressiveLexer, self).restartLexer(resetPos=resetPos) def parse(self, string): - if six.PY3 and isinstance(string, bytes): - string = [chr(c).encode('ascii') for c in string] for char in string: self.feed(char) self.feed(EOF) @@ -311,67 +325,71 @@ self.newToken(tok) def _feed(self, char, charPos): # pylint: disable=R0912,R0915 - if char == '\n': + if char in (six.u('\n'), six.b('\n')[0]): self.advanceLine() else: self.advanceColumn() if self.consumer() is not None: - tok = self.consumer().feed(char) - if tok is not None: + try: + tok = self.consumer().feed(char) + except SkipToken: self.setConsumer(None) - if tok[0] is not None: - yield self.Token(*tok) + else: + if tok is not None: + self.setConsumer(None) + if tok[0] is not None: + yield self.Token(*tok) return try: if char is EOF: - if self.__state == 0: + if self._state == 0: self.restartLexer() yield EOF return - self.__maxPos = max(self.__maxPos, max(pos[0] for regex, callback, defaultType, pos in self.__currentState)) - if self.__maxPos == 0 and self.__currentMatch: - raise LexerError(self.__currentMatch[0][0], *self.__currentMatch[0][1]) - self.__matches.extend([(pos[0], callback) for regex, callback, defaultType, pos in self.__currentState if pos[0] == self.__maxPos]) - self.__matches = [(pos, callback) for pos, callback in self.__matches if pos == self.__maxPos] + self._maxPos = max(self._maxPos, max(pos[0] for regex, callback, defaultType, pos in self._currentState)) + if self._maxPos == 0 and self._currentMatch: + raise LexerError(self._currentMatch[0][0], *self._currentMatch[0][1]) + self._matches.extend([(pos[0], callback) for regex, callback, defaultType, pos in self._currentState if pos[0] == self._maxPos]) + self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos] else: - if self.__state == 0 and self.ignore(char): + if self._state == 0 and self.ignore(char): return - self.__state = 1 + self._state = 1 newState = list() - for regex, callback, defaultType, pos in self.__currentState: + for regex, callback, defaultType, pos in self._currentState: try: if regex.feed(char): - pos[0] = len(self.__currentMatch) + 1 + pos[0] = len(self._currentMatch) + 1 except DeadState: if pos[0]: - self.__matches.append((pos[0], callback)) - self.__maxPos = max(self.__maxPos, pos[0]) + self._matches.append((pos[0], callback)) + self._maxPos = max(self._maxPos, pos[0]) else: newState.append((regex, callback, defaultType, pos)) if all([regex.isDeadEnd() for regex, callback, defaultType, pos in newState]): for regex, callback, defaultType, pos in newState: - self.__matches.append((len(self.__currentMatch) + 1, callback)) - self.__maxPos = max(self.__maxPos, len(self.__currentMatch) + 1) + self._matches.append((len(self._currentMatch) + 1, callback)) + self._maxPos = max(self._maxPos, len(self._currentMatch) + 1) newState = list() - self.__matches = [(pos, callback) for pos, callback in self.__matches if pos == self.__maxPos] - self.__currentState = newState + self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos] + self._currentState = newState - self.__currentMatch.append((char, self.position() if charPos is None else charPos)) - if self.__currentState: + self._currentMatch.append((char, self.position() if charPos is None else charPos)) + if self._currentState: return - if self.__maxPos == 0: + if self._maxPos == 0: raise LexerError(char, *self.position()) except LexerError: self.restartLexer() raise - tok = self.__finalizeMatch() + tok = self._finalizeMatch() if tok is not None: yield tok @@ -379,11 +397,16 @@ self.restartLexer() yield EOF - def __finalizeMatch(self): + def _finalizeMatch(self): # First declared token method - matches = set([callback for _, callback in self.__matches]) - match = type(self.__currentMatch[0][0])().join([char for char, pos in self.__currentMatch[:self.__maxPos]]) # byte or unicode - remain = self.__currentMatch[self.__maxPos:] + matches = set([callback for _, callback in self._matches]) + if isinstance(self._currentMatch[0][0], six.text_type): + sep = six.u('') + else: + sep = six.b('') + match = sep.join([(bytes([char]) if six.PY3 and isinstance(char, int) else char) \ + for char, pos in self._currentMatch[:self._maxPos]]) # byte or unicode + remain = self._currentMatch[self._maxPos:] self.restartLexer(False) self._input.extend(remain) for _, callback, defaultType in self._allTokens()[1]: diff -Nru python-ptk-1.3.2/ptk/meta.py python-ptk-1.3.5/ptk/meta.py --- python-ptk-1.3.2/ptk/meta.py 2016-06-26 15:43:12.000000000 +0000 +++ python-ptk-1.3.5/ptk/meta.py 2018-04-22 13:48:29.000000000 +0000 @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt import six class PackageInfo(object): - version = six.u('1.3.2') + version = six.u('1.3.5') version_info = map(int, version.split(six.u('.'))) project_name = six.u('ptk') diff -Nru python-ptk-1.3.2/ptk/parser.py python-ptk-1.3.5/ptk/parser.py --- python-ptk-1.3.2/ptk/parser.py 2016-01-17 14:25:47.000000000 +0000 +++ python-ptk-1.3.5/ptk/parser.py 2018-04-22 13:37:44.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt import six @@ -13,7 +13,7 @@ from ptk.grammar import Grammar, Production, GrammarError # production is only imported so that client code doesn't have to import it from grammar from ptk.grammar import production # pylint: disable=W0611 -from ptk.utils import Singleton, callbackByName +from ptk.utils import Singleton, callbackByName, memoize class ParseError(Exception): @@ -24,7 +24,7 @@ """ def __init__(self, grammar, tok, state): self.token = tok - super(ParseError, self).__init__(six.u('Unexpected token "%s" in state "%s"') % (tok.value, sorted(state))) + super(ParseError, self).__init__(six.u('Unexpected token "%s" (%s) in state "%s"') % (tok.value, tok.type, sorted(state))) self._expecting = set() for terminal in grammar.tokenTypes(): @@ -95,16 +95,15 @@ @functools.total_ordering class _Item(object): + __slots__ = ('production', 'dot', 'terminal', 'index', 'shouldReduce', 'expecting') + def __init__(self, prod, dot, terminal): self.production = prod self.dot = dot self.terminal = terminal - - def shouldReduce(self): - """ - Returns True if the dot is in last position - """ - return self.dot == len(self.production.right) + self.index = None + self.shouldReduce = self.dot == len(self.production.right) + self.expecting = None if self.shouldReduce else self.production.right[self.dot] def next(self): """ @@ -184,6 +183,9 @@ super(LRParser, self).__init__() self._restartParser() + def rstack(self): + return reversed(self.__stack) + def newToken(self, tok): try: for action, stack in self._processToken(tok): @@ -191,7 +193,11 @@ break except _Accept as exc: self._restartParser() - self.newSentence(exc.result) + return self.newSentence(exc.result) + + def currentLRState(self): + for item in self.__stack[-1].state: + return item.index def _processToken(self, tok): while True: @@ -209,8 +215,8 @@ raise NotImplementedError @classmethod - def _createProductionParser(cls, name, priority): - return ProductionParser(callbackByName(name), priority, cls) + def _createProductionParser(cls, name, priority, attrs): + return ProductionParser(callbackByName(name), priority, cls, attrs) @classmethod def _createReduceAction(cls, item): @@ -264,12 +270,14 @@ logger.debug('State %d', index) for item in sorted(state): logger.debug(' %s', item) + item.index = index + cls.__lrstates__.append(sorted(state)) logger.info('%d states.', len(states)) @classmethod def __computeStates(cls, start): - allSyms = cls.tokenTypes() | cls.nonterminals() - goto = dict() + allSyms = list(cls.tokenTypes() | cls.nonterminals()) + goto = list() cls._startState = frozenset([_Item(start, 0, EOF)]) states = set([cls._startState]) stack = [cls._startState] @@ -278,31 +286,28 @@ stateClosure = cls.__itemSetClosure(state) for symbol in allSyms: # Compute goto(symbol, state) - nextState = set() - for item in stateClosure: - if not item.shouldReduce() and item.production.right[item.dot] == symbol: - nextState.add(item.next()) + nextState = frozenset([item.next() for item in stateClosure if item.expecting == symbol]) if nextState: - nextState = frozenset(nextState) - goto[(state, symbol)] = nextState + goto.append(((state, symbol), nextState)) if nextState not in states: states.add(nextState) stack.append(nextState) - return states, goto + return states, dict(goto) @classmethod def __computeActions(cls, states, goto): cls.__actions__ = dict() reachable = set() + tokenTypes = cls.tokenTypes() for state in states: for item in cls.__itemSetClosure(state): - if item.shouldReduce(): + if item.shouldReduce: action = cls._createReduceAction(item) reachable.add(item.production.name) cls.__addReduceAction(state, item.terminal, action) else: symbol = item.production.right[item.dot] - if symbol in cls.tokenTypes(): + if symbol in tokenTypes: cls.__addShiftAction(state, symbol, cls._createShiftAction(goto[(state, symbol)])) return reachable @@ -396,11 +401,12 @@ self.__stack = [_StackItem(self._startState, None)] @classmethod + @memoize def __itemSetClosure(cls, items): result = set(items) while True: prev = set(result) - for item in [item for item in result if not item.shouldReduce()]: + for item in [item for item in result if not item.shouldReduce]: symbol = item.production.right[item.dot] if symbol not in cls.tokenTypes(): terminals = cls.first(*tuple(item.production.right[item.dot + 1:] + [item.terminal])) @@ -414,10 +420,11 @@ class ProductionParser(LRParser, ProgressiveLexer): # pylint: disable=R0904 # pylint: disable=C0111,C0103,R0201 - def __init__(self, callback, priority, grammarClass): # pylint: disable=R0915 + def __init__(self, callback, priority, grammarClass, attributes): # pylint: disable=R0915 self.callback = callback self.priority = priority self.grammarClass = grammarClass + self.attributes = attributes super(ProductionParser, self).__init__() @@ -425,6 +432,11 @@ def prepare(cls, **kwargs): # pylint: disable=R0915 # Obviously cannot use @production here + # When mixing async and sync parsers in the same program this may be called twice, + # because AsyncProductionParser inherits from ProductionParser + if cls.productions(): + return + # DECL -> identifier "->" PRODS prod = Production('DECL', cls.DECL) prod.addSymbol('identifier', 'name') @@ -669,7 +681,7 @@ def P2(self): # 'name' is replaced in newSentence() - return [Production(None, self.callback, priority=self.priority)] + return [Production(None, self.callback, priority=self.priority, attributes=self.attributes)] def SYMNAME1(self, identifier): return identifier diff -Nru python-ptk-1.3.2/ptk/regex.py python-ptk-1.3.5/ptk/regex.py --- python-ptk-1.3.2/ptk/regex.py 2015-12-26 14:05:31.000000000 +0000 +++ python-ptk-1.3.5/ptk/regex.py 2018-04-22 13:37:50.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt """ @@ -11,6 +11,13 @@ import re import collections + +def intValue(char): + if six.PY3 and isinstance(char, int): # Byte + return int(bytes([char])) + return int(char) + + #=============================================================================== # Regex objects @@ -183,10 +190,6 @@ """ Match a whole string """ - - if six.PY3 and isinstance(string, bytes): - string = [chr(c).encode('ascii') for c in string] - self.start() try: for char in string: @@ -229,7 +232,7 @@ """The .""" def __contains__(self, char): - return char not in [six.u('\n'), six.b('\n')] + return char not in [six.u('\n'), six.b('\n')[0]] def __eq__(self, other): return isinstance(other, AnyCharacterClass) @@ -257,7 +260,7 @@ return self is other # Because of cache def __contains__(self, char): - return self._rx.match(char) is not None + return self._rx.match(bytes([char]) if six.PY3 and isinstance(char, int) else char) is not None class LitteralCharacterClass(CharacterClass): # pylint: disable=R0903 @@ -297,11 +300,7 @@ Token = collections.namedtuple('Token', ['type', 'value']) def __init__(self, regex): - if six.PY3 and isinstance(regex, bytes): - self._stack = [chr(c).encode('ascii') for c in reversed(regex)] - else: - self._stack = list(reversed(regex)) - + self._stack = list(reversed(regex)) self._state = 0 self._currentClass = None self._exponentValue = 0 @@ -313,7 +312,7 @@ self.bbackslash = '\\' else: self.ubackslash = '\\' - self.bbackslash = '\\'.encode('ascii') + self.bbackslash = '\\'.encode('ascii')[0] def tokens(self): """ @@ -342,30 +341,30 @@ def _state0(self, char, tokenList): # Normal state - if char in [six.u('*'), six.b('*')]: + if char in [six.u('*'), six.b('*')[0]]: tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(0, None))) - elif char in [six.u('+'), six.b('+')]: + elif char in [six.u('+'), six.b('+')[0]]: tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(1, None))) - elif char in [six.u('.'), six.b('.')]: + elif char in [six.u('.'), six.b('.')[0]]: tokenList.append(self.Token(self.TOK_CLASS, AnyCharacterClass())) - elif char in [six.u('('), six.b('(')]: + elif char in [six.u('('), six.b('(')[0]]: tokenList.append(self.Token(self.TOK_LPAREN, char)) - elif char in [six.u(')'), six.b(')')]: + elif char in [six.u(')'), six.b(')')[0]]: tokenList.append(self.Token(self.TOK_RPAREN, char)) - elif char in [six.u('|'), six.b('|')]: + elif char in [six.u('|'), six.b('|')[0]]: tokenList.append(self.Token(self.TOK_UNION, char)) elif char == six.u('['): self._currentClass = six.StringIO() self._currentClass.write(char) return 2 - elif char == six.b('['): + elif char == six.b('[')[0]: self._currentClass = six.BytesIO() - self._currentClass.write(char) + self._currentClass.write(bytes([char]) if six.PY3 else char) return 2 - elif char in [six.u('{'), six.b('{')]: + elif char in [six.u('{'), six.b('{')[0]]: return 9 - elif char in [six.u(']'), six.b(']'), six.u('}'), six.b('}')]: - raise TokenizeError('Unexpected token "%s"' % char) + elif char in [six.u(']'), six.b(']')[0], six.u('}'), six.b('}')[0]]: + raise TokenizeError('Unexpected token "%s"' % str(char)) elif char in [self.ubackslash, self.bbackslash]: return 1 else: @@ -375,16 +374,20 @@ # After a "\" in normal state if char in [six.u('d'), six.u('s'), six.u('w'), six.u('D'), six.u('S'), six.u('W')]: tokenList.append(self.Token(self.TOK_CLASS, RegexCharacterClass(self.ubackslash + char))) - elif char in [six.b('d'), six.b('s'), six.b('w'), six.b('D'), six.b('S'), six.b('W')]: + elif char in [six.b('d')[0], six.b('s')[0], six.b('w')[0], six.b('D')[0], six.b('S')[0], six.b('W')[0]]: tokenList.append(self.Token(self.TOK_CLASS, RegexCharacterClass(self.bbackslash + char))) + elif char == six.u('r'): + tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(six.u('\r')))) elif char == six.u('n'): tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(six.u('\n')))) elif char == six.u('t'): tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(six.u('\t')))) - elif char == six.b('n'): - tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(six.b('\n')))) - elif char == six.b('t'): - tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(six.b('\t')))) + elif char == six.b('r')[0]: + tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(six.b('\r')[0]))) + elif char == six.b('n')[0]: + tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(six.b('\n')[0]))) + elif char == six.b('t')[0]: + tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(six.b('\t')[0]))) else: tokenList.append(self.Token(self.TOK_CLASS, LitteralCharacterClass(char))) return 0 @@ -395,64 +398,72 @@ # In character class if char in [self.ubackslash, self.bbackslash]: return 3 - if char in [six.u(']'), six.b(']')]: - self._currentClass.write(char) + if char in [six.u(']'), six.b(']')[0]]: + self._currentClass.write(bytes([char]) if six.PY3 and isinstance(char, int) else char) tokenList.append(self.Token(self.TOK_CLASS, RegexCharacterClass(self._currentClass.getvalue()))) self._currentClass = None return 0 - self._currentClass.write(char) + self._currentClass.write(bytes([char]) if six.PY3 and isinstance(char, int) else char) def _state3(self, char, tokenList): # pylint: disable=W0613 # After "\" in character class - if six.PY2 and isinstance(char, str): - self._currentClass.write(self.bbackslash + char) + if (six.PY2 and isinstance(char, str)) or (six.PY3 and isinstance(char, int)): + self._currentClass.write(bytes([self.bbackslash])) else: - self._currentClass.write(self.ubackslash + char) + self._currentClass.write(self.ubackslash) + self._currentClass.write(bytes([char]) if six.PY3 and isinstance(char, int) else char) return 2 # Exponent def _state9(self, char, tokenList): # pylint: disable=W0613 # Start of exponent - if not char.isdigit(): + try: + self._exponentValue = intValue(char) + except ValueError: raise InvalidExponentError('Exponent not starting with a number') - self._exponentValue = int(char) return 10 def _state10(self, char, tokenList): # In exponent, computing start value - if char in [six.u('-'), six.b('-')]: + if char in [six.u('-'), six.b('-')[0]]: self._startExponent = self._exponentValue return 11 - elif char in [six.u('}'), six.b('}')]: + elif char in [six.u('}'), six.b('}')[0]]: tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(self._exponentValue, self._exponentValue))) return 0 - elif char.isdigit(): - self._exponentValue *= 10 - self._exponentValue += int(char) else: - raise InvalidExponentError('Invalid character "%s"' % char) + try: + v = intValue(char) + except ValueError: + raise InvalidExponentError('Invalid character "%s"' % char) + self._exponentValue *= 10 + self._exponentValue += v def _state11(self, char, tokenList): # pylint: disable=W0613 # In exponent, expecting second term of interval - if char in [six.u('}'), six.b('}')]: + if char in [six.u('}'), six.b('}')[0]]: raise InvalidExponentError('Missing range end') - if not char.isdigit(): + try: + v = intValue(char) + except ValueError: raise InvalidExponentError('Invalid character "%s"' % char) - self._exponentValue = int(char) + self._exponentValue = v return 12 def _state12(self, char, tokenList): # In exponent, computing end value - if char in [six.u('}'), six.b('}')]: + if char in [six.u('}'), six.b('}')[0]]: if self._startExponent > self._exponentValue: raise InvalidExponentError('Invalid exponent range %d-%d' % (self._startExponent, self._exponentValue)) tokenList.append(self.Token(self.TOK_EXPONENT, ExponentToken(self._startExponent, self._exponentValue))) return 0 - if not char.isdigit(): + try: + v = intValue(char) + except ValueError: raise InvalidExponentError('Invalid character "%s"' % char) self._exponentValue *= 10 - self._exponentValue += int(char) + self._exponentValue += v #=============================================================================== # Parsing @@ -475,7 +486,7 @@ tokens = list(tokens) expr, pos = self._parse_E1(tokens, 0) if len(tokens) != pos: - raise RegexParseError('Unexpected token "%s"' % tokens[pos].value) + raise RegexParseError('Unexpected token "%s"' % str(tokens[pos].value)) return expr def _parse_E1(self, tokens, pos): @@ -523,7 +534,7 @@ return expr, pos + 1 elif tokens[pos].type == RegexTokenizer.TOK_CLASS: return self.klass(tokens[pos].value), pos + 1 - raise RegexParseError('Unexpected token "%s"' % tokens[pos].value) + raise RegexParseError('Unexpected token "%s"' % str(tokens[pos].value)) # Delegate diff -Nru python-ptk-1.3.2/ptk/utils.py python-ptk-1.3.5/ptk/utils.py --- python-ptk-1.3.2/ptk/utils.py 2016-01-17 12:52:01.000000000 +0000 +++ python-ptk-1.3.5/ptk/utils.py 2018-04-22 13:37:54.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt """ diff -Nru python-ptk-1.3.2/README.rst.in python-ptk-1.3.5/README.rst.in --- python-ptk-1.3.2/README.rst.in 2016-06-26 15:41:55.000000000 +0000 +++ python-ptk-1.3.5/README.rst.in 2018-04-22 13:35:53.000000000 +0000 @@ -2,7 +2,7 @@ Parser Toolkit ============== -PTK - (c) Jérôme Laheurte 2015 +PTK - (c) Jérôme Laheurte 2015-2018 .. contents:: **Table of contents** @@ -134,11 +134,25 @@ API documentation ----------------- -The full documentation is hosted `here `_. +The full documentation is hosted `here `_. Changelog --------- +Version 1.3.5: + +- Update copyright notices +- Fix some packaging issues + +Version 1.3.4: + +- Added SkipToken for consumers + +Version 1.3.3: + +- Fix a number of problems when working with bytes in Python 3 +- One couldn't mix asynchronous and synchronous parsers in the same program + Version 1.3.2: - Fix Python regular expression based lexer (use match instead of search) diff -Nru python-ptk-1.3.2/setup.py python-ptk-1.3.5/setup.py --- python-ptk-1.3.2/setup.py 2015-12-20 13:27:05.000000000 +0000 +++ python-ptk-1.3.5/setup.py 2018-04-22 13:38:01.000000000 +0000 @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- -# (c) Jérôme Laheurte 2015 +# (c) Jérôme Laheurte 2015-2018 # See LICENSE.txt import six diff -Nru python-ptk-1.3.2/tests/test_deferred_lexer.py python-ptk-1.3.5/tests/test_deferred_lexer.py --- python-ptk-1.3.2/tests/test_deferred_lexer.py 2016-01-17 14:42:14.000000000 +0000 +++ python-ptk-1.3.5/tests/test_deferred_lexer.py 2017-01-29 11:48:40.000000000 +0000 @@ -26,8 +26,6 @@ return succeed(None) def doLex(self, inputString): - if six.PY3 and isinstance(inputString, bytes): - inputString = [chr(c).encode('ascii') for c in inputString] for char in inputString: d = self.lexer.deferFeed(char) self.assertTrue(d.called) diff -Nru python-ptk-1.3.2/tests/test_grammar.py python-ptk-1.3.5/tests/test_grammar.py --- python-ptk-1.3.2/tests/test_grammar.py 2015-12-27 17:23:24.000000000 +0000 +++ python-ptk-1.3.5/tests/test_grammar.py 2017-03-29 11:26:01.000000000 +0000 @@ -24,15 +24,15 @@ return cls.tokens @classmethod - def _createProductionParser(cls, name, priority): - return ProductionParser(callbackByName(name), priority, cls) + def _createProductionParser(cls, name, priority, attrs): + return ProductionParser(callbackByName(name), priority, cls, attrs) class ProductionParserTestCase(unittest.TestCase): def setUp(self): class TestGrammar(GrammarUnderTest): tokens = set() - self.parser = ProductionParser(None, None, TestGrammar) + self.parser = ProductionParser(None, None, TestGrammar, dict()) self.grammarClass = TestGrammar def assertHasProduction(self, prods, prod): diff -Nru python-ptk-1.3.2/tests/test_lexer.py python-ptk-1.3.5/tests/test_lexer.py --- python-ptk-1.3.2/tests/test_lexer.py 2016-01-17 13:28:54.000000000 +0000 +++ python-ptk-1.3.5/tests/test_lexer.py 2017-01-29 11:48:40.000000000 +0000 @@ -147,7 +147,7 @@ self.testCase.assertTrue(tok.type is None) def newToken(self, tok): pass - + self.lexer = TestedLexer(self) def test_none(self): @@ -179,10 +179,7 @@ def test_byte_regex_gives_byte_token_value(self): tok, = self.doLex(six.b('foo')) - if six.PY2: - self.assertTrue(isinstance(tok.value, str)) - else: - self.assertTrue(isinstance(tok.value, bytes)) + self.assertTrue(isinstance(tok.value, six.binary_type)) class ProgressiveLexerByteTestCase(LexerByteTestCaseMixin, ProgressiveLexerTestCase): diff -Nru python-ptk-1.3.2/tests/test_regex_parser.py python-ptk-1.3.5/tests/test_regex_parser.py --- python-ptk-1.3.2/tests/test_regex_parser.py 2015-12-13 12:32:12.000000000 +0000 +++ python-ptk-1.3.5/tests/test_regex_parser.py 2017-01-29 12:28:05.000000000 +0000 @@ -100,6 +100,10 @@ self.assertTrue(self._match(rx, 'abab')) self.assertFalse(self._match(rx, 'abb')) + def test_crlf(self): + rx = self._parse(r'\r\n') + self.assertTrue(self._match(rx, '\r\n')) + def test_extra_tokens(self): try: rx = self._parse('ab(') diff -Nru python-ptk-1.3.2/tests/test_regex.py python-ptk-1.3.5/tests/test_regex.py --- python-ptk-1.3.2/tests/test_regex.py 2015-12-13 12:32:12.000000000 +0000 +++ python-ptk-1.3.5/tests/test_regex.py 2017-01-29 11:48:40.000000000 +0000 @@ -6,70 +6,80 @@ from ptk.regex import RegularExpression, LitteralCharacterClass -class RegexTest(unittest.TestCase): +class RegexTestMixin(object): def test_deadend(self): rx = RegularExpression.concat( - RegularExpression.fromClass(LitteralCharacterClass('<')), - RegularExpression.fromClass(LitteralCharacterClass('=')) + RegularExpression.fromClass(LitteralCharacterClass(self.t('<')[0])), + RegularExpression.fromClass(LitteralCharacterClass(self.t('=')[0])) ) rx.start() - rx.feed('<') + rx.feed(self.t('<')[0]) # byte/char self.assertFalse(rx.isDeadEnd()) def test_newline(self): - rx = RegularExpression.fromClass(LitteralCharacterClass('\n')) - self.assertTrue(rx.match('\n')) + rx = RegularExpression.fromClass(LitteralCharacterClass(self.t('\n')[0])) + self.assertTrue(rx.match(self.t('\n'))) def test_class(self): - rx = RegularExpression.fromClass(LitteralCharacterClass('a')) - self.assertTrue(rx.match('a')) - self.assertFalse(rx.match('b')) + rx = RegularExpression.fromClass(LitteralCharacterClass(self.t('a')[0])) + self.assertTrue(rx.match(self.t('a'))) + self.assertFalse(rx.match(self.t('b'))) def test_concat(self): rx = RegularExpression.concat( - RegularExpression.fromClass(LitteralCharacterClass('a')), - RegularExpression.fromClass(LitteralCharacterClass('b')), - RegularExpression.fromClass(LitteralCharacterClass('c')) + RegularExpression.fromClass(LitteralCharacterClass(self.t('a')[0])), + RegularExpression.fromClass(LitteralCharacterClass(self.t('b')[0])), + RegularExpression.fromClass(LitteralCharacterClass(self.t('c')[0])) ) - self.assertTrue(rx.match('abc')) - self.assertFalse(rx.match('ab')) + self.assertTrue(rx.match(self.t('abc'))) + self.assertFalse(rx.match(self.t('ab'))) def test_union(self): rx = RegularExpression.union( - RegularExpression.fromClass(LitteralCharacterClass('a')), - RegularExpression.fromClass(LitteralCharacterClass('b')), - RegularExpression.fromClass(LitteralCharacterClass('c')) + RegularExpression.fromClass(LitteralCharacterClass(self.t('a')[0])), + RegularExpression.fromClass(LitteralCharacterClass(self.t('b')[0])), + RegularExpression.fromClass(LitteralCharacterClass(self.t('c')[0])) ) - self.assertTrue(rx.match('a')) - self.assertTrue(rx.match('b')) - self.assertTrue(rx.match('c')) - self.assertFalse(rx.match('d')) + self.assertTrue(rx.match(self.t('a'))) + self.assertTrue(rx.match(self.t('b'))) + self.assertTrue(rx.match(self.t('c'))) + self.assertFalse(rx.match(self.t('d'))) def test_kleene(self): - rx = RegularExpression.kleene(RegularExpression.fromClass(LitteralCharacterClass('a'))) - self.assertTrue(rx.match('')) - self.assertTrue(rx.match('a')) - self.assertTrue(rx.match('aa')) - self.assertFalse(rx.match('ab')) + rx = RegularExpression.kleene(RegularExpression.fromClass(LitteralCharacterClass(self.t('a')[0]))) + self.assertTrue(rx.match(self.t(''))) + self.assertTrue(rx.match(self.t('a'))) + self.assertTrue(rx.match(self.t('aa'))) + self.assertFalse(rx.match(self.t('ab'))) def test_exponent(self): - rx = RegularExpression.exponent(RegularExpression.fromClass(LitteralCharacterClass('a')), 2, 3) - self.assertFalse(rx.match('a')) - self.assertTrue(rx.match('aa')) - self.assertTrue(rx.match('aaa')) - self.assertFalse(rx.match('aaaa')) + rx = RegularExpression.exponent(RegularExpression.fromClass(LitteralCharacterClass(self.t('a')[0])), 2, 3) + self.assertFalse(rx.match(self.t('a'))) + self.assertTrue(rx.match(self.t('aa'))) + self.assertTrue(rx.match(self.t('aaa'))) + self.assertFalse(rx.match(self.t('aaaa'))) def test_exponent_min(self): - rx = RegularExpression.exponent(RegularExpression.fromClass(LitteralCharacterClass('a')), 2) - self.assertFalse(rx.match('a')) - self.assertTrue(rx.match('aa')) - self.assertTrue(rx.match('aaa')) + rx = RegularExpression.exponent(RegularExpression.fromClass(LitteralCharacterClass(self.t('a')[0])), 2) + self.assertFalse(rx.match(self.t('a'))) + self.assertTrue(rx.match(self.t('aa'))) + self.assertTrue(rx.match(self.t('aaa'))) def test_exponent_null(self): - rx = RegularExpression.exponent(RegularExpression.fromClass(LitteralCharacterClass('a')), 0, 1) - self.assertTrue(rx.match('')) - self.assertTrue(rx.match('a')) - self.assertFalse(rx.match('aa')) + rx = RegularExpression.exponent(RegularExpression.fromClass(LitteralCharacterClass(self.t('a')[0])), 0, 1) + self.assertTrue(rx.match(self.t(''))) + self.assertTrue(rx.match(self.t('a'))) + self.assertFalse(rx.match(self.t('aa'))) + + +class UnicodeRegexTest(RegexTestMixin, unittest.TestCase): + def t(self, s): + return s.decode('ascii') if six.PY2 else s + + +class BytesRegexTest(RegexTestMixin, unittest.TestCase): + def t(self, s): + return s.encode('ascii') if six.PY3 else s if __name__ == '__main__': diff -Nru python-ptk-1.3.2/TODO.txt python-ptk-1.3.5/TODO.txt --- python-ptk-1.3.2/TODO.txt 2015-12-25 12:07:33.000000000 +0000 +++ python-ptk-1.3.5/TODO.txt 2017-01-29 11:48:40.000000000 +0000 @@ -1,3 +1,4 @@ * LALR ? Not sure it's worth it. * ProgressiveLexer: subset construction in byte mode * Serialization, maybe + * Include position in input as attribute of token