diff -Nru ldif3-3.1.1/CHANGES.rst ldif3-3.2.0/CHANGES.rst --- ldif3-3.1.1/CHANGES.rst 2015-09-22 06:30:30.000000000 +0000 +++ ldif3-3.2.0/CHANGES.rst 2016-06-03 09:29:17.000000000 +0000 @@ -1,7 +1,24 @@ +3.2.0 (2016-06-03) +------------------ + +- Overhaule the unicode support to also support binary data (e.g. images) + encoded in LDIF. + + You can now pass an encoding to the parser which will be used to decode + values. If decoding failes, a bytestring will be returned. If you pass an + encoding of ``None``, the parser will not try to do any conversion and + return bytes directly. + + This change should be completely backwards compatible, as the parser now + gracefully handles a case where it crashed previously. + + (See `#4 `_) + + 3.1.1 (2015-09-20) ------------------ -- Allow empty values for attributes. +- Allow empty values for attributes. 3.1.0 (2015-07-09) diff -Nru ldif3-3.1.1/debian/changelog ldif3-3.2.0/debian/changelog --- ldif3-3.1.1/debian/changelog 2015-09-25 15:26:27.000000000 +0000 +++ ldif3-3.2.0/debian/changelog 2016-06-06 12:15:48.000000000 +0000 @@ -1,3 +1,16 @@ +ldif3 (3.2.0-1) unstable; urgency=low + + [ Ondřej Nový ] + * Fixed VCS URL (https) + + [ Michael Fladischer ] + * New upstream release. + * Initialize git-dpm. + * Clean up files in ldif3.egg-info to allow two builds in a row. + * Bump Standards-Version to 3.9.8. + + -- Michael Fladischer Mon, 06 Jun 2016 14:15:28 +0200 + ldif3 (3.1.1-1) unstable; urgency=low * New upstream release. diff -Nru ldif3-3.1.1/debian/clean ldif3-3.2.0/debian/clean --- ldif3-3.1.1/debian/clean 1970-01-01 00:00:00.000000000 +0000 +++ ldif3-3.2.0/debian/clean 2016-06-06 12:15:48.000000000 +0000 @@ -0,0 +1,4 @@ +ldif3.egg-info/PKG-INFO +ldif3.egg-info/SOURCES.txt +ldif3.egg-info/dependency_links.txt +ldif3.egg-info/top_level.txt diff -Nru ldif3-3.1.1/debian/control ldif3-3.2.0/debian/control --- ldif3-3.1.1/debian/control 2015-09-25 15:26:27.000000000 +0000 +++ ldif3-3.2.0/debian/control 2016-06-06 12:15:48.000000000 +0000 @@ -8,9 +8,9 @@ python-setuptools, python3-all, python3-setuptools -Standards-Version: 3.9.6 +Standards-Version: 3.9.8 Vcs-Browser: https://anonscm.debian.org/cgit/python-modules/packages/ldif3.git -Vcs-Git: git://anonscm.debian.org/python-modules/packages/ldif3.git +Vcs-Git: https://anonscm.debian.org/git/python-modules/packages/ldif3.git Homepage: https://github.com/xi/ldif3 X-Python-Version: >= 2.7 X-Python3-Version: >= 3.4 diff -Nru ldif3-3.1.1/debian/.git-dpm ldif3-3.2.0/debian/.git-dpm --- ldif3-3.1.1/debian/.git-dpm 1970-01-01 00:00:00.000000000 +0000 +++ ldif3-3.2.0/debian/.git-dpm 2016-06-06 12:15:48.000000000 +0000 @@ -0,0 +1,8 @@ +# see git-dpm(1) from git-dpm package +e85306cf4ea77a967468e37599f3435b09befbfd +e85306cf4ea77a967468e37599f3435b09befbfd +e85306cf4ea77a967468e37599f3435b09befbfd +e85306cf4ea77a967468e37599f3435b09befbfd +ldif3_3.2.0.orig.tar.gz +719d42ed1ee0ff644847c03db04c707e0af66cad +11571 diff -Nru ldif3-3.1.1/ldif3.py ldif3-3.2.0/ldif3.py --- ldif3-3.1.1/ldif3.py 2015-09-22 06:30:30.000000000 +0000 +++ ldif3-3.2.0/ldif3.py 2016-06-03 09:29:17.000000000 +0000 @@ -2,16 +2,6 @@ from __future__ import unicode_literals -__version__ = '3.1.1' - -__all__ = [ - # constants - 'LDIF_PATTERN', - # classes - 'LDIFWriter', - 'LDIFParser', -] - import base64 import re import logging @@ -24,6 +14,16 @@ from urllib.parse import urlparse from urllib.request import urlopen +__version__ = '3.2.0' + +__all__ = [ + # constants + 'LDIF_PATTERN', + # classes + 'LDIFWriter', + 'LDIFParser', +] + log = logging.getLogger('ldif3') ATTRTYPE_PATTERN = r'[\w;.-]+(;[\w_-]+)*' @@ -73,14 +73,25 @@ :type line_sep: bytearray :param line_sep: line separator + + :type encoding: string + :param encoding: Encoding to use for converting values to bytes. Note that + the spec requires the dn field to be UTF-8 encoded, so it does not + really make sense to use anything else. Default: ``'utf8'``. """ def __init__( - self, output_file, base64_attrs=[], cols=76, line_sep=b'\n'): + self, + output_file, + base64_attrs=[], + cols=76, + line_sep=b'\n', + encoding='utf8'): self._output_file = output_file self._base64_attrs = lower(base64_attrs) self._cols = cols self._line_sep = line_sep + self._encoding = encoding self.records_written = 0 #: number of records that have been written @@ -107,18 +118,21 @@ self._base64_attrs """ return attr_type.lower() in self._base64_attrs or \ + isinstance(attr_value, bytes) or \ UNSAFE_STRING_RE.search(attr_value) is not None def _unparse_attr(self, attr_type, attr_value): """Write a single attribute type/value pair.""" if self._needs_base64_encoding(attr_type, attr_value): - encoded = base64.encodestring(attr_value.encode('utf8'))\ + if not isinstance(attr_value, bytes): + attr_value = attr_value.encode(self._encoding) + encoded = base64.encodestring(attr_value)\ .replace(b'\n', b'')\ - .decode('utf8') + .decode('ascii') line = ':: '.join([attr_type, encoded]) else: line = ': '.join([attr_type, attr_value]) - self._fold_line(line.encode('utf8')) + self._fold_line(line.encode('ascii')) def _unparse_entry_record(self, entry): """ @@ -202,6 +216,13 @@ :type line_sep: bytearray :param line_sep: line separator + :type encoding: string + :param encoding: Encoding to use for converting values to unicode strings. + If decoding failes, the raw bytestring will be used instead. You can + also pass ``None`` which will skip decoding and always produce + bytestrings. Note that this only applies to entry values. ``dn`` and + entry keys will always be unicode strings. + :type strict: boolean :param strict: If set to ``False``, recoverable parse errors will produce log warnings rather than exceptions. @@ -222,11 +243,13 @@ ignored_attr_types=[], process_url_schemes=[], line_sep=b'\n', + encoding='utf8', strict=True): self._input_file = input_file self._process_url_schemes = lower(process_url_schemes) self._ignored_attr_types = lower(ignored_attr_types) self._line_sep = line_sep + self._encoding = encoding self._strict = strict self.line_counter = 0 #: number of lines that have been read @@ -268,7 +291,8 @@ def _parse_attr(self, line): """Parse a single attribute type/value pair.""" colon_pos = line.index(b':') - attr_type = line[0:colon_pos] + attr_type = line[0:colon_pos].decode('ascii') + if line[colon_pos:].startswith(b'::'): attr_value = base64.decodestring(line[colon_pos + 2:]) elif line[colon_pos:].startswith(b':<'): @@ -280,7 +304,15 @@ attr_value = urlopen(url.decode('ascii')).read() else: attr_value = line[colon_pos + 1:].strip() - return attr_type.decode('utf8'), attr_value.decode('utf8') + + if attr_type == u'dn': + return attr_type, attr_value.decode('utf8') + elif self._encoding is not None: + try: + return attr_type, attr_value.decode(self._encoding) + except UnicodeError: + pass + return attr_type, attr_value def _error(self, msg): if self._strict: diff -Nru ldif3-3.1.1/README.rst ldif3-3.2.0/README.rst --- ldif3-3.1.1/README.rst 2015-09-22 06:30:30.000000000 +0000 +++ ldif3-3.2.0/README.rst 2016-06-03 09:29:17.000000000 +0000 @@ -32,11 +32,17 @@ Unicode support --------------- -The stream object that is passed to parser or writer must be a byte -stream. It must use UTF-8 encoding as described in the spec. +The stream object that is passed to parser or writer must be an ascii byte +stream. -The parsed objects (``dn`` and the keys and values of ``record``) on the -other hand are unicode strings. +The spec allows to include arbitrary data in base64 encoding or via URL. There +is no way of knowing the encoding of this data. To handle this, there are two +modes: + +By default, the ``LDIFParser`` will try to interpret all values as UTF-8 and +leave only the ones that fail to decode as bytes. But you can also pass an +``encoding`` of ``None`` to the constructor, in which case the parser will not +try to do any conversion and return bytes directly. .. _RFC 2849: https://tools.ietf.org/html/rfc2849 diff -Nru ldif3-3.1.1/setup.cfg ldif3-3.2.0/setup.cfg --- ldif3-3.1.1/setup.cfg 2015-09-22 06:30:30.000000000 +0000 +++ ldif3-3.2.0/setup.cfg 2016-06-03 09:29:17.000000000 +0000 @@ -8,5 +8,5 @@ cover-html-dir=.cover [flake8] -exclude=.git,.tox,.env,build,dist +exclude=.git,.tox,.env,build,dist,setup.py ignore=E127,E128 diff -Nru ldif3-3.1.1/setup.py ldif3-3.2.0/setup.py --- ldif3-3.1.1/setup.py 2015-09-22 06:30:30.000000000 +0000 +++ ldif3-3.2.0/setup.py 2016-06-03 09:29:17.000000000 +0000 @@ -1,25 +1,24 @@ #!/usr/bin/env python import os - +import re from setuptools import setup -curdir = os.path.dirname(os.path.abspath(__file__)) +DIRNAME = os.path.abspath(os.path.dirname(__file__)) +rel = lambda *parts: os.path.abspath(os.path.join(DIRNAME, *parts)) +README = open(rel('README.rst')).read() +MAIN = open(rel('ldif3.py')).read() +VERSION = re.search("__version__ = '([^']+)'", MAIN).group(1) +NAME = re.search('^"""(.*) - (.*)"""', MAIN).group(1) +DESCRIPTION = re.search('^"""(.*) - (.*)"""', MAIN).group(2) -with open(os.path.join(curdir, 'ldif3.py')) as fh: - for line in fh: - if line.startswith('"""'): - name, description = line.rstrip().strip('"').split(' - ') - elif line.startswith('__version__'): - version = line.split('\'')[1] - break setup( - name=name, - version=version, - description=description, - long_description=open(os.path.join(curdir, 'README.rst')).read(), + name=NAME, + version=VERSION, + description=DESCRIPTION, + long_description=README, url='https://github.com/xi/ldif3', author='Tobias Bengfort', author_email='tobias.bengfort@posteo.de', diff -Nru ldif3-3.1.1/tests.py ldif3-3.2.0/tests.py --- ldif3-3.1.1/tests.py 2015-09-22 06:30:30.000000000 +0000 +++ ldif3-3.2.0/tests.py 2016-06-03 09:29:17.000000000 +0000 @@ -1,3 +1,5 @@ +# -*- encoding: utf8 -*- + from __future__ import unicode_literals import unittest @@ -242,6 +244,30 @@ self.assertEqual(dn, DNS[i]) self.assertEqual(record, RECORDS[i]) + def test_parse_binary(self): + self.stream = BytesIO(b'dn: cn=Bjorn J Jensen\n' + b'jpegPhoto:: 8PLz\nfoo: bar') + self.p = ldif3.LDIFParser(self.stream) + items = list(self.p.parse()) + self.assertEqual(items, [( + u'cn=Bjorn J Jensen', { + u'jpegPhoto': [b'\xf0\xf2\xf3'], + u'foo': [u'bar'], + } + )]) + + def test_parse_binary_raw(self): + self.stream = BytesIO(b'dn: cn=Bjorn J Jensen\n' + b'jpegPhoto:: 8PLz\nfoo: bar') + self.p = ldif3.LDIFParser(self.stream, encoding=None) + items = list(self.p.parse()) + self.assertEqual(items, [( + 'cn=Bjorn J Jensen', { + u'jpegPhoto': [b'\xf0\xf2\xf3'], + u'foo': [b'bar'], + } + )]) + class TestLDIFParserEmptyAttrValue(unittest.TestCase): def setUp(self): @@ -337,3 +363,13 @@ def test_unparse_fail(self): with self.assertRaises(ValueError): self.w.unparse(DNS[0], 'foo') + + def test_unparse_binary(self): + self.w.unparse(u'cn=Bjorn J Jensen', {u'jpegPhoto': [b'\xf0\xf2\xf3']}) + value = self.stream.getvalue() + self.assertEqual(value, b'dn: cn=Bjorn J Jensen\njpegPhoto:: 8PLz\n\n') + + def test_unparse_unicode_dn(self): + self.w.unparse(u'cn=Björn J Jensen', {u'foo': [u'bar']}) + value = self.stream.getvalue() + self.assertEqual(value, b'dn:: Y249QmrDtnJuIEogSmVuc2Vu\nfoo: bar\n\n')