diff -Nru python-xopen-0.3.3/debian/changelog python-xopen-0.5.0/debian/changelog --- python-xopen-0.3.3/debian/changelog 2018-04-16 12:53:53.000000000 +0000 +++ python-xopen-0.5.0/debian/changelog 2019-02-06 21:26:09.000000000 +0000 @@ -1,3 +1,23 @@ +python-xopen (0.5.0-2) unstable; urgency=medium + + * Add missing Depends + Closes: #921553 + + -- Andreas Tille Wed, 06 Feb 2019 22:26:09 +0100 + +python-xopen (0.5.0-1) unstable; urgency=medium + + * New upstream version + * debhelper 12 + * Standards-Version: 4.3.0 + * Drop ancient X-Python-Version field + * Point watch file to pypi since download from Github does not lead to + a working source tarball + * Build-Depends: python-setuptools-scm + * Upstream switched to pytest + + -- Andreas Tille Wed, 30 Jan 2019 15:47:06 +0100 + python-xopen (0.3.3-1) unstable; urgency=medium * New upstream version diff -Nru python-xopen-0.3.3/debian/compat python-xopen-0.5.0/debian/compat --- python-xopen-0.3.3/debian/compat 2018-04-16 12:53:53.000000000 +0000 +++ python-xopen-0.5.0/debian/compat 2019-02-06 21:26:09.000000000 +0000 @@ -1 +1 @@ -11 +12 diff -Nru python-xopen-0.3.3/debian/control python-xopen-0.5.0/debian/control --- python-xopen-0.3.3/debian/control 2018-04-16 12:53:53.000000000 +0000 +++ python-xopen-0.5.0/debian/control 2019-02-06 21:26:09.000000000 +0000 @@ -4,27 +4,31 @@ Section: python Testsuite: autopkgtest-pkg-python Priority: optional -Build-Depends: debhelper (>= 11~), +Build-Depends: debhelper (>= 12~), dh-python, python, python-setuptools, python-nose, python-bz2file, + python-setuptools-scm, + python-pytest, python3, python3-setuptools, python3-nose, - python3-bz2file -Standards-Version: 4.1.4 + python3-bz2file, + python3-setuptools-scm, + python3-pytest +Standards-Version: 4.3.0 Vcs-Browser: https://salsa.debian.org/med-team/python-xopen Vcs-Git: https://salsa.debian.org/med-team/python-xopen.git Homepage: https://github.com/marcelm/xopen -X-Python-Version: >= 2.6 -X-Python3-Version: >= 3.2 Package: python-xopen Architecture: all Depends: ${python:Depends}, - ${misc:Depends} + ${misc:Depends}, + python-pkg-resources, + python-bz2file Provides: ${python:Provides} Description: Python module to open compressed files transparently This small Python module provides a xopen function that works like the @@ -45,7 +49,8 @@ Package: python3-xopen Architecture: all Depends: ${python3:Depends}, - ${misc:Depends} + ${misc:Depends}, + python3-pkg-resources Description: Python3 module to open compressed files transparently This small Python3 module provides a xopen function that works like the built-in open function, but can also deal with compressed files. diff -Nru python-xopen-0.3.3/debian/rules python-xopen-0.5.0/debian/rules --- python-xopen-0.3.3/debian/rules 2018-04-16 12:53:53.000000000 +0000 +++ python-xopen-0.5.0/debian/rules 2019-02-06 21:26:09.000000000 +0000 @@ -3,18 +3,5 @@ DH_VERBOSE := 1 export PYBUILD_NAME=xopen -PYTHON2 := $(shell pyversions -r) -PYTHON3 := $(shell py3versions -r) - %: dh $@ --with python2,python3 --buildsystem=pybuild - -override_dh_auto_test: -ifeq ($(filter nocheck,$(DEB_BUILD_OPTIONS)),) - for py in $(PYTHON2) ; do\ - $$py setup.py nosetests -v ; \ - done - for py in $(PYTHON3) ; do\ - $$py setup.py nosetests -v ; \ - done -endif diff -Nru python-xopen-0.3.3/debian/watch python-xopen-0.5.0/debian/watch --- python-xopen-0.3.3/debian/watch 2018-04-16 12:53:53.000000000 +0000 +++ python-xopen-0.5.0/debian/watch 2019-02-06 21:26:09.000000000 +0000 @@ -1,3 +1,6 @@ version=4 -https://github.com/marcelm/xopen/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz) +https://pypi.python.org/simple/xopen .*/xopen-(\d[\d.]+)@ARCHIVE_EXT@#sha256=.* + +# This leads to tarball which does not work +# https://github.com/marcelm/xopen/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz) diff -Nru python-xopen-0.3.3/.editorconfig python-xopen-0.5.0/.editorconfig --- python-xopen-0.3.3/.editorconfig 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/.editorconfig 2019-01-30 12:10:28.000000000 +0000 @@ -0,0 +1,6 @@ +[*.py] +charset=utf-8 +end_of_line=lf +insert_final_newline=true +indent_style=space +indent_size=4 diff -Nru python-xopen-0.3.3/LICENSE python-xopen-0.5.0/LICENSE --- python-xopen-0.3.3/LICENSE 2018-04-09 18:36:09.000000000 +0000 +++ python-xopen-0.5.0/LICENSE 2019-01-30 12:10:28.000000000 +0000 @@ -1,4 +1,4 @@ -Copyright (c) 2010-2016 Marcel Martin +Copyright (c) 2010-2019 Marcel Martin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff -Nru python-xopen-0.3.3/PKG-INFO python-xopen-0.5.0/PKG-INFO --- python-xopen-0.3.3/PKG-INFO 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/PKG-INFO 2019-01-30 12:10:40.000000000 +0000 @@ -0,0 +1,107 @@ +Metadata-Version: 2.1 +Name: xopen +Version: 0.5.0 +Summary: Open compressed files transparently +Home-page: https://github.com/marcelm/xopen/ +Author: Marcel Martin +Author-email: mail@marcelm.net +License: MIT +Description: .. image:: https://travis-ci.org/marcelm/xopen.svg?branch=master + :target: https://travis-ci.org/marcelm/xopen + + .. image:: https://img.shields.io/pypi/v/xopen.svg?branch=master + :target: https://pypi.python.org/pypi/xopen + + ===== + xopen + ===== + + This small Python module provides an ``xopen`` function that works like the + built-in ``open`` function, but can also deal with compressed files. + Supported compression formats are gzip, bzip2 and xz. They are automatically + recognized by their file extensions `.gz`, `.bz2` or `.xz`. + + The focus is on being as efficient as possible on all supported Python versions. + For example, simply using ``gzip.open`` is very slow in older Pythons, and + it is a lot faster to use a ``gzip`` subprocess. For writing to gzip files, + ``xopen`` uses ``pigz`` when available. + + This module has originally been developed as part of the `cutadapt + tool `_ that is used in bioinformatics to + manipulate sequencing data. It has been in successful use within that software + for a few years. + + ``xopen`` is compatible with Python versions 2.7 and 3.4 to 3.7. + + + Usage + ----- + + Open a file for reading:: + + from xopen import xopen + + with xopen('file.txt.xz') as f: + content = f.read() + + Or without context manager:: + + from xopen import xopen + + f = xopen('file.txt.xz') + content = f.read() + f.close() + + Open a file for writing:: + + from xopen import xopen + + with xopen('file.txt.gz', mode='w') as f: + f.write('Hello') + + + Credits + ------- + + The name ``xopen`` was taken from the C function of the same name in the + `utils.h file which is part of BWA `_. + + Kyle Beauchamp has contributed support for appending to files. + + Some ideas were taken from the `canopener project `_. + If you also want to open S3 files, you may want to use that module instead. + + + Changes + ------- + + v0.5.0 + ~~~~~~ + * By default, pigz is now only allowed to use at most four threads. This hopefully reduces + problems some users had with too many threads when opening many files at the same time. + * xopen now accepts pathlib.Path objects. + + + Author + ------ + + Marcel Martin (`@marcelm_ on Twitter `_) + + Links + ----- + + * `Source code `_ + * `Report an issue `_ + * `Project page on PyPI (Python package index) `_ + +Platform: UNKNOWN +Classifier: Development Status :: 4 - Beta +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4 +Provides-Extra: dev diff -Nru python-xopen-0.3.3/pyproject.toml python-xopen-0.5.0/pyproject.toml --- python-xopen-0.3.3/pyproject.toml 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/pyproject.toml 2019-01-30 12:10:28.000000000 +0000 @@ -0,0 +1,2 @@ +[build-system] +requires = ["setuptools", "wheel", "setuptools_scm"] diff -Nru python-xopen-0.3.3/README.rst python-xopen-0.5.0/README.rst --- python-xopen-0.3.3/README.rst 2018-04-09 18:36:09.000000000 +0000 +++ python-xopen-0.5.0/README.rst 2019-01-30 12:10:28.000000000 +0000 @@ -23,7 +23,7 @@ manipulate sequencing data. It has been in successful use within that software for a few years. -``xopen`` is compatible with Python 2.7, 3.3, 3.4, 3.5 and 3.6. +``xopen`` is compatible with Python versions 2.7 and 3.4 to 3.7. Usage @@ -64,6 +64,16 @@ If you also want to open S3 files, you may want to use that module instead. +Changes +------- + +v0.5.0 +~~~~~~ +* By default, pigz is now only allowed to use at most four threads. This hopefully reduces + problems some users had with too many threads when opening many files at the same time. +* xopen now accepts pathlib.Path objects. + + Author ------ diff -Nru python-xopen-0.3.3/setup.cfg python-xopen-0.5.0/setup.cfg --- python-xopen-0.3.3/setup.cfg 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/setup.cfg 2019-01-30 12:10:40.000000000 +0000 @@ -0,0 +1,7 @@ +[bdist_wheel] +universal = 1 + +[egg_info] +tag_build = +tag_date = 0 + diff -Nru python-xopen-0.3.3/setup.py python-xopen-0.5.0/setup.py --- python-xopen-0.3.3/setup.py 2018-04-09 18:36:09.000000000 +0000 +++ python-xopen-0.5.0/setup.py 2019-01-30 12:10:28.000000000 +0000 @@ -2,36 +2,38 @@ from setuptools import setup if sys.version_info < (2, 7): - sys.stdout.write("At least Python 2.7 is required.\n") - sys.exit(1) + sys.stdout.write("At least Python 2.7 is required.\n") + sys.exit(1) with open('README.rst') as f: - long_description = f.read() - -if sys.version_info < (3, ): - requires = ['bz2file'] -else: - requires = [] + long_description = f.read() setup( - name='xopen', - version='0.3.3', - author='Marcel Martin', - author_email='mail@marcelm.net', - url='https://github.com/marcelm/xopen/', - description='Open compressed files transparently', - long_description=long_description, - license='MIT', - py_modules=['xopen'], - install_requires=requires, - classifiers=[ - "Development Status :: 4 - Beta", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - ] + name='xopen', + use_scm_version=True, + setup_requires=['setuptools_scm'], # Support pip versions that don't know about pyproject.toml + author='Marcel Martin', + author_email='mail@marcelm.net', + url='https://github.com/marcelm/xopen/', + description='Open compressed files transparently', + long_description=long_description, + license='MIT', + py_modules=['xopen'], + install_requires=[ + 'bz2file; python_version=="2.7"', + ], + extras_require={ + 'dev': ['pytest'], + }, + python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4', + classifiers=[ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + ] ) diff -Nru python-xopen-0.3.3/tests/test_xopen.py python-xopen-0.5.0/tests/test_xopen.py --- python-xopen-0.3.3/tests/test_xopen.py 2018-04-09 18:36:09.000000000 +0000 +++ python-xopen-0.5.0/tests/test_xopen.py 2019-01-30 12:10:28.000000000 +0000 @@ -1,241 +1,251 @@ # coding: utf-8 from __future__ import print_function, division, absolute_import -import gzip + import os import random import sys import signal from contextlib import contextmanager -from nose.tools import raises +import pytest from xopen import xopen, PipedGzipReader -base = "tests/file.txt" -files = [ base + ext for ext in ['', '.gz', '.bz2' ] ] -try: - import lzma - files.append(base + '.xz') -except ImportError: - lzma = None +extensions = ["", ".gz", ".bz2"] try: - import bz2 + import lzma + extensions.append(".xz") except ImportError: - bz2 = None + lzma = None -major, minor = sys.version_info[0:2] +base = "tests/file.txt" +files = [base + ext for ext in extensions] +CONTENT = 'Testing, testing ...\nThe second line.\n' + +# File extensions for which appending is supported +append_extensions = extensions[:] +if sys.version_info[0] == 2: + append_extensions.remove(".bz2") @contextmanager def temporary_path(name): - directory = os.path.join(os.path.dirname(__file__), 'testtmp') - if not os.path.isdir(directory): - os.mkdir(directory) - path = os.path.join(directory, name) - yield path - os.remove(path) - - -def test_xopen_text(): - for name in files: - with xopen(name, 'rt') as f: - lines = list(f) - assert len(lines) == 2 - assert lines[1] == 'The second line.\n', name - - -def test_xopen_binary(): - for name in files: - with xopen(name, 'rb') as f: - lines = list(f) - assert len(lines) == 2 - assert lines[1] == b'The second line.\n', name - - -def test_no_context_manager_text(): - for name in files: - f = xopen(name, 'rt') - lines = list(f) - assert len(lines) == 2 - assert lines[1] == 'The second line.\n', name - f.close() - assert f.closed - - -def test_no_context_manager_binary(): - for name in files: - f = xopen(name, 'rb') - lines = list(f) - assert len(lines) == 2 - assert lines[1] == b'The second line.\n', name - f.close() - assert f.closed - - -@raises(IOError) -def test_nonexisting_file(): - with xopen('this-file-does-not-exist') as f: - pass - - -@raises(IOError) -def test_nonexisting_file_gz(): - with xopen('this-file-does-not-exist.gz') as f: - pass - - -@raises(IOError) -def test_nonexisting_file_bz2(): - with xopen('this-file-does-not-exist.bz2') as f: - pass - - -if lzma: - @raises(IOError) - def test_nonexisting_file_xz(): - with xopen('this-file-does-not-exist.xz') as f: - pass - - -@raises(IOError) -def test_write_to_nonexisting_dir(): - with xopen('this/path/does/not/exist/file.txt', 'w') as f: - pass - - -@raises(IOError) -def test_write_to_nonexisting_dir_gz(): - with xopen('this/path/does/not/exist/file.gz', 'w') as f: - pass - - -@raises(IOError) -def test_write_to_nonexisting_dir_bz2(): - with xopen('this/path/does/not/exist/file.bz2', 'w') as f: - pass - - -if lzma: - @raises(IOError) - def test_write_to_nonexisting_dir(): - with xopen('this/path/does/not/exist/file.xz', 'w') as f: - pass - - -def test_append(): - cases = ["", ".gz"] - if bz2 and sys.version_info > (3,): - # BZ2 does NOT support append in Py 2. - cases.append(".bz2") - if lzma: - cases.append(".xz") - for ext in cases: - # On Py3, need to send BYTES, not unicode. Let's do it for all. - text = "AB".encode("utf-8") - reference = text + text - with temporary_path('truncated.fastq' + ext) as path: - try: - os.unlink(path) - except OSError: - pass - with xopen(path, 'ab') as f: - f.write(text) - with xopen(path, 'ab') as f: - f.write(text) - with xopen(path, 'r') as f: - for appended in f: - pass - try: - reference = reference.decode("utf-8") - except AttributeError: - pass - assert appended == reference - - -def test_append_text(): - cases = ["", ".gz"] - if bz2 and sys.version_info > (3,): - # BZ2 does NOT support append in Py 2. - cases.append(".bz2") - if lzma: - cases.append(".xz") - for ext in cases: # BZ2 does NOT support append - text = "AB" - reference = text + text - with temporary_path('truncated.fastq' + ext) as path: - try: - os.unlink(path) - except OSError: - pass - with xopen(path, 'at') as f: - f.write(text) - with xopen(path, 'at') as f: - f.write(text) - with xopen(path, 'rt') as f: - for appended in f: - pass - assert appended == reference + directory = os.path.join(os.path.dirname(__file__), 'testtmp') + if not os.path.isdir(directory): + os.mkdir(directory) + path = os.path.join(directory, name) + yield path + os.remove(path) + + +@pytest.mark.parametrize("name", files) +def test_xopen_text(name): + with xopen(name, 'rt') as f: + lines = list(f) + assert len(lines) == 2 + assert lines[1] == 'The second line.\n', name + + +@pytest.mark.parametrize("name", files) +def test_xopen_binary(name): + with xopen(name, 'rb') as f: + lines = list(f) + assert len(lines) == 2 + assert lines[1] == b'The second line.\n', name + + +@pytest.mark.parametrize("name", files) +def test_no_context_manager_text(name): + f = xopen(name, 'rt') + lines = list(f) + assert len(lines) == 2 + assert lines[1] == 'The second line.\n', name + f.close() + assert f.closed + + +@pytest.mark.parametrize("name", files) +def test_no_context_manager_binary(name): + f = xopen(name, 'rb') + lines = list(f) + assert len(lines) == 2 + assert lines[1] == b'The second line.\n', name + f.close() + assert f.closed + + +@pytest.mark.parametrize("ext", extensions) +def test_nonexisting_file(ext): + with pytest.raises(IOError): + with xopen('this-file-does-not-exist' + ext) as f: + pass + + +@pytest.mark.parametrize("ext", extensions) +def test_write_to_nonexisting_dir(ext): + with pytest.raises(IOError): + with xopen('this/path/does/not/exist/file.txt' + ext, 'w') as f: + pass + + +@pytest.mark.parametrize("ext", append_extensions) +def test_append(ext): + text = "AB".encode("utf-8") + reference = text + text + with temporary_path('truncated.fastq' + ext) as path: + try: + os.unlink(path) + except OSError: + pass + with xopen(path, 'ab') as f: + f.write(text) + with xopen(path, 'ab') as f: + f.write(text) + with xopen(path, 'r') as f: + for appended in f: + pass + try: + reference = reference.decode("utf-8") + except AttributeError: + pass + assert appended == reference + + +@pytest.mark.parametrize("ext", append_extensions) +def test_append_text(ext): + text = "AB" + reference = text + text + with temporary_path('truncated.fastq' + ext) as path: + try: + os.unlink(path) + except OSError: + pass + with xopen(path, 'at') as f: + f.write(text) + with xopen(path, 'at') as f: + f.write(text) + with xopen(path, 'rt') as f: + for appended in f: + pass + assert appended == reference def create_truncated_file(path): - # Random text - random_text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(1024)) - # Make the text a lot bigger in order to ensure that it is larger than the - # pipe buffer size. - random_text *= 1024 # 1MB - with xopen(path, 'w') as f: - f.write(random_text) - with open(path, 'a') as f: - f.truncate(os.stat(path).st_size - 10) + # Random text + random_text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(1024)) + # Make the text a lot bigger in order to ensure that it is larger than the + # pipe buffer size. + random_text *= 1024 # 1MB + with xopen(path, 'w') as f: + f.write(random_text) + with open(path, 'a') as f: + f.truncate(os.stat(path).st_size - 10) class TookTooLongError(Exception): - pass + pass class timeout: - # copied from https://stackoverflow.com/a/22348885/715090 - def __init__(self, seconds=1): - self.seconds = seconds - - def handle_timeout(self, signum, frame): - raise TookTooLongError() - - def __enter__(self): - signal.signal(signal.SIGALRM, self.handle_timeout) - signal.alarm(self.seconds) + # copied from https://stackoverflow.com/a/22348885/715090 + def __init__(self, seconds=1): + self.seconds = seconds + + def handle_timeout(self, signum, frame): + raise TookTooLongError() + + def __enter__(self): + signal.signal(signal.SIGALRM, self.handle_timeout) + signal.alarm(self.seconds) - def __exit__(self, type, value, traceback): - signal.alarm(0) + def __exit__(self, type, value, traceback): + signal.alarm(0) if sys.version_info[:2] != (3, 3): - @raises(EOFError, IOError) - def test_truncated_gz(): - with temporary_path('truncated.gz') as path: - create_truncated_file(path) - with timeout(seconds=2): - f = xopen(path, 'r') - f.read() - f.close() - - - @raises(EOFError, IOError) - def test_truncated_gz_iter(): - with temporary_path('truncated.gz') as path: - create_truncated_file(path) - with timeout(seconds=2): - f = xopen(path, 'r') - for line in f: - pass - f.close() + def test_truncated_gz(): + with temporary_path('truncated.gz') as path: + create_truncated_file(path) + with timeout(seconds=2): + with pytest.raises((EOFError, IOError)): + f = xopen(path, 'r') + f.read() + f.close() + + + def test_truncated_gz_iter(): + with temporary_path('truncated.gz') as path: + create_truncated_file(path) + with timeout(seconds=2): + with pytest.raises((EOFError, IOError)): + f = xopen(path, 'r') + for line in f: + pass + f.close() def test_bare_read_from_gz(): - with xopen('tests/hello.gz', 'rt') as f: - assert f.read() == 'hello' + with xopen('tests/hello.gz', 'rt') as f: + assert f.read() == 'hello' def test_read_piped_gzip(): - with PipedGzipReader('tests/hello.gz', 'rt') as f: - assert f.read() == 'hello' + with PipedGzipReader('tests/hello.gz', 'rt') as f: + assert f.read() == 'hello' + + +def test_write_pigz_threads(tmpdir): + path = str(tmpdir.join('out.gz')) + with xopen(path, mode='w', threads=3) as f: + f.write('hello') + with xopen(path) as f: + assert f.read() == 'hello' + + +def test_write_stdout(): + f = xopen('-', mode='w') + print("Hello", file=f) + f.close() + # ensure stdout is not closed + print("Still there?") + + +def test_write_stdout_contextmanager(): + # Do not close stdout + with xopen('-', mode='w') as f: + print("Hello", file=f) + # ensure stdout is not closed + print("Still there?") + + +if sys.version_info[:2] >= (3, 4): + # pathlib was added in Python 3.4 + from pathlib import Path + + @pytest.mark.parametrize("file", files) + def test_read_pathlib(file): + path = Path(file) + with xopen(path, mode='rt') as f: + assert f.read() == CONTENT + + @pytest.mark.parametrize("file", files) + def test_read_pathlib_binary(file): + path = Path(file) + with xopen(path, mode='rb') as f: + assert f.read() == bytes(CONTENT, 'ascii') + + @pytest.mark.parametrize("ext", extensions) + def test_write_pathlib(ext, tmpdir): + path = Path(str(tmpdir)) / ('hello.txt' + ext) + with xopen(path, mode='wt') as f: + f.write('hello') + with xopen(path, mode='rt') as f: + assert f.read() == 'hello' + + @pytest.mark.parametrize("ext", extensions) + def test_write_pathlib_binary(ext, tmpdir): + path = Path(str(tmpdir)) / ('hello.txt' + ext) + with xopen(path, mode='wb') as f: + f.write(b'hello') + with xopen(path, mode='rb') as f: + assert f.read() == b'hello' diff -Nru python-xopen-0.3.3/tox.ini python-xopen-0.5.0/tox.ini --- python-xopen-0.3.3/tox.ini 2018-04-09 18:36:09.000000000 +0000 +++ python-xopen-0.5.0/tox.ini 2019-01-30 12:10:28.000000000 +0000 @@ -1,6 +1,6 @@ [tox] -envlist = py27,py33,py34,py35,py36 +envlist = py27,py34,py35,py36,py37 [testenv] -deps = nose -commands = nosetests -P tests +deps = pytest +commands = pytest diff -Nru python-xopen-0.3.3/.travis.yml python-xopen-0.5.0/.travis.yml --- python-xopen-0.3.3/.travis.yml 2018-04-09 18:36:09.000000000 +0000 +++ python-xopen-0.5.0/.travis.yml 2019-01-30 12:10:28.000000000 +0000 @@ -1,17 +1,39 @@ -sudo: false language: python + +dist: xenial + cache: directories: - $HOME/.cache/pip + python: - "2.7" - - "3.3" - "3.4" - "3.5" - "3.6" + - "3.7" install: - pip install . script: - - nosetests -P tests + - python setup.py --version # Detect encoding problems + - python -m pytest + +env: + global: + - TWINE_USERNAME=marcelm + +jobs: + include: + - stage: deploy + services: + - docker + python: "3.6" + install: python3 -m pip install twine 'requests-toolbelt!=0.9.0' + if: tag IS present + script: + - | + python3 setup.py sdist + ls -l dist/ + python3 -m twine upload dist/* diff -Nru python-xopen-0.3.3/xopen.egg-info/dependency_links.txt python-xopen-0.5.0/xopen.egg-info/dependency_links.txt --- python-xopen-0.3.3/xopen.egg-info/dependency_links.txt 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/xopen.egg-info/dependency_links.txt 2019-01-30 12:10:40.000000000 +0000 @@ -0,0 +1 @@ + diff -Nru python-xopen-0.3.3/xopen.egg-info/PKG-INFO python-xopen-0.5.0/xopen.egg-info/PKG-INFO --- python-xopen-0.3.3/xopen.egg-info/PKG-INFO 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/xopen.egg-info/PKG-INFO 2019-01-30 12:10:40.000000000 +0000 @@ -0,0 +1,107 @@ +Metadata-Version: 2.1 +Name: xopen +Version: 0.5.0 +Summary: Open compressed files transparently +Home-page: https://github.com/marcelm/xopen/ +Author: Marcel Martin +Author-email: mail@marcelm.net +License: MIT +Description: .. image:: https://travis-ci.org/marcelm/xopen.svg?branch=master + :target: https://travis-ci.org/marcelm/xopen + + .. image:: https://img.shields.io/pypi/v/xopen.svg?branch=master + :target: https://pypi.python.org/pypi/xopen + + ===== + xopen + ===== + + This small Python module provides an ``xopen`` function that works like the + built-in ``open`` function, but can also deal with compressed files. + Supported compression formats are gzip, bzip2 and xz. They are automatically + recognized by their file extensions `.gz`, `.bz2` or `.xz`. + + The focus is on being as efficient as possible on all supported Python versions. + For example, simply using ``gzip.open`` is very slow in older Pythons, and + it is a lot faster to use a ``gzip`` subprocess. For writing to gzip files, + ``xopen`` uses ``pigz`` when available. + + This module has originally been developed as part of the `cutadapt + tool `_ that is used in bioinformatics to + manipulate sequencing data. It has been in successful use within that software + for a few years. + + ``xopen`` is compatible with Python versions 2.7 and 3.4 to 3.7. + + + Usage + ----- + + Open a file for reading:: + + from xopen import xopen + + with xopen('file.txt.xz') as f: + content = f.read() + + Or without context manager:: + + from xopen import xopen + + f = xopen('file.txt.xz') + content = f.read() + f.close() + + Open a file for writing:: + + from xopen import xopen + + with xopen('file.txt.gz', mode='w') as f: + f.write('Hello') + + + Credits + ------- + + The name ``xopen`` was taken from the C function of the same name in the + `utils.h file which is part of BWA `_. + + Kyle Beauchamp has contributed support for appending to files. + + Some ideas were taken from the `canopener project `_. + If you also want to open S3 files, you may want to use that module instead. + + + Changes + ------- + + v0.5.0 + ~~~~~~ + * By default, pigz is now only allowed to use at most four threads. This hopefully reduces + problems some users had with too many threads when opening many files at the same time. + * xopen now accepts pathlib.Path objects. + + + Author + ------ + + Marcel Martin (`@marcelm_ on Twitter `_) + + Links + ----- + + * `Source code `_ + * `Report an issue `_ + * `Project page on PyPI (Python package index) `_ + +Platform: UNKNOWN +Classifier: Development Status :: 4 - Beta +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4 +Provides-Extra: dev diff -Nru python-xopen-0.3.3/xopen.egg-info/requires.txt python-xopen-0.5.0/xopen.egg-info/requires.txt --- python-xopen-0.3.3/xopen.egg-info/requires.txt 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/xopen.egg-info/requires.txt 2019-01-30 12:10:40.000000000 +0000 @@ -0,0 +1,6 @@ + +[:python_version == "2.7"] +bz2file + +[dev] +pytest diff -Nru python-xopen-0.3.3/xopen.egg-info/SOURCES.txt python-xopen-0.5.0/xopen.egg-info/SOURCES.txt --- python-xopen-0.3.3/xopen.egg-info/SOURCES.txt 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/xopen.egg-info/SOURCES.txt 2019-01-30 12:10:40.000000000 +0000 @@ -0,0 +1,21 @@ +.editorconfig +.gitignore +.travis.yml +LICENSE +README.rst +pyproject.toml +setup.cfg +setup.py +tox.ini +xopen.py +tests/file.txt +tests/file.txt.bz2 +tests/file.txt.gz +tests/file.txt.xz +tests/hello.gz +tests/test_xopen.py +xopen.egg-info/PKG-INFO +xopen.egg-info/SOURCES.txt +xopen.egg-info/dependency_links.txt +xopen.egg-info/requires.txt +xopen.egg-info/top_level.txt \ No newline at end of file diff -Nru python-xopen-0.3.3/xopen.egg-info/top_level.txt python-xopen-0.5.0/xopen.egg-info/top_level.txt --- python-xopen-0.3.3/xopen.egg-info/top_level.txt 1970-01-01 00:00:00.000000000 +0000 +++ python-xopen-0.5.0/xopen.egg-info/top_level.txt 2019-01-30 12:10:40.000000000 +0000 @@ -0,0 +1 @@ +xopen diff -Nru python-xopen-0.3.3/xopen.py python-xopen-0.5.0/xopen.py --- python-xopen-0.3.3/xopen.py 2018-04-09 18:36:09.000000000 +0000 +++ python-xopen-0.5.0/xopen.py 2019-01-30 12:10:28.000000000 +0000 @@ -9,244 +9,341 @@ import os import time from subprocess import Popen, PIPE +from pkg_resources import get_distribution, DistributionNotFound -__version__ = '0.3.2' + +try: + __version__ = get_distribution(__name__).version +except DistributionNotFound: + # package is not installed + pass _PY3 = sys.version > '3' if not _PY3: - import bz2file as bz2 + import bz2file as bz2 else: - try: - import bz2 - except ImportError: - bz2 = None + try: + import bz2 + except ImportError: + bz2 = None try: - import lzma + import lzma except ImportError: - lzma = None + lzma = None if _PY3: - basestring = str + basestring = str + +try: + import pathlib # Exists in Python 3.4+ +except ImportError: + pathlib = None + +try: + from os import fspath # Exists in Python 3.6+ +except ImportError: + def fspath(path): + if hasattr(path, "__fspath__"): + return path.__fspath__() + # Python 3.4 and 3.5 do not support the file system path protocol + if pathlib is not None and isinstance(path, pathlib.Path): + return str(path) + return path + + +def _available_cpu_count(): + """ + Number of available virtual or physical CPUs on this system + Adapted from http://stackoverflow.com/a/1006301/715090 + """ + try: + return len(os.sched_getaffinity(0)) + except AttributeError: + pass + import re + try: + with open('/proc/self/status') as f: + status = f.read() + m = re.search(r'(?m)^Cpus_allowed:\s*(.*)$', status) + if m: + res = bin(int(m.group(1).replace(',', ''), 16)).count('1') + if res > 0: + return res + except IOError: + pass + try: + import multiprocessing + return multiprocessing.cpu_count() + except (ImportError, NotImplementedError): + return 1 class Closing(object): - """ - Inherit from this class and implement a close() method to offer context - manager functionality. - """ - def __enter__(self): - return self - - def __exit__(self, *exc_info): - self.close() - - def __del__(self): - try: - self.close() - except: - pass + """ + Inherit from this class and implement a close() method to offer context + manager functionality. + """ + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.close() + + def __del__(self): + try: + self.close() + except: + pass class PipedGzipWriter(Closing): - """ - Write gzip-compressed files by running an external gzip or pigz process and - piping into it. On Python 2, this is faster than using gzip.open(). On - Python 3, it allows to run the compression in a separate process and can - therefore also be faster. - """ - - def __init__(self, path, mode='wt'): - if mode not in ('w', 'wt', 'wb', 'a', 'at', 'ab'): - raise ValueError("Mode is '{0}', but it must be 'w', 'wt', 'wb', 'a', 'at' or 'ab'".format(mode)) - self.outfile = open(path, mode) - self.devnull = open(os.devnull, mode) - self.closed = False - self.name = path - - # Setting close_fds to True in the Popen arguments is necessary due to - # . - kwargs = dict(stdin=PIPE, stdout=self.outfile, stderr=self.devnull, close_fds=True) - try: - self.process = Popen(['pigz'], **kwargs) - self.program = 'pigz' - except OSError as e: - # pigz not found, try regular gzip - try: - self.process = Popen(['gzip'], **kwargs) - self.program = 'gzip' - except (IOError, OSError) as e: - self.outfile.close() - self.devnull.close() - raise - except IOError as e: - self.outfile.close() - self.devnull.close() - raise - if _PY3 and 'b' not in mode: - self._file = io.TextIOWrapper(self.process.stdin) - else: - self._file = self.process.stdin - - def write(self, arg): - self._file.write(arg) - - def close(self): - self.closed = True - self._file.close() - retcode = self.process.wait() - self.outfile.close() - self.devnull.close() - if retcode != 0: - raise IOError("Output {0} process terminated with exit code {1}".format(self.program, retcode)) + """ + Write gzip-compressed files by running an external gzip or pigz process and + piping into it. On Python 2, this is faster than using gzip.open(). On + Python 3, it allows to run the compression in a separate process and can + therefore also be faster. + """ + + def __init__(self, path, mode='wt', compresslevel=6, threads=None): + """ + mode -- one of 'w', 'wt', 'wb', 'a', 'at', 'ab' + compresslevel -- gzip compression level + threads (int) -- number of pigz threads. If this is set to None, a reasonable default is + used. At the moment, this means that the number of available CPU cores is used, capped + at four to avoid creating too many threads. Use 0 to let pigz use all available cores. + """ + if mode not in ('w', 'wt', 'wb', 'a', 'at', 'ab'): + raise ValueError("Mode is '{0}', but it must be 'w', 'wt', 'wb', 'a', 'at' or 'ab'".format(mode)) + + # TODO use a context manager + self.outfile = open(path, mode) + self.devnull = open(os.devnull, mode) + self.closed = False + self.name = path + + kwargs = dict(stdin=PIPE, stdout=self.outfile, stderr=self.devnull) + # Setting close_fds to True in the Popen arguments is necessary due to + # . + # However, close_fds is not supported on Windows. See + # . + if sys.platform != 'win32': + kwargs['close_fds'] = True + + if 'w' in mode and compresslevel != 6: + extra_args = ['-' + str(compresslevel)] + else: + extra_args = [] + + pigz_args = ['pigz'] + if threads is None: + threads = min(_available_cpu_count(), 4) + if threads != 0: + pigz_args += ['-p', str(threads)] + try: + self.process = Popen(pigz_args + extra_args, **kwargs) + self.program = 'pigz' + except OSError: + # pigz not found, try regular gzip + try: + self.process = Popen(['gzip'] + extra_args, **kwargs) + self.program = 'gzip' + except (IOError, OSError): + self.outfile.close() + self.devnull.close() + raise + except IOError: # TODO IOError is the same as OSError on Python 3.3 + self.outfile.close() + self.devnull.close() + raise + if _PY3 and 'b' not in mode: + self._file = io.TextIOWrapper(self.process.stdin) + else: + self._file = self.process.stdin + + def write(self, arg): + self._file.write(arg) + + def close(self): + self.closed = True + self._file.close() + retcode = self.process.wait() + self.outfile.close() + self.devnull.close() + if retcode != 0: + raise IOError("Output {0} process terminated with exit code {1}".format(self.program, retcode)) class PipedGzipReader(Closing): - def __init__(self, path, mode='r'): - if mode not in ('r', 'rt', 'rb'): - raise ValueError("Mode is '{0}', but it must be 'r', 'rt' or 'rb'".format(mode)) - self.process = Popen(['gzip', '-cd', path], stdout=PIPE, stderr=PIPE) - self.name = path - if _PY3 and not 'b' in mode: - self._file = io.TextIOWrapper(self.process.stdout) - else: - self._file = self.process.stdout - if _PY3: - self._stderr = io.TextIOWrapper(self.process.stderr) - else: - self._stderr = self.process.stderr - self.closed = False - # Give gzip a little bit of time to report any errors (such as - # a non-existing file) - time.sleep(0.01) - self._raise_if_error() - - def close(self): - self.closed = True - retcode = self.process.poll() - if retcode is None: - # still running - self.process.terminate() - self._raise_if_error() - - def __iter__(self): - for line in self._file: - yield line - self.process.wait() - self._raise_if_error() - - def _raise_if_error(self): - """ - Raise IOError if process is not running anymore and the - exit code is nonzero. - """ - retcode = self.process.poll() - if retcode is not None and retcode != 0: - message = self._stderr.read().strip() - raise IOError(message) - - def read(self, *args): - data = self._file.read(*args) - if len(args) == 0 or args[0] <= 0: - # wait for process to terminate until we check the exit code - self.process.wait() - self._raise_if_error() - return data + def __init__(self, path, mode='r'): + if mode not in ('r', 'rt', 'rb'): + raise ValueError("Mode is '{0}', but it must be 'r', 'rt' or 'rb'".format(mode)) + self.process = Popen(['gzip', '-cd', path], stdout=PIPE, stderr=PIPE) + self.name = path + if _PY3 and 'b' not in mode: + self._file = io.TextIOWrapper(self.process.stdout) + else: + self._file = self.process.stdout + if _PY3: + self._stderr = io.TextIOWrapper(self.process.stderr) + else: + self._stderr = self.process.stderr + self.closed = False + # Give gzip a little bit of time to report any errors (such as + # a non-existing file) + time.sleep(0.01) + self._raise_if_error() + + def close(self): + self.closed = True + retcode = self.process.poll() + if retcode is None: + # still running + self.process.terminate() + self._raise_if_error() + + def __iter__(self): + for line in self._file: + yield line + self.process.wait() + self._raise_if_error() + + def _raise_if_error(self): + """ + Raise IOError if process is not running anymore and the + exit code is nonzero. + """ + retcode = self.process.poll() + if retcode is not None and retcode != 0: + message = self._stderr.read().strip() + raise IOError(message) + + def read(self, *args): + data = self._file.read(*args) + if len(args) == 0 or args[0] <= 0: + # wait for process to terminate until we check the exit code + self.process.wait() + self._raise_if_error() + return data if bz2 is not None: - class ClosingBZ2File(bz2.BZ2File, Closing): - """ - A better BZ2File that supports the context manager protocol. - This is relevant only for Python 2.6. - """ - - -def xopen(filename, mode='r', compresslevel=6): - """ - Replacement for the "open" function that can also open files that have - been compressed with gzip, bzip2 or xz. If the filename is '-', standard - output (mode 'w') or input (mode 'r') is returned. If the filename ends - with .gz, the file is opened with a pipe to the gzip program. If that - does not work, then gzip.open() is used (the gzip module is slower than - the pipe to the gzip program). If the filename ends with .bz2, it's - opened as a bz2.BZ2File. Otherwise, the regular open() is used. - - mode can be: 'rt', 'rb', 'at', 'ab', 'wt', or 'wb' - Instead of 'rt', 'wt' and 'at', 'r', 'w' and 'a' can be used as - abbreviations. - - In Python 2, the 't' and 'b' characters are ignored. - - Append mode ('a', 'at', 'ab') is unavailable with BZ2 compression and - will raise an error. - - compresslevel is the gzip compression level. It is not used for bz2 and xz. - """ - if mode in ('r', 'w', 'a'): - mode += 't' - if mode not in ('rt', 'rb', 'wt', 'wb', 'at', 'ab'): - raise ValueError("mode '{0}' not supported".format(mode)) - if not _PY3: - mode = mode[0] - if not isinstance(filename, basestring): - raise ValueError("the filename must be a string") - - # standard input and standard output handling - if filename == '-': - if not _PY3: - return dict(r=sys.stdin, w=sys.stdout)[mode] - else: - return dict( - r=sys.stdin, - rt=sys.stdin, - rb=sys.stdin.buffer, - w=sys.stdout, - wt=sys.stdout, - wb=sys.stdout.buffer)[mode] - - if filename.endswith('.bz2'): - if bz2 is None: - raise ImportError("Cannot open bz2 files: The bz2 module is not available") - if _PY3: - return bz2.open(filename, mode) - else: - if mode[0] == 'a': - raise ValueError("mode '{0}' not supported with BZ2 compression".format(mode)) - if sys.version_info[:2] <= (2, 6): - return ClosingBZ2File(filename, mode) - else: - return bz2.BZ2File(filename, mode) - elif filename.endswith('.xz'): - if lzma is None: - raise ImportError("Cannot open xz files: The lzma module is not available (use Python 3.3 or newer)") - return lzma.open(filename, mode) - elif filename.endswith('.gz'): - if _PY3 and 'r' in mode: - return gzip.open(filename, mode) - if sys.version_info[:2] == (2, 7): - buffered_reader = io.BufferedReader - buffered_writer = io.BufferedWriter - else: - buffered_reader = lambda x: x - buffered_writer = lambda x: x - if 'r' in mode: - try: - return PipedGzipReader(filename, mode) - except OSError: - # gzip not installed - return buffered_reader(gzip.open(filename, mode)) - else: - try: - return PipedGzipWriter(filename, mode) - except OSError: - return buffered_writer(gzip.open(filename, mode, compresslevel=compresslevel)) - else: - # Python 2.6 and 2.7 have io.open, which we could use to make the returned - # object consistent with the one returned in Python 3, but reading a file - # with io.open() is 100 times slower (!) on Python 2.6, and still about - # three times slower on Python 2.7 (tested with "for _ in io.open(path): pass") - return open(filename, mode) + class ClosingBZ2File(bz2.BZ2File, Closing): + """ + A better BZ2File that supports the context manager protocol. + This is relevant only for Python 2.6. + """ + + +def _open_stdin_or_out(mode): + # Do not return sys.stdin or sys.stdout directly as we want the returned object + # to be closable without closing sys.stdout. + std = dict(r=sys.stdin, w=sys.stdout)[mode[0]] + if not _PY3: + # Enforce str type on Python 2 + # Note that io.open is slower than regular open() on Python 2.7, but + # it appears to be the only API that has a closefd parameter. + mode = mode[0] + 'b' + return io.open(std.fileno(), mode=mode, closefd=False) + + +def _open_bz2(filename, mode): + if bz2 is None: + raise ImportError("Cannot open bz2 files: The bz2 module is not available") + if _PY3: + return bz2.open(filename, mode) + else: + if mode[0] == 'a': + raise ValueError("mode '{0}' not supported with BZ2 compression".format(mode)) + if sys.version_info[:2] <= (2, 6): + return ClosingBZ2File(filename, mode) + else: + return bz2.BZ2File(filename, mode) + + +def _open_xz(filename, mode): + if lzma is None: + raise ImportError( + "Cannot open xz files: The lzma module is not available (use Python 3.3 or newer)") + return lzma.open(filename, mode) + + +def _open_gz(filename, mode, compresslevel, threads): + if _PY3 and 'r' in mode: + return gzip.open(filename, mode) + if sys.version_info[:2] == (2, 7): + buffered_reader = io.BufferedReader + buffered_writer = io.BufferedWriter + else: + buffered_reader = lambda x: x + buffered_writer = lambda x: x + if 'r' in mode: + try: + return PipedGzipReader(filename, mode) + except OSError: + # gzip not installed + return buffered_reader(gzip.open(filename, mode)) + else: + try: + return PipedGzipWriter(filename, mode, compresslevel, threads=threads) + except OSError: + return buffered_writer(gzip.open(filename, mode, compresslevel=compresslevel)) + + +def xopen(filename, mode='r', compresslevel=6, threads=None): + """ + A replacement for the "open" function that can also open files that have + been compressed with gzip, bzip2 or xz. If the filename is '-', standard + output (mode 'w') or input (mode 'r') is returned. + + The file type is determined based on the filename: .gz is gzip, .bz2 is bzip2 and .xz is + xz/lzma. + + When writing a gzip-compressed file, the following methods are tried in order to get the + best speed 1) using a pigz (parallel gzip) subprocess; 2) using a gzip subprocess; + 3) gzip.open. A single gzip subprocess can be faster than gzip.open because it runs in a + separate process. + + Uncompressed files are opened with the regular open(). + + mode can be: 'rt', 'rb', 'at', 'ab', 'wt', or 'wb'. Also, the 't' can be omitted, + so instead of 'rt', 'wt' and 'at', the abbreviations 'r', 'w' and 'a' can be used. + + In Python 2, the 't' and 'b' characters are ignored. + + Append mode ('a', 'at', 'ab') is unavailable with BZ2 compression and + will raise an error. + + compresslevel is the gzip compression level. It is not used for bz2 and xz. + + threads is the number of threads for pigz. If None, then the pigz default is used. + """ + if mode in ('r', 'w', 'a'): + mode += 't' + if mode not in ('rt', 'rb', 'wt', 'wb', 'at', 'ab'): + raise ValueError("mode '{0}' not supported".format(mode)) + if not _PY3: + mode = mode[0] + filename = fspath(filename) + if not isinstance(filename, basestring): + raise ValueError("the filename must be a string") + if compresslevel not in range(1, 10): + raise ValueError("compresslevel must be between 1 and 9") + + if filename == '-': + return _open_stdin_or_out(mode) + elif filename.endswith('.bz2'): + return _open_bz2(filename, mode) + elif filename.endswith('.xz'): + return _open_xz(filename, mode) + elif filename.endswith('.gz'): + return _open_gz(filename, mode, compresslevel, threads) + else: + # Python 2.6 and 2.7 have io.open, which we could use to make the returned + # object consistent with the one returned in Python 3, but reading a file + # with io.open() is 100 times slower (!) on Python 2.6, and still about + # three times slower on Python 2.7 (tested with "for _ in io.open(path): pass") + return open(filename, mode)