diff -Nru pycuda-2017.1.1/aksetup_helper.py pycuda-2018.1.1/aksetup_helper.py --- pycuda-2017.1.1/aksetup_helper.py 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/aksetup_helper.py 2018-10-31 18:05:29.000000000 +0000 @@ -285,7 +285,7 @@ if value is not None: filevars[key] = value - keys = filevars.keys() + keys = list(filevars.keys()) keys.sort() outf = open(filename, "w") diff -Nru pycuda-2017.1.1/debian/changelog pycuda-2018.1.1/debian/changelog --- pycuda-2017.1.1/debian/changelog 2018-11-03 16:34:53.000000000 +0000 +++ pycuda-2018.1.1/debian/changelog 2018-11-09 14:13:18.000000000 +0000 @@ -1,3 +1,30 @@ +pycuda (2018.1.1-1ubuntu1) disco; urgency=low + + * Merge from Debian unstable. Remaining changes: + - Fix ftbfs due to boost-python soname change. + + -- Gianfranco Costamagna Fri, 09 Nov 2018 15:13:18 +0100 + +pycuda (2018.1.1-1) unstable; urgency=medium + + [ Andreas Beckmann ] + * Put package under maintenance by the Debian NVIDIA Maintainers team, move + Tomasz to Uploaders. + * Switch Vcs-* URLs to salsa.debian.org. + + [ Tomasz Rybak ] + * New upstream release (Closes: #903826). + * Add Rules-Requires-Root to d/control. + * Update d/copyright links to use https protocol. + * Add disclaimer to d/copyright describing why PyCUDA is in contrib. + * Reorder d/control putting Python 3 packages first. + * Remove unnecessary X-Python{,3}-Version fields. + * Update Standards-Version to 4.2.1; no changes necessary. + * Set compatibility level to 11 + * Point python-pycuda-doc.doc-base to main package's doc directory. + + -- Tomasz Rybak Wed, 31 Oct 2018 18:59:56 +0100 + pycuda (2017.1.1-2ubuntu2) disco; urgency=medium * No-change rebuild to build without python3.6 support. @@ -231,3 +258,4 @@ as package is using Debian ones -- Tomasz Rybak Sat, 20 Aug 2011 23:02:55 +0200 + diff -Nru pycuda-2017.1.1/debian/compat pycuda-2018.1.1/debian/compat --- pycuda-2017.1.1/debian/compat 2018-03-09 18:07:39.000000000 +0000 +++ pycuda-2018.1.1/debian/compat 2018-11-03 19:49:14.000000000 +0000 @@ -1 +1 @@ -10 +11 diff -Nru pycuda-2017.1.1/debian/control pycuda-2018.1.1/debian/control --- pycuda-2017.1.1/debian/control 2018-08-10 01:38:42.000000000 +0000 +++ pycuda-2018.1.1/debian/control 2018-11-09 14:13:14.000000000 +0000 @@ -1,9 +1,10 @@ Source: pycuda Section: contrib/python Priority: optional -Maintainer: Ubuntu Developers -XSBC-Original-Maintainer: Tomasz Rybak -Build-Depends: debhelper (>= 10), +Maintainer: Debian NVIDIA Maintainers +Uploaders: + Tomasz Rybak , +Build-Depends: debhelper (>= 11), dh-python, python-all-dev, python-all-dbg, @@ -24,30 +25,29 @@ python3-sphinx (>= 1.0.7+dfsg) , python-pytools (>= 2011.5), python3-pytools -Standards-Version: 4.1.3 -X-Python-Version: >= 2.6 -X-Python3-Version: >= 3.3 +Standards-Version: 4.2.1 +Rules-Requires-Root: no Homepage: http://mathema.tician.de/software/pycuda -Vcs-Git: https://anonscm.debian.org/git/collab-maint/python-pycuda.git -Vcs-Browser: https://anonscm.debian.org/gitweb/?p=collab-maint/python-pycuda.git +Vcs-Browser: https://salsa.debian.org/nvidia-team/python-pycuda +Vcs-Git: https://salsa.debian.org/nvidia-team/python-pycuda.git -Package: python-pycuda +Package: python3-pycuda Architecture: amd64 Multi-Arch: no -Depends: ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}, +Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, nvidia-cuda-toolkit, - python-appdirs (>= 1.4.0), - python-numpy, - python-decorator (>= 3.2.0), - python-pytools (>= 2011.5) + python3-appdirs (>= 1.4.0), + python3-numpy, + python3-decorator (>= 3.2.0), + python3-pytools Recommends: python-pycuda-doc , - python-mako -Suggests: python-pytest, - python-opengl, - python-matplotlib, - python-pycuda-dbg + python3-mako +Suggests: python3-pytest, + python3-opengl, + python3-matplotlib, + python3-pycuda-dbg Replaces: python-pycuda-headers -Description: Python module to access Nvidia‘s CUDA parallel computation API +Description: Python 3 module to access Nvidia‘s CUDA parallel computation API PyCUDA lets you access Nvidia‘s CUDA parallel computation API from Python. Several wrappers of the CUDA API already exist–so what’s so special about PyCUDA? @@ -65,14 +65,16 @@ * Speed. PyCUDA’s base layer is written in C++, so all the niceties above are virtually free. * Helpful Documentation. + . + This package contains Python 3 modules. -Package: python-pycuda-dbg +Package: python3-pycuda-dbg Section: contrib/debug Architecture: amd64 Multi-Arch: no -Depends: python-pycuda (= ${binary:Version}), python-dbg, - ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}, -Description: Python module to access Nvidia‘s CUDA API (debug extensions) +Depends: python3-pycuda (= ${binary:Version}), python3-dbg, + ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, +Description: Python 3 module to access Nvidia‘s CUDA API (debug extensions) PyCUDA lets you access Nvidia‘s CUDA parallel computation API from Python. Several wrappers of the CUDA API already exist–so what’s so special about PyCUDA? @@ -91,25 +93,25 @@ above are virtually free. * Helpful Documentation. . - This package contains debug extensions build for the Python debug interpreter. + This package contains debug extensions for the Python 3 debug interpreter. -Package: python3-pycuda +Package: python-pycuda Architecture: amd64 Multi-Arch: no -Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, +Depends: ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}, nvidia-cuda-toolkit, - python3-appdirs (>= 1.4.0), - python3-numpy, - python3-decorator (>= 3.2.0), - python3-pytools + python-appdirs (>= 1.4.0), + python-numpy, + python-decorator (>= 3.2.0), + python-pytools (>= 2011.5) Recommends: python-pycuda-doc , - python3-mako -Suggests: python3-pytest, - python3-opengl, - python3-matplotlib, - python3-pycuda-dbg + python-mako +Suggests: python-pytest, + python-opengl, + python-matplotlib, + python-pycuda-dbg Replaces: python-pycuda-headers -Description: Python 3 module to access Nvidia‘s CUDA parallel computation API +Description: Python module to access Nvidia‘s CUDA parallel computation API PyCUDA lets you access Nvidia‘s CUDA parallel computation API from Python. Several wrappers of the CUDA API already exist–so what’s so special about PyCUDA? @@ -127,16 +129,14 @@ * Speed. PyCUDA’s base layer is written in C++, so all the niceties above are virtually free. * Helpful Documentation. - . - This package contains Python 3 modules. -Package: python3-pycuda-dbg +Package: python-pycuda-dbg Section: contrib/debug Architecture: amd64 Multi-Arch: no -Depends: python3-pycuda (= ${binary:Version}), python3-dbg, - ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, -Description: Python 3 module to access Nvidia‘s CUDA API (debug extensions) +Depends: python-pycuda (= ${binary:Version}), python-dbg, + ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}, +Description: Python module to access Nvidia‘s CUDA API (debug extensions) PyCUDA lets you access Nvidia‘s CUDA parallel computation API from Python. Several wrappers of the CUDA API already exist–so what’s so special about PyCUDA? @@ -155,7 +155,7 @@ above are virtually free. * Helpful Documentation. . - This package contains debug extensions for the Python 3 debug interpreter. + This package contains debug extensions build for the Python debug interpreter. Package: python-pycuda-doc Section: contrib/doc diff -Nru pycuda-2017.1.1/debian/copyright pycuda-2018.1.1/debian/copyright --- pycuda-2017.1.1/debian/copyright 2018-03-09 18:07:39.000000000 +0000 +++ pycuda-2018.1.1/debian/copyright 2018-11-03 19:49:14.000000000 +0000 @@ -1,9 +1,13 @@ Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: PyCUDA Upstream-Contact: Andreas Klöckner -Source: http://git.tiker.net/trees/pycuda.git - http://pypi.python.org/pypi/pycuda/ - http://pypi.python.org/packages/source/p/pycuda/ +Source: https://git.tiker.net/pycuda.git + https://pypi.python.org/pypi/pycuda/ + https://github.com/inducer/pycuda +Disclaimer: Although PyCUDA itself is free software, it depends on CUDA + which is proprietary NVIDIA software. Package nvidia-cuda-toolkit, + dependency of PyCUDA packages, is in non-free which means that, + ccording to Policy 2.2.2, PyCUDA needs to be in contrib. Files: * Copyright: 2009-2011 Andreas Klöckner diff -Nru pycuda-2017.1.1/debian/python-pycuda-doc.doc-base pycuda-2018.1.1/debian/python-pycuda-doc.doc-base --- pycuda-2017.1.1/debian/python-pycuda-doc.doc-base 2018-03-09 18:07:39.000000000 +0000 +++ pycuda-2018.1.1/debian/python-pycuda-doc.doc-base 2018-11-03 19:49:14.000000000 +0000 @@ -6,5 +6,5 @@ Section: Programming/Python Format: HTML -Index: /usr/share/doc/python-pycuda-doc/html/index.html -Files: /usr/share/doc/python-pycuda-doc/html/*.html +Index: /usr/share/doc/python-pycuda/html/index.html +Files: /usr/share/doc/python-pycuda/html/*.html diff -Nru pycuda-2017.1.1/debian/rules pycuda-2018.1.1/debian/rules --- pycuda-2017.1.1/debian/rules 2018-08-10 01:38:42.000000000 +0000 +++ pycuda-2018.1.1/debian/rules 2018-11-03 19:49:14.000000000 +0000 @@ -57,7 +57,7 @@ endif -GIT_REVISION=v2017.1.1 +GIT_REVISION=v2018.1.1 GIT_SUBMODULES=pycuda/compyte GIT_URL=https://git.tiker.net/trees/pycuda.git diff -Nru pycuda-2017.1.1/doc/source/driver.rst pycuda-2018.1.1/doc/source/driver.rst --- pycuda-2017.1.1/doc/source/driver.rst 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/doc/source/driver.rst 2018-10-31 18:05:29.000000000 +0000 @@ -1986,7 +1986,7 @@ .. versionadded:: 2011.1 -.. function:: stop() +.. function:: stop_profiler() .. versionadded:: 2011.1 diff -Nru pycuda-2017.1.1/doc/source/index.rst pycuda-2018.1.1/doc/source/index.rst --- pycuda-2017.1.1/doc/source/index.rst 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/doc/source/index.rst 2018-10-31 18:05:29.000000000 +0000 @@ -28,7 +28,7 @@ * Helpful Documentation. You're looking at it. ;) -Here's an example, to given you an impression:: +Here's an example, to give you an impression:: import pycuda.autoinit import pycuda.driver as drv diff -Nru pycuda-2017.1.1/MANIFEST.in pycuda-2018.1.1/MANIFEST.in --- pycuda-2017.1.1/MANIFEST.in 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/MANIFEST.in 2018-10-31 18:05:29.000000000 +0000 @@ -21,5 +21,7 @@ include README.rst include LICENSE +recursive-include pycuda *.pyx + recursive-include bpl-subset/bpl_subset/boost *.h *.hpp *.cpp *.html *.inl *.ipp *.pl *.txt recursive-include bpl-subset/bpl_subset/libs *.h *.hpp *.cpp *.html *.inl *.ipp *.pl *.txt diff -Nru pycuda-2017.1.1/pycuda/characterize.py pycuda-2018.1.1/pycuda/characterize.py --- pycuda-2017.1.1/pycuda/characterize.py 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/pycuda/characterize.py 2018-10-31 18:05:29.000000000 +0000 @@ -6,7 +6,11 @@ def platform_bits(): - return tuple.__itemsize__ * 8 + import sys + if sys.maxsize > 2**32: + return 64 + else: + return 32 def has_stack(): diff -Nru pycuda-2017.1.1/pycuda/compiler.py pycuda-2018.1.1/pycuda/compiler.py --- pycuda-2017.1.1/pycuda/compiler.py 2017-12-27 21:34:52.000000000 +0000 +++ pycuda-2018.1.1/pycuda/compiler.py 2018-10-31 18:05:29.000000000 +0000 @@ -95,7 +95,7 @@ finally: cache_file.close() - except: + except Exception: pass from tempfile import mkdtemp @@ -221,7 +221,6 @@ cache_dir = False if cache_dir is None: - from os.path import join import appdirs cache_dir = os.path.join(appdirs.user_cache_dir("pycuda", "pycuda"), "compiler-cache-v1") @@ -254,6 +253,7 @@ return compile_plain(source, options, keep, nvcc, cache_dir, target) + class CudaModule(object): def _check_arch(self, arch): if arch is None: @@ -265,7 +265,7 @@ from warnings import warn warn("trying to compile for a compute capability " "higher than selected GPU") - except: + except Exception: pass def _bind_module(self): @@ -393,13 +393,14 @@ libdir = ld_path break + if libdir is None and isfile('/usr/lib/x86_64-linux-gnu/libcudadevrt.a'): + libdir = '/usr/lib/x86_64-linux-gnu' + if libdir is None: nvcc_path = _find_nvcc_on_path() if nvcc_path is not None: libdir = join(os.path.dirname(nvcc_path), "..", "lib64") - if libdir is None and isfile('/usr/lib/x86_64-linux-gnu/libcudadevrt.a'): - libdir = '/usr/lib/x86_64-linux-gnu' libptn = 'lib%s.a' if libdir is None: raise RuntimeError('Unable to locate the CUDA SDK installation ' @@ -429,7 +430,7 @@ from os.path import isfile, join libpath = join(self.libdir, self.libptn % libname) if not isfile(libpath): - raise FileNotFoundError('CUDA SDK library file "%s" not found' % libpath) + raise OSError('CUDA SDK library file "%s" not found' % libpath) from pycuda.driver import jit_input_type self.linker.add_file(libpath, jit_input_type.LIBRARY) return self @@ -450,13 +451,15 @@ - source is linked against the CUDA device runtime library cudadevrt - library cudadevrt is statically linked into the generated Module ''' - def __init__(self, source, nvcc="nvcc", options=[], keep=False, + def __init__(self, source, nvcc="nvcc", options=None, keep=False, no_extern_c=False, arch=None, code=None, cache_dir=None, include_dirs=[], cuda_libdir=None): super(DynamicSourceModule, self).__init__(nvcc=nvcc, link_options=None, keep=keep, no_extern_c=no_extern_c, arch=arch, code=code, cache_dir=cache_dir, include_dirs=include_dirs, cuda_libdir=cuda_libdir) + if options is None: + options = DEFAULT_NVCC_FLAGS options = options[:] if '-rdc=true' not in options: options.append('-rdc=true') diff -Nru pycuda-2017.1.1/pycuda/compyte/dtypes.py pycuda-2018.1.1/pycuda/compyte/dtypes.py --- pycuda-2017.1.1/pycuda/compyte/dtypes.py 2017-12-27 21:34:52.000000000 +0000 +++ pycuda-2018.1.1/pycuda/compyte/dtypes.py 2018-10-31 18:05:31.000000000 +0000 @@ -210,7 +210,10 @@ elif name_to_dtype is None: name_to_dtype = NAME_TO_DTYPE.__getitem__ - c_arg = c_arg.replace("const", "").replace("volatile", "") + c_arg = (c_arg + .replace("const", "") + .replace("volatile", "") + .replace("__restrict__", "")) # process and remove declarator import re diff -Nru pycuda-2017.1.1/pycuda/elementwise.py pycuda-2018.1.1/pycuda/elementwise.py --- pycuda-2017.1.1/pycuda/elementwise.py 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/pycuda/elementwise.py 2018-10-31 18:05:29.000000000 +0000 @@ -149,7 +149,7 @@ func = mod.get_function(name) func.prepare("".join(arg.struct_char for arg in arguments)) - return func, arguments + return mod, func, arguments def get_elwise_kernel(arguments, operation, @@ -157,7 +157,7 @@ """Return a L{pycuda.driver.Function} that performs the same scalar operation on one or several vectors. """ - func, arguments = get_elwise_kernel_and_types( + mod, func, arguments = get_elwise_kernel_and_types( arguments, operation, name, keep, options, **kwargs) return func @@ -171,9 +171,13 @@ self.gen_kwargs.update(dict(keep=keep, options=options, name=name, operation=operation, arguments=arguments)) + def get_texref(self, name, use_range=False): + mod, knl, arguments = self.generate_stride_kernel_and_types(use_range=use_range) + return mod.get_texref(name) + @memoize_method def generate_stride_kernel_and_types(self, use_range): - knl, arguments = get_elwise_kernel_and_types(use_range=use_range, + mod, knl, arguments = get_elwise_kernel_and_types(use_range=use_range, **self.gen_kwargs) assert [i for i, arg in enumerate(arguments) @@ -181,7 +185,7 @@ "ElementwiseKernel can only be used with functions that " \ "have at least one vector argument" - return knl, arguments + return mod, knl, arguments def __call__(self, *args, **kwargs): vectors = [] @@ -195,7 +199,7 @@ + ", ".join(six.iterkeys(kwargs))) invocation_args = [] - func, arguments = self.generate_stride_kernel_and_types( + mod, func, arguments = self.generate_stride_kernel_and_types( range_ is not None or slice_ is not None) for arg, arg_descr in zip(args, arguments): diff -Nru pycuda-2017.1.1/pycuda/gpuarray.py pycuda-2018.1.1/pycuda/gpuarray.py --- pycuda-2017.1.1/pycuda/gpuarray.py 2017-12-27 21:34:52.000000000 +0000 +++ pycuda-2018.1.1/pycuda/gpuarray.py 2018-10-31 18:05:29.000000000 +0000 @@ -227,7 +227,24 @@ def flags(self): return _ArrayFlags(self) - def set(self, ary, async=False, stream=None): + def set(self, ary, async_=False, stream=None, **kwargs): + # {{{ handle 'async' deprecation + + async_arg = kwargs.pop("async", None) + if async_arg is not None: + if async_ is not None: + raise TypeError("may not specify both 'async' and 'async_'") + async_ = async_arg + + if async_ is None: + async_ = False + + if kwargs: + raise TypeError("extra keyword arguments specified: %s" + % ", ".join(kwargs)) + + # }}} + if ary.size != self.size: raise ValueError("ary and self must be the same size") if ary.shape != self.shape: @@ -240,12 +257,29 @@ raise ValueError("ary and self must have the same dtype") if self.size: - _memcpy_discontig(self, ary, async=async, stream=stream) + _memcpy_discontig(self, ary, async_=async_, stream=stream) def set_async(self, ary, stream=None): - return self.set(ary, async=True, stream=stream) + return self.set(ary, async_=True, stream=stream) + + def get(self, ary=None, pagelocked=False, async_=False, stream=None, **kwargs): + # {{{ handle 'async' deprecation + + async_arg = kwargs.pop("async", None) + if async_arg is not None: + if async_ is not None: + raise TypeError("may not specify both 'async' and 'async_'") + async_ = async_arg + + if async_ is None: + async_ = False + + if kwargs: + raise TypeError("extra keyword arguments specified: %s" + % ", ".join(kwargs)) + + # }}} - def get(self, ary=None, pagelocked=False, async=False, stream=None): if ary is None: if pagelocked: ary = drv.pagelocked_empty(self.shape, self.dtype) @@ -268,11 +302,11 @@ raise TypeError("self and ary must have the same dtype") if self.size: - _memcpy_discontig(ary, self, async=async, stream=stream) + _memcpy_discontig(ary, self, async_=async_, stream=stream) return ary def get_async(self, stream=None, ary=None): - return self.get(ary=ary, async=True, stream=stream) + return self.get(ary=ary, async_=True, stream=stream) def copy(self): new = GPUArray(self.shape, self.dtype, self.allocator) @@ -379,9 +413,8 @@ strides = None if dtype is None: dtype = self.dtype - else: - if dtype == self.dtype: - strides = self.strides + if dtype == self.dtype: + strides = self.strides return self.__class__(self.shape, dtype, allocator=self.allocator, strides=strides, order=order) @@ -605,14 +638,10 @@ return result - def __pow__(self, other): - """pow function:: - - example: - array = pow(array) - array = pow(array,4) - array = pow(array,array) - + def _pow(self, other, new): + """ + Do the pow operator. + with new, the user can choose between ipow or just pow """ if isinstance(other, GPUArray): @@ -622,7 +651,10 @@ assert self.shape == other.shape - result = self._new_like_me(_get_common_dtype(self, other)) + if new: + result = self._new_like_me(_get_common_dtype(self, other)) + else: + result = self func = elementwise.get_pow_array_kernel( self.dtype, other.dtype, result.dtype) @@ -637,7 +669,10 @@ raise RuntimeError("only contiguous arrays may " "be used as arguments to this operation") - result = self._new_like_me() + if new: + result = self._new_like_me() + else: + result = self func = elementwise.get_pow_kernel(self.dtype) func.prepared_async_call(self._grid, self._block, None, other, self.gpudata, result.gpudata, @@ -645,6 +680,28 @@ return result + def __pow__(self, other): + """pow function:: + + example: + array = pow(array) + array = pow(array,4) + array = pow(array,array) + + """ + return self._pow(other,new=True) + + def __ipow__(self, other): + """ipow function:: + + example: + array **= 4 + array **= array + + """ + return self._pow(other,new=False) + + def reverse(self, stream=None): """Return this array in reversed order. The array is treated as one-dimensional. @@ -834,7 +891,7 @@ array_stride = self.strides[array_axis] - new_shape.append((stop-start-1)//idx_stride+1) + new_shape.append((abs(stop-start)-1)//abs(idx_stride)+1) new_strides.append(idx_stride*array_stride) new_offset += array_stride*start @@ -1171,7 +1228,7 @@ return strides -def _memcpy_discontig(dst, src, async=False, stream=None): +def _memcpy_discontig(dst, src, async_=False, stream=None): """Copy the contents of src into dst. The two arrays should have the same dtype, shape, and order, but @@ -1208,7 +1265,9 @@ dst_strides = [dst.strides[axis] for axis in axes] # copy functions require contiguity in minor axis, so add new axis if needed - if len(shape) == 0 or src_strides[0] != src.dtype.itemsize or dst_strides[0] != dst.dtype.itemsize: + if (len(shape) == 0 + or src_strides[0] != src.dtype.itemsize + or dst_strides[0] != dst.dtype.itemsize): shape[0:0] = [1] src_strides[0:0] = [0] dst_strides[0:0] = [0] @@ -1221,7 +1280,7 @@ if dst_strides[i] < dst_strides[i-1]: raise ValueError("src and dst must have same order") if (src_strides[i-1] * shape[i-1] == src_strides[i] and - dst_strides[i-1] * shape[i-1] == dst_strides[i]): + dst_strides[i-1] * shape[i-1] == dst_strides[i]): shape[i-1:i+1] = [shape[i-1] * shape[i]] del src_strides[i] del dst_strides[i] @@ -1232,8 +1291,9 @@ if len(shape) <= 1: if isinstance(src, GPUArray): if isinstance(dst, GPUArray): - if async: - drv.memcpy_dtod_async(dst.gpudata, src.gpudata, src.nbytes, stream=stream) + if async_: + drv.memcpy_dtod_async( + dst.gpudata, src.gpudata, src.nbytes, stream=stream) else: drv.memcpy_dtod(dst.gpudata, src.gpudata, src.nbytes) else: @@ -1241,14 +1301,15 @@ # having no gaps, but the axes could be transposed # so that the order is neither Fortran or C. # So, we attempt to get a contiguous view of dst. - dst = _as_strided(dst, shape=(dst.size,), strides=(dst.dtype.itemsize,)) - if async: + dst = _as_strided( + dst, shape=(dst.size,), strides=(dst.dtype.itemsize,)) + if async_: drv.memcpy_dtoh_async(dst, src.gpudata, stream=stream) else: drv.memcpy_dtoh(dst, src.gpudata) else: src = _as_strided(src, shape=(src.size,), strides=(src.dtype.itemsize,)) - if async: + if async_: drv.memcpy_htod_async(dst.gpudata, src, stream=stream) else: drv.memcpy_htod(dst.gpudata, src) @@ -1259,7 +1320,9 @@ elif len(shape) == 3: copy = drv.Memcpy3D() else: - raise ValueError("more than 2 discontiguous axes not supported %s" % (tuple(sorted(axes)),)) + raise ValueError( + "more than 2 discontiguous axes not supported %s" + % (tuple(sorted(axes)),)) if isinstance(src, GPUArray): copy.set_src_device(src.gpudata) @@ -1278,22 +1341,24 @@ copy.height = shape[1] if len(shape) == 2: - if async: + if async_: copy(stream) else: copy(aligned=True) - else: # len(shape) == 3 + else: # len(shape) == 3 if src_strides[2] % src_strides[1] != 0: - raise RuntimeError("src's major stride must be a multiple of middle stride") + raise RuntimeError( + "src's major stride must be a multiple of middle stride") copy.src_height = src_strides[2] // src_strides[1] if dst_strides[2] % dst_strides[1] != 0: - raise RuntimeError("dst's major stride must be a multiple of middle stride") + raise RuntimeError( + "dst's major stride must be a multiple of middle stride") copy.dst_height = dst_strides[2] // dst_strides[1] copy.depth = shape[2] - if async: + if async_: copy(stream) else: copy() @@ -1500,13 +1565,13 @@ return a.transpose(axes) -def reshape(a, shape): +def reshape(a, *shape, **kwargs): """Gives a new shape to an array without changing its data. .. versionadded:: 2015.2 """ - return a.reshape(shape) + return a.reshape(*shape, **kwargs) # }}} diff -Nru pycuda-2017.1.1/pycuda/__init__.py pycuda-2018.1.1/pycuda/__init__.py --- pycuda-2017.1.1/pycuda/__init__.py 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/pycuda/__init__.py 2018-10-31 18:05:29.000000000 +0000 @@ -1,3 +1,3 @@ -VERSION = (2017, 1, 1) +VERSION = (2018, 1, 1) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS diff -Nru pycuda-2017.1.1/pycuda/scan.py pycuda-2018.1.1/pycuda/scan.py --- pycuda-2017.1.1/pycuda/scan.py 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/pycuda/scan.py 2018-10-31 18:05:29.000000000 +0000 @@ -345,7 +345,7 @@ class _ScanKernelBase(object): def __init__(self, dtype, scan_expr, neutral=None, - name_prefix="scan", options=[], preamble="", devices=None): + name_prefix="scan", options=None, preamble="", devices=None): if isinstance(self, ExclusiveScanKernel) and neutral is None: raise ValueError("neutral element is required for exclusive scan") diff -Nru pycuda-2017.1.1/pycuda/sparse/pkt_build.py pycuda-2018.1.1/pycuda/sparse/pkt_build.py --- pycuda-2017.1.1/pycuda/sparse/pkt_build.py 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/pycuda/sparse/pkt_build.py 2018-10-31 18:05:29.000000000 +0000 @@ -12,6 +12,7 @@ packet_start = 0 base_dof_nr = 0 + max_thread_costs = int(max_thread_costs) index_array = np.zeros( max_thread_costs*thread_count, dtype=spmv.packed_index_dtype) data_array = np.zeros( diff -Nru pycuda-2017.1.1/src/cpp/cuda.hpp pycuda-2018.1.1/src/cpp/cuda.hpp --- pycuda-2017.1.1/src/cpp/cuda.hpp 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/src/cpp/cuda.hpp 2018-10-31 18:05:29.000000000 +0000 @@ -1556,7 +1556,7 @@ py::handle<>( #if PY_VERSION_HEX >= 0x03030000 PyMemoryView_FromMemory((char *) (m_devptr + offset), size, - PyBUF_READ | PyBUF_WRITE) + PyBUF_WRITE) #else /* Py2 */ PyBuffer_FromReadWriteMemory((void *) (m_devptr + offset), size) #endif diff -Nru pycuda-2017.1.1/src/wrapper/wrap_cudadrv.cpp pycuda-2018.1.1/src/wrapper/wrap_cudadrv.cpp --- pycuda-2017.1.1/src/wrapper/wrap_cudadrv.cpp 2017-12-27 21:34:52.000000000 +0000 +++ pycuda-2018.1.1/src/wrapper/wrap_cudadrv.cpp 2018-10-31 18:05:29.000000000 +0000 @@ -1019,7 +1019,7 @@ #if CUDAPP_CUDA_VERSION >= 3020 .value("MALLOC_HEAP_SIZE", CU_LIMIT_MALLOC_HEAP_SIZE) #endif -#if CUDAPP_CUDA_VERSION >= 3050 +#if CUDAPP_CUDA_VERSION >= 4010 .value("DEV_RUNTIME_SYNC_DEPTH", CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH) .value("DEV_RUNTIME_PENDING_LAUNCH_COUNT", CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT) #endif diff -Nru pycuda-2017.1.1/test/test_cumath.py pycuda-2018.1.1/test/test_cumath.py --- pycuda-2017.1.1/test/test_cumath.py 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/test/test_cumath.py 2018-10-31 18:05:29.000000000 +0000 @@ -78,9 +78,9 @@ test_sqrt = make_unary_function_test("sqrt", 1e-5, 1, 2e-7) test_sin = make_unary_function_test("sin", -10, 10, 1e-7) - test_sin_c = make_unary_function_test("sin", -3, 3, 2e-6, complex=True) + test_sin_c = make_unary_function_test("sin", -3, 3, 2.1e-6, complex=True) test_cos = make_unary_function_test("cos", -10, 10, 1e-7) - test_cos_c = make_unary_function_test("cos", -3, 3, 2e-6, complex=True) + test_cos_c = make_unary_function_test("cos", -3, 3, 2.1e-6, complex=True) test_asin = make_unary_function_test("asin", -0.9, 0.9, 5e-7) #test_sin_c = make_unary_function_test("sin", -0.9, 0.9, 2e-6, complex=True) test_acos = make_unary_function_test("acos", -0.9, 0.9, 5e-7) @@ -242,5 +242,5 @@ if len(sys.argv) > 1: exec (sys.argv[1]) else: - from py.test.cmdline import main + from pytest import main main([__file__]) diff -Nru pycuda-2017.1.1/test/test_driver.py pycuda-2018.1.1/test/test_driver.py --- pycuda-2017.1.1/test/test_driver.py 2017-12-27 21:34:50.000000000 +0000 +++ pycuda-2018.1.1/test/test_driver.py 2018-10-31 18:05:29.000000000 +0000 @@ -1,10 +1,8 @@ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function +from __future__ import division, absolute_import, print_function import numpy as np import numpy.linalg as la from pycuda.tools import mark_cuda_test, dtype_to_ctype -import pytest +import pytest # noqa from six.moves import range @@ -12,7 +10,7 @@ try: import pycuda # noqa return True - except: + except Exception: return False @@ -98,7 +96,7 @@ a = gpuarray.vec.make_float3(1, 2, 3) dest = np.empty((400), gpuarray.vec.float3) - set_them(drv.Out(dest), a, block=(400,1,1)) + set_them(drv.Out(dest), a, block=(400, 1, 1)) assert (dest == a).all() @mark_cuda_test @@ -905,7 +903,7 @@ drv.memcpy_dtoh(e, e_gpu) drv.memcpy_dtoh(f, f_gpu) - #print(c,d,e,f) + # print(c,d,e,f) a = np.random.randint(10, size=100) b = np.random.randint(10, size=100) @@ -918,6 +916,7 @@ @mark_cuda_test def test_jit_link_module(self): + from pycuda.compiler import DEFAULT_NVCC_FLAGS if drv.Context.get_device().compute_capability() < (3, 5): from pytest import skip skip("need compute capability 3.5 or higher for dynamic parallelism") @@ -936,13 +935,17 @@ from pycuda.compiler import DynamicModule mod = DynamicModule() - mod.add_source(test_outer_cu, nvcc_options=['-rdc=true', '-lcudadevrt']) - mod.add_source(test_inner_cu, nvcc_options=['-rdc=true', '-lcudadevrt']) + mod.add_source( + test_outer_cu, nvcc_options=( + ['-rdc=true', '-lcudadevrt']+DEFAULT_NVCC_FLAGS)) + mod.add_source( + test_inner_cu, nvcc_options=( + ['-rdc=true', '-lcudadevrt']+DEFAULT_NVCC_FLAGS)) mod.add_stdlib('cudadevrt') mod.link() test_kernel = mod.get_function('test_kernel') - test_kernel(grid=(2,1), block=(1,1,1)) + test_kernel(grid=(2, 1), block=(1, 1, 1)) def test_import_pyopencl_before_pycuda(): @@ -959,7 +962,7 @@ import sys if len(sys.argv) > 1: - exec (sys.argv[1]) + exec(sys.argv[1]) else: - from py.test.cmdline import main + from pytest import main main([__file__]) diff -Nru pycuda-2017.1.1/test/test_gpuarray.py pycuda-2018.1.1/test/test_gpuarray.py --- pycuda-2017.1.1/test/test_gpuarray.py 2017-12-27 21:34:52.000000000 +0000 +++ pycuda-2018.1.1/test/test_gpuarray.py 2018-10-31 18:05:29.000000000 +0000 @@ -36,6 +36,10 @@ result = (a_gpu**a_gpu).get() assert (np.abs(pow(a, a) - result) < 1e-3).all() + a_gpu **= a_gpu + a_gpu = a_gpu.get() + assert (np.abs(pow(a, a) - a_gpu) < 1e-3).all() + @mark_cuda_test def test_pow_number(self): a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) @@ -44,6 +48,10 @@ result = pow(a_gpu, 2).get() assert (np.abs(a**2 - result) < 1e-3).all() + a_gpu **= 2 + a_gpu = a_gpu.get() + assert (np.abs(a**2 - a_gpu) < 1e-3).all() + @mark_cuda_test def test_numpy_integer_shape(self): gpuarray.empty(np.int32(17), np.float32) @@ -944,6 +952,9 @@ @mark_cuda_test def test_dot_allocator(self): + from pytest import skip + skip("https://github.com/inducer/pycuda/issues/163") + import pycuda.tools pool = pycuda.tools.DeviceMemoryPool() @@ -1142,5 +1153,5 @@ if len(sys.argv) > 1: exec (sys.argv[1]) else: - from py.test.cmdline import main + from pytest import main main([__file__])