diff -Nru python-bitarray-0.8.1/bitarray/_bitarray.c python-bitarray-1.6.3/bitarray/_bitarray.c --- python-bitarray-0.8.1/bitarray/_bitarray.c 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/bitarray/_bitarray.c 2021-01-21 00:52:57.000000000 +0000 @@ -1,56 +1,26 @@ /* - This file is the C part of the bitarray package. Almost all - functionality is implemented here. + Copyright (c) 2008 - 2021, Ilan Schnell + bitarray is published under the PSF license. + + This file is the C part of the bitarray package. + All functionality of the bitarray object is implemented here. Author: Ilan Schnell */ #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "pythoncapi_compat.h" +#include "bitarray.h" -#if PY_MAJOR_VERSION >= 3 -#define IS_PY3K -#endif +/* block size used when reading / writing blocks of bytes from files */ +#define BLOCKSIZE 65536 #ifdef IS_PY3K -#include "bytesobject.h" -#define PyString_FromStringAndSize PyBytes_FromStringAndSize -#define PyString_FromString PyBytes_FromString -#define PyString_Check PyBytes_Check -#define PyString_Size PyBytes_Size -#define PyString_AsString PyBytes_AsString -#define PyString_ConcatAndDel PyBytes_ConcatAndDel #define Py_TPFLAGS_HAVE_WEAKREFS 0 -#endif - -#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 5 -/* Py_ssize_t was introduced in Python 2.5, substitute long for it */ -typedef long Py_ssize_t; -#define PY_SSIZE_T_MAX LONG_MAX -#define PY_SSIZE_T_MIN LONG_MIN -Py_ssize_t PyNumber_AsSsize_t(PyObject *o, PyObject *exc) -{ - return PyLong_AsLong(o); -} -int PyIndex_Check(PyObject *o) -{ - return 0; -} -#define PY_SSIZE_T_FMT "l" +#define BYTES_SIZE_FMT "y#" #else -/* Python 2.5 and up uses 'n' as the format char for Py_ssize_t */ -#define PY_SSIZE_T_FMT "n" -#endif - -#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 6 -/* backward compatibility with Python 2.5 */ -#define Py_TYPE(ob) (((PyObject *) (ob))->ob_type) -#define Py_SIZE(ob) (((PyVarObject *) (ob))->ob_size) -#endif - -#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 7 -/* (new) buffer protocol */ -#define WITH_BUFFER +#define BYTES_SIZE_FMT "s#" #endif #ifdef STDC_HEADERS @@ -61,146 +31,112 @@ #endif /* HAVE_SYS_TYPES_H */ #endif /* !STDC_HEADERS */ +static int default_endian = ENDIAN_BIG; -typedef long long int idx_t; - -/* throughout: 0 = little endian 1 = big endian */ -#define DEFAULT_ENDIAN 1 - -typedef struct { - PyObject_VAR_HEAD -#ifdef WITH_BUFFER - int ob_exports; /* how many buffer exports */ -#endif - char *ob_item; - Py_ssize_t allocated; /* how many bytes allocated */ - idx_t nbits; /* length og bitarray */ - int endian; /* bit endianness of bitarray */ - PyObject *weakreflist; /* list of weak references */ -} bitarrayobject; - -static PyTypeObject Bitarraytype; - -#define bitarray_Check(obj) PyObject_TypeCheck(obj, &Bitarraytype) - -#define BITS(bytes) (((idx_t) 8) * ((idx_t) (bytes))) - -#define BYTES(bits) (((bits) == 0) ? 0 : (((bits) - 1) / 8 + 1)) - -#define BITMASK(endian, i) (((char) 1) << ((endian) ? (7 - (i)%8) : (i)%8)) - -/* ------------ low level access to bits in bitarrayobject ------------- */ - -#define GETBIT(self, i) \ - ((self)->ob_item[(i) / 8] & BITMASK((self)->endian, i) ? 1 : 0) +static PyTypeObject Bitarray_Type; -static void -setbit(bitarrayobject *self, idx_t i, int bit) -{ - char *cp, mask; +#define bitarray_Check(obj) PyObject_TypeCheck((obj), &Bitarray_Type) - mask = BITMASK(self->endian, i); - cp = self->ob_item + i / 8; - if (bit) - *cp |= mask; - else - *cp &= ~mask; -} static int -check_overflow(idx_t nbits) +resize(bitarrayobject *self, Py_ssize_t nbits) { - idx_t max_bits; + const Py_ssize_t allocated = self->allocated, size = Py_SIZE(self); + Py_ssize_t newsize; + size_t new_allocated; - assert(nbits >= 0); - if (sizeof(void *) == 4) { /* 32bit machine */ - max_bits = ((idx_t) 1) << 34; /* 2^34 = 16 Gbits*/ - if (nbits > max_bits) { - char buff[256]; - sprintf(buff, "cannot create bitarray of size %lld, " - "max size is %lld", nbits, max_bits); - PyErr_SetString(PyExc_OverflowError, buff); - return -1; - } + assert(allocated >= size && size == BYTES(self->nbits)); + /* ob_item == NULL implies ob_size == allocated == 0 */ + assert(self->ob_item != NULL || (size == 0 && allocated == 0)); + /* allocated == 0 implies size == 0 */ + assert(allocated != 0 || size == 0); + + newsize = BYTES(nbits); + if (nbits < 0 || BITS(newsize) < 0) { + PyErr_Format(PyExc_OverflowError, "bitarray resize %zd", nbits); + return -1; } - return 0; -} -static int -resize(bitarrayobject *self, idx_t nbits) -{ - Py_ssize_t newsize; - size_t _new_size; /* for allocation */ + if (newsize == size) { + /* the buffer size hasn't changed - bypass everything */ + self->nbits = nbits; + return 0; + } - if (check_overflow(nbits) < 0) + if (self->ob_exports > 0) { + PyErr_SetString(PyExc_BufferError, + "cannot resize bitarray that is exporting buffers"); return -1; + } - newsize = (Py_ssize_t) BYTES(nbits); - - /* Bypass realloc() when a previous overallocation is large enough - to accommodate the newsize. If the newsize is 16 smaller than the - current size, then proceed with the realloc() to shrink the list. + /* Bypass reallocation when a allocation is large enough to accommodate + the newsize. If the newsize falls lower than half the allocated size, + then proceed with the reallocation to shrink the bitarray. */ - if (self->allocated >= newsize && - Py_SIZE(self) < newsize + 16 && - self->ob_item != NULL) - { - Py_SIZE(self) = newsize; + if (allocated >= newsize && newsize >= (allocated >> 1)) { + assert(self->ob_item != NULL || newsize == 0); + Py_SET_SIZE(self, newsize); self->nbits = nbits; return 0; } - if (newsize >= Py_SIZE(self) + 65536) - /* Don't overallocate when the size increase is very large. */ - _new_size = newsize; - else + if (newsize == 0) { + PyMem_FREE(self->ob_item); + self->ob_item = NULL; + Py_SET_SIZE(self, 0); + self->allocated = 0; + self->nbits = 0; + return 0; + } + + new_allocated = (size_t) newsize; + if (size == 0 && newsize <= 4) + /* When resizing an empty bitarray, we want at least 4 bytes. */ + new_allocated = 4; + + /* Over-allocate when the (previous) size is non-zero (as we often + extend an empty array on creation) and the size is actually + increasing. */ + else if (size != 0 && newsize > size) /* This over-allocates proportional to the bitarray size, making - room for additional growth. The over-allocation is mild, but is - enough to give linear-time amortized behavior over a long - sequence of appends() in the presence of a poorly-performing - system realloc(). + room for additional growth. The growth pattern is: 0, 4, 8, 16, 25, 34, 44, 54, 65, 77, ... - Note, the pattern starts out the same as for lists but then + The pattern starts out the same as for lists but then grows at a smaller rate so that larger bitarrays only overallocate by about 1/16th -- this is done because bitarrays are assumed - to be memory critical. - */ - _new_size = (newsize >> 4) + (Py_SIZE(self) < 8 ? 3 : 7) + newsize; + to be memory critical. */ + new_allocated += (newsize >> 4) + (newsize < 8 ? 3 : 7); - self->ob_item = PyMem_Realloc(self->ob_item, _new_size); + assert(new_allocated >= (size_t) newsize); + self->ob_item = PyMem_Realloc(self->ob_item, new_allocated); if (self->ob_item == NULL) { PyErr_NoMemory(); return -1; } - Py_SIZE(self) = newsize; - self->allocated = _new_size; + Py_SET_SIZE(self, newsize); + self->allocated = new_allocated; self->nbits = nbits; return 0; } /* create new bitarray object without initialization of buffer */ static PyObject * -newbitarrayobject(PyTypeObject *type, idx_t nbits, int endian) +newbitarrayobject(PyTypeObject *type, Py_ssize_t nbits, int endian) { + const Py_ssize_t nbytes = BYTES(nbits); bitarrayobject *obj; - Py_ssize_t nbytes; - - if (check_overflow(nbits) < 0) - return NULL; + assert(nbits >= 0); obj = (bitarrayobject *) type->tp_alloc(type, 0); if (obj == NULL) return NULL; - nbytes = (Py_ssize_t) BYTES(nbits); - Py_SIZE(obj) = nbytes; - obj->nbits = nbits; - obj->endian = endian; + Py_SET_SIZE(obj, nbytes); if (nbytes == 0) { obj->ob_item = NULL; } else { - obj->ob_item = PyMem_Malloc((size_t) nbytes); + obj->ob_item = (char *) PyMem_Malloc((size_t) nbytes); if (obj->ob_item == NULL) { PyObject_Del(obj); PyErr_NoMemory(); @@ -208,6 +144,9 @@ } } obj->allocated = nbytes; + obj->nbits = nbits; + obj->endian = endian; + obj->ob_exports = 0; obj->weakreflist = NULL; return (PyObject *) obj; } @@ -226,33 +165,43 @@ /* copy n bits from other (starting at b) onto self (starting at a) */ static void -copy_n(bitarrayobject *self, idx_t a, - bitarrayobject *other, idx_t b, idx_t n) +copy_n(bitarrayobject *self, Py_ssize_t a, + bitarrayobject *other, Py_ssize_t b, Py_ssize_t n) { - idx_t i; + Py_ssize_t i; assert(0 <= n && n <= self->nbits && n <= other->nbits); assert(0 <= a && a <= self->nbits - n); assert(0 <= b && b <= other->nbits - n); + if (n == 0) + return; - /* XXX + /* When the start positions are at byte positions, we can copy whole + bytes using memmove, and copy the remaining few bits individually. + Note that the order of these two operations matters when copying + self to self. */ if (self->endian == other->endian && a % 8 == 0 && b % 8 == 0 && n >= 8) { - Py_ssize_t bytes; - idx_t bits; + const size_t bytes = n / 8; + const Py_ssize_t bits = BITS(bytes); + + assert(bits <= n && n < bits + 8); + if (a <= b) + memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); + + if (n != bits) + copy_n(self, bits + a, other, bits + b, n - bits); + + if (a > b) + memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); - bytes = n / 8; - bits = 8 * bytes; - copy_n(self, bits + a, other, bits + b, n - bits); - memmove(self->ob_item + a / 8, other->ob_item + b / 8, bytes); return; } - */ - /* the different type of looping is only relevant when other and self - are the same object, i.e. when copying a piece of an bitarrayobject - onto itself */ - if (a < b) { + /* The two different types of looping are only relevant when copying + self to self, i.e. when copying a piece of an bitarrayobject onto + itself. */ + if (a <= b) { for (i = 0; i < n; i++) /* loop forward (delete) */ setbit(self, i + a, GETBIT(other, i + b)); } @@ -264,7 +213,7 @@ /* starting at start, delete n bits from self */ static int -delete_n(bitarrayobject *self, idx_t start, idx_t n) +delete_n(bitarrayobject *self, Py_ssize_t start, Py_ssize_t n) { assert(0 <= start && start <= self->nbits); assert(0 <= n && n <= self->nbits - start); @@ -277,7 +226,7 @@ /* starting at start, insert n (uninitialized) bits into self */ static int -insert_n(bitarrayobject *self, idx_t start, idx_t n) +insert_n(bitarrayobject *self, Py_ssize_t start, Py_ssize_t n) { assert(0 <= start && start <= self->nbits); assert(n >= 0); @@ -290,225 +239,157 @@ return 0; } -/* sets ususet bits to 0, i.e. the ones in the last byte (if any), - and return the number of bits set -- self->nbits is unchanged */ -static int -setunused(bitarrayobject *self) +static void +invert(bitarrayobject *self) { - idx_t i, n; - int res = 0; + const Py_ssize_t nbytes = Py_SIZE(self); + Py_ssize_t i; - n = BITS(Py_SIZE(self)); - for (i = self->nbits; i < n; i++) { - setbit(self, i, 0); - res++; - } - assert(res < 8); - return res; + for (i = 0; i < nbytes; i++) + self->ob_item[i] = ~self->ob_item[i]; } -/* repeat self n times */ +/* repeat self n times (negative n is treated as 0) */ static int -repeat(bitarrayobject *self, idx_t n) +repeat(bitarrayobject *self, Py_ssize_t n) { - idx_t nbits, i; - - if (n <= 0) { - if (resize(self, 0) < 0) - return -1; - } - if (n > 1) { - nbits = self->nbits; - if (resize(self, nbits * n) < 0) - return -1; - for (i = 1; i < n; i++) - copy_n(self, i * nbits, self, 0, nbits); - } - return 0; -} - + const Py_ssize_t nbits = self->nbits; + Py_ssize_t i; -enum op_type { - OP_and, - OP_or, - OP_xor, -}; + if (nbits == 0 || n == 1) /* nothing to do */ + return 0; -/* perform bitwise operation */ -static int -bitwise(bitarrayobject *self, PyObject *arg, enum op_type oper) -{ - bitarrayobject *other; - Py_ssize_t i; + if (n <= 0) /* clear */ + return resize(self, 0); - if (!bitarray_Check(arg)) { - PyErr_SetString(PyExc_TypeError, - "bitarray object expected for bitwise operation"); + assert(n > 1 && nbits > 0); + if (nbits > PY_SSIZE_T_MAX / n) { + PyErr_Format(PyExc_OverflowError, + "cannot repeat bitarray (of size %zd) %zd times", + nbits, n); return -1; } - other = (bitarrayobject *) arg; - if (self->nbits != other->nbits) { - PyErr_SetString(PyExc_ValueError, - "bitarrays of equal length expected for bitwise operation"); + + if (resize(self, n * nbits) < 0) return -1; - } - setunused(self); - setunused(other); - switch (oper) { - case OP_and: - for (i = 0; i < Py_SIZE(self); i++) - self->ob_item[i] &= other->ob_item[i]; - break; - case OP_or: - for (i = 0; i < Py_SIZE(self); i++) - self->ob_item[i] |= other->ob_item[i]; - break; - case OP_xor: - for (i = 0; i < Py_SIZE(self); i++) - self->ob_item[i] ^= other->ob_item[i]; - break; - } + + for (i = 1; i < n; i++) + copy_n(self, i * nbits, self, 0, nbits); + return 0; } /* set the bits from start to stop (excluding) in self to val */ static void -setrange(bitarrayobject *self, idx_t start, idx_t stop, int val) +setrange(bitarrayobject *self, Py_ssize_t start, Py_ssize_t stop, int val) { - idx_t i; + Py_ssize_t i; assert(0 <= start && start <= self->nbits); assert(0 <= stop && stop <= self->nbits); - for (i = start; i < stop; i++) - setbit(self, i, val); -} - -static void -invert(bitarrayobject *self) -{ - Py_ssize_t i; - - for (i = 0; i < Py_SIZE(self); i++) - self->ob_item[i] = ~self->ob_item[i]; -} -/* reverse the order of bits in each byte of the buffer */ -static void -bytereverse(bitarrayobject *self) -{ - static char trans[256]; - static int setup = 0; - Py_ssize_t i; - unsigned char c; + if (self->nbits == 0 || start >= stop) + return; - if (!setup) { - /* setup translation table, which maps each byte to it's reversed: - trans = {0, 128, 64, 192, 32, 160, ..., 255} */ - int j, k; - for (k = 0; k < 256; k++) { - trans[k] = 0x00; - for (j = 0; j < 8; j++) - if (1 << (7 - j) & k) - trans[k] |= 1 << j; - } - setup = 1; + if (stop >= start + 8) { + const Py_ssize_t byte_start = BYTES(start); + const Py_ssize_t byte_stop = stop / 8; + + for (i = start; i < BITS(byte_start); i++) + setbit(self, i, val); + memset(self->ob_item + byte_start, val ? 0xff : 0x00, + (size_t) (byte_stop - byte_start)); + for (i = BITS(byte_stop); i < stop; i++) + setbit(self, i, val); } - - setunused(self); - for (i = 0; i < Py_SIZE(self); i++) { - c = self->ob_item[i]; - self->ob_item[i] = trans[c]; + else { + for (i = start; i < stop; i++) + setbit(self, i, val); } } +/* Return number of 1 bits. This function never fails. */ +static Py_ssize_t +count(bitarrayobject *self, int vi, Py_ssize_t start, Py_ssize_t stop) +{ + Py_ssize_t res = 0, i; -static int bitcount_lookup[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, -}; + assert(0 <= start && start <= self->nbits); + assert(0 <= stop && stop <= self->nbits); + assert(0 <= vi && vi <= 1); + assert(BYTES(stop) <= Py_SIZE(self)); -/* returns number of 1 bits */ -static idx_t -count(bitarrayobject *self) -{ - Py_ssize_t i; - idx_t res = 0; - unsigned char c; + if (self->nbits == 0 || start >= stop) + return 0; - setunused(self); - for (i = 0; i < Py_SIZE(self); i++) { - c = self->ob_item[i]; - res += bitcount_lookup[c]; + if (stop >= start + 8) { + const Py_ssize_t byte_start = BYTES(start); + const Py_ssize_t byte_stop = stop / 8; + Py_ssize_t j; + + for (i = start; i < BITS(byte_start); i++) + res += GETBIT(self, i); + for (j = byte_start; j < byte_stop; j++) + res += bitcount_lookup[(unsigned char) self->ob_item[j]]; + for (i = BITS(byte_stop); i < stop; i++) + res += GETBIT(self, i); } - return res; + else { + for (i = start; i < stop; i++) + res += GETBIT(self, i); + } + return vi ? res : stop - start - res; } -/* return index of first occurrence of vi, -1 when x is not in found. */ -static idx_t -findfirst(bitarrayobject *self, int vi, idx_t start, idx_t stop) +/* Return index of first occurrence of vi, -1 when x is not in found. + This function never fails. */ +static Py_ssize_t +findfirst(bitarrayobject *self, int vi, Py_ssize_t start, Py_ssize_t stop) { - Py_ssize_t j; - idx_t i; - char c; + Py_ssize_t i; - if (Py_SIZE(self) == 0) - return -1; - if (start < 0 || start > self->nbits) - start = 0; - if (stop < 0 || stop > self->nbits) - stop = self->nbits; - if (start >= stop) + assert(0 <= start && start <= self->nbits); + assert(0 <= stop && stop <= self->nbits); + assert(0 <= vi && vi <= 1); + assert(BYTES(stop) <= Py_SIZE(self)); + + if (self->nbits == 0 || start >= stop) return -1; - if (stop > start + 8) { + if (stop >= start + 8) { /* seraching for 1 means: break when byte is not 0x00 searching for 0 means: break when byte is not 0xff */ - c = vi ? 0x00 : 0xff; + const char c = vi ? 0x00 : 0xff; /* skip ahead by checking whole bytes */ - for (j = (Py_ssize_t) (start / 8); j < BYTES(stop); j++) - if (c ^ self->ob_item[j]) + for (i = start / 8; i < BYTES(stop); i++) { + if (c ^ self->ob_item[i]) break; - - if (j == Py_SIZE(self)) - j--; - assert(0 <= j && j < Py_SIZE(self)); - - if (start < BITS(j)) - start = BITS(j); + } + if (start < BITS(i)) + start = BITS(i); } /* fine grained search */ - for (i = start; i < stop; i++) + for (i = start; i < stop; i++) { if (GETBIT(self, i) == vi) return i; - + } return -1; } -/* search for the first occurrence bitarray xa (in self), starting at p, - and return its position (-1 when not found) +/* search for the first occurrence of bitarray xa (in self), starting at p, + and return its position (or -1 when not found) */ -static idx_t -search(bitarrayobject *self, bitarrayobject *xa, idx_t p) +static Py_ssize_t +search(bitarrayobject *self, bitarrayobject *xa, Py_ssize_t p) { - idx_t i; + Py_ssize_t i; assert(p >= 0); + if (xa->nbits == 1) /* faster for sparse bitarrays */ + return findfirst(self, GETBIT(xa, 0), p, self->nbits); + while (p < self->nbits - xa->nbits + 1) { for (i = 0; i < xa->nbits; i++) if (GETBIT(self, p + i) != GETBIT(xa, i)) @@ -522,9 +403,9 @@ } static int -set_item(bitarrayobject *self, idx_t i, PyObject *v) +set_item(bitarrayobject *self, Py_ssize_t i, PyObject *v) { - long vi; + int vi; assert(0 <= i && i < self->nbits); vi = PyObject_IsTrue(v); @@ -542,43 +423,21 @@ return set_item(self, self->nbits - 1, item); } -static PyObject * -unpack(bitarrayobject *self, char zero, char one) -{ - PyObject *res; - Py_ssize_t i; - char *str; - - if (self->nbits > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, "bitarray too large to unpack"); - return NULL; - } - str = PyMem_Malloc((size_t) self->nbits); - if (str == NULL) { - PyErr_NoMemory(); - return NULL; - } - for (i = 0; i < self->nbits; i++) { - *(str + i) = GETBIT(self, i) ? one : zero; - } - res = PyString_FromStringAndSize(str, (Py_ssize_t) self->nbits); - PyMem_Free((void *) str); - return res; -} - static int extend_bitarray(bitarrayobject *self, bitarrayobject *other) { - idx_t n_sum; + /* We have to store the sizes before we resize, and since + other may be self, we also need to store other->nbits. */ + const Py_ssize_t self_nbits = self->nbits; + const Py_ssize_t other_nbits = other->nbits; - if (other->nbits == 0) + if (other_nbits == 0) return 0; - n_sum = self->nbits + other->nbits; - if (resize(self, n_sum) < 0) + if (resize(self, self_nbits + other_nbits) < 0) return -1; - copy_n(self, n_sum - other->nbits, other, 0, other->nbits); + copy_n(self, self_nbits, other, 0, other_nbits); return 0; } @@ -608,7 +467,7 @@ Py_ssize_t n, i; assert(PyList_Check(list)); - n = PyList_Size(list); + n = PyList_GET_SIZE(list); if (n == 0) return 0; @@ -616,7 +475,7 @@ return -1; for (i = 0; i < n; i++) { - item = PyList_GetItem(list, i); + item = PyList_GET_ITEM(list, i); if (item == NULL) return -1; if (set_item(self, self->nbits - n + i, item) < 0) @@ -632,7 +491,7 @@ Py_ssize_t n, i; assert(PyTuple_Check(tuple)); - n = PyTuple_Size(tuple); + n = PyTuple_GET_SIZE(tuple); if (n == 0) return 0; @@ -640,7 +499,7 @@ return -1; for (i = 0; i < n; i++) { - item = PyTuple_GetItem(tuple, i); + item = PyTuple_GET_ITEM(tuple, i); if (item == NULL) return -1; if (set_item(self, self->nbits - n + i, item) < 0) @@ -649,78 +508,42 @@ return 0; } -/* extend_string(): extend the bitarray from a string, where each whole - characters is converted to a single bit -*/ -enum conv_tp { - STR_01, /* '0' -> 0 '1' -> 1 no other characters allowed */ - STR_RAW, /* 0x00 -> 0 other -> 1 */ -}; - static int -extend_string(bitarrayobject *self, PyObject *string, enum conv_tp conv) +extend_01(bitarrayobject *self, PyObject *bytes) { - Py_ssize_t strlen, i; - char c, *str; + Py_ssize_t nbytes, i; + char c, *data; int vi = 0; - assert(PyString_Check(string)); - strlen = PyString_Size(string); - if (strlen == 0) + assert(PyBytes_Check(bytes)); + nbytes = PyBytes_GET_SIZE(bytes); + if (nbytes == 0) return 0; - if (resize(self, self->nbits + strlen) < 0) + if (resize(self, self->nbits + nbytes) < 0) return -1; - str = PyString_AsString(string); - - for (i = 0; i < strlen; i++) { - c = *(str + i); - /* depending on conv, map c to bit */ - switch (conv) { - case STR_01: - switch (c) { - case '0': vi = 0; break; - case '1': vi = 1; break; - default: - PyErr_Format(PyExc_ValueError, - "character must be '0' or '1', found '%c'", c); - return -1; - } - break; - case STR_RAW: - vi = c ? 1 : 0; - break; + data = PyBytes_AsString(bytes); + for (i = 0; i < nbytes; i++) { + c = data[i]; + switch (c) { + case '0': vi = 0; break; + case '1': vi = 1; break; + default: + PyErr_Format(PyExc_ValueError, + "character must be '0' or '1', found '%c'", c); + return -1; } - setbit(self, self->nbits - strlen + i, vi); + setbit(self, self->nbits - nbytes + i, vi); } return 0; } static int -extend_rawstring(bitarrayobject *self, PyObject *string) -{ - Py_ssize_t strlen; - char *str; - - assert(PyString_Check(string) && self->nbits % 8 == 0); - strlen = PyString_Size(string); - if (strlen == 0) - return 0; - - if (resize(self, self->nbits + BITS(strlen)) < 0) - return -1; - - str = PyString_AsString(string); - memcpy(self->ob_item + (Py_SIZE(self) - strlen), str, strlen); - return 0; -} - -static int extend_dispatch(bitarrayobject *self, PyObject *obj) { PyObject *iter; - int ret; + int res; /* dispatch on type */ if (bitarray_Check(obj)) /* bitarray */ @@ -732,47 +555,80 @@ if (PyTuple_Check(obj)) /* tuple */ return extend_tuple(self, obj); - if (PyString_Check(obj)) /* str01 */ - return extend_string(self, obj, STR_01); - + if (PyBytes_Check(obj)) { /* bytes 01 */ + /* This case is used on Python 2. However, it should have never + been here for Python 3, as it allows bitarray(b'01101011') */ #ifdef IS_PY3K - if (PyUnicode_Check(obj)) { /* str01 */ - PyObject *string; - string = PyUnicode_AsEncodedString(obj, NULL, NULL); - ret = extend_string(self, string, STR_01); - Py_DECREF(string); - return ret; - } + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "cannot extend from 'bytes', " + "use .pack() or .frombytes() instead", 1) < 0) + return -1; #endif + return extend_01(self, obj); + } + + if (PyUnicode_Check(obj)) { /* (unicode) string 01 */ + PyObject *bytes; + + bytes = PyUnicode_AsEncodedString(obj, NULL, NULL); + if (bytes == NULL) + return -1; + assert(PyBytes_Check(bytes)); + res = extend_01(self, bytes); + Py_DECREF(bytes); /* drop bytes */ + return res; + } if (PyIter_Check(obj)) /* iter */ return extend_iter(self, obj); /* finally, try to get the iterator of the object */ iter = PyObject_GetIter(obj); - if (iter == NULL) { - PyErr_SetString(PyExc_TypeError, "could not extend bitarray"); - return -1; + if (iter) { + res = extend_iter(self, iter); + Py_DECREF(iter); + return res; } - ret = extend_iter(self, iter); - Py_DECREF(iter); - return ret; + PyErr_Format(PyExc_TypeError, + "'%s' object is not iterable", Py_TYPE(obj)->tp_name); + return -1; } -/* --------- helper functions NOT involving bitarrayobjects ------------ */ +static PyObject * +unpack(bitarrayobject *self, char zero, char one, const char *fmt) +{ + PyObject *result; + Py_ssize_t i; + char *str; + + if (self->nbits > PY_SSIZE_T_MAX / 8) { + PyErr_SetString(PyExc_OverflowError, "bitarray too large to unpack"); + return NULL; + } + + str = (char *) PyMem_Malloc((size_t) self->nbits); + if (str == NULL) { + PyErr_NoMemory(); + return NULL; + } + for (i = 0; i < self->nbits; i++) + str[i] = GETBIT(self, i) ? one : zero; + + result = Py_BuildValue(fmt, str, self->nbits); + PyMem_Free((void *) str); + return result; +} -#define ENDIAN_STR(ba) (((ba)->endian) ? "big" : "little") +/* --------- helper functions not involving bitarrayobjects ------------ */ #ifdef IS_PY3K -#define IS_INDEX(x) (PyLong_Check(x) || PyIndex_Check(x)) #define IS_INT_OR_BOOL(x) (PyBool_Check(x) || PyLong_Check(x)) -#else -#define IS_INDEX(x) (PyInt_Check(x) || PyLong_Check(x) || PyIndex_Check(x)) +#else /* Py 2 */ #define IS_INT_OR_BOOL(x) (PyBool_Check(x) || PyInt_Check(x) || \ - PyLong_Check(x)) + PyLong_Check(x)) #endif -/* given an PyLong (which must be 0 or 1), or a PyBool, return 0 or 1, +/* given a PyLong (which must be 0 or 1) or a PyBool, return 0 or 1, or -1 on error */ static int IntBool_AsInt(PyObject *v) @@ -780,9 +636,9 @@ long x; if (PyBool_Check(v)) - return PyObject_IsTrue(v); + return v == Py_True; -#ifndef IS_PY3K +#if PY_MAJOR_VERSION == 2 if (PyInt_Check(v)) { x = PyInt_AsLong(v); } @@ -797,128 +653,45 @@ } if (x < 0 || x > 1) { - PyErr_SetString(PyExc_ValueError, - "integer value between 0 and 1 expected"); + PyErr_SetString(PyExc_ValueError, "integer 0 or 1 expected"); return -1; } return (int) x; } -/* Extract a slice index from a PyInt or PyLong or an object with the - nb_index slot defined, and store in *i. - However, this function returns -1 on error and 0 on success. - - This is almost _PyEval_SliceIndex() with Py_ssize_t replaced by idx_t -*/ -static int -getIndex(PyObject *v, idx_t *i) +/* Normalize index (which may be negative), such that 0 <= i <= n */ +static void +normalize_index(Py_ssize_t n, Py_ssize_t *i) { - idx_t x; - -#ifndef IS_PY3K - if (PyInt_Check(v)) { - x = PyInt_AS_LONG(v); + if (*i < 0) { + *i += n; + if (*i < 0) + *i = 0; } - else -#endif - if (PyLong_Check(v)) { - x = PyLong_AsLongLong(v); - } - else if (PyIndex_Check(v)) { - x = PyNumber_AsSsize_t(v, NULL); - if (x == -1 && PyErr_Occurred()) - return -1; - } - else { - PyErr_SetString(PyExc_TypeError, "slice indices must be integers or " - "None or have an __index__ method"); - return -1; - } - *i = x; - return 0; -} - -/* this is PySlice_GetIndicesEx() with Py_ssize_t replaced by idx_t */ -static int -slice_GetIndicesEx(PySliceObject *r, idx_t length, - idx_t *start, idx_t *stop, idx_t *step, idx_t *slicelength) -{ - idx_t defstart, defstop; - - if (r->step == Py_None) { - *step = 1; - } - else { - if (getIndex(r->step, step) < 0) - return -1; - if (*step == 0) { - PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); - return -1; - } - } - defstart = *step < 0 ? length - 1 : 0; - defstop = *step < 0 ? -1 : length; - - if (r->start == Py_None) { - *start = defstart; - } - else { - if (getIndex(r->start, start) < 0) - return -1; - if (*start < 0) *start += length; - if (*start < 0) *start = (*step < 0) ? -1 : 0; - if (*start >= length) *start = (*step < 0) ? length - 1 : length; - } - - if (r->stop == Py_None) { - *stop = defstop; - } - else { - if (getIndex(r->stop, stop) < 0) - return -1; - if (*stop < 0) *stop += length; - if (*stop < 0) *stop = -1; - if (*stop > length) *stop = length; - } - - if ((*step < 0 && *stop >= *start) || (*step > 0 && *start >= *stop)) { - *slicelength = 0; - } - else if (*step < 0) { - *slicelength = (*stop - *start + 1) / (*step) + 1; - } - else { - *slicelength = (*stop - *start - 1) / (*step) + 1; - } - - return 0; + if (*i > n) + *i = n; } /************************************************************************** - Implementation of API methods + Implementation of bitarray methods **************************************************************************/ static PyObject * bitarray_length(bitarrayobject *self) { - return PyLong_FromLongLong(self->nbits); + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "self.length() has been deprecated since 1.5.1, " + "use len(self) instead", 1) < 0) + return NULL; + + return PyLong_FromSsize_t(self->nbits); } PyDoc_STRVAR(length_doc, "length() -> int\n\ \n\ -Return the length, i.e. number of bits stored in the bitarray.\n\ -This method is preferred over __len__ (used when typing ``len(a)``),\n\ -since __len__ will fail for a bitarray object with 2^31 or more elements\n\ -on a 32bit machine, whereas this method will return the correct value,\n\ -on 32bit and 64bit machines."); - -PyDoc_STRVAR(len_doc, -"__len__() -> int\n\ -\n\ -Return the length, i.e. number of bits stored in the bitarray.\n\ -This method will fail for a bitarray object with 2^31 or more elements\n\ -on a 32bit machine. Use bitarray.length() instead."); +Return the length - a.length() is the same as len(a).\n\ +Deprecated since 1.5.1, use len()."); static PyObject * @@ -930,7 +703,8 @@ if (res == NULL) return NULL; - memcpy(((bitarrayobject *) res)->ob_item, self->ob_item, Py_SIZE(self)); + memcpy(((bitarrayobject *) res)->ob_item, self->ob_item, + (size_t) Py_SIZE(self)); return res; } @@ -943,49 +717,59 @@ static PyObject * bitarray_count(bitarrayobject *self, PyObject *args) { - idx_t n1; - long x = 1; + PyObject *x = Py_True; + Py_ssize_t start = 0, stop = self->nbits; + int vi; + + if (!PyArg_ParseTuple(args, "|Onn:count", &x, &start, &stop)) + return NULL; - if (!PyArg_ParseTuple(args, "|i:count", &x)) + vi = PyObject_IsTrue(x); + if (vi < 0) return NULL; - n1 = count(self); - return PyLong_FromLongLong(x ? n1 : (self->nbits - n1)); + normalize_index(self->nbits, &start); + normalize_index(self->nbits, &stop); + + return PyLong_FromSsize_t(count(self, vi, start, stop)); } PyDoc_STRVAR(count_doc, -"count([value]) -> int\n\ +"count(value=True, start=0, stop=, /) -> int\n\ \n\ -Return number of occurrences of value (defaults to True) in the bitarray."); +Count the number of occurrences of bool(value) in the bitarray."); static PyObject * bitarray_index(bitarrayobject *self, PyObject *args) { PyObject *x; - idx_t i, start = 0, stop = -1; - long vi; + Py_ssize_t start = 0, stop = self->nbits, i; + int vi; - if (!PyArg_ParseTuple(args, "O|LL:index", &x, &start, &stop)) + if (!PyArg_ParseTuple(args, "O|nn:index", &x, &start, &stop)) return NULL; vi = PyObject_IsTrue(x); if (vi < 0) return NULL; + normalize_index(self->nbits, &start); + normalize_index(self->nbits, &stop); + i = findfirst(self, vi, start, stop); if (i < 0) { - PyErr_SetString(PyExc_ValueError, "index(x): x not in bitarray"); + PyErr_Format(PyExc_ValueError, "%d is not in bitarray", vi); return NULL; } - return PyLong_FromLongLong(i); + return PyLong_FromSsize_t(i); } PyDoc_STRVAR(index_doc, -"index(value, [start, [stop]]) -> int\n\ +"index(value, start=0, stop=, /) -> int\n\ \n\ -Return index of the first occurrence of bool(value) in the bitarray.\n\ -Raises ValueError if the value is not present."); +Return index of the first occurrence of `bool(value)` in the bitarray.\n\ +Raises `ValueError` if the value is not present."); static PyObject * @@ -997,53 +781,23 @@ } PyDoc_STRVAR(extend_doc, -"extend(object)\n\ -\n\ -Append bits to the end of the bitarray. The objects which can be passed\n\ -to this method are the same iterable objects which can given to a bitarray\n\ -object upon initialization."); - - -static PyObject * -bitarray_contains(bitarrayobject *self, PyObject *x) -{ - long res; - - if (IS_INT_OR_BOOL(x)) { - int vi; - - vi = IntBool_AsInt(x); - if (vi < 0) - return NULL; - res = findfirst(self, vi, 0, -1) >= 0; - } - else if (bitarray_Check(x)) { - res = search(self, (bitarrayobject *) x, 0) >= 0; - } - else { - PyErr_SetString(PyExc_TypeError, "bitarray or bool expected"); - return NULL; - } - return PyBool_FromLong(res); -} - -PyDoc_STRVAR(contains_doc, -"__contains__(x) -> bool\n\ +"extend(iterable or string, /)\n\ \n\ -Return True if bitarray contains x, False otherwise.\n\ -The value x may be a boolean (or integer between 0 and 1), or a bitarray."); +Extend bitarray by appending the truth value of each element given\n\ +by iterable. If a string is provided, each `0` and `1` are appended\n\ +as bits."); static PyObject * bitarray_search(bitarrayobject *self, PyObject *args) { PyObject *list = NULL; /* list of matching positions to be returned */ - PyObject *x, *item = NULL; + PyObject *item = NULL, *x; Py_ssize_t limit = -1; bitarrayobject *xa; - idx_t p; + Py_ssize_t p; - if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":_search", &x, &limit)) + if (!PyArg_ParseTuple(args, "O|n:search", &x, &limit)) return NULL; if (!bitarray_Check(x)) { @@ -1066,7 +820,7 @@ p = search(self, xa, p); if (p < 0) break; - item = PyLong_FromLongLong(p); + item = PyLong_FromSsize_t(p); p++; if (item == NULL || PyList_Append(list, item) < 0) { Py_XDECREF(item); @@ -1081,10 +835,10 @@ } PyDoc_STRVAR(search_doc, -"search(bitarray, [limit]) -> list\n\ +"search(bitarray, limit=, /) -> list\n\ \n\ -Searches for the given a bitarray in self, and returns the start positions\n\ -where bitarray matches self as a list.\n\ +Searches for the given bitarray in self, and return the list of start\n\ +positions.\n\ The optional argument limits the number of search results to the integer\n\ specified. By default, all search results are returned."); @@ -1095,12 +849,12 @@ PyObject *res, *ptr; ptr = PyLong_FromVoidPtr(self->ob_item), - res = Py_BuildValue("OLsiL", + res = Py_BuildValue("Onsin", ptr, - (idx_t) Py_SIZE(self), + Py_SIZE(self), ENDIAN_STR(self), (int) (BITS(Py_SIZE(self)) - self->nbits), - (idx_t) self->allocated); + self->allocated); Py_DECREF(ptr); return res; } @@ -1109,26 +863,21 @@ "buffer_info() -> tuple\n\ \n\ Return a tuple (address, size, endianness, unused, allocated) giving the\n\ -current memory address, the size (in bytes) used to hold the bitarray's\n\ -contents, the bit endianness as a string, the number of unused bits\n\ -(e.g. a bitarray of length 11 will have a buffer size of 2 bytes and\n\ -5 unused bits), and the size (in bytes) of the allocated memory."); +memory address of the bitarray's buffer, the buffer size (in bytes),\n\ +the bit endianness as a string, the number of unused bits within the last\n\ +byte, and the allocated memory for the buffer (in bytes)."); static PyObject * bitarray_endian(bitarrayobject *self) { -#ifdef IS_PY3K - return PyUnicode_FromString(ENDIAN_STR(self)); -#else - return PyString_FromString(ENDIAN_STR(self)); -#endif + return Py_BuildValue("s", ENDIAN_STR(self)); } PyDoc_STRVAR(endian_doc, -"endian() -> string\n\ +"endian() -> str\n\ \n\ -Return the bit endianness as a string (either 'little' or 'big')."); +Return the bit endianness of the bitarray as a string (`little` or `big`)."); static PyObject * @@ -1136,23 +885,19 @@ { if (append_item(self, v) < 0) return NULL; - Py_RETURN_NONE; } PyDoc_STRVAR(append_doc, -"append(item)\n\ +"append(item, /)\n\ \n\ -Append the value bool(item) to the end of the bitarray."); +Append the truth value `bool(item)` to the end of the bitarray."); static PyObject * bitarray_all(bitarrayobject *self) { - if (findfirst(self, 0, 0, -1) >= 0) - Py_RETURN_FALSE; - else - Py_RETURN_TRUE; + return PyBool_FromLong(findfirst(self, 0, 0, self->nbits) == -1); } PyDoc_STRVAR(all_doc, @@ -1164,10 +909,7 @@ static PyObject * bitarray_any(bitarrayobject *self) { - if (findfirst(self, 1, 0, -1) >= 0) - Py_RETURN_TRUE; - else - Py_RETURN_FALSE; + return PyBool_FromLong(findfirst(self, 1, 0, self->nbits) >= 0); } PyDoc_STRVAR(any_doc, @@ -1179,8 +921,9 @@ static PyObject * bitarray_reduce(bitarrayobject *self) { + const Py_ssize_t nbytes = Py_SIZE(self); PyObject *dict, *repr = NULL, *result = NULL; - char *str; + char *data; dict = PyObject_GetAttrString((PyObject *) self, "__dict__"); if (dict == NULL) { @@ -1190,17 +933,17 @@ } /* the first byte indicates the number of unused bits at the end, and the rest of the bytes consist of the raw binary data */ - str = PyMem_Malloc(Py_SIZE(self) + 1); - if (str == NULL) { + data = (char *) PyMem_Malloc(nbytes + 1); + if (data == NULL) { PyErr_NoMemory(); goto error; } - str[0] = (char) setunused(self); - memcpy(str + 1, self->ob_item, Py_SIZE(self)); - repr = PyString_FromStringAndSize(str, Py_SIZE(self) + 1); + data[0] = (char) setunused(self); + memcpy(data + 1, self->ob_item, (size_t) nbytes); + repr = PyBytes_FromStringAndSize(data, nbytes + 1); if (repr == NULL) goto error; - PyMem_Free((void *) str); + PyMem_Free((void *) data); result = Py_BuildValue("O(Os)O", Py_TYPE(self), repr, ENDIAN_STR(self), dict); error: @@ -1215,10 +958,11 @@ static PyObject * bitarray_reverse(bitarrayobject *self) { - PyObject *t; /* temp bitarray to store lower half of self */ - idx_t i, m; + const Py_ssize_t m = self->nbits - 1; /* index of last item */ + PyObject *t; /* temp bitarray to store lower half of self */ + Py_ssize_t i; - if (self->nbits < 2) + if (self->nbits < 2) /* nothing needs to be done */ Py_RETURN_NONE; t = newbitarrayobject(Py_TYPE(self), self->nbits / 2, self->endian); @@ -1227,15 +971,13 @@ #define tt ((bitarrayobject *) t) /* copy lower half of array into temporary array */ - memcpy(tt->ob_item, self->ob_item, Py_SIZE(tt)); - - m = self->nbits - 1; + memcpy(tt->ob_item, self->ob_item, (size_t) Py_SIZE(tt)); - /* reverse the upper half onto the lower half. */ + /* reverse upper half onto the lower half. */ for (i = 0; i < tt->nbits; i++) setbit(self, i, GETBIT(self, m - i)); - /* revert the stored away lower half onto the upper half. */ + /* reverse the stored away lower half onto the upper half of self. */ for (i = 0; i < tt->nbits; i++) setbit(self, m - i, GETBIT(tt, i)); #undef tt @@ -1256,38 +998,72 @@ p = setunused(self); self->nbits += p; -#ifdef IS_PY3K return PyLong_FromLong(p); -#else - return PyInt_FromLong(p); -#endif } PyDoc_STRVAR(fill_doc, "fill() -> int\n\ \n\ Adds zeros to the end of the bitarray, such that the length of the bitarray\n\ -is not a multiple of 8. Returns the number of bits added (0..7)."); +will be a multiple of 8. Returns the number of bits added (0..7)."); static PyObject * -bitarray_invert(bitarrayobject *self) +bitarray_invert(bitarrayobject *self, PyObject *args) { - invert(self); + Py_ssize_t i = PY_SSIZE_T_MAX; + + if (!PyArg_ParseTuple(args, "|n:invert", &i)) + return NULL; + + if (i == PY_SSIZE_T_MAX) { /* default - invert all bits */ + invert(self); + Py_RETURN_NONE; + } + + if (i < 0) + i += self->nbits; + + if (i < 0 || i >= self->nbits) { + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + setbit(self, i, 1 - GETBIT(self, i)); Py_RETURN_NONE; } PyDoc_STRVAR(invert_doc, -"invert()\n\ +"invert(index=)\n\ \n\ -Invert all bits in the array (in-place),\n\ -i.e. convert each 1-bit into a 0-bit and vice versa."); +Invert all bits in the array (in-place).\n\ +When the optional `index` is given, only invert the single bit at index."); static PyObject * bitarray_bytereverse(bitarrayobject *self) { - bytereverse(self); + static char trans[256]; + static int setup = 0; + Py_ssize_t i; + + if (!setup) { + /* setup translation table, which maps each byte to it's reversed: + trans = {0, 128, 64, 192, 32, 160, ..., 255} */ + int j, k; + + for (k = 0; k < 256; k++) { + trans[k] = 0x00; + for (j = 0; j < 8; j++) + if (1 << (7 - j) & k) + trans[k] |= 1 << j; + } + setup = 1; + } + + setunused(self); + for (i = 0; i < Py_SIZE(self); i++) + self->ob_item[i] = trans[(unsigned char) self->ob_item[i]]; + Py_RETURN_NONE; } @@ -1302,34 +1078,34 @@ static PyObject * bitarray_setall(bitarrayobject *self, PyObject *v) { - long vi; + int vi; vi = PyObject_IsTrue(v); if (vi < 0) return NULL; - memset(self->ob_item, vi ? 0xff : 0x00, Py_SIZE(self)); + memset(self->ob_item, vi ? 0xff : 0x00, (size_t) Py_SIZE(self)); Py_RETURN_NONE; } PyDoc_STRVAR(setall_doc, -"setall(value)\n\ +"setall(value, /)\n\ \n\ -Set all bits in the bitarray to bool(value)."); +Set all bits in the bitarray to `bool(value)`."); static PyObject * bitarray_sort(bitarrayobject *self, PyObject *args, PyObject *kwds) { - idx_t n, n0, n1; + static char *kwlist[] = {"reverse", NULL}; + const Py_ssize_t n = self->nbits; + Py_ssize_t n0, n1; int reverse = 0; - static char* kwlist[] = {"reverse", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:sort", kwlist, &reverse)) return NULL; - n = self->nbits; - n1 = count(self); + n1 = count(self, 1, 0, n); if (reverse) { setrange(self, 0, n1, 1); @@ -1349,402 +1125,295 @@ Sort the bits in the array (in-place)."); -#ifdef IS_PY3K static PyObject * -bitarray_fromfile(bitarrayobject *self, PyObject *args) +bitarray_tolist(bitarrayobject *self, PyObject *args) { - PyObject *f; - Py_ssize_t newsize, nbytes = -1; - PyObject *reader, *rargs, *result; - size_t nread; - idx_t t, p; + PyObject *list, *item; + Py_ssize_t i; + int as_ints = 0; - if (!PyArg_ParseTuple(args, "O|n:fromfile", &f, &nbytes)) + if (!PyArg_ParseTuple(args, "|i:tolist", &as_ints)) return NULL; - if (nbytes == 0) - Py_RETURN_NONE; - - reader = PyObject_GetAttrString(f, "read"); - if (reader == NULL) - { - PyErr_SetString(PyExc_TypeError, - "first argument must be an open file"); + list = PyList_New(self->nbits); + if (list == NULL) return NULL; - } - rargs = Py_BuildValue("(n)", nbytes); - if (rargs == NULL) { - Py_DECREF(reader); - return NULL; - } - result = PyEval_CallObject(reader, rargs); - if (result != NULL) { - if (!PyBytes_Check(result)) { - PyErr_SetString(PyExc_TypeError, - "first argument must be an open file"); - Py_DECREF(result); - Py_DECREF(rargs); - Py_DECREF(reader); - return NULL; - } - - nread = PyBytes_Size(result); - - t = self->nbits; - p = setunused(self); - self->nbits += p; - - newsize = Py_SIZE(self) + nread; - - if (resize(self, BITS(newsize)) < 0) { - Py_DECREF(result); - Py_DECREF(rargs); - Py_DECREF(reader); - return NULL; - } - memcpy(self->ob_item + (Py_SIZE(self) - nread), - PyBytes_AS_STRING(result), nread); - - if (nbytes > 0 && nread < (size_t) nbytes) { - PyErr_SetString(PyExc_EOFError, "not enough items read"); + for (i = 0; i < self->nbits; i++) { + item = as_ints ? PyLong_FromLong(GETBIT(self, i)) : + PyBool_FromLong(GETBIT(self, i)); + if (item == NULL) return NULL; - } - if (delete_n(self, t, p) < 0) + if (PyList_SetItem(list, i, item) < 0) return NULL; - Py_DECREF(result); } + return list; +} + +PyDoc_STRVAR(tolist_doc, +"tolist(as_ints=False, /) -> list\n\ +\n\ +Return a list with the items (False or True) in the bitarray.\n\ +The optional parameter, changes the items in the list to integers (0 or 1).\n\ +Note that the list object being created will require 32 or 64 times more\n\ +memory (depending on the machine architecture) than the bitarray object,\n\ +which may cause a memory error if the bitarray is very large."); - Py_DECREF(rargs); - Py_DECREF(reader); - Py_RETURN_NONE; -} -#else static PyObject * -bitarray_fromfile(bitarrayobject *self, PyObject *args) +bitarray_frombytes(bitarrayobject *self, PyObject *bytes) { - PyObject *f; - FILE *fp; - Py_ssize_t newsize, nbytes = -1; - size_t nread; - idx_t t, p; - long cur; - - if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":fromfile", &f, &nbytes)) - return NULL; + Py_ssize_t nbytes; + Py_ssize_t t, p; - fp = PyFile_AsFile(f); - if (fp == NULL) { - PyErr_SetString(PyExc_TypeError, - "first argument must be an open file"); + if (!PyBytes_Check(bytes)) { + PyErr_SetString(PyExc_TypeError, "bytes expected"); return NULL; } - - /* find number of bytes till EOF */ - if (nbytes < 0) { - if ((cur = ftell(fp)) < 0) - goto EOFerror; - - if (fseek(fp, 0L, SEEK_END) || (nbytes = ftell(fp)) < 0) - goto EOFerror; - - nbytes -= cur; - if (fseek(fp, cur, SEEK_SET)) { - EOFerror: - PyErr_SetString(PyExc_EOFError, "could not find EOF"); - return NULL; - } - } + nbytes = PyBytes_GET_SIZE(bytes); if (nbytes == 0) Py_RETURN_NONE; - /* file exists and there are more than zero bytes to read */ + /* Before we extend the raw bytes with the new data, we need to store + the current size and pad the last byte, as our bitarray size might + not be a multiple of 8. After extending, we remove the padding + bits again. + */ t = self->nbits; p = setunused(self); self->nbits += p; + assert(self->nbits % 8 == 0); - newsize = Py_SIZE(self) + nbytes; - if (resize(self, BITS(newsize)) < 0) + if (resize(self, self->nbits + BITS(nbytes)) < 0) return NULL; - nread = fread(self->ob_item + (Py_SIZE(self) - nbytes), 1, nbytes, fp); - if (nread < (size_t) nbytes) { - newsize -= nbytes - nread; - if (resize(self, BITS(newsize)) < 0) - return NULL; - PyErr_SetString(PyExc_EOFError, "not enough items in file"); - return NULL; - } + memcpy(self->ob_item + (Py_SIZE(self) - nbytes), + PyBytes_AsString(bytes), (size_t) nbytes); if (delete_n(self, t, p) < 0) return NULL; Py_RETURN_NONE; } -#endif -PyDoc_STRVAR(fromfile_doc, -"fromfile(f, [n])\n\ +PyDoc_STRVAR(frombytes_doc, +"frombytes(bytes, /)\n\ \n\ -Read n bytes from the file object f and append them to the bitarray\n\ -interpreted as machine values. When n is omitted, as many bytes are\n\ -read until EOF is reached."); - +Extend bitarray with raw bytes. That is, each append byte will add eight\n\ +bits to the bitarray."); -#ifdef IS_PY3K -static PyObject * -bitarray_tofile(bitarrayobject *self, PyObject *f) -{ - PyObject *writer, *value, *args, *result; - if (f == NULL) { - PyErr_SetString(PyExc_TypeError, "writeobject with NULL file"); - return NULL; - } - writer = PyObject_GetAttrString(f, "write"); - if (writer == NULL) - return NULL; - setunused(self); - value = PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self)); - if (value == NULL) { - Py_DECREF(writer); - return NULL; - } - args = PyTuple_Pack(1, value); - if (args == NULL) { - Py_DECREF(value); - Py_DECREF(writer); - return NULL; - } - result = PyEval_CallObject(writer, args); - Py_DECREF(args); - Py_DECREF(value); - Py_DECREF(writer); - if (result == NULL) - { - PyErr_SetString(PyExc_TypeError, "open file expected"); - return NULL; - } - Py_DECREF(result); - Py_RETURN_NONE; -} -#else static PyObject * -bitarray_tofile(bitarrayobject *self, PyObject *f) +bitarray_tobytes(bitarrayobject *self) { - FILE *fp; - - fp = PyFile_AsFile(f); - if (fp == NULL) { - PyErr_SetString(PyExc_TypeError, "open file expected"); - return NULL; - } - if (Py_SIZE(self) == 0) - Py_RETURN_NONE; - setunused(self); - if (fwrite(self->ob_item, 1, Py_SIZE(self), fp) != - (size_t) Py_SIZE(self)) - { - PyErr_SetFromErrno(PyExc_IOError); - clearerr(fp); - return NULL; - } - Py_RETURN_NONE; + return PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self)); } -#endif -PyDoc_STRVAR(tofile_doc, -"tofile(f)\n\ +PyDoc_STRVAR(tobytes_doc, +"tobytes() -> bytes\n\ \n\ -Write all bits (as machine values) to the file object f.\n\ -When the length of the bitarray is not a multiple of 8,\n\ -the remaining bits (1..7) are set to 0."); +Return the byte representation of the bitarray.\n\ +When the length of the bitarray is not a multiple of 8, the few remaining\n\ +bits (1..7) are considered to be 0."); static PyObject * -bitarray_tolist(bitarrayobject *self) +bitarray_fromfile(bitarrayobject *self, PyObject *args) { - PyObject *list; - idx_t i; + PyObject *bytes, *f, *res; + Py_ssize_t nblock, nread = 0, nbytes = -1; + int not_enough_bytes; - list = PyList_New((Py_ssize_t) self->nbits); - if (list == NULL) + if (!PyArg_ParseTuple(args, "O|n:fromfile", &f, &nbytes)) return NULL; - for (i = 0; i < self->nbits; i++) - if (PyList_SetItem(list, (Py_ssize_t) i, - PyBool_FromLong(GETBIT(self, i))) < 0) - return NULL; - return list; -} - -PyDoc_STRVAR(tolist_doc, -"tolist() -> list\n\ -\n\ -Return an ordinary list with the items in the bitarray.\n\ -Note that the list object being created will require 32 or 64 times more\n\ -memory than the bitarray object, which may cause a memory error if the\n\ -bitarray is very large.\n\ -Also note that to extend a bitarray with elements from a list,\n\ -use the extend method."); + if (nbytes < 0) /* read till EOF */ + nbytes = PY_SSIZE_T_MAX; + while (nread < nbytes) { + nblock = Py_MIN(nbytes - nread, BLOCKSIZE); + bytes = PyObject_CallMethod(f, "read", "n", nblock); + if (bytes == NULL) + return NULL; + if (!PyBytes_Check(bytes)) { + Py_DECREF(bytes); + PyErr_SetString(PyExc_TypeError, "read() didn't return bytes"); + return NULL; + } + not_enough_bytes = (PyBytes_GET_SIZE(bytes) < nblock); + nread += PyBytes_GET_SIZE(bytes); + assert(nread >= 0 && nread <= nbytes); -static PyObject * -bitarray_frombytes(bitarrayobject *self, PyObject *string) -{ - idx_t t, p; + res = bitarray_frombytes(self, bytes); + Py_DECREF(bytes); + if (res == NULL) + return NULL; + Py_DECREF(res); /* drop frombytes result */ - if (!PyString_Check(string)) { - PyErr_SetString(PyExc_TypeError, "byte string expected"); - return NULL; + if (not_enough_bytes) { + if (nbytes == PY_SSIZE_T_MAX) /* read till EOF */ + break; + PyErr_SetString(PyExc_EOFError, "not enough bytes to read"); + return NULL; + } } - t = self->nbits; - p = setunused(self); - self->nbits += p; - - if (extend_rawstring(self, string) < 0) - return NULL; - if (delete_n(self, t, p) < 0) - return NULL; Py_RETURN_NONE; } -PyDoc_STRVAR(frombytes_doc, -"frombytes(bytes)\n\ +PyDoc_STRVAR(fromfile_doc, +"fromfile(f, n=-1, /)\n\ \n\ -Append from a byte string, interpreted as machine values."); +Extend bitarray with up to n bytes read from the file object f.\n\ +When n is omitted or negative, reads all data until EOF.\n\ +When n is provided and positions but exceeds the data available,\n\ +EOFError is raised (but the available data is still read and appended."); static PyObject * -bitarray_tobytes(bitarrayobject *self) +bitarray_tofile(bitarrayobject *self, PyObject *f) { + const Py_ssize_t nbytes = Py_SIZE(self); + Py_ssize_t size, offset; + PyObject *res; + setunused(self); - return PyString_FromStringAndSize(self->ob_item, Py_SIZE(self)); + for (offset = 0; offset < nbytes; offset += BLOCKSIZE) { + size = Py_MIN(nbytes - offset, BLOCKSIZE); + assert(size >= 0 && offset + size <= nbytes); + /* basically: f.write(memoryview(self)[offset:offset + size] */ + res = PyObject_CallMethod(f, "write", BYTES_SIZE_FMT, + self->ob_item + offset, size); + if (res == NULL) + return NULL; + Py_DECREF(res); /* drop write result */ + } + Py_RETURN_NONE; } -PyDoc_STRVAR(tobytes_doc, -"tobytes() -> bytes\n\ +PyDoc_STRVAR(tofile_doc, +"tofile(f, /)\n\ \n\ -Return the byte representation of the bitarray.\n\ -When the length of the bitarray is not a multiple of 8, the few remaining\n\ -bits (1..7) are set to 0."); +Write the byte representation of the bitarray to the file object f.\n\ +When the length of the bitarray is not a multiple of 8,\n\ +the remaining bits (1..7) are set to 0."); static PyObject * bitarray_to01(bitarrayobject *self) { -#ifdef IS_PY3K - PyObject *string, *unpacked; - - unpacked = unpack(self, '0', '1'); - string = PyUnicode_FromEncodedObject(unpacked, NULL, NULL); - Py_DECREF(unpacked); - return string; -#else - return unpack(self, '0', '1'); -#endif + return unpack(self, '0', '1', "s#"); } PyDoc_STRVAR(to01_doc, -"to01() -> string\n\ +"to01() -> str\n\ \n\ Return a string containing '0's and '1's, representing the bits in the\n\ -bitarray object.\n\ -Note: To extend a bitarray from a string containing '0's and '1's,\n\ -use the extend method."); +bitarray object."); static PyObject * bitarray_unpack(bitarrayobject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"zero", "one", NULL}; char zero = 0x00, one = 0xff; - static char* kwlist[] = {"zero", "one", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|cc:unpack", kwlist, &zero, &one)) return NULL; - return unpack(self, zero, one); + return unpack(self, zero, one, BYTES_SIZE_FMT); } PyDoc_STRVAR(unpack_doc, "unpack(zero=b'\\x00', one=b'\\xff') -> bytes\n\ \n\ -Return a byte string containing one character for each bit in the bitarray,\n\ -using the specified mapping.\n\ -See also the pack method."); +Return bytes containing one character for each bit in the bitarray,\n\ +using the specified mapping."); static PyObject * -bitarray_pack(bitarrayobject *self, PyObject *string) +bitarray_pack(bitarrayobject *self, PyObject *bytes) { - if (!PyString_Check(string)) { - PyErr_SetString(PyExc_TypeError, "byte string expected"); + Py_ssize_t nbytes, i; + char *data; + + if (!PyBytes_Check(bytes)) { + PyErr_SetString(PyExc_TypeError, "bytes expected"); return NULL; } - if (extend_string(self, string, STR_RAW) < 0) + nbytes = PyBytes_GET_SIZE(bytes); + if (nbytes == 0) + Py_RETURN_NONE; + + if (resize(self, self->nbits + nbytes) < 0) return NULL; + data = PyBytes_AsString(bytes); + for (i = 0; i < nbytes; i++) + setbit(self, self->nbits - nbytes + i, data[i] ? 1 : 0); + Py_RETURN_NONE; } PyDoc_STRVAR(pack_doc, -"pack(bytes)\n\ +"pack(bytes, /)\n\ \n\ -Extend the bitarray from a byte string, where each characters corresponds to\n\ -a single bit. The character b'\\x00' maps to bit 0 and all other characters\n\ -map to bit 1.\n\ +Extend the bitarray from bytes, where each byte corresponds to a single\n\ +bit. The byte `b'\\x00'` maps to bit 0 and all other characters map to\n\ +bit 1.\n\ This method, as well as the unpack method, are meant for efficient\n\ transfer of data between bitarray objects to other python objects\n\ -(for example NumPy's ndarray object) which have a different view of memory."); +(for example NumPy's ndarray object) which have a different memory view."); static PyObject * bitarray_repr(bitarrayobject *self) { - PyObject *string; -#ifdef IS_PY3K - PyObject *decoded; -#endif + PyObject *result; + Py_ssize_t i; + char *str; + size_t strsize; - if (self->nbits == 0) { - string = PyString_FromString("bitarray()"); - if (string == NULL) - return NULL; + if (self->nbits == 0) + return Py_BuildValue("s", "bitarray()"); + + strsize = self->nbits + 12; /* 12 is the length of "bitarray('')" */ + if (strsize > PY_SSIZE_T_MAX / 8) { + PyErr_SetString(PyExc_OverflowError, + "bitarray too large to represent"); + return NULL; } - else { - string = PyString_FromString("bitarray(\'"); - if (string == NULL) - return NULL; - PyString_ConcatAndDel(&string, unpack(self, '0', '1')); - PyString_ConcatAndDel(&string, PyString_FromString("\')")); + + str = (char *) PyMem_Malloc(strsize); + if (str == NULL) { + PyErr_NoMemory(); + return NULL; } -#ifdef IS_PY3K - decoded = PyUnicode_FromEncodedObject(string, NULL, NULL); - Py_DECREF(string); - string = decoded; -#endif - return string; + /* add "bitarray('......')" to str */ + strcpy(str, "bitarray('"); /* has length 10 */ + /* don't use strcpy here, as this would add an extra null byte */ + str[strsize - 2] = '\''; + str[strsize - 1] = ')'; + + for (i = 0; i < self->nbits; i++) + str[i + 10] = GETBIT(self, i) ? '1' : '0'; + + result = Py_BuildValue("s#", str, (Py_ssize_t) strsize); + PyMem_Free((void *) str); + return result; } static PyObject * bitarray_insert(bitarrayobject *self, PyObject *args) { - idx_t i; + Py_ssize_t i; PyObject *v; - if (!PyArg_ParseTuple(args, "LO:insert", &i, &v)) + if (!PyArg_ParseTuple(args, "nO:insert", &i, &v)) return NULL; - if (i < 0) { - i += self->nbits; - if (i < 0) - i = 0; - } - if (i > self->nbits) - i = self->nbits; + normalize_index(self->nbits, &i); if (insert_n(self, i, 1) < 0) return NULL; @@ -1754,18 +1423,18 @@ } PyDoc_STRVAR(insert_doc, -"insert(i, item)\n\ +"insert(index, value, /)\n\ \n\ -Insert bool(item) into the bitarray before position i."); +Insert `bool(value)` into the bitarray before index."); static PyObject * bitarray_pop(bitarrayobject *self, PyObject *args) { - idx_t i = -1; + Py_ssize_t i = -1; long vi; - if (!PyArg_ParseTuple(args, "|L:pop", &i)) + if (!PyArg_ParseTuple(args, "|n:pop", &i)) return NULL; if (self->nbits == 0) { @@ -1787,25 +1456,25 @@ } PyDoc_STRVAR(pop_doc, -"pop([i]) -> item\n\ +"pop(index=-1, /) -> item\n\ \n\ Return the i-th (default last) element and delete it from the bitarray.\n\ -Raises IndexError if bitarray is empty or index is out of range."); +Raises `IndexError` if bitarray is empty or index is out of range."); static PyObject * bitarray_remove(bitarrayobject *self, PyObject *v) { - idx_t i; - long vi; + Py_ssize_t i; + int vi; vi = PyObject_IsTrue(v); if (vi < 0) return NULL; - i = findfirst(self, vi, 0, -1); + i = findfirst(self, vi, 0, self->nbits); if (i < 0) { - PyErr_SetString(PyExc_ValueError, "remove(x): x not in bitarray"); + PyErr_Format(PyExc_ValueError, "%d not in bitarray", vi); return NULL; } if (delete_n(self, i, 1) < 0) @@ -1814,407 +1483,1000 @@ } PyDoc_STRVAR(remove_doc, -"remove(item)\n\ +"remove(value, /)\n\ +\n\ +Remove the first occurrence of `bool(value)` in the bitarray.\n\ +Raises `ValueError` if item is not present."); + + +static PyObject * +bitarray_clear(bitarrayobject *self) +{ + if (resize(self, 0) < 0) + return NULL; + Py_RETURN_NONE; +} + +PyDoc_STRVAR(clear_doc, +"clear()\n\ \n\ -Remove the first occurrence of bool(item) in the bitarray.\n\ -Raises ValueError if item is not present."); +Remove all items from the bitarray."); + + +static PyObject * +bitarray_sizeof(bitarrayobject *self) +{ + Py_ssize_t res; + + res = sizeof(bitarrayobject) + self->allocated; + return PyLong_FromSsize_t(res); +} +PyDoc_STRVAR(sizeof_doc, +"Return the size of the bitarray in memory, in bytes."); + + +/* ----------------------- bitarray_as_sequence ------------------------ */ + +static Py_ssize_t +bitarray_len(bitarrayobject *self) +{ + return self->nbits; +} + +static PyObject * +bitarray_concat(bitarrayobject *self, PyObject *other) +{ + PyObject *res; + + res = bitarray_copy(self); + if (res == NULL) + return NULL; -/* --------- special methods ----------- */ + if (extend_dispatch((bitarrayobject *) res, other) < 0) { + Py_DECREF(res); + return NULL; + } + return res; +} static PyObject * -bitarray_getitem(bitarrayobject *self, PyObject *a) +bitarray_repeat(bitarrayobject *self, Py_ssize_t n) { PyObject *res; - idx_t start, stop, step, slicelength, j, i = 0; - if (IS_INDEX(a)) { - if (getIndex(a, &i) < 0) + res = bitarray_copy(self); + if (res == NULL) + return NULL; + + if (repeat((bitarrayobject *) res, n) < 0) { + Py_DECREF(res); + return NULL; + } + return res; +} + +static PyObject * +bitarray_item(bitarrayobject *self, Py_ssize_t i) +{ + if (i < 0 || i >= self->nbits) { + PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); + return NULL; + } + return PyBool_FromLong(GETBIT(self, i)); +} + +static int +bitarray_ass_item(bitarrayobject *self, Py_ssize_t i, PyObject *value) +{ + if (i < 0 || i >= self->nbits) { + PyErr_SetString(PyExc_IndexError, + "bitarray assignment index out of range"); + return -1; + } + if (value == NULL) + return delete_n(self, i, 1); + else + return set_item(self, i, value); +} + +/* return 1 if 'item' (which can be an int or bitarray) is in self, + 0 otherwise, and -1 on error */ +static int +bitarray_contains(bitarrayobject *self, PyObject *item) +{ + if (IS_INT_OR_BOOL(item)) { + int vi; + + vi = IntBool_AsInt(item); + if (vi < 0) + return -1; + return findfirst(self, vi, 0, self->nbits) >= 0; + } + + if (bitarray_Check(item)) + return search(self, (bitarrayobject *) item, 0) >= 0; + + PyErr_SetString(PyExc_TypeError, "bitarray or bool expected"); + return -1; +} + +static PyObject * +bitarray_inplace_concat(bitarrayobject *self, PyObject *other) +{ + if (extend_dispatch(self, other) < 0) + return NULL; + Py_INCREF(self); + return (PyObject *) self; +} + +static PyObject * +bitarray_inplace_repeat(bitarrayobject *self, Py_ssize_t n) +{ + if (repeat(self, n) < 0) + return NULL; + Py_INCREF(self); + return (PyObject *) self; +} + +static PySequenceMethods bitarray_as_sequence = { + (lenfunc) bitarray_len, /* sq_length */ + (binaryfunc) bitarray_concat, /* sq_concat */ + (ssizeargfunc) bitarray_repeat, /* sq_repeat */ + (ssizeargfunc) bitarray_item, /* sq_item */ + 0, /* sq_slice */ + (ssizeobjargproc) bitarray_ass_item, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc) bitarray_contains, /* sq_contains */ + (binaryfunc) bitarray_inplace_concat, /* sq_inplace_concat */ + (ssizeargfunc) bitarray_inplace_repeat, /* sq_inplace_repeat */ +}; + +/* ----------------------- bitarray_as_mapping ------------------------- */ + +static PyObject * +bitarray_subscr(bitarrayobject *self, PyObject *item) +{ + if (PyIndex_Check(item)) { + Py_ssize_t i; + + i = PyNumber_AsSsize_t(item, PyExc_IndexError); + if (i == -1 && PyErr_Occurred()) return NULL; if (i < 0) i += self->nbits; - if (i < 0 || i >= self->nbits) { - PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); - return NULL; - } - return PyBool_FromLong(GETBIT(self, i)); + return bitarray_item(self, i); } - if (PySlice_Check(a)) { - if (slice_GetIndicesEx((PySliceObject *) a, self->nbits, - &start, &stop, &step, &slicelength) < 0) { + + if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelength, i, j; + PyObject *res; + + if (PySlice_GetIndicesEx(item, self->nbits, + &start, &stop, &step, &slicelength) < 0) { return NULL; } res = newbitarrayobject(Py_TYPE(self), slicelength, self->endian); if (res == NULL) return NULL; - for (i = 0, j = start; i < slicelength; i++, j += step) - setbit((bitarrayobject *) res, i, GETBIT(self, j)); - + if (step == 1) { + copy_n((bitarrayobject *) res, 0, self, start, slicelength); + } + else { + for (i = 0, j = start; i < slicelength; i++, j += step) + setbit((bitarrayobject *) res, i, GETBIT(self, j)); + } return res; } PyErr_SetString(PyExc_TypeError, "index or slice expected"); return NULL; } -/* Sets the elements, specified by slice, in self to the value(s) given by v - which is either a bitarray or a boolean. -*/ +/* The following functions (setslice_bitarray, setslice_bool and delslice) + are called from bitarray_ass_subscr. Having this functionality inside + bitarray_ass_subscr would make the function incomprehensibly long. */ + +/* set the elements in self, specified by slice, to bitarray */ static int -setslice(bitarrayobject *self, PySliceObject *slice, PyObject *v) +setslice_bitarray(bitarrayobject *self, PyObject *slice, PyObject *array) { - idx_t start, stop, step, slicelength, j, i = 0; + Py_ssize_t start, stop, step, slicelength, increase, i, j; + int copy_array = 0, res = -1; - if (slice_GetIndicesEx(slice, self->nbits, - &start, &stop, &step, &slicelength) < 0) + assert(PySlice_Check(slice) && bitarray_Check(array)); + if (PySlice_GetIndicesEx(slice, self->nbits, + &start, &stop, &step, &slicelength) < 0) return -1; - if (bitarray_Check(v)) { -#define vv ((bitarrayobject *) v) - if (vv->nbits == slicelength) { - for (i = 0, j = start; i < slicelength; i++, j += step) - setbit(self, j, GETBIT(vv, i)); - return 0; - } - if (step != 1) { - char buff[256]; - sprintf(buff, "attempt to assign sequence of size %lld " - "to extended slice of size %lld", - vv->nbits, (idx_t) slicelength); - PyErr_SetString(PyExc_ValueError, buff); +#define aa ((bitarrayobject *) array) + /* number of bits by which 'self' has to be increased (decreased) */ + increase = aa->nbits - slicelength; + + if (aa == self) { /* covers cases like a[2::] = a and a[::-1] = a */ + array = bitarray_copy(aa); + if (array == NULL) return -1; + copy_array = 1; + } + + if (step == 1) { + if (increase > 0) { /* increase self */ + if (insert_n(self, start, increase) < 0) + goto error; } - /* make self bigger or smaller */ - if (vv->nbits > slicelength) { - if (insert_n(self, start, vv->nbits - slicelength) < 0) - return -1; - } - else { - if (delete_n(self, start, slicelength - vv->nbits) < 0) - return -1; + if (increase < 0) { /* decrease self */ + if (delete_n(self, start, -increase) < 0) + goto error; } /* copy the new values into self */ - copy_n(self, start, vv, 0, vv->nbits); -#undef vv - return 0; + copy_n(self, start, aa, 0, aa->nbits); } - if (IS_INT_OR_BOOL(v)) { - int vi; + else { /* step != 1 */ + if (increase != 0) { + PyErr_Format(PyExc_ValueError, + "attempt to assign sequence of size %zd " + "to extended slice of size %zd", + aa->nbits, slicelength); + goto error; + } + assert(increase == 0); + for (i = 0, j = start; i < slicelength; i++, j += step) + setbit(self, j, GETBIT(aa, i)); + } +#undef aa - vi = IntBool_AsInt(v); - if (vi < 0) - return -1; + res = 0; + error: + if (copy_array) + Py_DECREF(array); + return res; +} + +/* set the elements in self, specified by slice, to bool */ +static int +setslice_bool(bitarrayobject *self, PyObject *slice, PyObject *bool) +{ + Py_ssize_t start, stop, step, slicelength, i, j; + int vi; + + assert(PySlice_Check(slice) && IS_INT_OR_BOOL(bool)); + if (PySlice_GetIndicesEx(slice, self->nbits, + &start, &stop, &step, &slicelength) < 0) + return -1; + + vi = IntBool_AsInt(bool); + if (vi < 0) + return -1; + + if (step == 1) { + setrange(self, start, start + slicelength, vi); + } + else { /* step != 1 */ for (i = 0, j = start; i < slicelength; i++, j += step) setbit(self, j, vi); + } + return 0; +} + +/* delete the elements in self, specified by slice */ +static int +delslice(bitarrayobject *self, PyObject *slice) +{ + Py_ssize_t start, stop, step, slicelength; + + assert(PySlice_Check(slice)); + if (PySlice_GetIndicesEx(slice, self->nbits, + &start, &stop, &step, &slicelength) < 0) + return -1; + + if (slicelength == 0) return 0; + + if (step < 0) { + stop = start + 1; + start = stop + step * (slicelength - 1) - 1; + step = -step; + } + assert(step > 0 && start <= stop && slicelength > 0); + assert(start >= 0 && start < self->nbits); + assert(stop >= 0 && stop <= self->nbits); + + if (step == 1) { + assert(stop - start == slicelength); + return delete_n(self, start, slicelength); } - PyErr_SetString(PyExc_IndexError, - "bitarray or bool expected for slice assignment"); - return -1; + else { + Py_ssize_t i, j; + /* Now step > 1. We set the items not to be removed. */ + for (i = j = start; i < self->nbits; i++) { + if ((i - start) % step != 0 || i >= stop) + setbit(self, j++, GETBIT(self, i)); + } + return resize(self, self->nbits - slicelength); + } +} + +static int +bitarray_ass_subscr(bitarrayobject *self, PyObject* item, PyObject* value) +{ + if (PyIndex_Check(item)) { + Py_ssize_t i; + + i = PyNumber_AsSsize_t(item, PyExc_IndexError); + if (i == -1 && PyErr_Occurred()) + return -1; + if (i < 0) + i += self->nbits; + return bitarray_ass_item(self, i, value); + } + + if (PySlice_Check(item)) { + if (value == NULL) + return delslice(self, item); + + if (bitarray_Check(value)) + return setslice_bitarray(self, item, value); + + if (IS_INT_OR_BOOL(value)) + return setslice_bool(self, item, value); + + PyErr_SetString(PyExc_IndexError, + "bitarray or bool expected for slice assignment"); + return -1; + } + return -1; /* cannot happen */ } +static PyMappingMethods bitarray_as_mapping = { + (lenfunc) bitarray_len, + (binaryfunc) bitarray_subscr, + (objobjargproc) bitarray_ass_subscr, +}; + +/* --------------------------- bitarray_as_number ---------------------- */ + static PyObject * -bitarray_setitem(bitarrayobject *self, PyObject *args) +bitarray_cpinvert(bitarrayobject *self) { - PyObject *a, *v; - idx_t i = 0; + PyObject *result; - if (!PyArg_ParseTuple(args, "OO:__setitem__", &a, &v)) + result = bitarray_copy(self); + if (result == NULL) return NULL; - if (IS_INDEX(a)) { - if (getIndex(a, &i) < 0) - return NULL; - if (i < 0) - i += self->nbits; - if (i < 0 || i >= self->nbits) { - PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); - return NULL; - } - if (set_item(self, i, v) < 0) - return NULL; - Py_RETURN_NONE; + invert((bitarrayobject *) result); + return result; +} + +enum op_type { + OP_and, + OP_or, + OP_xor, +}; + +/* perform bitwise in-place operation */ +static int +bitwise(bitarrayobject *self, PyObject *arg, enum op_type oper) +{ + const Py_ssize_t nbytes = Py_SIZE(self); + bitarrayobject *other; + Py_ssize_t i; + + if (!bitarray_Check(arg)) { + PyErr_SetString(PyExc_TypeError, + "bitarray expected for bitwise operation"); + return -1; } - if (PySlice_Check(a)) { - if (setslice(self, (PySliceObject *) a, v) < 0) - return NULL; - Py_RETURN_NONE; + other = (bitarrayobject *) arg; + if (self->nbits != other->nbits || self->endian != other->endian) { + PyErr_SetString(PyExc_ValueError, + "bitarrays of equal length and endianness expected"); + return -1; } - PyErr_SetString(PyExc_TypeError, "index or slice expected"); - return NULL; + setunused(self); + setunused(other); + switch (oper) { + case OP_and: + for (i = 0; i < nbytes; i++) + self->ob_item[i] &= other->ob_item[i]; + break; + case OP_or: + for (i = 0; i < nbytes; i++) + self->ob_item[i] |= other->ob_item[i]; + break; + case OP_xor: + for (i = 0; i < nbytes; i++) + self->ob_item[i] ^= other->ob_item[i]; + break; + default: /* cannot happen */ + return -1; + } + return 0; +} + +#define BITWISE_FUNC(oper) \ +static PyObject * \ +bitarray_ ## oper (bitarrayobject *self, PyObject *other) \ +{ \ + PyObject *res; \ + \ + res = bitarray_copy(self); \ + if (res == NULL) \ + return NULL; \ + if (bitwise((bitarrayobject *) res, other, OP_ ## oper) < 0) { \ + Py_DECREF(res); \ + return NULL; \ + } \ + return res; \ +} + +BITWISE_FUNC(and) /* bitarray_and */ +BITWISE_FUNC(or) /* bitarray_or */ +BITWISE_FUNC(xor) /* bitarray_xor */ + + +#define BITWISE_IFUNC(oper) \ +static PyObject * \ +bitarray_i ## oper (bitarrayobject *self, PyObject *other) \ +{ \ + if (bitwise(self, other, OP_ ## oper) < 0) \ + return NULL; \ + Py_INCREF(self); \ + return (PyObject *) self; \ +} + +BITWISE_IFUNC(and) /* bitarray_iand */ +BITWISE_IFUNC(or) /* bitarray_ior */ +BITWISE_IFUNC(xor) /* bitarray_ixor */ + + +static PyNumberMethods bitarray_as_number = { + 0, /* nb_add */ + 0, /* nb_subtract */ + 0, /* nb_multiply */ +#if PY_MAJOR_VERSION == 2 + 0, /* nb_divide */ +#endif + 0, /* nb_remainder */ + 0, /* nb_divmod */ + 0, /* nb_power */ + 0, /* nb_negative */ + 0, /* nb_positive */ + 0, /* nb_absolute */ + 0, /* nb_bool (was nb_nonzero) */ + (unaryfunc) bitarray_cpinvert, /* nb_invert */ + 0, /* nb_lshift */ + 0, /* nb_rshift */ + (binaryfunc) bitarray_and, /* nb_and */ + (binaryfunc) bitarray_xor, /* nb_xor */ + (binaryfunc) bitarray_or, /* nb_or */ +#if PY_MAJOR_VERSION == 2 + 0, /* nb_coerce */ +#endif + 0, /* nb_int */ + 0, /* nb_reserved (was nb_long) */ + 0, /* nb_float */ +#if PY_MAJOR_VERSION == 2 + 0, /* nb_oct */ + 0, /* nb_hex */ +#endif + 0, /* nb_inplace_add */ + 0, /* nb_inplace_subtract */ + 0, /* nb_inplace_multiply */ +#if PY_MAJOR_VERSION == 2 + 0, /* nb_inplace_divide */ +#endif + 0, /* nb_inplace_remainder */ + 0, /* nb_inplace_power */ + 0, /* nb_inplace_lshift */ + 0, /* nb_inplace_rshift */ + (binaryfunc) bitarray_iand, /* nb_inplace_and */ + (binaryfunc) bitarray_ixor, /* nb_inplace_xor */ + (binaryfunc) bitarray_ior, /* nb_inplace_or */ + 0, /* nb_floor_divide */ + 0, /* nb_true_divide */ + 0, /* nb_inplace_floor_divide */ + 0, /* nb_inplace_true_divide */ +#if PY_MAJOR_VERSION == 3 + 0, /* nb_index */ +#endif +}; + +/************************************************************************** + variable length encoding and decoding + **************************************************************************/ + +static int +check_codedict(PyObject *codedict) +{ + if (!PyDict_Check(codedict)) { + PyErr_SetString(PyExc_TypeError, "dict expected"); + return -1; + } + if (PyDict_Size(codedict) == 0) { + PyErr_SetString(PyExc_ValueError, "non-empty dict expected"); + return -1; + } + return 0; +} + +static int +check_value(PyObject *value) +{ + if (!bitarray_Check(value)) { + PyErr_SetString(PyExc_TypeError, + "bitarray expected for dict value"); + return -1; + } + if (((bitarrayobject *) value)->nbits == 0) { + PyErr_SetString(PyExc_ValueError, "non-empty bitarray expected"); + return -1; + } + return 0; } static PyObject * -bitarray_delitem(bitarrayobject *self, PyObject *a) +bitarray_encode(bitarrayobject *self, PyObject *args) { - idx_t start, stop, step, slicelength, j, i = 0; + PyObject *codedict, *iterable, *iter, *symbol, *value; - if (IS_INDEX(a)) { - if (getIndex(a, &i) < 0) - return NULL; - if (i < 0) - i += self->nbits; - if (i < 0 || i >= self->nbits) { - PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); - return NULL; + if (!PyArg_ParseTuple(args, "OO:encode", &codedict, &iterable)) + return NULL; + + if (check_codedict(codedict) < 0) + return NULL; + + iter = PyObject_GetIter(iterable); + if (iter == NULL) { + PyErr_SetString(PyExc_TypeError, "iterable object expected"); + return NULL; + } + /* extend self with the bitarrays from codedict */ + while ((symbol = PyIter_Next(iter)) != NULL) { + value = PyDict_GetItem(codedict, symbol); + Py_DECREF(symbol); + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, + "symbol not defined in prefix code"); + goto error; } - if (delete_n(self, i, 1) < 0) - return NULL; - Py_RETURN_NONE; + if (check_value(value) < 0 || + extend_bitarray(self, (bitarrayobject *) value) < 0) + goto error; + } + Py_DECREF(iter); + if (PyErr_Occurred()) + return NULL; + Py_RETURN_NONE; +error: + Py_DECREF(iter); + return NULL; +} + +PyDoc_STRVAR(encode_doc, +"encode(code, iterable, /)\n\ +\n\ +Given a prefix code (a dict mapping symbols to bitarrays),\n\ +iterate over the iterable object with symbols, and extend the bitarray\n\ +with the corresponding bitarray for each symbol."); + +/* ----------------------- binary tree (C-level) ----------------------- */ + +/* a node has either children or a symbol, NEVER both */ +typedef struct _bin_node +{ + struct _bin_node *child[2]; + PyObject *symbol; +} binode; + + +static binode * +binode_new(void) +{ + binode *nd; + + nd = (binode *) PyMem_Malloc(sizeof(binode)); + if (nd == NULL) { + PyErr_NoMemory(); + return NULL; } - if (PySlice_Check(a)) { - if (slice_GetIndicesEx((PySliceObject *) a, self->nbits, - &start, &stop, &step, &slicelength) < 0) { + nd->child[0] = NULL; + nd->child[1] = NULL; + nd->symbol = NULL; + return nd; +} + +static void +binode_delete(binode *nd) +{ + if (nd == NULL) + return; + + binode_delete(nd->child[0]); + binode_delete(nd->child[1]); + Py_XDECREF(nd->symbol); + PyMem_Free(nd); +} + +/* insert symbol (mapping to ba) into the tree */ +static int +binode_insert_symbol(binode *tree, bitarrayobject *ba, PyObject *symbol) +{ + binode *nd = tree, *prev; + Py_ssize_t i; + int k; + + for (i = 0; i < ba->nbits; i++) { + k = GETBIT(ba, i); + prev = nd; + nd = nd->child[k]; + + if (nd) { + if (nd->symbol) /* we cannot have already a symbol */ + goto ambiguity; + } + else { /* if node does not exist, create new one */ + nd = binode_new(); + if (nd == NULL) + return -1; + prev->child[k] = nd; + } + } + /* the new leaf node cannot already have a symbol or children */ + if (nd->symbol || nd->child[0] || nd->child[1]) + goto ambiguity; + + nd->symbol = symbol; + Py_INCREF(symbol); + return 0; + + ambiguity: + PyErr_SetString(PyExc_ValueError, "prefix code ambiguous"); + return -1; +} + +/* return a binary tree from a codedict, which is created by inserting + all symbols mapping to bitarrays */ +static binode * +binode_make_tree(PyObject *codedict) +{ + binode *tree; + PyObject *symbol, *value; + Py_ssize_t pos = 0; + + tree = binode_new(); + if (tree == NULL) + return NULL; + + while (PyDict_Next(codedict, &pos, &symbol, &value)) { + if (check_value(value) < 0 || + binode_insert_symbol(tree, (bitarrayobject *) value, + symbol) < 0) { + binode_delete(tree); return NULL; } - if (slicelength == 0) - Py_RETURN_NONE; + } + /* as we require the codedict to be non-empty the tree cannot be empty */ + assert(tree); + return tree; +} + +/* Traverse using the branches corresponding to bits in `ba`, starting + at *indexp. Return the symbol at the leaf node, or NULL when the end + of the bitarray has been reached. On error, NULL is also returned, + and the appropriate PyErr_SetString is set. +*/ +static PyObject * +binode_traverse(binode *tree, bitarrayobject *ba, Py_ssize_t *indexp) +{ + binode *nd = tree; + int k; - if (step < 0) { - stop = start + 1; - start = stop + step * (slicelength - 1) - 1; - step = -step; + while (*indexp < ba->nbits) { + assert(nd); + k = GETBIT(ba, *indexp); + (*indexp)++; + nd = nd->child[k]; + if (nd == NULL) { + PyErr_SetString(PyExc_ValueError, + "prefix code does not match data in bitarray"); + return NULL; } - if (step == 1) { - assert(stop - start == slicelength); - if (delete_n(self, start, slicelength) < 0) - return NULL; - Py_RETURN_NONE; + if (nd->symbol) { /* leaf */ + assert(nd->child[0] == NULL && nd->child[1] == NULL); + return nd->symbol; } - /* this is the only complicated part when step > 1 */ - for (i = j = start; i < self->nbits; i++) - if ((i - start) % step != 0 || i >= stop) { - setbit(self, j, GETBIT(self, i)); - j++; - } - if (resize(self, self->nbits - slicelength) < 0) - return NULL; - Py_RETURN_NONE; } - PyErr_SetString(PyExc_TypeError, "index or slice expected"); + if (nd != tree) + PyErr_SetString(PyExc_ValueError, "decoding not terminated"); + return NULL; } -/* ---------- number methods ---------- */ - -static PyObject * -bitarray_add(bitarrayobject *self, PyObject *other) +/* add the node's symbol to given dict */ +static int +binode_to_dict(binode *nd, PyObject *dict, bitarrayobject *prefix) { - PyObject *res; + bitarrayobject *t; /* prefix of the two child nodes */ + int k, ret; - res = bitarray_copy(self); - if (extend_dispatch((bitarrayobject *) res, other) < 0) { - Py_DECREF(res); - return NULL; + if (nd == NULL) + return 0; + + if (nd->symbol) { + if (PyDict_SetItem(dict, nd->symbol, (PyObject *) prefix) < 0) + return -1; + return 0; } - return res; + + for (k = 0; k < 2; k++) { + t = (bitarrayobject *) bitarray_copy(prefix); + if (t == NULL) + return -1; + resize(t, t->nbits + 1); + setbit(t, t->nbits - 1, k); + ret = binode_to_dict(nd->child[k], dict, t); + Py_DECREF((PyObject *) t); + if (ret < 0) + return -1; + } + return 0; } -static PyObject * -bitarray_iadd(bitarrayobject *self, PyObject *other) +/* return the number of nodes */ +static Py_ssize_t +binode_nodes(binode *nd) { - if (extend_dispatch(self, other) < 0) - return NULL; - Py_INCREF(self); - return (PyObject *) self; + Py_ssize_t res; + + if (nd == NULL) + return 0; + + /* a node cannot have a symbol and children */ + assert(!(nd->symbol && (nd->child[0] || nd->child[1]))); + /* a node must have a symbol or children */ + assert(nd->symbol || nd->child[0] || nd->child[1]); + + res = 1; + res += binode_nodes(nd->child[0]); + res += binode_nodes(nd->child[1]); + return res; } +/******************************** decodetree ******************************/ + +typedef struct { + PyObject_HEAD + binode *tree; +} decodetreeobject; + + static PyObject * -bitarray_mul(bitarrayobject *self, PyObject *v) +decodetree_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - PyObject *res; - idx_t vi = 0; + binode *tree; + PyObject *codedict; + decodetreeobject *self; - if (!IS_INDEX(v)) { - PyErr_SetString(PyExc_TypeError, - "integer value expected for bitarray repetition"); + if (!PyArg_ParseTuple(args, "O:decodetree", &codedict)) return NULL; - } - if (getIndex(v, &vi) < 0) - return NULL; - res = bitarray_copy(self); - if (repeat((bitarrayobject *) res, vi) < 0) { - Py_DECREF(res); + + if (check_codedict(codedict) < 0) return NULL; - } - return res; -} -static PyObject * -bitarray_imul(bitarrayobject *self, PyObject *v) -{ - idx_t vi = 0; + tree = binode_make_tree(codedict); + if (tree == NULL) + return NULL; - if (!IS_INDEX(v)) { - PyErr_SetString(PyExc_TypeError, - "integer value expected for in-place bitarray repetition"); + self = (decodetreeobject *) type->tp_alloc(type, 0); + if (self == NULL) { + binode_delete(tree); return NULL; } - if (getIndex(v, &vi) < 0) - return NULL; - if (repeat(self, vi) < 0) - return NULL; - Py_INCREF(self); + self->tree = tree; + return (PyObject *) self; } +/* Return a dict mapping the symbols to bitarrays. This dict is a + reconstruction of the code dict the decodetree was created with. */ static PyObject * -bitarray_cpinvert(bitarrayobject *self) +decodetree_todict(decodetreeobject *self) { - PyObject *res; + PyObject *dict, *prefix; - res = bitarray_copy(self); - invert((bitarrayobject *) res); - return res; -} + dict = PyDict_New(); + if (dict == NULL) + return NULL; -#define BITWISE_FUNC(oper) \ -static PyObject * \ -bitarray_ ## oper (bitarrayobject *self, PyObject *other) \ -{ \ - PyObject *res; \ - \ - res = bitarray_copy(self); \ - if (bitwise((bitarrayobject *) res, other, OP_ ## oper) < 0) { \ - Py_DECREF(res); \ - return NULL; \ - } \ - return res; \ -} + prefix = newbitarrayobject(&Bitarray_Type, 0, default_endian); + if (prefix == NULL) + goto error; -BITWISE_FUNC(and) -BITWISE_FUNC(or) -BITWISE_FUNC(xor) + if (binode_to_dict(self->tree, dict, (bitarrayobject *) prefix) < 0) + goto error; + Py_DECREF(prefix); + return dict; -#define BITWISE_IFUNC(oper) \ -static PyObject * \ -bitarray_i ## oper (bitarrayobject *self, PyObject *other) \ -{ \ - if (bitwise(self, other, OP_ ## oper) < 0) \ - return NULL; \ - Py_INCREF(self); \ - return (PyObject *) self; \ + error: + Py_DECREF(dict); + Py_XDECREF(prefix); + return NULL; } -BITWISE_IFUNC(and) -BITWISE_IFUNC(or) -BITWISE_IFUNC(xor) - -/******************* variable length encoding and decoding ***************/ - +/* Return the number of nodes in the tree (not just symbols) */ static PyObject * -bitarray_encode(bitarrayobject *self, PyObject *args) +decodetree_nodes(decodetreeobject *self) { - PyObject *codedict, *iterable, *iter, *symbol, *bits; - - if (!PyArg_ParseTuple(args, "OO:_encode", &codedict, &iterable)) - return NULL; - - iter = PyObject_GetIter(iterable); - if (iter == NULL) { - PyErr_SetString(PyExc_TypeError, "iterable object expected"); - return NULL; - } - /* extend self with the bitarrays from codedict */ - while ((symbol = PyIter_Next(iter)) != NULL) { - bits = PyDict_GetItem(codedict, symbol); - Py_DECREF(symbol); - if (bits == NULL) { - PyErr_SetString(PyExc_ValueError, "symbol not in prefix code"); - goto error; - } - if (extend_bitarray(self, (bitarrayobject *) bits) < 0) - goto error; - } - Py_DECREF(iter); - if (PyErr_Occurred()) - return NULL; - Py_RETURN_NONE; -error: - Py_DECREF(iter); - return NULL; + return PyLong_FromSsize_t(binode_nodes(self->tree)); } -PyDoc_STRVAR(encode_doc, -"_encode(code, iterable)\n\ -\n\ -like the encode method without code checking"); +static PyObject * +decodetree_sizeof(decodetreeobject *self) +{ + Py_ssize_t res; + res = sizeof(decodetreeobject); + res += sizeof(binode) * binode_nodes(self->tree); + return PyLong_FromSsize_t(res); +} -/* return the leave node resulting from traversing the (binary) tree, - or, when the iteration is finished, NULL -*/ -static PyObject * -tree_traverse(bitarrayobject *self, idx_t *indexp, PyObject *tree) +static void +decodetree_dealloc(decodetreeobject *self) { - PyObject *subtree; - long vi; + binode_delete(self->tree); + Py_TYPE(self)->tp_free((PyObject *) self); +} - if (*indexp == self->nbits) /* stop iterator */ - return NULL; +/* as these methods are only useful for debugging and testing, + they are only documented within this file */ +static PyMethodDef decodetree_methods[] = { + {"nodes", (PyCFunction) decodetree_nodes, METH_NOARGS, 0}, + {"todict", (PyCFunction) decodetree_todict, METH_NOARGS, 0}, + {"__sizeof__", (PyCFunction) decodetree_sizeof, METH_NOARGS, 0}, + {NULL, NULL} /* sentinel */ +}; - vi = GETBIT(self, *indexp); - (*indexp)++; - subtree = PyList_GetItem(tree, vi); +PyDoc_STRVAR(decodetree_doc, +"decodetree(code, /) -> decodetree\n\ +\n\ +Given a prefix code (a dict mapping symbols to bitarrays),\n\ +create a binary tree object to be passed to `.decode()` or `.iterdecode()`."); - if (PyList_Check(subtree) && PyList_Size(subtree) == 2) - return tree_traverse(self, indexp, subtree); - else - return subtree; -} +static PyTypeObject DecodeTree_Type = { +#ifdef IS_PY3K + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + "bitarray.decodetree", /* tp_name */ + sizeof(decodetreeobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor) decodetree_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number*/ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + PyObject_HashNotImplemented, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + decodetree_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + decodetree_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + decodetree_new, /* tp_new */ + PyObject_Del, /* tp_free */ +}; -#define IS_EMPTY_LIST(x) (PyList_Check(x) && PyList_Size(x) == 0) +#define DecodeTree_Check(op) PyObject_TypeCheck(op, &DecodeTree_Type) + +/* -------------------------- END decodetree --------------------------- */ static PyObject * -bitarray_decode(bitarrayobject *self, PyObject *tree) +bitarray_decode(bitarrayobject *self, PyObject *obj) { - PyObject *symbol, *list; - idx_t index = 0; + binode *tree; + PyObject *list = NULL, *symbol; + Py_ssize_t index = 0; + + if (DecodeTree_Check(obj)) { + tree = ((decodetreeobject *) obj)->tree; + } + else { + if (check_codedict(obj) < 0) + return NULL; + + tree = binode_make_tree(obj); + if (tree == NULL) + goto error; + } list = PyList_New(0); if (list == NULL) - return NULL; - /* traverse binary tree and append symbols to the result list */ - while ((symbol = tree_traverse(self, &index, tree)) != NULL) { - if (IS_EMPTY_LIST(symbol)) { - PyErr_SetString(PyExc_ValueError, - "prefix code does not match data in bitarray"); - goto error; - } + goto error; + + while ((symbol = binode_traverse(tree, self, &index))) { if (PyList_Append(list, symbol) < 0) goto error; } + if (PyErr_Occurred()) + goto error; + if (!DecodeTree_Check(obj)) + binode_delete(tree); return list; + error: - Py_DECREF(list); + if (!DecodeTree_Check(obj)) + binode_delete(tree); + Py_XDECREF(list); return NULL; } PyDoc_STRVAR(decode_doc, -"_decode(tree) -> list\n\ +"decode(code, /) -> list\n\ \n\ -Given a tree, decode the content of the bitarray and return the list of\n\ +Given a prefix code (a dict mapping symbols to bitarrays, or `decodetree`\n\ +object), decode the content of the bitarray and return it as a list of\n\ symbols."); -/*********************** (Bitarray) Decode Iterator *********************/ +/*********************** (bitarray) Decode Iterator ***********************/ typedef struct { PyObject_HEAD - bitarrayobject *bao; /* bitarray we're searching in */ - PyObject *tree; /* prefix tree containing symbols */ - idx_t index; /* current index in bitarray */ + bitarrayobject *bao; /* bitarray we're decoding */ + binode *tree; /* prefix tree containing symbols */ + Py_ssize_t index; /* current index in bitarray */ + PyObject *decodetree; /* decodetree or NULL */ } decodeiterobject; static PyTypeObject DecodeIter_Type; #define DecodeIter_Check(op) PyObject_TypeCheck(op, &DecodeIter_Type) -/* create a new initialized bitarray search iterator object */ + +/* create a new initialized bitarray decode iterator object */ static PyObject * -bitarray_iterdecode(bitarrayobject *self, PyObject *tree) +bitarray_iterdecode(bitarrayobject *self, PyObject *obj) { - decodeiterobject *it; /* iterator to be returned */ + decodeiterobject *it; /* iterator to be returned */ + binode *tree; + + if (DecodeTree_Check(obj)) { + tree = ((decodetreeobject *) obj)->tree; + } + else { + if (check_codedict(obj) < 0) + return NULL; + + tree = binode_make_tree(obj); + if (tree == NULL) + return NULL; + } it = PyObject_GC_New(decodeiterobject, &DecodeIter_Type); if (it == NULL) @@ -2222,18 +2484,20 @@ Py_INCREF(self); it->bao = self; - Py_INCREF(tree); it->tree = tree; it->index = 0; + it->decodetree = DecodeTree_Check(obj) ? obj : NULL; + Py_XINCREF(it->decodetree); PyObject_GC_Track(it); return (PyObject *) it; } PyDoc_STRVAR(iterdecode_doc, -"_iterdecode(tree) -> iterator\n\ +"iterdecode(code, /) -> iterator\n\ \n\ -Given a tree, decode the content of the bitarray and iterate over the\n\ -symbols."); +Given a prefix code (a dict mapping symbols to bitarrays, or `decodetree`\n\ +object), decode the content of the bitarray and return an iterator over\n\ +the symbols."); static PyObject * decodeiter_next(decodeiterobject *it) @@ -2241,14 +2505,9 @@ PyObject *symbol; assert(DecodeIter_Check(it)); - symbol = tree_traverse(it->bao, &(it->index), it->tree); - if (symbol == NULL) /* stop iteration */ - return NULL; - if (IS_EMPTY_LIST(symbol)) { - PyErr_SetString(PyExc_ValueError, - "prefix code does not match data in bitarray"); + symbol = binode_traverse(it->tree, it->bao, &(it->index)); + if (symbol == NULL) /* stop iteration OR error occured */ return NULL; - } Py_INCREF(symbol); return symbol; } @@ -2256,9 +2515,13 @@ static void decodeiter_dealloc(decodeiterobject *it) { + if (it->decodetree) + Py_DECREF(it->decodetree); + else /* when decodeiter was created from dict - free tree */ + binode_delete(it->tree); + PyObject_GC_UnTrack(it); - Py_XDECREF(it->bao); - Py_XDECREF(it->tree); + Py_DECREF(it->bao); PyObject_GC_Del(it); } @@ -2271,12 +2534,12 @@ static PyTypeObject DecodeIter_Type = { #ifdef IS_PY3K - PyVarObject_HEAD_INIT(&DecodeIter_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ #endif - "bitarraydecodeiterator", /* tp_name */ + "bitarray.decodeiterator", /* tp_name */ sizeof(decodeiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ @@ -2306,13 +2569,13 @@ 0, /* tp_methods */ }; -/*********************** (Bitarray) Search Iterator *********************/ +/*********************** (Bitarray) Search Iterator ***********************/ typedef struct { PyObject_HEAD bitarrayobject *bao; /* bitarray we're searching in */ bitarrayobject *xa; /* bitarray being searched for */ - idx_t p; /* current search position */ + Py_ssize_t p; /* current search position */ } searchiterobject; static PyTypeObject SearchIter_Type; @@ -2350,7 +2613,7 @@ } PyDoc_STRVAR(itersearch_doc, -"itersearch(bitarray) -> iterator\n\ +"itersearch(bitarray, /) -> iterator\n\ \n\ Searches for the given a bitarray in self, and return an iterator over\n\ the start positions where bitarray matches self."); @@ -2358,22 +2621,22 @@ static PyObject * searchiter_next(searchiterobject *it) { - idx_t p; + Py_ssize_t p; assert(SearchIter_Check(it)); p = search(it->bao, it->xa, it->p); if (p < 0) /* no more positions -- stop iteration */ return NULL; it->p = p + 1; /* next search position */ - return PyLong_FromLongLong(p); + return PyLong_FromSsize_t(p); } static void searchiter_dealloc(searchiterobject *it) { PyObject_GC_UnTrack(it); - Py_XDECREF(it->bao); - Py_XDECREF(it->xa); + Py_DECREF(it->bao); + Py_DECREF(it->xa); PyObject_GC_Del(it); } @@ -2386,12 +2649,12 @@ static PyTypeObject SearchIter_Type = { #ifdef IS_PY3K - PyVarObject_HEAD_INIT(&SearchIter_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ #endif - "bitarraysearchiterator", /* tp_name */ + "bitarray.searchiterator", /* tp_name */ sizeof(searchiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ @@ -2421,10 +2684,9 @@ 0, /* tp_methods */ }; -/*************************** Method definitions *************************/ +/*********************** bitarray method definitions **********************/ -static PyMethodDef -bitarray_methods[] = { +static PyMethodDef bitarray_methods[] = { {"all", (PyCFunction) bitarray_all, METH_NOARGS, all_doc}, {"any", (PyCFunction) bitarray_any, METH_NOARGS, @@ -2435,15 +2697,17 @@ buffer_info_doc}, {"bytereverse", (PyCFunction) bitarray_bytereverse, METH_NOARGS, bytereverse_doc}, + {"clear", (PyCFunction) bitarray_clear, METH_NOARGS, + clear_doc}, {"copy", (PyCFunction) bitarray_copy, METH_NOARGS, copy_doc}, {"count", (PyCFunction) bitarray_count, METH_VARARGS, count_doc}, - {"_decode", (PyCFunction) bitarray_decode, METH_O, + {"decode", (PyCFunction) bitarray_decode, METH_O, decode_doc}, - {"_iterdecode", (PyCFunction) bitarray_iterdecode, METH_O, + {"iterdecode", (PyCFunction) bitarray_iterdecode, METH_O, iterdecode_doc}, - {"_encode", (PyCFunction) bitarray_encode, METH_VARARGS, + {"encode", (PyCFunction) bitarray_encode, METH_VARARGS, encode_doc}, {"endian", (PyCFunction) bitarray_endian, METH_NOARGS, endian_doc}, @@ -2459,7 +2723,7 @@ index_doc}, {"insert", (PyCFunction) bitarray_insert, METH_VARARGS, insert_doc}, - {"invert", (PyCFunction) bitarray_invert, METH_NOARGS, + {"invert", (PyCFunction) bitarray_invert, METH_VARARGS, invert_doc}, {"length", (PyCFunction) bitarray_length, METH_NOARGS, length_doc}, @@ -2482,7 +2746,7 @@ sort_doc}, {"tofile", (PyCFunction) bitarray_tofile, METH_O, tofile_doc}, - {"tolist", (PyCFunction) bitarray_tolist, METH_NOARGS, + {"tolist", (PyCFunction) bitarray_tolist, METH_VARARGS, tolist_doc}, {"tobytes", (PyCFunction) bitarray_tobytes, METH_NOARGS, tobytes_doc}, @@ -2497,74 +2761,74 @@ copy_doc}, {"__deepcopy__", (PyCFunction) bitarray_copy, METH_O, copy_doc}, - {"__len__", (PyCFunction) bitarray_length, METH_NOARGS, - len_doc}, - {"__contains__", (PyCFunction) bitarray_contains, METH_O, - contains_doc}, {"__reduce__", (PyCFunction) bitarray_reduce, METH_NOARGS, reduce_doc}, - - /* slice methods */ - {"__delitem__", (PyCFunction) bitarray_delitem, METH_O, 0}, - {"__getitem__", (PyCFunction) bitarray_getitem, METH_O, 0}, - {"__setitem__", (PyCFunction) bitarray_setitem, METH_VARARGS, 0}, - - /* number methods */ - {"__add__", (PyCFunction) bitarray_add, METH_O, 0}, - {"__iadd__", (PyCFunction) bitarray_iadd, METH_O, 0}, - {"__mul__", (PyCFunction) bitarray_mul, METH_O, 0}, - {"__rmul__", (PyCFunction) bitarray_mul, METH_O, 0}, - {"__imul__", (PyCFunction) bitarray_imul, METH_O, 0}, - {"__and__", (PyCFunction) bitarray_and, METH_O, 0}, - {"__or__", (PyCFunction) bitarray_or, METH_O, 0}, - {"__xor__", (PyCFunction) bitarray_xor, METH_O, 0}, - {"__iand__", (PyCFunction) bitarray_iand, METH_O, 0}, - {"__ior__", (PyCFunction) bitarray_ior, METH_O, 0}, - {"__ixor__", (PyCFunction) bitarray_ixor, METH_O, 0}, - {"__invert__", (PyCFunction) bitarray_cpinvert, METH_NOARGS, 0}, + {"__sizeof__", (PyCFunction) bitarray_sizeof, METH_NOARGS, + sizeof_doc}, {NULL, NULL} /* sentinel */ }; +/* ------------------------ bitarray initialization -------------------- */ + +/* Given a string, return an integer representing the endianness. + If the string is invalid, set a Python exception and return -1. */ +static int +endian_from_string(const char* string) +{ + assert(default_endian == ENDIAN_LITTLE || default_endian == ENDIAN_BIG); + + if (string == NULL) + return default_endian; + + if (strcmp(string, "little") == 0) + return ENDIAN_LITTLE; + + if (strcmp(string, "big") == 0) + return ENDIAN_BIG; + + PyErr_Format(PyExc_ValueError, "bit endianness must be either " + "'little' or 'big', got: '%s'", string); + return -1; +} + static PyObject * bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - PyObject *a; /* to be returned in some cases */ + PyObject *res; /* to be returned in some cases */ PyObject *initial = NULL; char *endian_str = NULL; int endian; - static char* kwlist[] = {"initial", "endian", NULL}; + static char *kwlist[] = {"", "endian", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, - "|Os:bitarray", kwlist, &initial, &endian_str)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Os:bitarray", + kwlist, &initial, &endian_str)) return NULL; - if (endian_str == NULL) { - endian = DEFAULT_ENDIAN; /* use default value */ - } - else if (strcmp(endian_str, "little") == 0) { - endian = 0; - } - else if (strcmp(endian_str, "big") == 0) { - endian = 1; - } - else { - PyErr_SetString(PyExc_ValueError, - "endian must be 'little' or 'big'"); + endian = endian_from_string(endian_str); + if (endian < 0) return NULL; - } /* no arg or None */ if (initial == NULL || initial == Py_None) return newbitarrayobject(type, 0, endian); - /* int, long */ - if (IS_INDEX(initial)) { - idx_t nbits = 0; + /* boolean */ + if (PyBool_Check(initial)) { + PyErr_SetString(PyExc_TypeError, + "cannot create bitarray from boolean"); + return NULL; + } + + /* index (a number) */ + if (PyIndex_Check(initial)) { + Py_ssize_t nbits; - if (getIndex(initial, &nbits) < 0) + nbits = PyNumber_AsSsize_t(initial, PyExc_IndexError); + if (nbits == -1 && PyErr_Occurred()) return NULL; + if (nbits < 0) { PyErr_SetString(PyExc_ValueError, "cannot create bitarray with negative length"); @@ -2576,60 +2840,62 @@ /* from bitarray itself */ if (bitarray_Check(initial)) { #define np ((bitarrayobject *) initial) - a = newbitarrayobject(type, np->nbits, - endian_str == NULL ? np->endian : endian); - if (a == NULL) + res = newbitarrayobject(type, np->nbits, + endian_str == NULL ? np->endian : endian); + if (res == NULL) return NULL; - memcpy(((bitarrayobject *) a)->ob_item, np->ob_item, Py_SIZE(np)); + memcpy(((bitarrayobject *) res)->ob_item, np->ob_item, + (size_t) Py_SIZE(np)); #undef np - return a; + return res; } - /* string */ - if (PyString_Check(initial)) { - Py_ssize_t strlen; - char *str; + /* bytes (for pickling) */ + if (PyBytes_Check(initial)) { + Py_ssize_t nbytes; + char *data; - strlen = PyString_Size(initial); - if (strlen == 0) /* empty string */ + nbytes = PyBytes_Size(initial); + if (nbytes == 0) /* no bytes */ return newbitarrayobject(type, 0, endian); - str = PyString_AsString(initial); - if (0 <= str[0] && str[0] < 8) { + data = PyBytes_AsString(initial); + if (0 <= data[0] && data[0] < 8) { /* when the first character is smaller than 8, it indicates the number of unused bits at the end, and rest of the bytes - consist of the raw binary data, this is used for pickling */ - if (strlen == 1 && str[0] > 0) { + consist of the raw binary data */ + if (nbytes == 1 && data[0] > 0) { PyErr_Format(PyExc_ValueError, - "did not expect 0x0%d", (int) str[0]); + "did not expect 0x0%d", (int) data[0]); return NULL; } - a = newbitarrayobject(type, BITS(strlen - 1) - ((idx_t) str[0]), - endian); - if (a == NULL) + res = newbitarrayobject(type, + BITS(nbytes - 1) - ((Py_ssize_t) data[0]), + endian); + if (res == NULL) return NULL; - memcpy(((bitarrayobject *) a)->ob_item, str + 1, strlen - 1); - return a; + memcpy(((bitarrayobject *) res)->ob_item, data + 1, + (size_t) nbytes - 1); + return res; } } /* leave remaining type dispatch to the extend method */ - a = newbitarrayobject(type, 0, endian); - if (a == NULL) + res = newbitarrayobject(type, 0, endian); + if (res == NULL) return NULL; - if (extend_dispatch((bitarrayobject *) a, initial) < 0) { - Py_DECREF(a); + if (extend_dispatch((bitarrayobject *) res, initial) < 0) { + Py_DECREF(res); return NULL; } - return a; + return res; } - static PyObject * richcompare(PyObject *v, PyObject *w, int op) { int cmp, vi, wi; - idx_t i, vs, ws; + Py_ssize_t i, vs, ws; if (!bitarray_Check(v) || !bitarray_Check(w)) { Py_INCREF(Py_NotImplemented); @@ -2639,12 +2905,19 @@ #define wa ((bitarrayobject *) w) vs = va->nbits; ws = wa->nbits; - if (vs != ws) { - /* shortcut for EQ/NE: if sizes differ, the bitarrays differ */ - if (op == Py_EQ) - Py_RETURN_FALSE; - if (op == Py_NE) - Py_RETURN_TRUE; + if (op == Py_EQ || op == Py_NE) { + /* shortcuts for EQ/NE */ + if (vs != ws) { + /* if sizes differ, the bitarrays differ */ + return PyBool_FromLong((long) (op == Py_NE)); + } + else if (va->endian == wa->endian) { + /* sizes and endianness are the same - use memcmp() */ + setunused(va); + setunused(wa); + cmp = memcmp(va->ob_item, wa->ob_item, (size_t) Py_SIZE(v)); + return PyBool_FromLong((long) ((cmp == 0) ^ (op == Py_NE))); + } } /* to avoid uninitialized warning for some compilers */ @@ -2688,12 +2961,12 @@ return PyBool_FromLong((long) cmp); } -/************************** Bitarray Iterator **************************/ +/***************************** bitarray iterator **************************/ typedef struct { PyObject_HEAD bitarrayobject *bao; /* bitarray we're iterating over */ - idx_t index; /* current index in bitarray */ + Py_ssize_t index; /* current index in bitarray */ } bitarrayiterobject; static PyTypeObject BitarrayIter_Type; @@ -2701,7 +2974,7 @@ #define BitarrayIter_Check(op) PyObject_TypeCheck(op, &BitarrayIter_Type) /* create a new initialized bitarray iterator object, this object is - returned when calling item(a) */ + returned when calling iter(a) */ static PyObject * bitarray_iter(bitarrayobject *self) { @@ -2737,7 +3010,7 @@ bitarrayiter_dealloc(bitarrayiterobject *it) { PyObject_GC_UnTrack(it); - Py_XDECREF(it->bao); + Py_DECREF(it->bao); PyObject_GC_Del(it); } @@ -2750,12 +3023,12 @@ static PyTypeObject BitarrayIter_Type = { #ifdef IS_PY3K - PyVarObject_HEAD_INIT(&BitarrayIter_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ #endif - "bitarrayiterator", /* tp_name */ + "bitarray.bitarrayiterator", /* tp_name */ sizeof(bitarrayiterobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ @@ -2785,8 +3058,9 @@ 0, /* tp_methods */ }; -/********************* Bitarray Buffer Interface ************************/ -#ifdef WITH_BUFFER +/*********************** bitarray buffer interface ************************/ + +#if PY_MAJOR_VERSION == 2 /* old buffer protocol */ static Py_ssize_t bitarray_buffer_getreadbuf(bitarrayobject *self, Py_ssize_t index, const void **ptr) @@ -2831,6 +3105,8 @@ return Py_SIZE(self); } +#endif + static int bitarray_getbuffer(bitarrayobject *self, Py_buffer *view, int flags) { @@ -2857,24 +3133,52 @@ } static PyBufferProcs bitarray_as_buffer = { +#if PY_MAJOR_VERSION == 2 /* old buffer protocol */ (readbufferproc) bitarray_buffer_getreadbuf, (writebufferproc) bitarray_buffer_getwritebuf, (segcountproc) bitarray_buffer_getsegcount, (charbufferproc) bitarray_buffer_getcharbuf, +#endif (getbufferproc) bitarray_getbuffer, (releasebufferproc) bitarray_releasebuffer, }; -#endif /* WITH_BUFFER */ -/************************** Bitarray Type *******************************/ -static PyTypeObject Bitarraytype = { +/***************************** Bitarray Type ******************************/ + +PyDoc_STRVAR(bitarraytype_doc, +"bitarray(initializer=0, /, endian='big') -> bitarray\n\ +\n\ +Return a new bitarray object whose items are bits initialized from\n\ +the optional initial object, and endianness.\n\ +The initializer may be of the following types:\n\ +\n\ +`int`: Create a bitarray of given integer length. The initial values are\n\ +arbitrary. If you want all values to be set, use the .setall() method.\n\ +\n\ +`str`: Create bitarray from a string of `0` and `1`.\n\ +\n\ +`list`, `tuple`, `iterable`: Create bitarray from a sequence, each\n\ +element in the sequence is converted to a bit using its truth value.\n\ +\n\ +`bitarray`: Create bitarray from another bitarray. This is done by\n\ +copying the buffer holding the bitarray data, and is hence very fast.\n\ +\n\ +The optional keyword arguments `endian` specifies the bit endianness of the\n\ +created bitarray object.\n\ +Allowed values are the strings `big` and `little` (default is `big`).\n\ +\n\ +Note that setting the bit endianness only has an effect when accessing the\n\ +machine representation of the bitarray, i.e. when using the methods: tofile,\n\ +fromfile, tobytes, frombytes."); + +static PyTypeObject Bitarray_Type = { #ifdef IS_PY3K - PyVarObject_HEAD_INIT(&Bitarraytype, 0) + PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /* ob_size */ #endif - "bitarray._bitarray", /* tp_name */ + "bitarray.bitarray", /* tp_name */ sizeof(bitarrayobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ @@ -2884,25 +3188,21 @@ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc) bitarray_repr, /* tp_repr */ - 0, /* tp_as_number*/ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ + &bitarray_as_number, /* tp_as_number*/ + &bitarray_as_sequence, /* tp_as_sequence */ + &bitarray_as_mapping, /* tp_as_mapping */ + PyObject_HashNotImplemented, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ -#ifdef WITH_BUFFER &bitarray_as_buffer, /* tp_as_buffer */ -#else - 0, /* tp_as_buffer */ -#endif Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS -#ifdef WITH_BUFFER +#if PY_MAJOR_VERSION == 2 | Py_TPFLAGS_HAVE_NEWBUFFER #endif , /* tp_flags */ - 0, /* tp_doc */ + bitarraytype_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ richcompare, /* tp_richcompare */ @@ -2923,82 +3223,56 @@ PyObject_Del, /* tp_free */ }; -/*************************** Module functions **********************/ +/***************************** Module functions ***************************/ static PyObject * -bitdiff(PyObject *self, PyObject *args) +get_default_endian(PyObject *module) { - PyObject *a, *b; - Py_ssize_t i; - idx_t res = 0; - unsigned char c; - - if (!PyArg_ParseTuple(args, "OO:bitdiff", &a, &b)) - return NULL; - if (!(bitarray_Check(a) && bitarray_Check(b))) { - PyErr_SetString(PyExc_TypeError, "bitarray object expected"); - return NULL; - } - -#define aa ((bitarrayobject *) a) -#define bb ((bitarrayobject *) b) - if (aa->nbits != bb->nbits) { - PyErr_SetString(PyExc_ValueError, - "bitarrays of equal length expected"); - return NULL; - } - setunused(aa); - setunused(bb); - for (i = 0; i < Py_SIZE(aa); i++) { - c = aa->ob_item[i] ^ bb->ob_item[i]; - res += bitcount_lookup[c]; - } -#undef aa -#undef bb - return PyLong_FromLongLong(res); + return Py_BuildValue("s", + default_endian == ENDIAN_LITTLE ? "little" : "big"); } -PyDoc_STRVAR(bitdiff_doc, -"bitdiff(a, b) -> int\n\ +PyDoc_STRVAR(get_default_endian_doc, +"get_default_endian() -> string\n\ \n\ -Return the difference between two bitarrays a and b.\n\ -This is function does the same as (a ^ b).count(), but is more memory\n\ -efficient, as no intermediate bitarray object gets created"); +Return the default endianness for new bitarray objects being created.\n\ +Under normal circumstances, the return value is `big`."); static PyObject * -bits2bytes(PyObject *self, PyObject *v) +set_default_endian(PyObject *module, PyObject *args) { - idx_t n = 0; + char *endian_str; + int tmp; - if (!IS_INDEX(v)) { - PyErr_SetString(PyExc_TypeError, "integer expected"); - return NULL; - } - if (getIndex(v, &n) < 0) + if (!PyArg_ParseTuple(args, "s:_set_default_endian", &endian_str)) return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "positive value expected"); + + /* As endian_from_string might return -1, we have to store its value + in a temporary variable before setting default_endian. */ + tmp = endian_from_string(endian_str); + if (tmp < 0) return NULL; - } - return PyLong_FromLongLong(BYTES(n)); + default_endian = tmp; + + Py_RETURN_NONE; } -PyDoc_STRVAR(bits2bytes_doc, -"bits2bytes(n) -> int\n\ +PyDoc_STRVAR(set_default_endian_doc, +"_set_default_endian(endian, /)\n\ \n\ -Return the number of bytes necessary to store n bits."); +Set the default bit endianness for new bitarray objects being created."); static PyObject * sysinfo(void) { - return Py_BuildValue("iiiiL", + return Py_BuildValue("iiiin", (int) sizeof(void *), (int) sizeof(size_t), (int) sizeof(Py_ssize_t), - (int) sizeof(idx_t), - (idx_t) PY_SSIZE_T_MAX); + (int) sizeof(Py_ssize_t), + PY_SSIZE_T_MAX); } PyDoc_STRVAR(sysinfo_doc, @@ -3007,47 +3281,76 @@ tuple(sizeof(void *),\n\ sizeof(size_t),\n\ sizeof(Py_ssize_t),\n\ - sizeof(idx_t),\n\ + sizeof(Py_ssize_t),\n\ PY_SSIZE_T_MAX)"); static PyMethodDef module_functions[] = { - {"bitdiff", (PyCFunction) bitdiff, METH_VARARGS, bitdiff_doc }, - {"bits2bytes", (PyCFunction) bits2bytes, METH_O, bits2bytes_doc}, + {"get_default_endian", (PyCFunction) get_default_endian, METH_NOARGS, + get_default_endian_doc}, + {"_set_default_endian", (PyCFunction) set_default_endian, METH_VARARGS, + set_default_endian_doc}, {"_sysinfo", (PyCFunction) sysinfo, METH_NOARGS, sysinfo_doc }, {NULL, NULL} /* sentinel */ }; -/*********************** Install Module **************************/ +/******************************* Install Module ***************************/ #ifdef IS_PY3K static PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_bitarray", 0, -1, module_functions, }; +#endif + PyMODINIT_FUNC +#ifdef IS_PY3K PyInit__bitarray(void) #else -PyMODINIT_FUNC init_bitarray(void) #endif { PyObject *m; - Py_TYPE(&Bitarraytype) = &PyType_Type; - Py_TYPE(&BitarrayIter_Type) = &PyType_Type; #ifdef IS_PY3K m = PyModule_Create(&moduledef); - if (m == NULL) - return NULL; #else m = Py_InitModule3("_bitarray", module_functions, 0); - if (m == NULL) - return; #endif + if (m == NULL) + goto error; + + if (PyType_Ready(&Bitarray_Type) < 0) + goto error; + Py_SET_TYPE(&Bitarray_Type, &PyType_Type); + Py_INCREF((PyObject *) &Bitarray_Type); + PyModule_AddObject(m, "bitarray", (PyObject *) &Bitarray_Type); + + if (PyType_Ready(&DecodeTree_Type) < 0) + goto error; + Py_SET_TYPE(&DecodeTree_Type, &PyType_Type); + Py_INCREF((PyObject *) &DecodeTree_Type); + PyModule_AddObject(m, "decodetree", (PyObject *) &DecodeTree_Type); - Py_INCREF((PyObject *) &Bitarraytype); - PyModule_AddObject(m, "_bitarray", (PyObject *) &Bitarraytype); + if (PyType_Ready(&DecodeIter_Type) < 0) + goto error; + Py_SET_TYPE(&DecodeIter_Type, &PyType_Type); + + if (PyType_Ready(&BitarrayIter_Type) < 0) + goto error; + Py_SET_TYPE(&BitarrayIter_Type, &PyType_Type); + + if (PyType_Ready(&SearchIter_Type) < 0) + goto error; + Py_SET_TYPE(&SearchIter_Type, &PyType_Type); + + PyModule_AddObject(m, "__version__", + Py_BuildValue("s", BITARRAY_VERSION)); #ifdef IS_PY3K return m; + error: + return NULL; +#else + error: + return; #endif } diff -Nru python-bitarray-0.8.1/bitarray/bitarray.h python-bitarray-1.6.3/bitarray/bitarray.h --- python-bitarray-0.8.1/bitarray/bitarray.h 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/bitarray/bitarray.h 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,109 @@ +/* + Copyright (c) 2008 - 2021, Ilan Schnell + bitarray is published under the PSF license. + + Author: Ilan Schnell +*/ +#define BITARRAY_VERSION "1.6.3" + +#if PY_MAJOR_VERSION >= 3 +#define IS_PY3K +#endif + +#if PY_MAJOR_VERSION == 2 +/* the Py_MIN macro was introduced in Python 3.3 */ +#define Py_MIN(x, y) (((x) > (y)) ? (y) : (x)) +#define PySlice_GetIndicesEx(slice, len, start, stop, step, slicelength) \ + PySlice_GetIndicesEx(((PySliceObject *) slice), \ + (len), (start), (stop), (step), (slicelength)) +#define PyLong_FromLong PyInt_FromLong +#endif + +/* ob_size is the byte count of the buffer, not the number of elements. + The number of elements (bits) is nbits. */ +typedef struct { + PyObject_VAR_HEAD + char *ob_item; /* buffer */ + Py_ssize_t allocated; /* how many bytes allocated */ + Py_ssize_t nbits; /* length of bitarray, i.e. elements */ + int endian; /* bit endianness of bitarray */ + int ob_exports; /* how many buffer exports */ + PyObject *weakreflist; /* list of weak references */ +} bitarrayobject; + +/* --- bit endianness --- */ +#define ENDIAN_LITTLE 0 +#define ENDIAN_BIG 1 + +/* returns the endianness string from bitarrayobject */ +#define ENDIAN_STR(o) ((o)->endian == ENDIAN_LITTLE ? "little" : "big") + +/* number of bits that can be stored in given bytes */ +#define BITS(bytes) ((bytes) << 3) + +/* number of bytes necessary to store given bits */ +#define BYTES(bits) ((bits) == 0 ? 0 : (((bits) - 1) / 8 + 1)) + +#define BITMASK(endian, i) \ + (((char) 1) << ((endian) == ENDIAN_LITTLE ? ((i) % 8) : (7 - (i) % 8))) + +/* ------------ low level access to bits in bitarrayobject ------------- */ + +#ifndef NDEBUG +static inline int GETBIT(bitarrayobject *self, Py_ssize_t i) { + assert(0 <= i && i < self->nbits); + return ((self)->ob_item[(i) / 8] & BITMASK((self)->endian, i) ? 1 : 0); +} +#else +#define GETBIT(self, i) \ + ((self)->ob_item[(i) / 8] & BITMASK((self)->endian, i) ? 1 : 0) +#endif + +static inline void +setbit(bitarrayobject *self, Py_ssize_t i, int bit) +{ + char *cp, mask; + + assert(0 <= i && i < BITS(Py_SIZE(self))); + mask = BITMASK(self->endian, i); + cp = self->ob_item + i / 8; + if (bit) + *cp |= mask; + else + *cp &= ~mask; +} + +/* sets unused padding bits (within last byte of buffer) to 0, + and return the number of padding bits -- self->nbits is unchanged */ +static inline int +setunused(bitarrayobject *self) +{ + const char mask[16] = { + /* elements 0 and 8 (with value 0x00) are never accessed */ + 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, /* little endian */ + 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, /* big endian */ + }; + int res; + + if (self->nbits % 8 == 0) + return 0; + + res = (int) (BITS(Py_SIZE(self)) - self->nbits); + assert(0 < res && res < 8); + /* apply the appropriate mask to the last byte in buffer */ + self->ob_item[Py_SIZE(self) - 1] &= + mask[self->nbits % 8 + (self->endian == ENDIAN_LITTLE ? 0 : 8)]; + + return res; +} + +static unsigned char bitcount_lookup[256] = { + 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8, +}; diff -Nru python-bitarray-0.8.1/bitarray/__init__.py python-bitarray-1.6.3/bitarray/__init__.py --- python-bitarray-0.8.1/bitarray/__init__.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/bitarray/__init__.py 2021-01-21 00:52:57.000000000 +0000 @@ -4,128 +4,58 @@ Please find a description of this package at: - http://pypi.python.org/pypi/bitarray/ + https://github.com/ilanschnell/bitarray Author: Ilan Schnell """ -from bitarray._bitarray import _bitarray, bitdiff, bits2bytes, _sysinfo +from __future__ import absolute_import -__version__ = '0.8.1' +from bitarray._bitarray import (bitarray, decodetree, _sysinfo, + get_default_endian, _set_default_endian, + __version__) -def _tree_insert(tree, sym, ba): - """ - Insert symbol which is mapped to bitarray into tree - """ - v = ba[0] - if len(ba) > 1: - if tree[v] == []: - tree[v] = [[], []] - _tree_insert(tree[v], sym, ba[1:]) - else: - if tree[v] != []: - raise ValueError("prefix code ambiguous") - tree[v] = sym - -def _mk_tree(codedict): - # Generate tree from codedict - tree = [[], []] - for sym, ba in codedict.items(): - _tree_insert(tree, sym, ba) - return tree - -def _check_codedict(codedict): - if not isinstance(codedict, dict): - raise TypeError("dictionary expected") - if len(codedict) == 0: - raise ValueError("prefix code empty") - for k, v in codedict.items(): - if not isinstance(v, bitarray): - raise TypeError("bitarray expected for dictionary value") - if v.length() == 0: - raise ValueError("non-empty bitarray expected") - - -class bitarray(_bitarray): - """bitarray([initial], [endian=string]) - -Return a new bitarray object whose items are bits initialized from -the optional initial, and endianness. -If no object is provided, the bitarray is initialized to have length zero. -The initial object may be of the following types: - -int, long - Create bitarray of length given by the integer. The initial values - in the array are random, because only the memory allocated. - -string - Create bitarray from a string of '0's and '1's. - -list, tuple, iterable - Create bitarray from a sequence, each element in the sequence is - converted to a bit using truth value value. - -bitarray - Create bitarray from another bitarray. This is done by copying the - memory holding the bitarray data, and is hence very fast. - -The optional keyword arguments 'endian' specifies the bit endianness of the -created bitarray object. -Allowed values are 'big' and 'little' (default is 'big'). - -Note that setting the bit endianness only has an effect when accessing the -machine representation of the bitarray, i.e. when using the methods: tofile, -fromfile, tobytes, frombytes.""" - - def fromstring(self, string): - """fromstring(string) - -Append from a string, interpreting the string as machine values. -Deprecated since version 0.4.0, use ``frombytes()`` instead.""" - return self.frombytes(string.encode()) - - def tostring(self): - """tostring() -> string - -Return the string representing (machine values) of the bitarray. -When the length of the bitarray is not a multiple of 8, the few remaining -bits (1..7) are set to 0. -Deprecated since version 0.4.0, use ``tobytes()`` instead.""" - return self.tobytes().decode() - - def decode(self, codedict): - """decode(code) -> list - -Given a prefix code (a dict mapping symbols to bitarrays), -decode the content of the bitarray and return the list of symbols.""" - _check_codedict(codedict) - return self._decode(_mk_tree(codedict)) - - def iterdecode(self, codedict): - """iterdecode(code) -> iterator - -Given a prefix code (a dict mapping symbols to bitarrays), -decode the content of the bitarray and iterate over the symbols.""" - _check_codedict(codedict) - return self._iterdecode(_mk_tree(codedict)) - - def encode(self, codedict, iterable): - """encode(code, iterable) - -Given a prefix code (a dict mapping symbols to bitarrays), -iterates over iterable object with symbols, and extends the bitarray -with the corresponding bitarray for each symbols.""" - _check_codedict(codedict) - self._encode(codedict, iterable) +__all__ = ['bitarray', 'frozenbitarray', 'decodetree', '__version__'] - def __int__(self): - raise TypeError("int() argument cannot be a bitarray") - def __long__(self): - raise TypeError("long() argument cannot be a bitarray") +class frozenbitarray(bitarray): + """frozenbitarray(initializer=0, /, endian='big') -> frozenbitarray - def __float__(self): - raise TypeError("float() argument cannot be a bitarray") +Return a frozenbitarray object, which is initialized the same way a bitarray +object is initialized. A frozenbitarray is immutable and hashable. +Its contents cannot be altered after it is created; however, it can be used +as a dictionary key. +""" + def __repr__(self): + return 'frozen' + bitarray.__repr__(self) + + def __hash__(self): + "Return hash(self)." + if getattr(self, '_hash', None) is None: + self._hash = hash((len(self), self.tobytes())) + return self._hash + + def __delitem__(self, *args, **kwargs): + "" # no docstring + raise TypeError("'frozenbitarray' is immutable") + + append = bytereverse = clear = extend = encode = fill = __delitem__ + frombytes = fromfile = insert = invert = pack = pop = __delitem__ + remove = reverse = setall = sort = __setitem__ = __delitem__ + __iadd__ = __iand__ = __imul__ = __ior__ = __ixor__ = __delitem__ + + +def bits2bytes(_n): + """bits2bytes(n, /) -> int + +Return the number of bytes necessary to store n bits. +""" + import sys + if not isinstance(_n, (int, long) if sys.version_info[0] == 2 else int): + raise TypeError("integer expected") + if _n < 0: + raise ValueError("non-negative integer expected") + return 0 if _n == 0 else ((_n - 1) // 8 + 1) def test(verbosity=1, repeat=1): diff -Nru python-bitarray-0.8.1/bitarray/pythoncapi_compat.h python-bitarray-1.6.3/bitarray/pythoncapi_compat.h --- python-bitarray-0.8.1/bitarray/pythoncapi_compat.h 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/bitarray/pythoncapi_compat.h 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,280 @@ +// Header file providing new functions of the Python C API to old Python +// versions. +// +// File distributed under the MIT license. +// +// Homepage: +// https://github.com/pythoncapi/pythoncapi_compat +// +// Latest version: +// https://raw.githubusercontent.com/pythoncapi/pythoncapi_compat/master/pythoncapi_compat.h +// +// SPDX-License-Identifier: MIT + +#ifndef PYTHONCAPI_COMPAT +#define PYTHONCAPI_COMPAT + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "frameobject.h" // PyFrameObject, PyFrame_GetBack() + + +// Cast argument to PyObject* type. +#ifndef _PyObject_CAST +# define _PyObject_CAST(op) ((PyObject*)(op)) +#endif + + +// bpo-42262 added Py_NewRef() to Python 3.10.0a3 +#if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_NewRef) +static inline PyObject* _Py_NewRef(PyObject *obj) +{ + Py_INCREF(obj); + return obj; +} +#define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj)) +#endif + + +// bpo-42262 added Py_XNewRef() to Python 3.10.0a3 +#if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_XNewRef) +static inline PyObject* _Py_XNewRef(PyObject *obj) +{ + Py_XINCREF(obj); + return obj; +} +#define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj)) +#endif + + +// bpo-39573 added Py_SET_REFCNT() to Python 3.9.0a4 +#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_REFCNT) +static inline void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) +{ + ob->ob_refcnt = refcnt; +} +#define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT((PyObject*)(ob), refcnt) +#endif + + +// bpo-39573 added Py_SET_TYPE() to Python 3.9.0a4 +#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE) +static inline void +_Py_SET_TYPE(PyObject *ob, PyTypeObject *type) +{ + ob->ob_type = type; +} +#define Py_SET_TYPE(ob, type) _Py_SET_TYPE((PyObject*)(ob), type) +#endif + + +// bpo-39573 added Py_SET_SIZE() to Python 3.9.0a4 +#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE) +static inline void +_Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) +{ + ob->ob_size = size; +} +#define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size) +#endif + + +// bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1 +#if PY_VERSION_HEX < 0x030900B1 +static inline PyCodeObject* +PyFrame_GetCode(PyFrameObject *frame) +{ + PyCodeObject *code; + assert(frame != NULL); + code = frame->f_code; + assert(code != NULL); + Py_INCREF(code); + return code; +} +#endif + +static inline PyCodeObject* +_PyFrame_GetCodeBorrow(PyFrameObject *frame) +{ + PyCodeObject *code = PyFrame_GetCode(frame); + Py_DECREF(code); + return code; // borrowed reference +} + + +// bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1 +#if PY_VERSION_HEX < 0x030900B1 +static inline PyFrameObject* +PyFrame_GetBack(PyFrameObject *frame) +{ + PyFrameObject *back; + assert(frame != NULL); + back = frame->f_back; + Py_XINCREF(back); + return back; +} +#endif + +static inline PyFrameObject* +_PyFrame_GetBackBorrow(PyFrameObject *frame) +{ + PyFrameObject *back = PyFrame_GetBack(frame); + Py_XDECREF(back); + return back; // borrowed reference +} + + +// bpo-39947 added PyThreadState_GetInterpreter() to Python 3.9.0a5 +#if PY_VERSION_HEX < 0x030900A5 +static inline PyInterpreterState * +PyThreadState_GetInterpreter(PyThreadState *tstate) +{ + assert(tstate != NULL); + return tstate->interp; +} +#endif + + +// bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1 +#if PY_VERSION_HEX < 0x030900B1 +static inline PyFrameObject* +PyThreadState_GetFrame(PyThreadState *tstate) +{ + PyFrameObject *frame; + assert(tstate != NULL); + frame = tstate->frame; + Py_XINCREF(frame); + return frame; +} +#endif + +static inline PyFrameObject* +_PyThreadState_GetFrameBorrow(PyThreadState *tstate) +{ + PyFrameObject *frame = PyThreadState_GetFrame(tstate); + Py_XDECREF(frame); + return frame; // borrowed reference +} + + +// bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a5 +#if PY_VERSION_HEX < 0x030900A5 +static inline PyInterpreterState * +PyInterpreterState_Get(void) +{ + PyThreadState *tstate; + PyInterpreterState *interp; + + tstate = PyThreadState_GET(); + if (tstate == NULL) { + Py_FatalError("GIL released (tstate is NULL)"); + } + interp = tstate->interp; + if (interp == NULL) { + Py_FatalError("no current interpreter"); + } + return interp; +} +#endif + + +// bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a6 +#if 0x030700A1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030900A6 +static inline uint64_t +PyThreadState_GetID(PyThreadState *tstate) +{ + assert(tstate != NULL); + return tstate->id; +} +#endif + + +// bpo-37194 added PyObject_CallNoArgs() to Python 3.9.0a1 +#if PY_VERSION_HEX < 0x030900A1 +static inline PyObject* +PyObject_CallNoArgs(PyObject *func) +{ + return PyObject_CallFunctionObjArgs(func, NULL); +} +#endif + + +// bpo-39245 made PyObject_CallOneArg() public (previously called +// _PyObject_CallOneArg) in Python 3.9.0a4 +#if PY_VERSION_HEX < 0x030900A4 +static inline PyObject* +PyObject_CallOneArg(PyObject *func, PyObject *arg) +{ + return PyObject_CallFunctionObjArgs(func, arg, NULL); +} +#endif + + +// bpo-40024 added PyModule_AddType() to Python 3.9.0a5 +#if PY_VERSION_HEX < 0x030900A5 +static inline int +PyModule_AddType(PyObject *module, PyTypeObject *type) +{ + const char *name, *dot; + + if (PyType_Ready(type) < 0) { + return -1; + } + + // inline _PyType_Name() + name = type->tp_name; + assert(name != NULL); + dot = strrchr(name, '.'); + if (dot != NULL) { + name = dot + 1; + } + + Py_INCREF(type); + if (PyModule_AddObject(module, name, (PyObject *)type) < 0) { + Py_DECREF(type); + return -1; + } + + return 0; +} +#endif + + +// bpo-40241 added PyObject_GC_IsTracked() to Python 3.9.0a6. +// bpo-4688 added _PyObject_GC_IS_TRACKED() to Python 2.7.0a2. +#if PY_VERSION_HEX < 0x030900A6 +static inline int +PyObject_GC_IsTracked(PyObject* obj) +{ + return (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)); +} +#endif + +// bpo-40241 added PyObject_GC_IsFinalized() to Python 3.9.0a6. +// bpo-18112 added _PyGCHead_FINALIZED() to Python 3.4.0 final. +#if PY_VERSION_HEX < 0x030900A6 && PY_VERSION_HEX >= 0x030400F0 +static inline int +PyObject_GC_IsFinalized(PyObject *obj) +{ + return (PyObject_IS_GC(obj) && _PyGCHead_FINALIZED((PyGC_Head *)(obj)-1)); +} +#endif + + +// bpo-39573 added Py_IS_TYPE() to Python 3.9.0a4 +#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_IS_TYPE) +static inline int +_Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) { + return ob->ob_type == type; +} +#define Py_IS_TYPE(ob, type) _Py_IS_TYPE((const PyObject*)(ob), type) +#endif + + +#ifdef __cplusplus +} +#endif +#endif // PYTHONCAPI_COMPAT diff -Nru python-bitarray-0.8.1/bitarray/test_bitarray.py python-bitarray-1.6.3/bitarray/test_bitarray.py --- python-bitarray-0.8.1/bitarray/test_bitarray.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/bitarray/test_bitarray.py 2021-01-21 00:52:57.000000000 +0000 @@ -3,6 +3,8 @@ Author: Ilan Schnell """ +from __future__ import absolute_import + import os import sys import unittest @@ -12,57 +14,73 @@ is_py3k = bool(sys.version_info[0] == 3) +# imports needed inside tests +import copy +import pickle +import itertools + +try: + import shelve, hashlib +except ImportError: + shelve = hashlib = None + if is_py3k: - from io import StringIO + from io import BytesIO + unicode = str else: - from cStringIO import StringIO + from cStringIO import StringIO as BytesIO + range = xrange -from bitarray import bitarray, bitdiff, bits2bytes, __version__ - +from bitarray import (bitarray, frozenbitarray, bits2bytes, decodetree, + get_default_endian, _set_default_endian, + _sysinfo, __version__) tests = [] -if sys.version_info[:2] < (2, 6): - def next(x): - return x.next() - - -def to_bytes(s): - if is_py3k: - return bytes(s.encode('latin1')) - elif sys.version_info[:2] >= (2, 6): - return bytes(s) - else: - return s - class Util(object): - def randombitarrays(self): - for n in list(range(25)) + [randint(1000, 2000)]: + @staticmethod + def randombitarrays(start=0): + for n in list(range(start, 25)) + [randint(1000, 2000)]: a = bitarray(endian=['little', 'big'][randint(0, 1)]) a.frombytes(os.urandom(bits2bytes(n))) del a[n:] yield a - def randomlists(self): + @staticmethod + def randomlists(): for n in list(range(25)) + [randint(1000, 2000)]: yield [bool(randint(0, 1)) for d in range(n)] - def rndsliceidx(self, length): + @staticmethod + def rndsliceidx(length): if randint(0, 1): return None else: - return randint(-2 * length, 2 * length - 1) + return randint(-length-5, length+5) - def slicelen(self, r, length): - return getIndicesEx(r, length)[-1] + @staticmethod + def other_endian(endian): + t = {'little': 'big', + 'big': 'little'} + return t[endian] + + @staticmethod + def slicelen(s, length): + assert isinstance(s, slice) + start, stop, step = s.indices(length) + slicelength = (stop - start + (1 if step < 0 else -1)) // step + 1 + if slicelength < 0: + slicelength = 0 + return slicelength def check_obj(self, a): - self.assertEqual(repr(type(a)), "") + self.assertEqual(repr(type(a)), "<%s 'bitarray.bitarray'>" % + ('class' if is_py3k else 'type')) unused = 8 * a.buffer_info()[1] - len(a) - self.assert_(0 <= unused < 8) + self.assertTrue(0 <= unused < 8) self.assertEqual(unused, a.buffer_info()[3]) def assertEQUAL(self, a, b): @@ -72,82 +90,55 @@ self.check_obj(b) def assertStopIteration(self, it): - if is_py3k: - return - self.assertRaises(StopIteration, it.next) - - -def getIndicesEx(r, length): - if not isinstance(r, slice): - raise TypeError("slice object expected") - start = r.start - stop = r.stop - step = r.step - if r.step is None: - step = 1 - else: - if step == 0: - raise ValueError("slice step cannot be zero") - - if step < 0: - defstart = length - 1 - defstop = -1 - else: - defstart = 0 - defstop = length - - if r.start is None: - start = defstart - else: - if start < 0: start += length - if start < 0: start = [0, -1][step < 0] - if start >= length: start = [length, length - 1][step < 0] - - if r.stop is None: - stop = defstop - else: - if stop < 0: stop += length - if stop < 0: stop = -1 - if stop > length: stop = length - - if (step < 0 and stop >= length) or (step > 0 and start >= stop): - slicelength = 0 - elif step < 0: - slicelength = (stop - start + 1) / step + 1 - else: - slicelength = (stop - start - 1) / step + 1 - - if slicelength < 0: - slicelength = 0 + self.assertRaises(StopIteration, next, it) - return start, stop, step, slicelength + def assertRaisesMessage(self, excClass, msg, callable, *args, **kwargs): + try: + callable(*args, **kwargs) + except excClass as e: + if msg != str(e): + raise AssertionError("message: %s\n got: %s" % (msg, e)) # --------------------------------------------------------------------------- class TestsModuleFunctions(unittest.TestCase, Util): - def test_bitdiff(self): - a = bitarray('0011') - b = bitarray('0101') - self.assertEqual(bitdiff(a, b), 2) - self.assertRaises(TypeError, bitdiff, a, '') - self.assertRaises(TypeError, bitdiff, '1', b) - self.assertRaises(TypeError, bitdiff, a, 4) - b.append(1) - self.assertRaises(ValueError, bitdiff, a, b) - - for n in list(range(50)) + [randint(1000, 2000)]: + def test_version_string(self): + # the version string is not a function, but test it here anyway + self.assertIsInstance(__version__, str) + + def test_set_default_endian(self): + self.assertRaises(TypeError, _set_default_endian, 0) + self.assertRaises(TypeError, _set_default_endian, 'little', 0) + self.assertRaises(ValueError, _set_default_endian, 'foo') + for default_endian in 'big', 'little', u'big', u'little': + _set_default_endian(default_endian) a = bitarray() - a.frombytes(os.urandom(bits2bytes(n))) - del a[n:] - b = bitarray() - b.frombytes(os.urandom(bits2bytes(n))) - del b[n:] - diff = sum(a[i] ^ b[i] for i in range(n)) - self.assertEqual(bitdiff(a, b), diff) + self.assertEqual(a.endian(), default_endian) + for x in None, 0, 64, '10111', [1, 0]: + a = bitarray(x) + self.assertEqual(a.endian(), default_endian) + + for endian in 'big', 'little': + a = bitarray(endian=endian) + self.assertEqual(a.endian(), endian) + + # make sure that calling _set_default_endian wrong does not + # change the default endianness + self.assertRaises(ValueError, _set_default_endian, 'foobar') + self.assertEqual(bitarray().endian(), default_endian) + + def test_get_default_endian(self): + # takes no arguments + self.assertRaises(TypeError, get_default_endian, 'big') + for default_endian in 'big', 'little': + _set_default_endian(default_endian) + endian = get_default_endian() + self.assertEqual(endian, default_endian) + self.assertIsInstance(endian, str) def test_bits2bytes(self): - for arg in ['foo', [], None, {}]: + for arg in 'foo', [], None, {}, 187.0, -4.0: self.assertRaises(TypeError, bits2bytes, arg) self.assertRaises(TypeError, bits2bytes) @@ -157,12 +148,13 @@ self.assertRaises(ValueError, bits2bytes, -924) self.assertEqual(bits2bytes(0), 0) - for n in range(1, 1000): - self.assertEqual(bits2bytes(n), (n - 1) // 8 + 1) + for n in range(1, 100): + m = bits2bytes(n) + self.assertEqual(m, (n - 1) // 8 + 1) + self.assertIsInstance(m, int) for n, m in [(0, 0), (1, 1), (2, 1), (7, 1), (8, 1), (9, 2), (10, 2), (15, 2), (16, 2), (64, 8), (65, 9), - (0, 0), (1, 1), (65, 9), (2**29, 2**26), (2**31, 2**28), (2**32, 2**29), (2**34, 2**31), (2**34+793, 2**31+100), (2**35-8, 2**32-1), (2**62, 2**59), (2**63-8, 2**60-1)]: @@ -181,36 +173,42 @@ self.assertEqual(a.tolist(), []) self.check_obj(a) - def test_endian1(self): + def test_endian(self): a = bitarray(endian='little') - a.fromstring('A') + a.frombytes(b'ABC') self.assertEqual(a.endian(), 'little') + self.assertIsInstance(a.endian(), str) self.check_obj(a) b = bitarray(endian='big') - b.fromstring('A') + b.frombytes(b'ABC') self.assertEqual(b.endian(), 'big') + self.assertIsInstance(a.endian(), str) self.check_obj(b) - self.assertEqual(a.tostring(), b.tostring()) - - def test_endian2(self): - a = bitarray(endian='little') - a.fromstring(' ') - self.assertEqual(a.endian(), 'little') - self.check_obj(a) + self.assertNotEqual(a, b) + self.assertEqual(a.tobytes(), b.tobytes()) - b = bitarray(endian='big') - b.fromstring(' ') - self.assertEqual(b.endian(), 'big') - self.check_obj(b) + def test_endian_default(self): + _set_default_endian('big') + a_big = bitarray() + _set_default_endian('little') + a_little = bitarray() + _set_default_endian('big') - self.assertEqual(a.tostring(), b.tostring()) + self.assertEqual(a_big.endian(), 'big') + self.assertEqual(a_little.endian(), 'little') + def test_endian_wrong(self): self.assertRaises(TypeError, bitarray.__new__, bitarray, endian=0) self.assertRaises(ValueError, bitarray.__new__, bitarray, endian='') - self.assertRaises(ValueError, bitarray.__new__, - bitarray, endian='foo') + self.assertRaisesMessage( + ValueError, + "bit endianness must be either 'little' or 'big', got: 'foo'", + bitarray.__new__, bitarray, endian='foo') + self.assertRaisesMessage(TypeError, + "'ellipsis' object is not iterable", + bitarray.__new__, bitarray, Ellipsis) def test_integers(self): for n in range(50): @@ -265,6 +263,17 @@ self.assertEqual(a, bitarray(lst)) self.check_obj(a) + def test_iter3(self): + a = bitarray(itertools.repeat(False, 10)) + self.assertEqual(a, bitarray(10 * '0')) + # Note that the through value of '0' is True: bool('0') -> True + a = bitarray(itertools.repeat('0', 10)) + self.assertEqual(a, bitarray(10 * '1')) + + def test_range(self): + a = bitarray(range(-3, 3)) + self.assertEqual(a, bitarray('111011')) + def test_01(self): a = bitarray('0010111') self.assertEqual(a.tolist(), [0, 0, 1, 0, 1, 1, 1]) @@ -279,50 +288,81 @@ self.assertRaises(ValueError, bitarray.__new__, bitarray, '01012100') - def test_rawbytes(self): # this representation is used for pickling - for s, r in [('\x00', ''), ('\x07\xff', '1'), ('\x03\xff', '11111'), - ('\x01\x87\xda', '10000111' '1101101')]: - self.assertEqual(bitarray(to_bytes(s), endian='big'), + def test_rawbytes(self): # this representation is used for pickling + for s, r in [(b'\x00', ''), (b'\x07\xff', '1'), (b'\x03\xff', '11111'), + (b'\x01\x87\xda', '10000111' '1101101')]: + self.assertEqual(bitarray(s, endian='big'), bitarray(r)) for i in range(1, 8): self.assertRaises(ValueError, bitarray.__new__, - bitarray, to_bytes(chr(i))) + bitarray, bytes(bytearray([i]))) - def test_bitarray(self): - for n in range(50): + def test_bitarray_simple(self): + for n in range(10): a = bitarray(n) b = bitarray(a) - self.assert_(a is not b) + self.assertFalse(a is b) self.assertEQUAL(a, b) - for end in ('little', 'big'): - a = bitarray(endian=end) - c = bitarray(a) - self.assertEqual(c.endian(), end) - c = bitarray(a, endian='little') - self.assertEqual(c.endian(), 'little') - c = bitarray(a, endian='big') - self.assertEqual(c.endian(), 'big') - + def test_bitarray_endian(self): + # Test creating a new bitarray with different endianness from an + # existing bitarray. + for endian in 'little', 'big': + a = bitarray(endian=endian) + b = bitarray(a) + self.assertFalse(a is b) + self.assertEQUAL(a, b) - def test_None(self): - self.assertEQUAL(bitarray(), bitarray(0)) - self.assertEQUAL(bitarray(), bitarray(None)) + endian2 = self.other_endian(endian) + c = bitarray(a, endian2) + self.assertEqual(c.endian(), endian2) + self.assertEqual(a, c) # but only because they are empty + + # Even though the byte representation will be the same, + # the bitarrays are not equal. + a = bitarray('11001000' '11110000', endian) + self.assertEqual(len(a) % 8, 0) + c = bitarray(a, endian2) + # This is only equal because the size of the bitarray is a + # multiple of 8, and unused bits are not set (which changes + # the byte representation). + self.assertEqual(a.tobytes(), c.tobytes()) + self.assertNotEqual(a.endian(), c.endian()) + self.assertNotEqual(a, c) + def test_bitarray_endianness(self): + a = bitarray('11100001', endian='little') + b = bitarray(a, endian='big') + self.assertNotEqual(a, b) + self.assertEqual(a.tobytes(), b.tobytes()) - def test_WrongArgs(self): - self.assertRaises(TypeError, bitarray.__new__, bitarray, 'A', 42, 69) + b.bytereverse() + self.assertEqual(a, b) + self.assertNotEqual(a.tobytes(), b.tobytes()) - self.assertRaises(TypeError, bitarray.__new__, bitarray, Ellipsis) - self.assertRaises(TypeError, bitarray.__new__, bitarray, slice(0)) + c = bitarray('11100001', endian='big') + self.assertEqual(a, c) - self.assertRaises(TypeError, bitarray.__new__, bitarray, 2.345) - self.assertRaises(TypeError, bitarray.__new__, bitarray, 4+3j) + def test_create_empty(self): + for x in None, 0, '', list(), tuple(), set(), dict(): + a = bitarray(x) + self.assertEqual(len(a), 0) + self.assertEQUAL(a, bitarray()) - self.assertRaises(TypeError, bitarray.__new__, bitarray, '', 0, 42) + def test_wrong_args(self): + # wrong types + for x in False, True, Ellipsis, slice(0), 0.0, 0 + 0j: + self.assertRaises(TypeError, bitarray.__new__, bitarray, x) + # wrong values + for x in -1, 'A': + self.assertRaises(ValueError, bitarray.__new__, bitarray, x) + # test second (endian) argument + self.assertRaises(TypeError, bitarray.__new__, bitarray, 0, None) + self.assertRaises(TypeError, bitarray.__new__, bitarray, 0, 0) self.assertRaises(ValueError, bitarray.__new__, bitarray, 0, 'foo') - + # too many args + self.assertRaises(TypeError, bitarray.__new__, bitarray, 0, 'big', 0) tests.append(CreateObjectTests) @@ -330,19 +370,11 @@ class ToObjectsTests(unittest.TestCase, Util): - def test_int(self): - a = bitarray() - self.assertRaises(TypeError, int, a) - if not is_py3k: - self.assertRaises(TypeError, long, a) - - def test_float(self): - a = bitarray() - self.assertRaises(TypeError, float, a) - - def test_complext(self): + def test_numeric(self): a = bitarray() - self.assertRaises(TypeError, complex, a) + self.assertRaises(Exception, int, a) + self.assertRaises(Exception, float, a) + self.assertRaises(Exception, complex, a) def test_list(self): for a in self.randombitarrays(): @@ -357,55 +389,41 @@ # --------------------------------------------------------------------------- -class MetaDataTests(unittest.TestCase): +class MetaDataTests(unittest.TestCase, Util): def test_buffer_info1(self): - a = bitarray('0000111100001', endian='little') + a = bitarray(13, endian='little') self.assertEqual(a.buffer_info()[1:4], (2, 'little', 3)) a = bitarray() self.assertRaises(TypeError, a.buffer_info, 42) bi = a.buffer_info() - self.assert_(isinstance(bi, tuple)) + self.assertIsInstance(bi, tuple) self.assertEqual(len(bi), 5) - - self.assert_(isinstance(bi[0], int)) - if is_py3k: - self.assert_(isinstance(bi[1], int)) - self.assert_(isinstance(bi[2], str)) - self.assert_(isinstance(bi[3], int)) - if is_py3k: - self.assert_(isinstance(bi[4], int)) + self.assertIsInstance(bi[0], int) def test_buffer_info2(self): - for n in range(50): - bi = bitarray(n).buffer_info() - self.assertEqual(bi[1], bits2bytes(n)) - self.assertEqual(bi[3] + n, 8 * bi[1]) - self.assert_(bi[4] >= bi[1]) - - def test_buffer_info3(self): - a = bitarray(endian='little') - self.assertEqual(a.buffer_info()[2], 'little') - - a = bitarray(endian='big') - self.assertEqual(a.buffer_info()[2], 'big') - + for endian in 'big', 'little': + for n in range(50): + bi = bitarray(n, endian).buffer_info() + self.assertEqual(bi[1], bits2bytes(n)) # bytes + self.assertEqual(bi[2], endian) # endianness + self.assertEqual(bi[3], 8 * bi[1] - n) # unused + self.assertTrue(bi[4] >= bi[1]) # allocated def test_endian(self): - a = bitarray(endian='little') - self.assertEqual(a.endian(), 'little') + for endian in 'big', 'little': + a = bitarray(endian=endian) + self.assertEqual(a.endian(), endian) a = bitarray(endian='big') self.assertEqual(a.endian(), 'big') - - def test_length(self): - for n in range(1000): + def test_len(self): + for n in range(100): a = bitarray(n) self.assertEqual(len(a), n) - self.assertEqual(a.length(), n) tests.append(MetaDataTests) @@ -414,71 +432,80 @@ class SliceTests(unittest.TestCase, Util): - def test_getitem1(self): + def test_getitem_1(self): a = bitarray() self.assertRaises(IndexError, a.__getitem__, 0) a.append(True) self.assertEqual(a[0], True) + self.assertEqual(a[-1], True) self.assertRaises(IndexError, a.__getitem__, 1) self.assertRaises(IndexError, a.__getitem__, -2) - a.append(False) self.assertEqual(a[1], False) + self.assertEqual(a[-1], False) self.assertRaises(IndexError, a.__getitem__, 2) self.assertRaises(IndexError, a.__getitem__, -3) - def test_getitem2(self): + def test_getitem_2(self): a = bitarray('1100010') - for i, b in enumerate([True, True, False, False, False, True, False]): + for i, b in enumerate(a): self.assertEqual(a[i], b) - self.assertEqual(a[i-7], b) + self.assertEqual(a[i - 7], b) self.assertRaises(IndexError, a.__getitem__, 7) self.assertRaises(IndexError, a.__getitem__, -8) - def test_getitem3(self): - a = bitarray('0100000100001') + def test_getslice(self): + a = bitarray('01001111' '00001') self.assertEQUAL(a[:], a) - self.assert_(a[:] is not a) - aa = a.tolist() - self.assertEQUAL(a[11:2:-3], bitarray(aa[11:2:-3])) - self.check_obj(a[:]) + self.assertFalse(a[:] is a) + self.assertEqual(a[13:2:-3], bitarray('1010')) + self.assertEqual(a[2:-1:4], bitarray('010')) + self.assertEqual(a[::2], bitarray('0011001')) + self.assertEqual(a[8:], bitarray('00001')) + self.assertEqual(a[7:], bitarray('100001')) + self.assertEqual(a[:8], bitarray('01001111')) + self.assertEqual(a[::-1], bitarray('10000111' '10010')) + self.assertEqual(a[:8:-1], bitarray('1000')) self.assertRaises(ValueError, a.__getitem__, slice(None, None, 0)) self.assertRaises(TypeError, a.__getitem__, (1, 2)) - def test_getitem4(self): - for a in self.randombitarrays(): + def test_getslice_random(self): + for a in self.randombitarrays(start=1): aa = a.tolist() la = len(a) - if la == 0: continue for dum in range(10): - step = self.rndsliceidx(la) - if step == 0: step = None - s = slice(self.rndsliceidx(la), - self.rndsliceidx(la), step) + step = self.rndsliceidx(la) or None + s = slice(self.rndsliceidx(la), self.rndsliceidx(la), step) self.assertEQUAL(a[s], bitarray(aa[s], endian=a.endian())) - def test_setitem1(self): - a = bitarray([False]) + def test_setitem_simple(self): + a = bitarray('0') a[0] = 1 - self.assertEqual(a.tolist(), [True]) + self.assertEqual(a, bitarray('1')) a = bitarray(2) a[0] = 0 a[1] = 1 - self.assertEqual(a.tolist(), [False, True]) + self.assertEqual(a, bitarray('01')) a[-1] = 0 a[-2] = 1 - self.assertEqual(a.tolist(), [True, False]) + self.assertEqual(a, bitarray('10')) self.assertRaises(IndexError, a.__setitem__, 2, True) self.assertRaises(IndexError, a.__setitem__, -3, False) - def test_setitem2(self): - for a in self.randombitarrays(): + def test_setitem_simple2(self): + a = bitarray('00000') + a[0] = 1 + a[-2] = 1 + self.assertEqual(a, bitarray('10010')) + self.assertRaises(IndexError, a.__setitem__, 5, 'foo') + self.assertRaises(IndexError, a.__setitem__, -6, 'bar') + + def test_setitem_random(self): + for a in self.randombitarrays(start=1): la = len(a) - if la == 0: - continue i = randint(0, la - 1) aa = a.tolist() ida = id(a) @@ -489,6 +516,9 @@ self.assertEqual(id(a), ida) self.check_obj(a) + def test_setslice_simple(self): + for a in self.randombitarrays(start=1): + la = len(a) b = bitarray(la) b[0:la] = bitarray(a) self.assertEqual(a, b) @@ -503,90 +533,199 @@ b[::-1] = bitarray(a) self.assertEqual(a.tolist()[::-1], b.tolist()) - def test_setitem3(self): - a = bitarray(5 * [False]) - a[0] = 1 - a[-2] = 1 - self.assertEqual(a, bitarray('10010')) - self.assertRaises(IndexError, a.__setitem__, 5, 'foo') - self.assertRaises(IndexError, a.__setitem__, -6, 'bar') + def test_setslice_random(self): + for a in self.randombitarrays(start=1): + la = len(a) + for dum in range(10): + step = self.rndsliceidx(la) or None + s = slice(self.rndsliceidx(la), self.rndsliceidx(la), step) + lb = randint(0, 10) if step is None else self.slicelen(s, la) + b = bitarray(lb) + c = bitarray(a) + c[s] = b + self.check_obj(c) + cc = a.tolist() + cc[s] = b.tolist() + self.assertEqual(c, bitarray(cc)) - def test_setitem4(self): + def test_setslice_self_random(self): for a in self.randombitarrays(): - la = len(a) - if la == 0: continue - for dum in range(3): - step = self.rndsliceidx(la) - if step == 0: step = None - s = slice(self.rndsliceidx(la), - self.rndsliceidx(la), step) - for b in self.randombitarrays(): - if len(b) == self.slicelen(s, len(a)) or step is None: - c = bitarray(a) - d = c - c[s] = b - self.assert_(c is d) - self.check_obj(c) - cc = a.tolist() - cc[s] = b.tolist() - self.assertEqual(c, bitarray(cc)) + for step in -1, 1: + s = slice(None, None, step) + aa = a.tolist() + a[s] = a + aa[s] = aa + self.assertEqual(a, bitarray(aa)) + + def test_setslice_self(self): + a = bitarray('1100111') + a[::-1] = a + self.assertEqual(a, bitarray('1110011')) + a[4:] = a + self.assertEqual(a, bitarray('11101110011')) + a[:-5] = a + self.assertEqual(a, bitarray('1110111001110011')) + + a = bitarray('01001') + a[:-1] = a + self.assertEqual(a, bitarray('010011')) + a[2::] = a + self.assertEqual(a, bitarray('01010011')) + a[2:-2:1] = a + self.assertEqual(a, bitarray('010101001111')) + + a = bitarray('011') + a[2:2] = a + self.assertEqual(a, bitarray('010111')) + a[:] = a + self.assertEqual(a, bitarray('010111')) + + def test_setslice_to_bitarray(self): + a = bitarray('11111111' '1111') + a[2:6] = bitarray('0010') + self.assertEqual(a, bitarray('11001011' '1111')) + a.setall(0) + a[::2] = bitarray('111001') + self.assertEqual(a, bitarray('10101000' '0010')) + a.setall(0) + a[3:] = bitarray('111') + self.assertEqual(a, bitarray('000111')) + + a = bitarray(12) + a.setall(0) + a[1:11:2] = bitarray('11101') + self.assertEqual(a, bitarray('01010100' '0100')) + + a = bitarray(12) + a.setall(0) + a[:-6:-1] = bitarray('10111') + self.assertEqual(a, bitarray('00000001' '1101')) + def test_setslice_to_bitarray_2(self): + a = bitarray('1111') + a[3:3] = bitarray('000') # insert + self.assertEqual(a, bitarray('1110001')) + a[2:5] = bitarray() # remove + self.assertEqual(a, bitarray('1101')) + + a = bitarray('1111') + a[1:3] = bitarray('0000') + self.assertEqual(a, bitarray('100001')) + a[:] = bitarray('010') # replace all values + self.assertEqual(a, bitarray('010')) + + # assign slice to bitarray with different length + a = bitarray('111111') + a[3:4] = bitarray('00') + self.assertEqual(a, bitarray('1110011')) + a[2:5] = bitarray('0') # remove + self.assertEqual(a, bitarray('11011')) def test_setslice_to_bool(self): a = bitarray('11111111') a[::2] = False self.assertEqual(a, bitarray('01010101')) - a[4::] = True + a[4::] = True # ^^^^ self.assertEqual(a, bitarray('01011111')) - a[-2:] = False + a[-2:] = False # ^^ self.assertEqual(a, bitarray('01011100')) - a[:2:] = True + a[:2:] = True # ^^ self.assertEqual(a, bitarray('11011100')) - a[:] = True + a[:] = True # ^^^^^^^^ self.assertEqual(a, bitarray('11111111')) + a[2:5] = False # ^^^ + self.assertEqual(a, bitarray('11000111')) + a[1::3] = False # ^ ^ ^ + self.assertEqual(a, bitarray('10000110')) + a[1:6:2] = True # ^ ^ ^ + self.assertEqual(a, bitarray('11010110')) + a[3:3] = False # zero slicelength + self.assertEqual(a, bitarray('11010110')) def test_setslice_to_int(self): a = bitarray('11111111') - a[::2] = 0 + a[::2] = 0 # ^ ^ ^ ^ self.assertEqual(a, bitarray('01010101')) - a[4::] = 1 + a[4::] = 1 # ^^^^ self.assertEqual(a, bitarray('01011111')) a.__setitem__(slice(-2, None, None), 0) self.assertEqual(a, bitarray('01011100')) + self.assertRaises(ValueError, a.__setitem__, slice(None, None, 2), 3) + self.assertRaises(ValueError, a.__setitem__, slice(None, 2, None), -1) - self.assertRaises(ValueError, a.__setitem__, - slice(None, None, 2), 3) - self.assertRaises(ValueError, a.__setitem__, - slice(None, 2, None), -1) + def test_setslice_to_invalid(self): + a = bitarray('11111111') + s = slice(2, 6, None) + self.assertRaises(IndexError, a.__setitem__, s, 1.2) + self.assertRaises(IndexError, a.__setitem__, s, None) + self.assertRaises(IndexError, a.__setitem__, s, "0110") + a[s] = False + self.assertEqual(a, bitarray('11000011')) + # step != 1 and slicelen != length of assigned bitarray + self.assertRaisesMessage( + ValueError, + "attempt to assign sequence of size 3 to extended slice of size 4", + a.__setitem__, slice(None, None, 2), bitarray('000')) + self.assertRaisesMessage( + ValueError, + "attempt to assign sequence of size 3 to extended slice of size 2", + a.__setitem__, slice(None, None, 4), bitarray('000')) + self.assertRaisesMessage( + ValueError, + "attempt to assign sequence of size 7 to extended slice of size 8", + a.__setitem__, slice(None, None, -1), bitarray('0001000')) + self.assertEqual(a, bitarray('11000011')) + def test_sieve(self): # Sieve of Eratosthenes + a = bitarray(50) + a.setall(1) + for i in range(2, 8): + if a[i]: + a[i*i::i] = 0 + primes = [i for i in range(2, 50) if a[i]] + self.assertEqual(primes, [2, 3, 5, 7, 11, 13, 17, 19, + 23, 29, 31, 37, 41, 43, 47]) - def test_delitem1(self): + def test_delitem(self): a = bitarray('100110') del a[1] self.assertEqual(len(a), 5) - del a[3] - del a[-2] + del a[3], a[-2] self.assertEqual(a, bitarray('100')) self.assertRaises(IndexError, a.__delitem__, 3) self.assertRaises(IndexError, a.__delitem__, -4) - def test_delitem2(self): + def test_delslice(self): + a = bitarray('10101100' '10110') + del a[3:9] # ^^^^^ ^ + self.assertEqual(a, bitarray('1010110')) + del a[::3] # ^ ^ ^ + self.assertEqual(a, bitarray('0111')) + a = bitarray('10101100' '101101111') + del a[5:-3:3] # ^ ^ ^ + self.assertEqual(a, bitarray('1010100' '0101111')) + a = bitarray('10101100' '1011011') + del a[:-9:-2] # ^ ^ ^ ^ + self.assertEqual(a, bitarray('10101100' '011')) + del a[3:3] # zero slicelength + self.assertEqual(a, bitarray('10101100' '011')) + self.assertRaises(ValueError, a.__delitem__, slice(None, None, 0)) + self.assertEqual(len(a), 11) + del a[:] + self.assertEqual(a, bitarray()) + + def test_delslice_random(self): for a in self.randombitarrays(): la = len(a) - if la == 0: continue for dum in range(10): - step = self.rndsliceidx(la) - if step == 0: step = None - s = slice(self.rndsliceidx(la), - self.rndsliceidx(la), step) - c = bitarray(a) - d = c + step = self.rndsliceidx(la) or None + s = slice(self.rndsliceidx(la), self.rndsliceidx(la), step) + c = a.copy() del c[s] - self.assert_(c is d) self.check_obj(c) - cc = a.tolist() - del cc[s] - self.assertEQUAL(c, bitarray(cc, endian=c.endian())) + c_lst = a.tolist() + del c_lst[s] + self.assertEQUAL(c, bitarray(c_lst, endian=c.endian())) tests.append(SliceTests) @@ -597,7 +736,7 @@ def test_instancecheck(self): a = bitarray('011') - self.assertTrue(isinstance(a, bitarray)) + self.assertIsInstance(a, bitarray) self.assertFalse(isinstance(a, str)) def test_booleanness(self): @@ -605,6 +744,15 @@ self.assertEqual(bool(bitarray('0')), True) self.assertEqual(bool(bitarray('1')), True) + def test_to01(self): + a = bitarray() + self.assertEqual(a.to01(), '') + self.assertIsInstance(a.to01(), str) + + a = bitarray('101') + self.assertEqual(a.to01(), '101') + self.assertIsInstance(a.to01(), str) + def test_iterate(self): for lst in self.randomlists(): acc = [] @@ -632,10 +780,25 @@ b = bitarray('01010111000') self.assertEqual(a, b) - def test_compare(self): + def test_compare_eq_ne(self): + self.assertTrue(bitarray(0, 'big') == bitarray(0, 'little')) + self.assertFalse(bitarray(0, 'big') != bitarray(0, 'little')) + + for n in range(1, 20): + a = bitarray(n, 'little') + a.setall(1) + for endian in 'little', 'big': + b = bitarray(n, endian) + b.setall(1) + self.assertTrue(a == b) + self.assertFalse(a != b) + b[n - 1] = not b[n - 1] # flip last bit + self.assertTrue(a != b) + self.assertFalse(a == b) + + def test_compare_random(self): for a in self.randombitarrays(): aa = a.tolist() - for b in self.randombitarrays(): bb = b.tolist() self.assertEqual(a == b, aa == bb) @@ -657,28 +820,26 @@ def __getitem__(self, i): return bitarray.__getitem__(self, i - self.offset) - for a in self.randombitarrays(): - if len(a) == 0: - continue + for a in self.randombitarrays(start=0): b = ExaggeratingBitarray(a, 1234) for i in range(len(a)): - self.assertEqual(a[i], b[i+1234]) + self.assertEqual(a[i], b[i + 1234]) def test_endianness1(self): a = bitarray(endian='little') - a.frombytes(to_bytes('\x01')) + a.frombytes(b'\x01') self.assertEqual(a.to01(), '10000000') b = bitarray(endian='little') - b.frombytes(to_bytes('\x80')) + b.frombytes(b'\x80') self.assertEqual(b.to01(), '00000001') c = bitarray(endian='big') - c.frombytes(to_bytes('\x80')) + c.frombytes(b'\x80') self.assertEqual(c.to01(), '10000000') d = bitarray(endian='big') - d.frombytes(to_bytes('\x01')) + d.frombytes(b'\x01') self.assertEqual(d.to01(), '00000001') self.assertEqual(a, c) @@ -688,29 +849,29 @@ a = bitarray(8, endian='little') a.setall(False) a[0] = True - self.assertEqual(a.tobytes(), to_bytes('\x01')) + self.assertEqual(a.tobytes(), b'\x01') a[1] = True - self.assertEqual(a.tobytes(), to_bytes('\x03')) - a.frombytes(to_bytes(' ')) - self.assertEqual(a.tobytes(), to_bytes('\x03 ')) + self.assertEqual(a.tobytes(), b'\x03') + a.frombytes(b' ') + self.assertEqual(a.tobytes(), b'\x03 ') self.assertEqual(a.to01(), '1100000000000100') def test_endianness3(self): a = bitarray(8, endian='big') a.setall(False) a[7] = True - self.assertEqual(a.tobytes(), to_bytes('\x01')) + self.assertEqual(a.tobytes(), b'\x01') a[6] = True - self.assertEqual(a.tobytes(), to_bytes('\x03')) - a.frombytes(to_bytes(' ')) - self.assertEqual(a.tobytes(), to_bytes('\x03 ')) + self.assertEqual(a.tobytes(), b'\x03') + a.frombytes(b' ') + self.assertEqual(a.tobytes(), b'\x03 ') self.assertEqual(a.to01(), '0000001100100000') def test_endianness4(self): a = bitarray('00100000', endian='big') - self.assertEqual(a.tobytes(), to_bytes(' ')) + self.assertEqual(a.tobytes(), b' ') b = bitarray('00000100', endian='little') - self.assertEqual(b.tobytes(), to_bytes(' ')) + self.assertEqual(b.tobytes(), b' ') self.assertNotEqual(a, b) def test_endianness5(self): @@ -720,35 +881,51 @@ self.assertEqual(a.tobytes(), b.tobytes()) def test_pickle(self): - from pickle import loads, dumps - for v in range(3): - for a in self.randombitarrays(): - b = loads(dumps(a, v)) - self.assert_(b is not a) - self.assertEQUAL(a, b) - - def test_cPickle(self): - if is_py3k: - return - for v in range(3): - from cPickle import loads, dumps - for a in self.randombitarrays(): - b = loads(dumps(a, v)) - self.assert_(b is not a) - self.assertEQUAL(a, b) + for a in self.randombitarrays(): + b = pickle.loads(pickle.dumps(a)) + self.assertFalse(b is a) + self.assertEQUAL(a, b) def test_overflow(self): - from platform import architecture - - if architecture()[0] == '64bit': + if _sysinfo()[0] == 8: return - self.assertRaises(OverflowError, bitarray.__new__, - bitarray, 2**34 + 1) + a = bitarray(2**31 - 1); + self.assertRaises(OverflowError, bitarray.append, a, True) + self.assertRaises(IndexError, bitarray.__new__, bitarray, 2**31) a = bitarray(10 ** 6) self.assertRaises(OverflowError, a.__imul__, 17180) + def test_unicode_create(self): + a = bitarray(unicode()) + self.assertEqual(a, bitarray()) + + a = bitarray(unicode('111001')) + self.assertEqual(a, bitarray('111001')) + + for a in self.randombitarrays(): + b = bitarray(unicode(a.to01())) + self.assertEqual(a, b) + + def test_unicode_extend(self): + a = bitarray() + a.extend(unicode()) + self.assertEqual(a, bitarray()) + + a = bitarray() + a.extend(unicode('001011')) + self.assertEqual(a, bitarray('001011')) + + for a in self.randombitarrays(): + b = bitarray() + b.extend(unicode(a.to01())) + self.assertEqual(a, b) + + def test_unhashable(self): + a = bitarray() + self.assertRaises(TypeError, hash, a) + self.assertRaises(TypeError, dict, [(a, 'foo')]) tests.append(MiscTests) @@ -759,57 +936,52 @@ def test_all(self): a = bitarray() self.assertTrue(a.all()) - - if sys.version_info[:2] < (2, 5): - return + for s, r in ('0', False), ('1', True), ('01', False): + self.assertEqual(bitarray(s).all(), r) for a in self.randombitarrays(): - self.assertEqual(all(a), a.all()) + self.assertEqual(all(a), a.all()) self.assertEqual(all(a.tolist()), a.all()) - def test_any(self): a = bitarray() self.assertFalse(a.any()) - - if sys.version_info[:2] < (2, 5): - return + for s, r in ('0', False), ('1', True), ('01', True): + self.assertEqual(bitarray(s).any(), r) for a in self.randombitarrays(): - self.assertEqual(any(a), a.any()) + self.assertEqual(any(a), a.any()) self.assertEqual(any(a.tolist()), a.any()) - def test_repr(self): - a = bitarray() - self.assertEqual(repr(a), "bitarray()") - - a = bitarray('10111') - self.assertEqual(repr(a), "bitarray('10111')") + r = repr(bitarray()) + self.assertEqual(r, "bitarray()") + self.assertIsInstance(r, str) + + r = repr(bitarray('10111')) + self.assertEqual(r, "bitarray('10111')") + self.assertIsInstance(r, str) for a in self.randombitarrays(): b = eval(repr(a)) - self.assert_(b is not a) + self.assertFalse(b is a) self.assertEqual(a, b) self.check_obj(b) - def test_copy(self): - import copy for a in self.randombitarrays(): b = a.copy() - self.assert_(b is not a) + self.assertFalse(b is a) self.assertEQUAL(a, b) b = copy.copy(a) - self.assert_(b is not a) + self.assertFalse(b is a) self.assertEQUAL(a, b) b = copy.deepcopy(a) - self.assert_(b is not a) + self.assertFalse(b is a) self.assertEQUAL(a, b) - def assertReallyEqual(self, a, b): # assertEqual first, because it will have a good message if the # assertion fails. @@ -846,13 +1018,31 @@ self.assertReallyNotEqual(bitarray(''), bitarray('0')) self.assertReallyNotEqual(bitarray('0'), bitarray('1')) + def test_equality_random(self): + for a in self.randombitarrays(start=1): + b = a.copy() + self.assertReallyEqual(a, b) + n = len(a) + b.invert(n - 1) # flip last bit + self.assertReallyNotEqual(a, b) + + def test_sizeof(self): + a = bitarray() + size = sys.getsizeof(a) + self.assertEqual(size, a.__sizeof__()) + self.assertIsInstance(size, int if is_py3k else (int, long)) + self.assertTrue(size < 200) + a = bitarray(8000) + self.assertTrue(sys.getsizeof(a) > 1000) + + tests.append(SpecialMethodTests) # --------------------------------------------------------------------------- -class NumberTests(unittest.TestCase, Util): +class SequenceMethodsTests(unittest.TestCase, Util): - def test_add(self): + def test_concat(self): c = bitarray('001') + bitarray('110') self.assertEQUAL(c, bitarray('001110')) @@ -871,11 +1061,12 @@ a = bitarray() self.assertRaises(TypeError, a.__add__, 42) - - def test_iadd(self): + def test_inplace_concat(self): c = bitarray('001') c += bitarray('110') - self.assertEQUAL(c, bitarray('001110')) + self.assertEqual(c, bitarray('001110')) + c += '111' + self.assertEqual(c, bitarray('001110111')) for a in self.randombitarrays(): for b in self.randombitarrays(): @@ -883,7 +1074,7 @@ d = c d += b self.assertEqual(d, a + b) - self.assert_(c is d) + self.assertTrue(c is d) self.assertEQUAL(c, d) self.assertEqual(d.endian(), a.endian()) self.check_obj(d) @@ -891,10 +1082,12 @@ a = bitarray() self.assertRaises(TypeError, a.__iadd__, 42) - - def test_mul(self): - c = 0 * bitarray('1001111') - self.assertEQUAL(c, bitarray()) + def test_repeat(self): + for c in [0 * bitarray(), + 0 * bitarray('1001111'), + -1 * bitarray('100110'), + 11 * bitarray()]: + self.assertEQUAL(c, bitarray()) c = 3 * bitarray('001') self.assertEQUAL(c, bitarray('001001001')) @@ -904,7 +1097,7 @@ for a in self.randombitarrays(): b = a.copy() - for n in range(-10, 20): + for n in range(-3, 5): c = a * n self.assertEQUAL(c, bitarray(n * a.tolist(), endian=a.endian())) @@ -916,8 +1109,7 @@ a = bitarray() self.assertRaises(TypeError, a.__mul__, None) - - def test_imul(self): + def test_inplace_repeat(self): c = bitarray('1101110011') idc = id(c) c *= 0 @@ -929,7 +1121,7 @@ self.assertEQUAL(c, bitarray('110110110')) for a in self.randombitarrays(): - for n in range(-10, 10): + for n in range(-3, 5): b = a.copy() idb = id(b) b *= n @@ -940,12 +1132,66 @@ a = bitarray() self.assertRaises(TypeError, a.__imul__, None) + def test_contains_simple(self): + a = bitarray() + self.assertFalse(False in a) + self.assertFalse(True in a) + self.assertTrue(bitarray() in a) + a.append(True) + self.assertTrue(True in a) + self.assertFalse(False in a) + a = bitarray([False]) + self.assertTrue(False in a) + self.assertFalse(True in a) + a.append(True) + self.assertTrue(0 in a) + self.assertTrue(1 in a) + if not is_py3k: + self.assertTrue(long(0) in a) + self.assertTrue(long(1) in a) + + def test_contains_errors(self): + a = bitarray() + self.assertEqual(a.__contains__(1), False) + a.append(1) + self.assertEqual(a.__contains__(1), True) + a = bitarray('0011') + self.assertEqual(a.__contains__(bitarray('01')), True) + self.assertEqual(a.__contains__(bitarray('10')), False) + self.assertRaises(TypeError, a.__contains__, 'asdf') + self.assertRaises(ValueError, a.__contains__, 2) + self.assertRaises(ValueError, a.__contains__, -1) + if not is_py3k: + self.assertRaises(ValueError, a.__contains__, long(2)) + + def test_contains_range(self): + for n in range(2, 50): + a = bitarray(n) + a.setall(0) + self.assertTrue(False in a) + self.assertFalse(True in a) + a[randint(0, n - 1)] = 1 + self.assertTrue(True in a) + self.assertTrue(False in a) + a.setall(1) + self.assertTrue(True in a) + self.assertFalse(False in a) + a[randint(0, n - 1)] = 0 + self.assertTrue(True in a) + self.assertTrue(False in a) + + def test_contains_explicit(self): + a = bitarray('011010000001') + for s, r in [('', True), ('1', True), ('11', True), ('111', False), + ('011', True), ('0001', True), ('00011', False)]: + self.assertEqual(bitarray(s) in a, r) + -tests.append(NumberTests) +tests.append(SequenceMethodsTests) # --------------------------------------------------------------------------- -class BitwiseTests(unittest.TestCase, Util): +class NumberMethodsTests(unittest.TestCase, Util): def test_misc(self): for a in self.randombitarrays(): @@ -958,15 +1204,27 @@ b &= d self.assertEqual(~b, a) + def test_size_and_endianness(self): + a = bitarray('11001') + b = bitarray('100111') + self.assertRaises(ValueError, a.__and__, b) + for x in a.__or__, a.__xor__, a.__iand__, a.__ior__, a.__ixor__: + self.assertRaises(ValueError, x, b) + a = bitarray('11001', 'big') + b = bitarray('10011', 'little') + self.assertRaises(ValueError, a.__and__, b) + for x in a.__or__, a.__xor__, a.__iand__, a.__ior__, a.__ixor__: + self.assertRaises(ValueError, x, b) + def test_and(self): a = bitarray('11001') b = bitarray('10011') self.assertEQUAL(a & b, bitarray('10001')) b = bitarray('1001') - self.assertRaises(ValueError, a.__and__, b) # not same length - - self.assertRaises(TypeError, a.__and__, 42) + self.assertRaises(ValueError, a.__and__, b) # not same length + if is_py3k: # XXX: note sure why this is failing on Py27 + self.assertRaises(TypeError, a.__and__, 42) def test_iand(self): a = bitarray('110010110') @@ -978,12 +1236,19 @@ def test_or(self): a = bitarray('11001') b = bitarray('10011') + aa = a.copy() + bb = b.copy() self.assertEQUAL(a | b, bitarray('11011')) + self.assertEQUAL(a, aa) + self.assertEQUAL(b, bb) def test_ior(self): - a = bitarray('110010110') - a |= bitarray('100110011') + a = bitarray('110010110') + b = bitarray('100110011') + bb = b.copy() + a |= b self.assertEQUAL(a, bitarray('110110111')) + self.assertEQUAL(b, bb) def test_xor(self): a = bitarray('11001') @@ -996,100 +1261,24 @@ self.assertEQUAL(a, bitarray('010100101')) def test_invert(self): - a = bitarray() - a.invert() - self.assertEQUAL(a, bitarray()) - - a = bitarray('11011') - a.invert() - self.assertEQUAL(a, bitarray('00100')) - a = bitarray('11011') b = ~a self.assertEQUAL(b, bitarray('00100')) self.assertEQUAL(a, bitarray('11011')) - self.assert_(a is not b) + self.assertFalse(a is b) for a in self.randombitarrays(): - aa = a.tolist() b = bitarray(a) b.invert() for i in range(len(a)): - self.assertEqual(b[i], not aa[i]) + self.assertEqual(b[i], not a[i]) self.check_obj(b) - c = ~a - self.assert_(c is not a) - self.assertEQUAL(a, bitarray(aa, endian=a.endian())) - - for i in range(len(a)): - self.assertEqual(c[i], not aa[i]) - - self.check_obj(b) - - -tests.append(BitwiseTests) - -# --------------------------------------------------------------------------- - -class SequenceTests(unittest.TestCase, Util): - - def test_contains1(self): - a = bitarray() - self.assert_(False not in a) - self.assert_(True not in a) - self.assert_(bitarray() in a) - a.append(True) - self.assert_(True in a) - self.assert_(False not in a) - a = bitarray([False]) - self.assert_(False in a) - self.assert_(True not in a) - a.append(True) - self.assert_(0 in a) - self.assert_(1 in a) - if not is_py3k: - self.assert_(long(0) in a) - self.assert_(long(1) in a) - - def test_contains2(self): - a = bitarray() - self.assertEqual(a.__contains__(1), False) - a.append(1) - self.assertEqual(a.__contains__(1), True) - a = bitarray('0011') - self.assertEqual(a.__contains__(bitarray('01')), True) - self.assertEqual(a.__contains__(bitarray('10')), False) - self.assertRaises(TypeError, a.__contains__, 'asdf') - self.assertRaises(ValueError, a.__contains__, 2) - self.assertRaises(ValueError, a.__contains__, -1) - if not is_py3k: - self.assertRaises(ValueError, a.__contains__, long(2)) + self.assertEQUAL(c, b) + self.check_obj(c) - def test_contains3(self): - for n in range(2, 100): - a = bitarray(n) - a.setall(0) - self.assert_(False in a) - self.assert_(True not in a) - a[randint(0, n - 1)] = 1 - self.assert_(True in a) - self.assert_(False in a) - a.setall(1) - self.assert_(True in a) - self.assert_(False not in a) - a[randint(0, n - 1)] = 0 - self.assert_(True in a) - self.assert_(False in a) - def test_contains4(self): - a = bitarray('011010000001') - for s, r in [('', True), ('1', True), ('11', True), ('111', False), - ('011', True), ('0001', True), ('00011', False)]: - self.assertEqual(bitarray(s) in a, r) - - -tests.append(SequenceTests) +tests.append(NumberMethodsTests) # --------------------------------------------------------------------------- @@ -1189,10 +1378,6 @@ self.check_obj(c) def test_iterator2(self): - try: - import itertools - except ImportError: - return a = bitarray() a.extend(itertools.repeat(True, 23)) self.assertEqual(a, bitarray(23 * '1')) @@ -1211,6 +1396,24 @@ self.assertEqual(c.tolist(), a + b) self.check_obj(c) + def test_extend_self(self): + a = bitarray() + a.extend(a) + self.assertEqual(a, bitarray()) + + a = bitarray('1') + a.extend(a) + self.assertEqual(a, bitarray('11')) + + a = bitarray('110') + a.extend(a) + self.assertEqual(a, bitarray('110110')) + + for a in self.randombitarrays(): + b = bitarray(a) + a.extend(a) + self.assertEqual(a, b + b) + tests.append(ExtendTests) @@ -1218,29 +1421,36 @@ class MethodTests(unittest.TestCase, Util): - def test_append(self): + def test_append_simple(self): a = bitarray() a.append(True) a.append(False) a.append(False) self.assertEQUAL(a, bitarray('100')) + a.append(0) + a.append(1) + a.append(2) + a.append(None) + a.append('') + a.append('a') + self.assertEQUAL(a, bitarray('100011001')) + def test_append_random(self): for a in self.randombitarrays(): aa = a.tolist() b = a b.append(1) - self.assert_(a is b) + self.assertTrue(a is b) self.check_obj(b) - self.assertEQUAL(b, bitarray(aa+[1], endian=a.endian())) + self.assertEQUAL(b, bitarray(aa + [1], endian=a.endian())) b.append('') - self.assertEQUAL(b, bitarray(aa+[1, 0], endian=a.endian())) - + self.assertEQUAL(b, bitarray(aa + [1, 0], endian=a.endian())) def test_insert(self): a = bitarray() b = a a.insert(0, True) - self.assert_(a is b) + self.assertTrue(a is b) self.assertEqual(a, bitarray('1')) self.assertRaises(TypeError, a.insert) self.assertRaises(TypeError, a.insert, None) @@ -1255,16 +1465,26 @@ self.assertEqual(a.tolist(), aa) self.check_obj(a) - - def test_index(self): + def test_index1(self): a = bitarray() for i in (True, False, 1, 0): self.assertRaises(ValueError, a.index, i) a = bitarray(100 * [False]) self.assertRaises(ValueError, a.index, True) + self.assertRaises(TypeError, a.index) + self.assertRaises(TypeError, a.index, 1, 'a') + self.assertRaises(TypeError, a.index, 1, 0, 'a') + self.assertRaises(TypeError, a.index, 1, 0, 100, 1) a[20] = a[27] = 1 self.assertEqual(a.index(42), 20) + self.assertEqual(a.index(1, 21), 27) + self.assertEqual(a.index(1, 27), 27) + self.assertEqual(a.index(1, -73), 27) + self.assertRaises(ValueError, a.index, 1, 5, 17) + self.assertRaises(ValueError, a.index, 1, 5, -83) + self.assertRaises(ValueError, a.index, 1, 21, 27) + self.assertRaises(ValueError, a.index, 1, 28) self.assertEqual(a.index(0), 0) a = bitarray(200 * [True]) @@ -1273,6 +1493,7 @@ self.assertEqual(a.index(False), 173) self.assertEqual(a.index(True), 0) + def test_index2(self): for n in range(50): for m in range(n): a = bitarray(n) @@ -1286,7 +1507,7 @@ a[m] = 0 self.assertEqual(a.index(0), m) - def test_index2(self): + def test_index3(self): a = bitarray('00001000' '00000000' '0010000') self.assertEqual(a.index(1), 4) self.assertEqual(a.index(1, 1), 4) @@ -1295,7 +1516,16 @@ self.assertRaises(ValueError, a.index, 1, 5, 18) self.assertRaises(ValueError, a.index, 1, 19) - def test_index3(self): + def test_index4(self): + a = bitarray('11110111' '11111111' '1101111') + self.assertEqual(a.index(0), 4) + self.assertEqual(a.index(0, 1), 4) + self.assertEqual(a.index(1, 4), 5) + self.assertEqual(a.index(0, 5), 18) + self.assertRaises(ValueError, a.index, 0, 5, 18) + self.assertRaises(ValueError, a.index, 0, 19) + + def test_index5(self): a = bitarray(2000) a.setall(0) for _ in range(3): @@ -1314,26 +1544,90 @@ res2 = None self.assertEqual(res1, res2) + def test_index6(self): + for n in range(1, 50): + a = bitarray(n) + i = randint(0, 1) + a.setall(i) + for unused in range(randint(1, 4)): + a[randint(0, n - 1)] = 1 - i + aa = a.tolist() + for unused in range(100): + start = randint(-50, n + 50) + stop = randint(-50, n + 50) + try: + res1 = a.index(1 - i, start, stop) + except ValueError: + res1 = None + try: + res2 = aa.index(1 - i, start, stop) + except ValueError: + res2 = None + self.assertEqual(res1, res2) + - def test_count(self): + def test_count_basic(self): a = bitarray('10011') self.assertEqual(a.count(), 3) self.assertEqual(a.count(True), 3) self.assertEqual(a.count(False), 2) self.assertEqual(a.count(1), 3) self.assertEqual(a.count(0), 2) - self.assertRaises(TypeError, a.count, 'A') + self.assertEqual(a.count(None), 2) + self.assertEqual(a.count(''), 2) + self.assertEqual(a.count('A'), 3) + self.assertRaises(TypeError, a.count, 0, 'A') + self.assertRaises(TypeError, a.count, 0, 0, 'A') + + def test_count_byte(self): + + def count(n): # count 1 bits in number + cnt = 0 + while n: + cnt += n & 1 + n >>= 1 + return cnt - for i in range(0, 256): + for i in range(256): a = bitarray() - a.frombytes(to_bytes(chr(i))) - self.assertEqual(a.count(), a.to01().count('1')) - - for a in self.randombitarrays(): - self.assertEqual(a.count(), a.count(1)) - self.assertEqual(a.count(1), a.to01().count('1')) - self.assertEqual(a.count(0), a.to01().count('0')) - + a.frombytes(bytes(bytearray([i]))) + self.assertEqual(len(a), 8) + self.assertEqual(a.count(), count(i)) + self.assertEqual(a.count(), bin(i)[2:].count('1')) + + def test_count_whole_range(self): + for a in self.randombitarrays(): + s = a.to01() + self.assertEqual(a.count(1), s.count('1')) + self.assertEqual(a.count(0), s.count('0')) + + def test_count_allones(self): + N = 37 + a = bitarray(N) + a.setall(1) + for i in range(N): + for j in range(i, N): + self.assertEqual(a.count(1, i, j), j - i) + + def test_count_explicit(self): + for endian in 'big', 'little': + a = bitarray('01001100' '01110011' '01', endian) + self.assertEqual(a.count(), 9) + self.assertEqual(a.count(0, 12), 3) + self.assertEqual(a.count(1, -5), 3) + self.assertEqual(a.count(1, 2, 17), 7) + self.assertEqual(a.count(1, 6, 11), 2) + self.assertEqual(a.count(0, 7, -3), 4) + self.assertEqual(a.count(1, 1, -1), 8) + self.assertEqual(a.count(1, 17, 14), 0) + + def test_count_random(self): + for a in self.randombitarrays(): + s = a.to01() + i = randint(-3, len(a) + 1) + j = randint(-3, len(a) + 1) + self.assertEqual(a.count(1, i, j), s[i:j].count('1')) + self.assertEqual(a.count(0, i, j), s[i:j].count('0')) def test_search(self): a = bitarray('') @@ -1380,7 +1674,7 @@ def test_search3(self): a = bitarray('10010101110011111001011') for s, res in [('011', [6, 11, 20]), - ('111', [7, 12, 13, 14]), # note the overlap + ('111', [7, 12, 13, 14]), # note the overlap ('1011', [5, 19]), ('100', [0, 9, 16])]: b = bitarray(s) @@ -1388,40 +1682,80 @@ self.assertEqual(list(a.itersearch(b)), res) self.assertEqual([p for p in a.itersearch(b)], res) + def test_search4(self): + for a in self.randombitarrays(): + aa = a.to01() + for sub in '0', '1', '01', '01', '11', '101', '1111111': + sr = a.search(bitarray(sub), 1) + try: + p = sr[0] + except IndexError: + p = -1 + self.assertEqual(p, aa.find(sub)) - def test_fill(self): - a = bitarray('') - self.assertEqual(a.fill(), 0) - self.assertEqual(len(a), 0) + def test_search_type(self): + a = bitarray('10011') + it = a.itersearch(bitarray('1')) + self.assertIsInstance(type(it), type) - a = bitarray('101') - self.assertEqual(a.fill(), 5) - self.assertEQUAL(a, bitarray('10100000')) - self.assertEqual(a.fill(), 0) - self.assertEQUAL(a, bitarray('10100000')) + def test_fill_simple(self): + for endian in 'little', 'big': + a = bitarray(endian=endian) + self.assertEqual(a.fill(), 0) + self.assertEqual(len(a), 0) + + a = bitarray('101', endian) + self.assertEqual(a.fill(), 5) + self.assertEqual(a, bitarray('10100000')) + self.assertEqual(a.fill(), 0) + self.assertEqual(a, bitarray('10100000')) + def test_fill_random(self): for a in self.randombitarrays(): - aa = a.tolist() - la = len(a) - b = a - self.assert_(0 <= b.fill() < 8) + b = a.copy() + res = b.fill() + self.assertTrue(0 <= res < 8) self.assertEqual(b.endian(), a.endian()) - bb = b.tolist() - lb = len(b) - self.assert_(a is b) self.check_obj(b) - if la % 8 == 0: - self.assertEqual(bb, aa) - self.assertEqual(lb, la) + if len(a) % 8 == 0: + self.assertEqual(b, a) else: - self.assert_(lb % 8 == 0) - self.assertNotEqual(bb, aa) - self.assertEqual(bb[:la], aa) - self.assertEqual(b[la:], (lb-la)*bitarray('0')) - self.assert_(0 < lb-la < 8) + self.assertTrue(len(b) % 8 == 0) + self.assertNotEqual(b, a) + self.assertEqual(b[:len(a)], a) + self.assertEqual(b[len(a):], + (len(b) - len(a)) * bitarray('0')) + + def test_invert_simple(self): + a = bitarray() + a.invert() + self.assertEQUAL(a, bitarray()) + + a = bitarray('11011') + a.invert() + self.assertEQUAL(a, bitarray('00100')) + a.invert(2) + self.assertEQUAL(a, bitarray('00000')) + a.invert(-1) + self.assertEQUAL(a, bitarray('00001')) + + def test_invert_errors(self): + a = bitarray(5) + self.assertRaises(IndexError, a.invert, 5) + self.assertRaises(IndexError, a.invert, -6) + self.assertRaises(TypeError, a.invert, "A") + self.assertRaises(TypeError, a.invert, 0, 1) + def test_invert_random(self): + for a in self.randombitarrays(start=1): + b = a.copy() + c = a.copy() + i = randint(0, len(a) - 1) + b.invert(i) + c[i] = not c[i] + self.assertEQUAL(b, c) - def test_sort(self): + def test_sort_simple(self): a = bitarray('1101000') a.sort() self.assertEqual(a, bitarray('0000111')) @@ -1438,17 +1772,14 @@ self.assertRaises(TypeError, a.sort, 'A') - for a in self.randombitarrays(): - ida = id(a) - rev = randint(0, 1) - a.sort(rev) - self.assertEqual(a, bitarray(sorted(a.tolist(), reverse=rev))) - self.assertEqual(id(a), ida) - - - def test_reverse(self): - self.assertRaises(TypeError, bitarray().reverse, 42) + def test_sort_random(self): + for rev in 0, 1: + for a in self.randombitarrays(): + b = a.tolist() + a.sort(rev) + self.assertEqual(a, bitarray(sorted(b, reverse=rev))) + def test_reverse_simple(self): for x, y in [('', ''), ('1', '1'), ('10', '01'), ('001', '100'), ('1110', '0111'), ('11100', '00111'), ('011000', '000110'), ('1101100', '0011011'), @@ -1460,13 +1791,14 @@ a.reverse() self.assertEQUAL(a, bitarray(y)) + self.assertRaises(TypeError, bitarray().reverse, 42) + + def test_reverse_random(self): for a in self.randombitarrays(): - aa = a.tolist() - ida = id(a) + b = a.copy() a.reverse() - self.assertEqual(ida, id(a)) - self.assertEQUAL(a, bitarray(aa[::-1], endian=a.endian())) - + self.assertEQUAL(a, bitarray(b.tolist()[::-1], endian=a.endian())) + self.assertEQUAL(a, b[::-1]) def test_tolist(self): a = bitarray() @@ -1474,23 +1806,17 @@ a = bitarray('110') self.assertEqual(a.tolist(), [True, True, False]) + self.assertEqual(a.tolist(True), [1, 1, 0]) + + for as_ints in 0, 1: + for elt in a.tolist(as_ints): + self.assertIsInstance(elt, int if as_ints else bool) for lst in self.randomlists(): a = bitarray(lst) self.assertEqual(a.tolist(), lst) - def test_remove(self): - a = bitarray() - for i in (True, False, 1, 0): - self.assertRaises(ValueError, a.remove, i) - - a = bitarray(21) - a.setall(0) - self.assertRaises(ValueError, a.remove, 1) - a.setall(1) - self.assertRaises(ValueError, a.remove, 0) - a = bitarray('1010110') for val, res in [(False, '110110'), (True, '10110'), (1, '0110'), (1, '010'), (0, '10'), @@ -1501,11 +1827,21 @@ a = bitarray('0010011') b = a b.remove('1') - self.assert_(b is a) + self.assertTrue(b is a) self.assertEQUAL(b, bitarray('000011')) + def test_remove_errors(self): + a = bitarray() + for i in (True, False, 1, 0): + self.assertRaises(ValueError, a.remove, i) + + a = bitarray(21) + a.setall(0) + self.assertRaises(ValueError, a.remove, 1) + a.setall(1) + self.assertRaises(ValueError, a.remove, 0) - def test_pop(self): + def test_pop_simple(self): for x, n, r, y in [('1', 0, True, ''), ('0', -1, False, ''), ('0011100', 3, True, '001100')]: @@ -1518,9 +1854,10 @@ self.assertEqual(a.pop(), False) self.assertRaises(IndexError, a.pop) + def test_pop_random(self): for a in self.randombitarrays(): self.assertRaises(IndexError, a.pop, len(a)) - self.assertRaises(IndexError, a.pop, -len(a)-1) + self.assertRaises(IndexError, a.pop, -len(a) - 1) if len(a) == 0: continue aa = a.tolist() @@ -1529,9 +1866,7 @@ self.check_obj(a) self.assertEqual(a.endian(), enda) - for a in self.randombitarrays(): - if len(a) == 0: - continue + for a in self.randombitarrays(start=1): n = randint(-len(a), len(a)-1) aa = a.tolist() self.assertEqual(a.pop(n), aa[n]) @@ -1539,22 +1874,39 @@ self.assertEqual(a, bitarray(aa)) self.check_obj(a) + def test_clear(self): + for a in self.randombitarrays(): + ida = id(a) + endian = a.endian() + a.clear() + self.assertEqual(a, bitarray()) + self.assertEqual(id(a), ida) + self.assertEqual(a.endian(), endian) + self.assertEqual(len(a), 0) def test_setall(self): a = bitarray(5) a.setall(True) self.assertEQUAL(a, bitarray('11111')) + a.setall(False) + self.assertEQUAL(a, bitarray('00000')) + + def test_setall_empty(self): + a = bitarray() + for v in 0, 1: + a.setall(v) + self.assertEQUAL(a, bitarray()) + def test_setall_random(self): for a in self.randombitarrays(): val = randint(0, 1) b = a b.setall(val) self.assertEqual(b, bitarray(len(b) * [val])) - self.assert_(a is b) + self.assertTrue(a is b) self.check_obj(b) - - def test_bytereverse(self): + def test_bytereverse_explicit(self): for x, y in [('', ''), ('1', '0'), ('1011', '0000'), @@ -1567,14 +1919,13 @@ a.bytereverse() self.assertEqual(a, bitarray(y)) + def test_bytereverse_byte(self): for i in range(256): a = bitarray() - a.frombytes(to_bytes(chr(i))) - aa = a.tolist() - b = a + a.frombytes(bytes(bytearray([i]))) + b = a.copy() b.bytereverse() - self.assertEqual(b, bitarray(aa[::-1])) - self.assert_(a is b) + self.assertEqual(b, a[::-1]) self.check_obj(b) @@ -1582,108 +1933,128 @@ # --------------------------------------------------------------------------- -class StringTests(unittest.TestCase, Util): +class BytesTests(unittest.TestCase, Util): def randombytes(self): for n in range(1, 20): - yield to_bytes(''.join(chr(randint(0, 255)) - for x in range(n))) + yield os.urandom(n) - def test_frombytes(self): + def test_frombytes_simple(self): a = bitarray(endian='big') - a.frombytes(to_bytes('A')) + a.frombytes(b'A') self.assertEqual(a, bitarray('01000001')) b = a - b.frombytes(to_bytes('BC')) + b.frombytes(b'BC') self.assertEQUAL(b, bitarray('01000001' '01000010' '01000011', endian='big')) - self.assert_(b is a) + self.assertTrue(b is a) - for b in self.randombitarrays(): - c = b.__copy__() - b.frombytes(to_bytes('')) - self.assertEQUAL(b, c) + def test_frombytes_empty(self): + for a in self.randombitarrays(): + b = a.copy() + a.frombytes(b'') + self.assertEQUAL(a, b) + self.assertFalse(a is b) + + def test_frombytes_errors(self): + a = bitarray() + self.assertRaises(TypeError, a.frombytes) + self.assertRaises(TypeError, a.frombytes, b'', b'') + self.assertRaises(TypeError, a.frombytes, 1) + def test_frombytes_random(self): for b in self.randombitarrays(): for s in self.randombytes(): a = bitarray(endian=b.endian()) a.frombytes(s) - c = b.__copy__() + c = b.copy() b.frombytes(s) self.assertEQUAL(b[-len(a):], a) self.assertEQUAL(b[:-len(a)], c) - self.assertEQUAL(c + a, b) + self.assertEQUAL(b, c + a) - def test_tobytes(self): + def test_tobytes_empty(self): a = bitarray() - self.assertEqual(a.tobytes(), to_bytes('')) + self.assertEqual(a.tobytes(), b'') + def test_tobytes_endian(self): for end in ('big', 'little'): a = bitarray(endian=end) - a.frombytes(to_bytes('foo')) - self.assertEqual(a.tobytes(), to_bytes('foo')) + a.frombytes(b'foo') + self.assertEqual(a.tobytes(), b'foo') for s in self.randombytes(): a = bitarray(endian=end) a.frombytes(s) self.assertEqual(a.tobytes(), s) - for n, s in [(1, '\x01'), (2, '\x03'), (3, '\x07'), (4, '\x0f'), - (5, '\x1f'), (6, '\x3f'), (7, '\x7f'), (8, '\xff'), - (12, '\xff\x0f'), (15, '\xff\x7f'), (16, '\xff\xff'), - (17, '\xff\xff\x01'), (24, '\xff\xff\xff')]: + def test_tobytes_explicit_ones(self): + for n, s in [(1, b'\x01'), (2, b'\x03'), (3, b'\x07'), (4, b'\x0f'), + (5, b'\x1f'), (6, b'\x3f'), (7, b'\x7f'), (8, b'\xff'), + (12, b'\xff\x0f'), (15, b'\xff\x7f'), (16, b'\xff\xff'), + (17, b'\xff\xff\x01'), (24, b'\xff\xff\xff')]: a = bitarray(n, endian='little') a.setall(1) - self.assertEqual(a.tobytes(), to_bytes(s)) + self.assertEqual(a.tobytes(), s) - def test_unpack(self): + def test_unpack_simple(self): a = bitarray('01') - self.assertEqual(a.unpack(), to_bytes('\x00\xff')) - self.assertEqual(a.unpack(to_bytes('A')), to_bytes('A\xff')) - self.assertEqual(a.unpack(to_bytes('0'), to_bytes('1')), - to_bytes('01')) - self.assertEqual(a.unpack(one=to_bytes('\x01')), - to_bytes('\x00\x01')) - self.assertEqual(a.unpack(zero=to_bytes('A')), - to_bytes('A\xff')) - self.assertEqual(a.unpack(one=to_bytes('t'), zero=to_bytes('f')), - to_bytes('ft')) - - self.assertRaises(TypeError, a.unpack, - to_bytes('a'), zero=to_bytes('b')) - self.assertRaises(TypeError, a.unpack, foo=to_bytes('b')) - - for a in self.randombitarrays(): - self.assertEqual(a.unpack(to_bytes('0'), to_bytes('1')), - to_bytes(a.to01())) - + self.assertIsInstance(a.unpack(), bytes) + self.assertEqual(a.unpack(), b'\x00\xff') + self.assertEqual(a.unpack(b'A'), b'A\xff') + self.assertEqual(a.unpack(b'0', b'1'), b'01') + self.assertEqual(a.unpack(one=b'\x01'), b'\x00\x01') + self.assertEqual(a.unpack(zero=b'A'), b'A\xff') + self.assertEqual(a.unpack(one=b't', zero=b'f'), b'ft') + + def test_unpack_random(self): + for a in self.randombitarrays(): + self.assertEqual(a.unpack(b'0', b'1'), + a.to01().encode()) + # round trip b = bitarray() b.pack(a.unpack()) self.assertEqual(b, a) - + # round trip with invert b = bitarray() - b.pack(a.unpack(to_bytes('\x01'), to_bytes('\x00'))) + b.pack(a.unpack(b'\x01', b'\x00')) b.invert() self.assertEqual(b, a) + def test_unpack_errors(self): + a = bitarray('01') + self.assertRaises(TypeError, a.unpack, b'') + self.assertRaises(TypeError, a.unpack, b'0', b'') + self.assertRaises(TypeError, a.unpack, b'a', zero=b'b') + self.assertRaises(TypeError, a.unpack, foo=b'b') + self.assertRaises(TypeError, a.unpack, one=b'aa', zero=b'b') + if is_py3k: + self.assertRaises(TypeError, a.unpack, '0') + self.assertRaises(TypeError, a.unpack, one='a') + self.assertRaises(TypeError, a.unpack, b'0', '1') + + def test_pack_simple(self): + for endian in 'little', 'big': + _set_default_endian(endian) + a = bitarray() + a.pack(b'\x00') + self.assertEqual(a, bitarray('0')) + a.pack(b'\xff') + self.assertEqual(a, bitarray('01')) + a.pack(b'\x01\x00\x7a') + self.assertEqual(a, bitarray('01101')) - def test_pack(self): - a = bitarray() - a.pack(to_bytes('\x00')) - self.assertEqual(a, bitarray('0')) - a.pack(to_bytes('\xff')) - self.assertEqual(a, bitarray('01')) - a.pack(to_bytes('\x01\x00\x7a')) - self.assertEqual(a, bitarray('01101')) - + def test_pack_random(self): a = bitarray() for n in range(256): - a.pack(to_bytes(chr(n))) + a.pack(bytes(bytearray([n]))) self.assertEqual(a, bitarray('0' + 255 * '1')) + def test_pack_errors(self): + a = bitarray() self.assertRaises(TypeError, a.pack, 0) if is_py3k: self.assertRaises(TypeError, a.pack, '1') @@ -1691,7 +2062,7 @@ self.assertRaises(TypeError, a.pack, bitarray()) -tests.append(StringTests) +tests.append(BytesTests) # --------------------------------------------------------------------------- @@ -1704,37 +2075,26 @@ def tearDown(self): shutil.rmtree(self.tmpdir) + def read_file(self): + with open(self.tmpfname, 'rb') as fi: + return fi.read() - def test_pickle(self): - from pickle import load, dump + def assertFileSize(self, size): + self.assertEqual(os.path.getsize(self.tmpfname), size) - for v in range(3): - for a in self.randombitarrays(): - fo = open(self.tmpfname, 'wb') - dump(a, fo, v) - fo.close() - b = load(open(self.tmpfname, 'rb')) - self.assert_(b is not a) - self.assertEQUAL(a, b) - def test_cPickle(self): - if is_py3k: - return - from cPickle import load, dump - - for v in range(3): - for a in self.randombitarrays(): - fo = open(self.tmpfname, 'wb') - dump(a, fo, v) - fo.close() - b = load(open(self.tmpfname, 'rb')) - self.assert_(b is not a) - self.assertEQUAL(a, b) + def test_pickle(self): + for a in self.randombitarrays(): + with open(self.tmpfname, 'wb') as fo: + pickle.dump(a, fo) + with open(self.tmpfname, 'rb') as fi: + b = pickle.load(fi) + self.assertFalse(b is a) + self.assertEQUAL(a, b) def test_shelve(self): - if sys.version_info[:2] < (2, 5): + if not shelve or hasattr(sys, 'gettotalrefcount'): return - import shelve, hashlib d = shelve.open(self.tmpfname) stored = [] @@ -1752,229 +2112,368 @@ d.close() - def test_fromfile_wrong_args(self): - b = bitarray() - self.assertRaises(TypeError, b.fromfile) - self.assertRaises(TypeError, b.fromfile, StringIO()) # file not open - self.assertRaises(TypeError, b.fromfile, 42) - self.assertRaises(TypeError, b.fromfile, 'bar') - - - def test_from_empty_file(self): - fo = open(self.tmpfname, 'wb') - fo.close() + def test_fromfile_empty(self): + with open(self.tmpfname, 'wb') as fo: + pass + self.assertFileSize(0) a = bitarray() - a.fromfile(open(self.tmpfname, 'rb')) + with open(self.tmpfname, 'rb') as fi: + a.fromfile(fi) self.assertEqual(a, bitarray()) + def test_fromfile_Foo(self): + with open(self.tmpfname, 'wb') as fo: + fo.write(b'Foo') + self.assertFileSize(3) - def test_from_large_file(self): - N = 100000 + a = bitarray(endian='big') + with open(self.tmpfname, 'rb') as fi: + a.fromfile(fi) + self.assertEqual(a, bitarray('01000110' '01101111' '01101111')) - fo = open(self.tmpfname, 'wb') - fo.write(N * to_bytes('X')) - fo.close() + a = bitarray(endian='little') + with open(self.tmpfname, 'rb') as fi: + a.fromfile(fi) + self.assertEqual(a, bitarray('01100010' '11110110' '11110110')) + def test_fromfile_wrong_args(self): a = bitarray() - a.fromfile(open(self.tmpfname, 'rb')) - self.assertEqual(len(a), 8 * N) - self.assertEqual(a.buffer_info()[1], N) - # make sure there is no over-allocation - self.assertEqual(a.buffer_info()[4], N) + self.assertRaises(TypeError, a.fromfile) + #self.assertRaises(TypeError, a.fromfile, StringIO()) # file not open + self.assertRaises(Exception, a.fromfile, 42) + self.assertRaises(Exception, a.fromfile, 'bar') + + with open(self.tmpfname, 'wb') as fo: + pass + with open(self.tmpfname, 'rb') as fi: + self.assertRaises(TypeError, a.fromfile, fi, None) + + def test_fromfile_erros(self): + with open(self.tmpfname, 'wb') as fo: + fo.write(b'0123456789') + self.assertFileSize(10) + a = bitarray() + with open(self.tmpfname, 'wb') as fi: + self.assertRaises(Exception, a.fromfile, fi) - def test_fromfile_Foo(self): - fo = open(self.tmpfname, 'wb') - fo.write(to_bytes('Foo\n')) - fo.close() + if is_py3k: + with open(self.tmpfname, 'r') as fi: + self.assertRaises(TypeError, a.fromfile, fi) - a = bitarray(endian='big') - a.fromfile(open(self.tmpfname, 'rb')) - self.assertEqual(a, bitarray('01000110011011110110111100001010')) + def test_from_large_files(self): + for N in range(65534, 65538): + data = os.urandom(N) + with open(self.tmpfname, 'wb') as fo: + fo.write(data) - a = bitarray(endian='little') - a.fromfile(open(self.tmpfname, 'rb')) - self.assertEqual(a, bitarray('01100010111101101111011001010000')) + a = bitarray() + with open(self.tmpfname, 'rb') as fi: + a.fromfile(fi) + self.assertEqual(len(a), 8 * N) + self.assertEqual(a.buffer_info()[1], N) + self.assertEqual(a.tobytes(), data) + + def test_fromfile_extend_existing(self): + with open(self.tmpfname, 'wb') as fo: + fo.write(b'Foo') + foo_le = '011000101111011011110110' a = bitarray('1', endian='little') - a.fromfile(open(self.tmpfname, 'rb')) - self.assertEqual(a, bitarray('101100010111101101111011001010000')) + with open(self.tmpfname, 'rb') as fi: + a.fromfile(fi) + + self.assertEqual(a, bitarray('1' + foo_le)) for n in range(20): a = bitarray(n, endian='little') a.setall(1) - a.fromfile(open(self.tmpfname, 'rb')) - self.assertEqual(a, - n*bitarray('1') + - bitarray('01100010111101101111011001010000')) - + with open(self.tmpfname, 'rb') as fi: + a.fromfile(fi) + self.assertEqual(a, bitarray(n * '1' + foo_le)) def test_fromfile_n(self): a = bitarray() - a.fromstring('ABCDEFGHIJ') - fo = open(self.tmpfname, 'wb') - a.tofile(fo) - fo.close() - - b = bitarray() - f = open(self.tmpfname, 'rb') - b.fromfile(f, 1); self.assertEqual(b.tostring(), 'A') - f.read(1) - b = bitarray() - b.fromfile(f, 2); self.assertEqual(b.tostring(), 'CD') - b.fromfile(f, 1); self.assertEqual(b.tostring(), 'CDE') - b.fromfile(f, 0); self.assertEqual(b.tostring(), 'CDE') - b.fromfile(f); self.assertEqual(b.tostring(), 'CDEFGHIJ') - b.fromfile(f); self.assertEqual(b.tostring(), 'CDEFGHIJ') - f.close() - - b = bitarray() - f = open(self.tmpfname, 'rb') - f.read(1); - self.assertRaises(EOFError, b.fromfile, f, 10) - f.close() - self.assertEqual(b.tostring(), 'BCDEFGHIJ') - - b = bitarray() - f = open(self.tmpfname, 'rb') - b.fromfile(f); - self.assertEqual(b.tostring(), 'ABCDEFGHIJ') - self.assertRaises(EOFError, b.fromfile, f, 1) - f.close() - - - def test_tofile(self): - a = bitarray() - f = open(self.tmpfname, 'wb') - a.tofile(f) - f.close() - - fi = open(self.tmpfname, 'rb') - self.assertEqual(fi.read(), to_bytes('')) - fi.close() - - a = bitarray('01000110011011110110111100001010', endian='big') - f = open(self.tmpfname, 'wb') - a.tofile(f) - f.close() - - fi = open(self.tmpfname, 'rb') - self.assertEqual(fi.read(), to_bytes('Foo\n')) - fi.close() - - for a in self.randombitarrays(): - b = bitarray(a, endian='big') - fo = open(self.tmpfname, 'wb') - b.tofile(fo) - fo.close() - - s = open(self.tmpfname, 'rb').read() - self.assertEqual(len(s), a.buffer_info()[1]) - - for n in range(3): - a.fromstring(n * 'A') - self.assertRaises(TypeError, a.tofile) - self.assertRaises(TypeError, a.tofile, StringIO()) + a.frombytes(b'ABCDEFGHIJ') + with open(self.tmpfname, 'wb') as fo: + a.tofile(fo) + self.assertFileSize(10) + + with open(self.tmpfname, 'rb') as f: + a = bitarray() + a.fromfile(f, 0); self.assertEqual(a.tobytes(), b'') + a.fromfile(f, 1); self.assertEqual(a.tobytes(), b'A') + f.read(1) # skip B + a.fromfile(f, 1); self.assertEqual(a.tobytes(), b'AC') + a = bitarray() + a.fromfile(f, 2); self.assertEqual(a.tobytes(), b'DE') + a.fromfile(f, 1); self.assertEqual(a.tobytes(), b'DEF') + a.fromfile(f, 0); self.assertEqual(a.tobytes(), b'DEF') + a.fromfile(f); self.assertEqual(a.tobytes(), b'DEFGHIJ') + a.fromfile(f); self.assertEqual(a.tobytes(), b'DEFGHIJ') + + a = bitarray() + with open(self.tmpfname, 'rb') as f: + f.read(1) + self.assertRaises(EOFError, a.fromfile, f, 10) + # check that although we received an EOFError, the bytes were read + self.assertEqual(a.tobytes(), b'BCDEFGHIJ') + + a = bitarray() + with open(self.tmpfname, 'rb') as f: + # negative values - like ommiting the argument + a.fromfile(f, -1) + self.assertEqual(a.tobytes(), b'ABCDEFGHIJ') + self.assertRaises(EOFError, a.fromfile, f, 1) + + def test_fromfile_BytesIO(self): + f = BytesIO(b'somedata') + a = bitarray() + a.fromfile(f, 4) + self.assertEqual(len(a), 32) + self.assertEqual(a.tobytes(), b'some') + a.fromfile(f) + self.assertEqual(len(a), 64) + self.assertEqual(a.tobytes(), b'somedata') + + def test_tofile_empty(self): + a = bitarray() + with open(self.tmpfname, 'wb') as f: + a.tofile(f) + + self.assertFileSize(0) + + def test_tofile_Foo(self): + a = bitarray('0100011' '001101111' '01101111', endian='big') + b = a.copy() + with open(self.tmpfname, 'wb') as f: + a.tofile(f) + self.assertEQUAL(a, b) + + self.assertFileSize(3) + self.assertEqual(self.read_file(), b'Foo') - f = open(self.tmpfname, 'wb') + def test_tofile_random(self): + for a in self.randombitarrays(): + with open(self.tmpfname, 'wb') as fo: + a.tofile(fo) + n = bits2bytes(len(a)) + self.assertFileSize(n) + raw = self.read_file() + self.assertEqual(len(raw), n) + self.assertEqual(raw, a.tobytes()) + + def test_tofile_errors(self): + n = 100 + a = bitarray(8 * n) + self.assertRaises(TypeError, a.tofile) + + with open(self.tmpfname, 'wb') as f: + a.tofile(f) + self.assertFileSize(n) + # write to closed file + self.assertRaises(ValueError, a.tofile, f) + + if is_py3k: + with open(self.tmpfname, 'w') as f: + self.assertRaises(TypeError, a.tofile, f) + + with open(self.tmpfname, 'rb') as f: + self.assertRaises(Exception, a.tofile, f) + + def test_tofile_large(self): + n = 100 * 1000 + a = bitarray(8 * n) + a.setall(0) + a[2::37] = 1 + with open(self.tmpfname, 'wb') as f: a.tofile(f) - f.close() - self.assertRaises(TypeError, a.tofile, f) + self.assertFileSize(n) + raw = self.read_file() + self.assertEqual(len(raw), n) + self.assertEqual(raw, a.tobytes()) + + def test_tofile_ones(self): for n in range(20): a = n * bitarray('1', endian='little') - fo = open(self.tmpfname, 'wb') - a.tofile(fo) - fo.close() + with open(self.tmpfname, 'wb') as fo: + a.tofile(fo) - s = open(self.tmpfname, 'rb').read() - self.assertEqual(len(s), a.buffer_info()[1]) - - b = a.__copy__() - b.fill() + raw = self.read_file() + self.assertEqual(len(raw), bits2bytes(len(a))) + # when we the the unused bits in a, we can compare + a.fill() + b = bitarray(endian='little') + b.frombytes(raw) + self.assertEqual(a, b) - c = bitarray(endian='little') - c.frombytes(s) - self.assertEqual(c, b) + def test_tofile_BytesIO(self): + for n in list(range(10)) + list(range(65534, 65538)): + data = os.urandom(n) + a = bitarray(0, 'big') + a.frombytes(data) + self.assertEqual(len(a), 8 * n) + f = BytesIO() + a.tofile(f) + self.assertEqual(f.getvalue(), data) tests.append(FileTests) -# --------------------------------------------------------------------------- +# ----------------------------- Decode Tree --------------------------------- -class PrefixCodeTests(unittest.TestCase, Util): +alpabet_code = { + ' ': bitarray('001'), '.': bitarray('0101010'), + 'a': bitarray('0110'), 'b': bitarray('0001100'), + 'c': bitarray('000011'), 'd': bitarray('01011'), + 'e': bitarray('111'), 'f': bitarray('010100'), + 'g': bitarray('101000'), 'h': bitarray('00000'), + 'i': bitarray('1011'), 'j': bitarray('0111101111'), + 'k': bitarray('00011010'), 'l': bitarray('01110'), + 'm': bitarray('000111'), 'n': bitarray('1001'), + 'o': bitarray('1000'), 'p': bitarray('101001'), + 'q': bitarray('00001001101'), 'r': bitarray('1101'), + 's': bitarray('1100'), 't': bitarray('0100'), + 'u': bitarray('000100'), 'v': bitarray('0111100'), + 'w': bitarray('011111'), 'x': bitarray('0000100011'), + 'y': bitarray('101010'), 'z': bitarray('00011011110') +} + +class DecodeTreeTests(unittest.TestCase): + + def test_create(self): + dt = decodetree(alpabet_code) + self.assertEqual(repr(type(dt)), "<%s 'bitarray.decodetree'>" % + ('class' if is_py3k else 'type')) + self.assertRaises(TypeError, decodetree, None) + self.assertRaises(TypeError, decodetree, 'foo') + d = dict(alpabet_code) + d['-'] = bitarray() + self.assertRaises(ValueError, decodetree, d) + + def test_sizeof(self): + dt = decodetree({'.': bitarray('1')}) + self.assertTrue(0 < sys.getsizeof(dt) < 100) - def test_encode_errors(self): - a = bitarray() - self.assertRaises(TypeError, a.encode, 0, '') - self.assertRaises(ValueError, a.encode, {}, '') - self.assertRaises(TypeError, a.encode, {'a':42}, '') - self.assertRaises(ValueError, a.encode, {'a': bitarray()}, '') - # 42 not iterable - self.assertRaises(TypeError, a.encode, {'a': bitarray('0')}, 42) - self.assertEqual(len(a), 0) + dt = decodetree({'a': bitarray(20 * '0')}) + self.assertTrue(sys.getsizeof(dt) > 200) + + def test_nodes(self): + for n in range(1, 20): + dt = decodetree({'a': bitarray(n * '0')}) + self.assertEqual(dt.nodes(), n + 1) + + dt = decodetree({'I': bitarray('1'), 'l': bitarray('01'), + 'a': bitarray('001'), 'n': bitarray('000')}) + self.assertEqual(dt.nodes(), 7) + dt = decodetree(alpabet_code) + self.assertEqual(dt.nodes(), 70) + + def test_todict(self): + t = decodetree(alpabet_code) + d = t.todict() + self.assertEqual(d, alpabet_code) + + def test_decode(self): + t = decodetree(alpabet_code) + a = bitarray('10110111001101001') + self.assertEqual(a.decode(t), ['i', 'l', 'a', 'n']) + self.assertEqual(''.join(a.iterdecode(t)), 'ilan') + a = bitarray() + self.assertEqual(a.decode(t), []) + self.assertEqual(''.join(a.iterdecode(t)), '') + + def test_large(self): + d = {i: bitarray((1 << j) & i for j in range(10)) + for i in range(1024)} + t = decodetree(d) + self.assertEqual(t.todict(), d) + self.assertEqual(t.nodes(), 2047) + self.assertTrue(sys.getsizeof(t) > 10000) + +tests.append(DecodeTreeTests) + +# ------------------ variable length encoding and decoding ------------------ + +class PrefixCodeTests(unittest.TestCase, Util): def test_encode_string(self): a = bitarray() - d = {'a': bitarray('0')} - a.encode(d, '') + a.encode(alpabet_code, '') self.assertEqual(a, bitarray()) - a.encode(d, 'a') - self.assertEqual(a, bitarray('0')) - self.assertEqual(d, {'a': bitarray('0')}) + a.encode(alpabet_code, 'a') + self.assertEqual(a, bitarray('0110')) def test_encode_list(self): a = bitarray() - d = {'a':bitarray('0')} - a.encode(d, []) + a.encode(alpabet_code, []) self.assertEqual(a, bitarray()) - a.encode(d, ['a']) - self.assertEqual(a, bitarray('0')) - self.assertEqual(d, {'a': bitarray('0')}) + a.encode(alpabet_code, ['e']) + self.assertEqual(a, bitarray('111')) def test_encode_iter(self): a = bitarray() - d = {'a': bitarray('0'), 'b': bitarray('1')} - a.encode(d, iter('abba')) + d = {0: bitarray('0'), 1: bitarray('1')} + a.encode(d, iter([0, 1, 1, 0])) self.assertEqual(a, bitarray('0110')) def foo(): - for c in 'bbaabb': + for c in 1, 1, 0, 0, 1, 1: yield c a.encode(d, foo()) - self.assertEqual(a, bitarray('0110110011')) - self.assertEqual(d, {'a': bitarray('0'), 'b': bitarray('1')}) + a.encode(d, range(2)) + self.assertEqual(a, bitarray('011011001101')) + self.assertEqual(d, {0: bitarray('0'), 1: bitarray('1')}) + + def test_encode_symbol_not_in_code(self): + d = {None : bitarray('0'), + 0 : bitarray('10'), + 'A' : bitarray('11')} + a = bitarray() + a.encode(d, ['A', None, 0]) + self.assertEqual(a, bitarray('11010')) + self.assertRaises(ValueError, a.encode, d, [1, 2]) + self.assertRaises(ValueError, a.encode, d, 'ABCD') - def test_encode(self): - d = {'I': bitarray('1'), - 'l': bitarray('01'), - 'a': bitarray('001'), - 'n': bitarray('000')} + def test_encode_not_iterable(self): + d = {'a': bitarray('0'), 'b': bitarray('1')} a = bitarray() - a.encode(d, 'Ilan') - self.assertEqual(a, bitarray('101001000')) - a.encode(d, 'a') - self.assertEqual(a, bitarray('101001000001')) - self.assertEqual(d, {'I': bitarray('1'), 'l': bitarray('01'), - 'a': bitarray('001'), 'n': bitarray('000')}) - self.assertRaises(ValueError, a.encode, d, 'arvin') - + a.encode(d, 'abba') + self.assertRaises(TypeError, a.encode, d, 42) + self.assertRaises(TypeError, a.encode, d, 1.3) + self.assertRaises(TypeError, a.encode, d, None) + self.assertEqual(a, bitarray('0110')) - def test_decode_check_codedict(self): + def test_check_codedict_encode(self): a = bitarray() + self.assertRaises(TypeError, a.encode, None, '') + self.assertRaises(ValueError, a.encode, {}, '') + self.assertRaises(TypeError, a.encode, {'a': 'b'}, 'a') + self.assertRaises(ValueError, a.encode, {'a': bitarray()}, 'a') + self.assertEqual(len(a), 0) + + def test_check_codedict_decode(self): + a = bitarray('101') self.assertRaises(TypeError, a.decode, 0) self.assertRaises(ValueError, a.decode, {}) - # 42 not iterable - self.assertRaises(TypeError, a.decode, {'a':42}) - self.assertRaises(ValueError, a.decode, {'a':bitarray()}) + self.assertRaises(TypeError, a.decode, {'a': 42}) + self.assertRaises(ValueError, a.decode, {'a': bitarray()}) + self.assertEqual(a, bitarray('101')) + + def test_check_codedict_iterdecode(self): + a = bitarray('1100101') + self.assertRaises(TypeError, a.iterdecode, 0) + self.assertRaises(ValueError, a.iterdecode, {}) + self.assertRaises(TypeError, a.iterdecode, {'a': []}) + self.assertRaises(ValueError, a.iterdecode, {'a': bitarray()}) + self.assertEqual(a, bitarray('1100101')) def test_decode_simple(self): - d = {'I': bitarray('1'), - 'l': bitarray('01'), - 'a': bitarray('001'), - 'n': bitarray('000')} + d = {'I': bitarray('1'), 'l': bitarray('01'), + 'a': bitarray('001'), 'n': bitarray('000')} dcopy = dict(d) a = bitarray('101001000') self.assertEqual(a.decode(d), ['I', 'l', 'a', 'n']) @@ -1982,16 +2481,23 @@ self.assertEqual(a, bitarray('101001000')) def test_iterdecode_simple(self): - d = {'I': bitarray('1'), - 'l': bitarray('01'), - 'a': bitarray('001'), - 'n': bitarray('000')} + d = {'I': bitarray('1'), 'l': bitarray('01'), + 'a': bitarray('001'), 'n': bitarray('000')} dcopy = dict(d) a = bitarray('101001000') self.assertEqual(list(a.iterdecode(d)), ['I', 'l', 'a', 'n']) self.assertEqual(d, dcopy) self.assertEqual(a, bitarray('101001000')) + def test_iterdecode_remove_tree(self): + d = {'I': bitarray('1'), 'l': bitarray('01'), + 'a': bitarray('001'), 'n': bitarray('000')} + t = decodetree(d) + a = bitarray('101001000') + it = a.iterdecode(t) + del t + self.assertEqual(''.join(it), "Ilan") + def test_decode_empty(self): d = {'a': bitarray('1')} a = bitarray() @@ -2002,19 +2508,48 @@ self.assertEqual(d, {'a': bitarray('1')}) self.assertEqual(len(a), 0) + def test_decode_no_term(self): + d = {'a': bitarray('0'), 'b': bitarray('111')} + a = bitarray('011') + msg = "decoding not terminated" + self.assertRaisesMessage(ValueError, msg, a.decode, d) + self.assertRaisesMessage(ValueError, msg, a.iterdecode, d) + t = decodetree(d) + self.assertRaisesMessage(ValueError, msg, a.decode, t) + self.assertRaisesMessage(ValueError, msg, a.iterdecode, t) + + self.assertEqual(a, bitarray('011')) + self.assertEqual(d, {'a': bitarray('0'), 'b': bitarray('111')}) + self.assertEqual(t.todict(), d) + def test_decode_buggybitarray(self): d = {'a': bitarray('0')} a = bitarray('1') - self.assertRaises(ValueError, a.decode, d) + msg = "prefix code does not match data in bitarray" + self.assertRaisesMessage(ValueError, msg, a.decode, d) + self.assertRaisesMessage(ValueError, msg, a.iterdecode, d) + t = decodetree(d) + self.assertRaisesMessage(ValueError, msg, a.decode, t) + self.assertRaisesMessage(ValueError, msg, a.iterdecode, t) + self.assertEqual(a, bitarray('1')) self.assertEqual(d, {'a': bitarray('0')}) + self.assertEqual(t.todict(), d) + + def test_iterdecode_no_term(self): + d = {'a': bitarray('0'), 'b': bitarray('111')} + a = bitarray('011') + it = a.iterdecode(d) + self.assertEqual(next(it), 'a') + self.assertRaisesMessage(ValueError, "decoding not terminated", + next, it) + self.assertEqual(a, bitarray('011')) def test_iterdecode_buggybitarray(self): d = {'a': bitarray('0')} a = bitarray('1') it = a.iterdecode(d) - if not is_py3k: - self.assertRaises(ValueError, it.next) + self.assertRaises(ValueError, next, it) self.assertEqual(a, bitarray('1')) self.assertEqual(d, {'a': bitarray('0')}) @@ -2022,27 +2557,40 @@ d = {'a': bitarray('00'), 'b': bitarray('01')} a = bitarray('1') self.assertRaises(ValueError, a.decode, d) + t = decodetree(d) + self.assertRaises(ValueError, a.decode, t) + self.assertEqual(a, bitarray('1')) + self.assertEqual(d, {'a': bitarray('00'), 'b': bitarray('01')}) + self.assertEqual(t.todict(), d) def test_iterdecode_buggybitarray2(self): d = {'a': bitarray('00'), 'b': bitarray('01')} a = bitarray('1') it = a.iterdecode(d) - if not is_py3k: - self.assertRaises(ValueError, it.next) + self.assertRaises(ValueError, next, it) self.assertEqual(a, bitarray('1')) - def test_decode_ambiguous_code(self): - d = {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')} - a = bitarray() - self.assertRaises(ValueError, a.decode, d) - self.assertRaises(ValueError, a.iterdecode, d) + t = decodetree(d) + it = a.iterdecode(t) + self.assertRaises(ValueError, next, it) - def test_decode_ambiguous2(self): - d = {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')} - a = bitarray() - self.assertRaises(ValueError, a.decode, d) - self.assertRaises(ValueError, a.iterdecode, d) + self.assertEqual(a, bitarray('1')) + self.assertEqual(d, {'a': bitarray('00'), 'b': bitarray('01')}) + self.assertEqual(t.todict(), d) + + def test_decode_ambiguous_code(self): + for d in [ + {'a': bitarray('0'), 'b': bitarray('0'), 'c': bitarray('1')}, + {'a': bitarray('01'), 'b': bitarray('01'), 'c': bitarray('1')}, + {'a': bitarray('0'), 'b': bitarray('01')}, + {'a': bitarray('0'), 'b': bitarray('11'), 'c': bitarray('111')}, + ]: + a = bitarray() + msg = "prefix code ambiguous" + self.assertRaisesMessage(ValueError, msg, a.decode, d) + self.assertRaisesMessage(ValueError, msg, a.iterdecode, d) + self.assertRaisesMessage(ValueError, msg, decodetree, d) def test_miscitems(self): d = {None : bitarray('00'), @@ -2064,90 +2612,175 @@ self.assertStopIteration(it) def test_real_example(self): - code = {' ' : bitarray('001'), - '.' : bitarray('0101010'), - 'a' : bitarray('0110'), - 'b' : bitarray('0001100'), - 'c' : bitarray('000011'), - 'd' : bitarray('01011'), - 'e' : bitarray('111'), - 'f' : bitarray('010100'), - 'g' : bitarray('101000'), - 'h' : bitarray('00000'), - 'i' : bitarray('1011'), - 'j' : bitarray('0111101111'), - 'k' : bitarray('00011010'), - 'l' : bitarray('01110'), - 'm' : bitarray('000111'), - 'n' : bitarray('1001'), - 'o' : bitarray('1000'), - 'p' : bitarray('101001'), - 'q' : bitarray('00001001101'), - 'r' : bitarray('1101'), - 's' : bitarray('1100'), - 't' : bitarray('0100'), - 'u' : bitarray('000100'), - 'v' : bitarray('0111100'), - 'w' : bitarray('011111'), - 'x' : bitarray('0000100011'), - 'y' : bitarray('101010'), - 'z' : bitarray('00011011110')} a = bitarray() message = 'the quick brown fox jumps over the lazy dog.' - a.encode(code, message) + a.encode(alpabet_code, message) self.assertEqual(a, bitarray('01000000011100100001001101000100101100' '00110001101000100011001101100001111110010010101001000000010001100' '10111101111000100000111101001110000110000111100111110100101000000' '0111001011100110000110111101010100010101110001010000101010')) - self.assertEqual(''.join(a.decode(code)), message) - self.assertEqual(''.join(a.iterdecode(code)), message) - + self.assertEqual(''.join(a.decode(alpabet_code)), message) + self.assertEqual(''.join(a.iterdecode(alpabet_code)), message) + t = decodetree(alpabet_code) + self.assertEqual(''.join(a.decode(t)), message) + self.assertEqual(''.join(a.iterdecode(t)), message) tests.append(PrefixCodeTests) -# -------------- Buffer Interface (Python 2.7 only for now) ---------------- +# -------------------------- Buffer Interface ------------------------------- class BufferInterfaceTests(unittest.TestCase): - def test_read1(self): + def test_read_simple(self): a = bitarray('01000001' '01000010' '01000011', endian='big') v = memoryview(a) self.assertEqual(len(v), 3) - self.assertEqual(v[0], 'A') - self.assertEqual(v[:].tobytes(), 'ABC') + self.assertEqual(v[0], 65 if is_py3k else 'A') + self.assertEqual(v.tobytes(), b'ABC') a[13] = 1 - self.assertEqual(v[:].tobytes(), 'AFC') + self.assertEqual(v.tobytes(), b'AFC') + + def test_read_random(self): + a = bitarray() + a.frombytes(os.urandom(100)) + v = memoryview(a) + self.assertEqual(len(v), 100) + b = a[34 * 8 : 67 * 8] + self.assertEqual(v[34:67].tobytes(), b.tobytes()) + self.assertEqual(v.tobytes(), a.tobytes()) - def test_read2(self): - a = bitarray([randint(0, 1) for d in range(8000)]) + def test_resize(self): + a = bitarray('01000001' '01000010' '01000011', endian='big') v = memoryview(a) - self.assertEqual(len(v), 1000) - b = a[345 * 8 : 657 * 8] - self.assertEqual(v[345:657].tobytes(), b.tobytes()) - self.assertEqual(v[:].tobytes(), a.tobytes()) + self.assertRaises(BufferError, a.append, 1) + self.assertRaises(BufferError, a.clear) + self.assertRaises(BufferError, a.__delitem__, slice(0, 8)) + self.assertEqual(v.tobytes(), a.tobytes()) def test_write(self): - a = bitarray(800000) + a = bitarray(8000) a.setall(0) v = memoryview(a) self.assertFalse(v.readonly) - v[50000] = '\xff' - self.assertEqual(a[399999:400009], bitarray('0111111110')) - a[400003] = 0 - self.assertEqual(a[399999:400009], bitarray('0111011110')) - v[30001:30004] = 'ABC' - self.assertEqual(a[240000:240040].tobytes(), '\x00ABC\x00') + v[500] = 255 if is_py3k else '\xff' + self.assertEqual(a[3999:4009], bitarray('0111111110')) + a[4003] = 0 + self.assertEqual(a[3999:4009], bitarray('0111011110')) + v[301:304] = b'ABC' + self.assertEqual(a[300 * 8 : 305 * 8].tobytes(), b'\x00ABC\x00') + + def test_write_py3(self): + if not is_py3k: + return + a = bitarray(40) + a.setall(0) + m = memoryview(a) + v = m[1:4] + v[0] = 65 + v[1] = 66 + v[2] = 67 + self.assertEqual(a.tobytes(), b'\x00ABC\x00') + + +tests.append(BufferInterfaceTests) + +# --------------------------------------------------------------------------- + +class TestsFrozenbitarray(unittest.TestCase, Util): + + def test_init(self): + a = frozenbitarray('110') + self.assertEqual(a, bitarray('110')) + self.assertEqual(a.to01(), '110') + for endian in 'big', 'little': + a = frozenbitarray(0, endian) + self.assertEqual(a.endian(), endian) + + def test_methods(self): + # test a few methods which do not raise the TypeError + a = frozenbitarray('1101100') + self.assertEqual(a[2], 0) + self.assertEqual(a[:4].to01(), '1101') + self.assertEqual(a.count(), 4) + self.assertEqual(a.index(0), 2) + b = a.copy() + self.assertEqual(b, a) + self.assertEqual(repr(type(b)), "") + self.assertEqual(len(b), 7) + self.assertEqual(b.all(), False) + self.assertEqual(b.any(), True) + + def test_init_from_bitarray(self): + for a in self.randombitarrays(): + b = frozenbitarray(a) + self.assertFalse(b is a) + self.assertEqual(b, a) + self.assertEqual(b.endian(), a.endian()) + c = frozenbitarray(b) + self.assertEqual(c, b) + self.assertFalse(c is b) + self.assertEqual(c.endian(), a.endian()) + self.assertEqual(hash(c), hash(b)) + + def test_repr(self): + a = frozenbitarray() + self.assertEqual(repr(a), "frozenbitarray()") + self.assertEqual(str(a), "frozenbitarray()") + a = frozenbitarray('10111') + self.assertEqual(repr(a), "frozenbitarray('10111')") + self.assertEqual(str(a), "frozenbitarray('10111')") + + def test_immutable(self): + a = frozenbitarray('111') + self.assertRaises(TypeError, a.append, True) + self.assertRaises(TypeError, a.clear) + self.assertRaises(TypeError, a.__delitem__, 0) + self.assertRaises(TypeError, a.__setitem__, 0, 0) + + def test_dictkey(self): + a = frozenbitarray('01') + b = frozenbitarray('1001') + d = {a: 123, b: 345} + self.assertEqual(d[frozenbitarray('01')], 123) + self.assertEqual(d[frozenbitarray(b)], 345) + + def test_dictkey2(self): # taken slightly modified from issue #74 + a1 = frozenbitarray([True, False]) + a2 = frozenbitarray([False, False]) + dct = {a1: "one", a2: "two"} + a3 = frozenbitarray([True, False]) + self.assertEqual(a3, a1) + self.assertEqual(dct[a3], 'one') + + def test_mix(self): + a = bitarray('110') + b = frozenbitarray('0011') + self.assertEqual(a + b, bitarray('1100011')) + a.extend(b) + self.assertEqual(a, bitarray('1100011')) + + def test_pickle(self): + for a in self.randombitarrays(): + f = frozenbitarray(a) + g = pickle.loads(pickle.dumps(f)) + self.assertEqual(f, g) + self.assertEqual(f.endian(), g.endian()) + self.assertTrue(str(g).startswith('frozenbitarray')) + -if sys.version_info[:2] == (2, 7): - tests.append(BufferInterfaceTests) +tests.append(TestsFrozenbitarray) # --------------------------------------------------------------------------- def run(verbosity=1, repeat=1): - print('bitarray is installed in: ' + os.path.dirname(__file__)) - print('bitarray version: ' + __version__) - print(sys.version) + import bitarray.test_util as btu + tests.extend(btu.tests) + print('bitarray is installed in: %s' % os.path.dirname(__file__)) + print('bitarray version: %s' % __version__) + print('sys.version: %s' % sys.version) + print('sys.prefix: %s' % sys.prefix) + print('pointer size: %d bit' % (8 * _sysinfo()[0])) suite = unittest.TestSuite() for cls in tests: for _ in range(repeat): diff -Nru python-bitarray-0.8.1/bitarray/test_util.py python-bitarray-1.6.3/bitarray/test_util.py --- python-bitarray-0.8.1/bitarray/test_util.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/bitarray/test_util.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,839 @@ +""" +Tests for bitarray.util module +""" +from __future__ import absolute_import + +import os +import sys +import unittest +from string import hexdigits +from random import choice, randint, random +try: + from collections import Counter +except ImportError: + pass + +from bitarray import (bitarray, frozenbitarray, bits2bytes, decodetree, + get_default_endian, _set_default_endian) +from bitarray.test_bitarray import Util + +from bitarray.util import (zeros, make_endian, rindex, strip, count_n, + count_and, count_or, count_xor, subset, + ba2hex, hex2ba, ba2int, int2ba, huffman_code) + +if sys.version_info[0] == 3: + unicode = str + +tests = [] + +# --------------------------------------------------------------------------- + +class TestsZeros(unittest.TestCase): + + def test_1(self): + for default_endian in 'big', 'little': + _set_default_endian(default_endian) + a = zeros(0) + self.assertEqual(a, bitarray()) + self.assertEqual(a.endian(), default_endian) + + b = zeros(0, endian=None) + self.assertEqual(b.endian(), default_endian) + + for n in range(100): + a = zeros(n) + self.assertEqual(a, bitarray(n * '0')) + + for endian in 'big', 'little': + a = zeros(3, endian) + self.assertEqual(a, bitarray('000')) + self.assertEqual(a.endian(), endian) + + def test_wrong_args(self): + self.assertRaises(TypeError, zeros) # no argument + self.assertRaises(TypeError, zeros, '') + self.assertRaises(TypeError, zeros, bitarray()) + self.assertRaises(TypeError, zeros, []) + self.assertRaises(TypeError, zeros, 1.0) + self.assertRaises(ValueError, zeros, -1) + + self.assertRaises(TypeError, zeros, 0, 1) # endian not string + self.assertRaises(ValueError, zeros, 0, 'foo') # endian wrong string + +tests.append(TestsZeros) + +# --------------------------------------------------------------------------- + +class TestsMakeEndian(unittest.TestCase, Util): + + def test_simple(self): + a = bitarray('1110001', endian='big') + b = make_endian(a, 'big') + self.assertTrue(b is a) + c = make_endian(a, 'little') + self.assertTrue(c == a) + self.assertEqual(c.endian(), 'little') + + # wrong arguments + self.assertRaises(TypeError, make_endian, '', 'big') + self.assertRaises(TypeError, make_endian, bitarray(), 1) + self.assertRaises(ValueError, make_endian, bitarray(), 'foo') + + def test_empty(self): + a = bitarray(endian='little') + b = make_endian(a, 'big') + self.assertTrue(b == a) + self.assertEqual(len(b), 0) + self.assertEqual(b.endian(), 'big') + + def test_from_frozen(self): + a = frozenbitarray('1101111', 'big') + b = make_endian(a, 'big') + self.assertTrue(b is a) + c = make_endian(a, 'little') + self.assertTrue(c == a) + self.assertEqual(c.endian(), 'little') + + def test_random(self): + for a in self.randombitarrays(): + aa = a.copy() + for endian in 'big', 'little': + b = make_endian(a, endian) + self.assertEqual(a, b) + self.assertEqual(b.endian(), endian) + if a.endian() == endian: + self.assertTrue(b is a) + self.assertEQUAL(a, aa) + +tests.append(TestsMakeEndian) + +# --------------------------------------------------------------------------- + +class TestsRindex(unittest.TestCase, Util): + + def test_simple(self): + self.assertRaises(TypeError, rindex) + self.assertRaises(TypeError, rindex, None) + self.assertRaises(TypeError, rindex, bitarray(), 1, 2) + for endian in 'big', 'little': + a = bitarray('00010110000', endian) + self.assertEqual(rindex(a), 6) + self.assertEqual(rindex(a, 1), 6) + self.assertEqual(rindex(a, 'A'), 6) + self.assertEqual(rindex(a, True), 6) + + a = bitarray('00010110111', endian) + self.assertEqual(rindex(a, 0), 7) + self.assertEqual(rindex(a, None), 7) + self.assertEqual(rindex(a, False), 7) + + a = frozenbitarray('00010110111', endian) + self.assertEqual(rindex(a, 0), 7) + self.assertEqual(rindex(a, None), 7) + self.assertEqual(rindex(a, False), 7) + + for v in 0, 1: + self.assertRaises(ValueError, rindex, + bitarray(0, endian), v) + self.assertRaises(ValueError, rindex, + bitarray('000', endian), 1) + self.assertRaises(ValueError, rindex, + bitarray('11111', endian), 0) + + def test_random(self): + for a in self.randombitarrays(): + v = randint(0, 1) + try: + i = rindex(a, v) + except ValueError: + i = None + s = a.to01() + try: + j = s.rindex(str(v)) + except ValueError: + j = None + self.assertEqual(i, j) + + def test_3(self): + for _ in range(100): + n = randint(1, 100000) + v = randint(0, 1) + a = bitarray(n) + a.setall(1 - v) + lst = [randint(0, n - 1) for _ in range(100)] + for i in lst: + a[i] = v + self.assertEqual(rindex(a, v), max(lst)) + + def test_one_set(self): + for _ in range(100): + N = randint(1, 10000) + a = bitarray(N) + a.setall(0) + a[randint(0, N - 1)] = 1 + self.assertEqual(rindex(a), a.index(1)) + +tests.append(TestsRindex) + +# --------------------------------------------------------------------------- + +class TestsStrip(unittest.TestCase, Util): + + def test_simple(self): + self.assertRaises(TypeError, strip, '0110') + self.assertRaises(TypeError, strip, bitarray(), 123) + self.assertRaises(ValueError, strip, bitarray(), 'up') + for default_endian in 'big', 'little': + _set_default_endian(default_endian) + a = bitarray('00010110000') + self.assertEQUAL(strip(a), bitarray('0001011')) + self.assertEQUAL(strip(a, 'left'), bitarray('10110000')) + self.assertEQUAL(strip(a, 'both'), bitarray('1011')) + b = frozenbitarray('00010110000') + self.assertEqual(strip(b, 'both'), bitarray('1011')) + + for mode in 'left', 'right', 'both': + self.assertEqual(strip(bitarray('000'), mode), bitarray()) + self.assertEqual(strip(bitarray(), mode), bitarray()) + + def test_random(self): + for a in self.randombitarrays(): + b = a.copy() + s = a.to01() + self.assertEqual(strip(a, 'left'), bitarray(s.lstrip('0'))) + self.assertEqual(strip(a, 'right'), bitarray(s.rstrip('0'))) + self.assertEqual(strip(a, 'both'), bitarray(s.strip('0'))) + self.assertEQUAL(a, b) + + def test_one_set(self): + for _ in range(100): + N = randint(1, 10000) + a = bitarray(N) + a.setall(0) + a[randint(0, N - 1)] = 1 + self.assertEqual(strip(a, 'both'), bitarray('1')) + +tests.append(TestsStrip) + +# --------------------------------------------------------------------------- + +class TestsCount_N(unittest.TestCase, Util): + + @staticmethod + def count_n(a, n): + "return the index i for which a[:i].count() == n" + i, j = n, a.count(1, 0, n) + while j < n: + if a[i]: + j += 1 + i += 1 + return i + + def check_result(self, a, n, i): + self.assertEqual(a.count(1, 0, i), n) + if i: + self.assertTrue(a[i - 1]) + + def test_simple(self): + a = bitarray('111110111110111110111110011110111110111110111000') + b = a.copy() + self.assertEqual(len(a), 48) + self.assertEqual(a.count(), 37) + self.assertRaises(TypeError, count_n, '', 0) + self.assertEqual(count_n(a, 0), 0) + self.assertEqual(count_n(a, 20), 23) + self.assertEqual(count_n(a, 37), 45) + self.assertRaisesMessage(ValueError, "non-negative integer expected", + count_n, a, -1) # n < 0 + self.assertRaisesMessage(ValueError, "n larger than bitarray size", + count_n, a, 49) # n > len(a) + self.assertRaisesMessage(ValueError, "n exceeds total count", + count_n, a, 38) # n > a.count() + self.assertRaises(TypeError, count_n, a, "7") + for n in range(0, 37): + i = count_n(a, n) + self.check_result(a, n, i) + self.assertEqual(a[:i].count(), n) + self.assertEqual(i, self.count_n(a, n)) + self.assertEQUAL(a, b) + + def test_frozen(self): + a = frozenbitarray('001111101111101111101111100111100') + self.assertEqual(len(a), 33) + self.assertEqual(a.count(), 24) + self.assertRaises(TypeError, count_n, '', 0) + self.assertEqual(count_n(a, 0), 0) + self.assertEqual(count_n(a, 10), 13) + self.assertEqual(count_n(a, 24), 31) + self.assertRaises(ValueError, count_n, a, -1) # n < 0 + self.assertRaises(ValueError, count_n, a, 25) # n > a.count() + self.assertRaises(ValueError, count_n, a, 34) # n > len(a) + self.assertRaises(TypeError, count_n, a, "7") + + def test_large(self): + for N in list(range(100)) + [1000, 10000, 100000]: + a = bitarray(N) + v = randint(0, 1) + a.setall(v - 1) + for _ in range(randint(0, min(N, 100))): + a[randint(0, N - 1)] = v + n = randint(0, a.count()) + self.check_result(a, n, count_n(a, n)) + # check for total count + tc = a.count() + self.assertTrue(count_n(a, tc) <= N) + self.assertRaises(ValueError, count_n, a, tc + 1) + + def test_one_set(self): + N = 100000 + for _ in range(10): + a = bitarray(N) + a.setall(0) + self.assertEqual(count_n(a, 0), 0) + self.assertRaises(ValueError, count_n, a, 1) + i = randint(0, N - 1) + a[i] = 1 + self.assertEqual(count_n(a, 1), i + 1) + self.assertRaises(ValueError, count_n, a, 2) + + def test_random(self): + for a in self.randombitarrays(): + n = a.count() // 2 + i = count_n(a, n) + self.check_result(a, n, i) + +tests.append(TestsCount_N) + +# --------------------------------------------------------------------------- + +class TestsBitwiseCount(unittest.TestCase, Util): + + def test_count_byte(self): + ones = bitarray(8) + ones.setall(1) + zeros = bitarray(8) + zeros.setall(0) + for i in range(0, 256): + a = bitarray() + a.frombytes(bytes(bytearray([i]))) + cnt = a.count() + self.assertEqual(count_and(a, zeros), 0) + self.assertEqual(count_and(a, ones), cnt) + self.assertEqual(count_and(a, a), cnt) + self.assertEqual(count_or(a, zeros), cnt) + self.assertEqual(count_or(a, ones), 8) + self.assertEqual(count_or(a, a), cnt) + self.assertEqual(count_xor(a, zeros), cnt) + self.assertEqual(count_xor(a, ones), 8 - cnt) + self.assertEqual(count_xor(a, a), 0) + + def test_bit_count1(self): + a = bitarray('001111') + aa = a.copy() + b = bitarray('010011') + bb = b.copy() + self.assertEqual(count_and(a, b), 2) + self.assertEqual(count_or(a, b), 5) + self.assertEqual(count_xor(a, b), 3) + for f in count_and, count_or, count_xor: + # not two arguments + self.assertRaises(TypeError, f) + self.assertRaises(TypeError, f, a) + self.assertRaises(TypeError, f, a, b, 3) + # wrong argument types + self.assertRaises(TypeError, f, a, '') + self.assertRaises(TypeError, f, '1', b) + self.assertRaises(TypeError, f, a, 4) + self.assertEQUAL(a, aa) + self.assertEQUAL(b, bb) + + b.append(1) + for f in count_and, count_or, count_xor: + self.assertRaises(ValueError, f, a, b) + self.assertRaises(ValueError, f, + bitarray('110', 'big'), + bitarray('101', 'little')) + + def test_bit_count_frozen(self): + a = frozenbitarray('001111') + b = frozenbitarray('010011') + self.assertEqual(count_and(a, b), 2) + self.assertEqual(count_or(a, b), 5) + self.assertEqual(count_xor(a, b), 3) + + def test_bit_count2(self): + for n in list(range(50)) + [randint(1000, 2000)]: + a = bitarray() + a.frombytes(os.urandom(bits2bytes(n))) + del a[n:] + b = bitarray() + b.frombytes(os.urandom(bits2bytes(n))) + del b[n:] + self.assertEqual(count_and(a, b), (a & b).count()) + self.assertEqual(count_or(a, b), (a | b).count()) + self.assertEqual(count_xor(a, b), (a ^ b).count()) + +tests.append(TestsBitwiseCount) + +# --------------------------------------------------------------------------- + +class TestsSubset(unittest.TestCase, Util): + + def test_basic(self): + a = frozenbitarray('0101') + b = bitarray('0111') + self.assertTrue(subset(a, b)) + self.assertFalse(subset(b, a)) + self.assertRaises(TypeError, subset) + self.assertRaises(TypeError, subset, a, '') + self.assertRaises(TypeError, subset, '1', b) + self.assertRaises(TypeError, subset, a, 4) + b.append(1) + self.assertRaises(ValueError, subset, a, b) + + def subset_simple(self, a, b): + return (a & b).count() == a.count() + + def test_True(self): + for a, b in [('', ''), ('0', '1'), ('0', '0'), ('1', '1'), + ('000', '111'), ('0101', '0111'), + ('000010111', '010011111')]: + a, b = bitarray(a), bitarray(b) + self.assertTrue(subset(a, b) is True) + self.assertTrue(self.subset_simple(a, b) is True) + + def test_False(self): + for a, b in [('1', '0'), ('1101', '0111'), + ('0000101111', '0100111011')]: + a, b = bitarray(a), bitarray(b) + self.assertTrue(subset(a, b) is False) + self.assertTrue(self.subset_simple(a, b) is False) + + def test_random(self): + for a in self.randombitarrays(start=1): + b = a.copy() + # we set one random bit in b to 1, so a is always a subset of b + b[randint(0, len(a) - 1)] = 1 + self.assertTrue(subset(a, b)) + # but b in not always a subset of a + self.assertEqual(subset(b, a), self.subset_simple(b, a)) + # we set all bits in a, which ensures that b is a subset of a + a.setall(1) + self.assertTrue(subset(b, a)) + +tests.append(TestsSubset) + +# --------------------------------------------------------------------------- + +CODEDICT = {'little': {}, 'big': { + '0': bitarray('0000'), '1': bitarray('0001'), + '2': bitarray('0010'), '3': bitarray('0011'), + '4': bitarray('0100'), '5': bitarray('0101'), + '6': bitarray('0110'), '7': bitarray('0111'), + '8': bitarray('1000'), '9': bitarray('1001'), + 'a': bitarray('1010'), 'b': bitarray('1011'), + 'c': bitarray('1100'), 'd': bitarray('1101'), + 'e': bitarray('1110'), 'f': bitarray('1111'), +}} +for k, v in CODEDICT['big'].items(): + CODEDICT['little'][k] = v[::-1] + + +class TestsHexlify(unittest.TestCase, Util): + + def test_swap_hilo_bytes(self): + from bitarray._util import _swap_hilo_bytes + + self.assertEqual(len(_swap_hilo_bytes), 256) + for i in range(256): + byte = bytes(bytearray([i])) + a = bitarray() + a.frombytes(byte) + self.assertEqual(len(a), 8) + + b = a[4:8] + a[0:4] + self.assertEqual(b.tobytes(), + byte.translate(_swap_hilo_bytes)) + # with just _swap_hilo_bytes[i] we'd get an integer on Py3 + self.assertEqual(b.tobytes(), _swap_hilo_bytes[i:i + 1]) + + def test_ba2hex(self): + self.assertEqual(ba2hex(bitarray(0, 'big')), '') + self.assertEqual(ba2hex(bitarray('1110', 'big')), 'e') + self.assertEqual(ba2hex(bitarray('1110', 'little')), '7') + self.assertEqual(ba2hex(bitarray('00000001', 'big')), '01') + self.assertEqual(ba2hex(bitarray('10000000', 'big')), '80') + self.assertEqual(ba2hex(bitarray('00000001', 'little')), '08') + self.assertEqual(ba2hex(bitarray('10000000', 'little')), '10') + self.assertEqual(ba2hex(frozenbitarray('11000111', 'big')), 'c7') + # length not multiple of 4 + self.assertRaises(ValueError, ba2hex, bitarray('10')) + self.assertRaises(TypeError, ba2hex, '101') + + c = ba2hex(bitarray('1101', 'big')) + self.assertIsInstance(c, str) + + for n in range(7): + a = bitarray(n * '1111', 'big') + b = a.copy() + self.assertEqual(ba2hex(a), n * 'f') + # ensure original object wasn't altered + self.assertEQUAL(a, b) + + def test_hex2ba(self): + _set_default_endian('big') + self.assertEqual(hex2ba(''), bitarray()) + for c in 'e', 'E', b'e', b'E', unicode('e'), unicode('E'): + a = hex2ba(c) + self.assertEqual(a.to01(), '1110') + self.assertEqual(a.endian(), 'big') + self.assertEQUAL(hex2ba('01'), bitarray('00000001', 'big')) + self.assertEQUAL(hex2ba('08', 'little'), + bitarray('00000001', 'little')) + self.assertRaises(Exception, hex2ba, '01a7x89') + self.assertRaises(TypeError, hex2ba, 0) + + @staticmethod + def hex2ba(s, endian=None): + a = bitarray(0, endian or get_default_endian()) + a.encode(CODEDICT[a.endian()], s) + return a + + @staticmethod + def ba2hex(a): + return ''.join(a.iterdecode(CODEDICT[a.endian()])) + + def test_explicit(self): + data = [ # little big little big + ('', '', ''), + ('0000', '0', '0'), ('0001', '8', '1'), + ('1000', '1', '8'), ('1001', '9', '9'), + ('0100', '2', '4'), ('0101', 'a', '5'), + ('1100', '3', 'c'), ('1101', 'b', 'd'), + ('0010', '4', '2'), ('0011', 'c', '3'), + ('1010', '5', 'a'), ('1011', 'd', 'b'), + ('0110', '6', '6'), ('0111', 'e', '7'), + ('1110', '7', 'e'), ('1111', 'f', 'f'), + ('10001100', '13', '8c'), + ('100011001110', '137', '8ce'), + ('1000110011101111', '137f', '8cef'), + ('10001100111011110100', '137f2', '8cef4'), + ] + for bs, hex_le, hex_be in data: + a_be = bitarray(bs, 'big') + a_le = bitarray(bs, 'little') + self.assertEQUAL(hex2ba(hex_be, 'big'), a_be) + self.assertEQUAL(hex2ba(hex_le, 'little'), a_le) + self.assertEqual(ba2hex(a_be), hex_be) + self.assertEqual(ba2hex(a_le), hex_le) + # test simple encode / decode implementation + self.assertEQUAL(self.hex2ba(hex_be, 'big'), a_be) + self.assertEQUAL(self.hex2ba(hex_le, 'little'), a_le) + self.assertEqual(self.ba2hex(a_be), hex_be) + self.assertEqual(self.ba2hex(a_le), hex_le) + + def test_round_trip(self): + for i in range(100): + s = ''.join(choice(hexdigits) for _ in range(randint(0, 1000))) + for default_endian in 'big', 'little': + _set_default_endian(default_endian) + a = hex2ba(s) + self.assertEqual(len(a) % 4, 0) + self.assertEqual(a.endian(), default_endian) + t = ba2hex(a) + self.assertEqual(t, s.lower()) + b = hex2ba(t, default_endian) + self.assertEQUAL(a, b) + # test simple encode / decode implementation + self.assertEQUAL(a, self.hex2ba(t)) + self.assertEqual(t, self.ba2hex(a)) + + +tests.append(TestsHexlify) + +# --------------------------------------------------------------------------- + +class TestsIntegerization(unittest.TestCase, Util): + + def test_ba2int(self): + self.assertEqual(ba2int(bitarray('0')), 0) + self.assertEqual(ba2int(bitarray('1')), 1) + self.assertEqual(ba2int(bitarray('00101', 'big')), 5) + self.assertEqual(ba2int(bitarray('00101', 'little')), 20) + self.assertEqual(ba2int(frozenbitarray('11')), 3) + self.assertRaises(ValueError, ba2int, bitarray()) + self.assertRaises(ValueError, ba2int, frozenbitarray()) + self.assertRaises(TypeError, ba2int, '101') + a = bitarray('111') + b = a.copy() + self.assertEqual(ba2int(a), 7) + # ensure original object wasn't altered + self.assertEQUAL(a, b) + + def test_int2ba(self): + self.assertEqual(int2ba(0), bitarray('0')) + self.assertEqual(int2ba(1), bitarray('1')) + self.assertEqual(int2ba(5), bitarray('101')) + self.assertEQUAL(int2ba(6, endian='big'), bitarray('110', 'big')) + self.assertEQUAL(int2ba(6, endian='little'), + bitarray('011', 'little')) + self.assertRaises(TypeError, int2ba, 1.0) + self.assertRaises(TypeError, int2ba, 1, 3.0) + self.assertRaises(ValueError, int2ba, 1, 0) + self.assertRaises(TypeError, int2ba, 1, 10, 123) + self.assertRaises(ValueError, int2ba, 1, 10, 'asd') + # signed integer requires length + self.assertRaises(TypeError, int2ba, 100, signed=True) + + def test_signed(self): + for s, i in [ + ('0', 0), + ('1', -1), + ('00', 0), + ('10', 1), + ('01', -2), + ('11', -1), + ('000', 0), + ('100', 1), + ('010', 2), + ('110', 3), + ('001', -4), + ('101', -3), + ('011', -2), + ('111', -1), + ('00000', 0), + ('11110', 15), + ('00001', -16), + ('11111', -1), + ('000000000', 0), + ('111111110', 255), + ('000000001', -256), + ('111111111', -1), + ('0000000000000000000000', 0), + ('1001000011000000100010', 9 + 3 * 256 + 17 * 2 ** 16), + ('1111111111111111111110', 2 ** 21 - 1), + ('0000000000000000000001', -2 ** 21), + ('1001000011000000100011', -2 ** 21 + + (9 + 3 * 256 + 17 * 2 ** 16)), + ('1111111111111111111111', -1), + ]: + self.assertEqual(ba2int(bitarray(s, 'little'), signed=1), i) + self.assertEqual(ba2int(bitarray(s[::-1], 'big'), signed=1), i) + + self.assertEQUAL(int2ba(i, len(s), 'little', signed=1), + bitarray(s, 'little')) + self.assertEQUAL(int2ba(i, len(s), 'big', signed=1), + bitarray(s[::-1], 'big')) + + def test_int2ba_overflow(self): + self.assertRaises(OverflowError, int2ba, -1) + self.assertRaises(OverflowError, int2ba, -1, 4) + + self.assertRaises(OverflowError, int2ba, 128, 7) + self.assertRaises(OverflowError, int2ba, 64, 7, signed=1) + self.assertRaises(OverflowError, int2ba, -65, 7, signed=1) + + for n in range(1, 20): + self.assertRaises(OverflowError, int2ba, 2 ** n, n) + self.assertRaises(OverflowError, int2ba, 2 ** (n - 1), n, + signed=1) + self.assertRaises(OverflowError, int2ba, -2 ** (n - 1) - 1, n, + signed=1) + + def test_int2ba_length(self): + self.assertRaises(TypeError, int2ba, 0, 1.0) + self.assertRaises(ValueError, int2ba, 0, 0) + self.assertEqual(int2ba(5, length=6, endian='big'), + bitarray('000101')) + for n in range(1, 100): + ab = int2ba(1, n, 'big') + al = int2ba(1, n, 'little') + self.assertEqual(ab.endian(), 'big') + self.assertEqual(al.endian(), 'little') + self.assertEqual(len(ab), n), + self.assertEqual(len(al), n) + self.assertEqual(ab, bitarray((n - 1) * '0') + bitarray('1')) + self.assertEqual(al, bitarray('1') + bitarray((n - 1) * '0')) + + ab = int2ba(0, n, 'big') + al = int2ba(0, n, 'little') + self.assertEqual(len(ab), n) + self.assertEqual(len(al), n) + self.assertEqual(ab, bitarray(n * '0', 'big')) + self.assertEqual(al, bitarray(n * '0', 'little')) + + self.assertEqual(int2ba(2 ** n - 1), bitarray(n * '1')) + self.assertEqual(int2ba(2 ** n - 1, endian='little'), + bitarray(n * '1')) + for endian in 'big', 'little': + self.assertEqual(int2ba(-1, n, endian, signed=True), + bitarray(n * '1')) + + def test_explicit(self): + _set_default_endian('big') + for i, sa in [( 0, '0'), (1, '1'), + ( 2, '10'), (3, '11'), + (25, '11001'), (265, '100001001'), + (3691038, '1110000101001000011110')]: + ab = bitarray(sa, 'big') + al = bitarray(sa[::-1], 'little') + self.assertEQUAL(int2ba(i), ab) + self.assertEQUAL(int2ba(i, endian='big'), ab) + self.assertEQUAL(int2ba(i, endian='little'), al) + self.assertEqual(ba2int(ab), ba2int(al), i) + + def check_round_trip(self, i): + for endian in 'big', 'little': + a = int2ba(i, endian=endian) + self.assertEqual(a.endian(), endian) + self.assertTrue(len(a) > 0) + # ensure we have no leading zeros + if a.endian == 'big': + self.assertTrue(len(a) == 1 or a.index(1) == 0) + self.assertEqual(ba2int(a), i) + if i > 0: + self.assertEqual(i.bit_length(), len(a)) + # add a few trailing / leading zeros to bitarray + if endian == 'big': + a = zeros(randint(0, 3), endian) + a + else: + a = a + zeros(randint(0, 3), endian) + self.assertEqual(a.endian(), endian) + self.assertEqual(ba2int(a), i) + + def test_many(self): + for i in range(100): + self.check_round_trip(i) + self.check_round_trip(randint(0, 10 ** randint(3, 300))) + + @staticmethod + def twos_complement(i, num_bits): + # https://en.wikipedia.org/wiki/Two%27s_complement + mask = 2 ** (num_bits - 1) + return -(i & mask) + (i & ~mask) + + def test_random_signed(self): + for a in self.randombitarrays(start=1): + i = ba2int(a, signed=True) + b = int2ba(i, len(a), a.endian(), signed=True) + self.assertEQUAL(a, b) + + j = ba2int(a, signed=False) # unsigned + if i >= 0: + self.assertEqual(i, j) + + self.assertEqual(i, self.twos_complement(j, len(a))) + + +tests.append(TestsIntegerization) + +# --------------------------------------------------------------------------- + +class TestsHuffman(unittest.TestCase): + + def test_simple(self): + freq = {0: 10, 'as': 2, None: 1.6} + code = huffman_code(freq) + self.assertEqual(len(code), 3) + self.assertEqual(len(code[0]), 1) + self.assertEqual(len(code['as']), 2) + self.assertEqual(len(code[None]), 2) + + def test_tiny(self): + code = huffman_code({0: 0}) + self.assertEqual(len(code), 1) + self.assertEqual(code, {0: bitarray()}) + + code = huffman_code({0: 0, 1: 0}) + self.assertEqual(len(code), 2) + for i in range(2): + self.assertEqual(len(code[i]), 1) + + def test_endianness(self): + freq = {'A': 10, 'B': 2, 'C': 5} + for endian in 'big', 'little': + code = huffman_code(freq, endian) + self.assertEqual(len(code), 3) + for v in code.values(): + self.assertEqual(v.endian(), endian) + + def test_wrong_arg(self): + self.assertRaises(TypeError, huffman_code, [('a', 1)]) + self.assertRaises(TypeError, huffman_code, 123) + self.assertRaises(TypeError, huffman_code, None) + # cannot compare 'a' with 1 + self.assertRaises(TypeError, huffman_code, {'A': 'a', 'B': 1}) + self.assertRaises(ValueError, huffman_code, {}) + + def check_tree(self, code): + n = len(code) + tree = decodetree(code) + self.assertEqual(tree.todict(), code) + # ensure tree has 2n-1 nodes (n symbol nodes and n-1 internal nodes) + self.assertEqual(tree.nodes(), 2 * n - 1) + + def test_balanced(self): + n = 6 + freq = {} + for i in range(2 ** n): + freq[i] = 1 + code = huffman_code(freq) + self.assertEqual(len(code), 2 ** n) + self.assertTrue(all(len(v) == n for v in code.values())) + self.check_tree(code) + + def test_unbalanced(self): + N = 27 + freq = {} + for i in range(N): + freq[i] = 2 ** i + code = huffman_code(freq) + self.assertEqual(len(code), N) + for i in range(N): + self.assertEqual(len(code[i]), N - (1 if i <= 1 else i)) + self.check_tree(code) + + def test_counter(self): + message = 'the quick brown fox jumps over the lazy dog.' + code = huffman_code(Counter(message)) + a = bitarray() + a.encode(code, message) + self.assertEqual(''.join(a.decode(code)), message) + self.check_tree(code) + + def test_random_list(self): + plain = [randint(0, 100) for _ in range(500)] + code = huffman_code(Counter(plain)) + a = bitarray() + a.encode(code, plain) + self.assertEqual(a.decode(code), plain) + self.check_tree(code) + + def test_random_freq(self): + N = randint(2, 1000) + # create Huffman code for N symbols + code = huffman_code({i: random() for i in range(N)}) + self.check_tree(code) + +tests.append(TestsHuffman) + +# --------------------------------------------------------------------------- + +def run(verbosity=1): + import os + import bitarray + + print('bitarray is installed in: %s' % os.path.dirname(bitarray.__file__)) + print('bitarray version: %s' % bitarray.__version__) + print('Python version: %s' % sys.version) + + suite = unittest.TestSuite() + for cls in tests: + suite.addTest(unittest.makeSuite(cls)) + + runner = unittest.TextTestRunner(verbosity=verbosity) + return runner.run(suite) + + +if __name__ == '__main__': + run() diff -Nru python-bitarray-0.8.1/bitarray/_util.c python-bitarray-1.6.3/bitarray/_util.c --- python-bitarray-0.8.1/bitarray/_util.c 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/bitarray/_util.c 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,360 @@ +/* + Copyright (c) 2019 - 2021, Ilan Schnell + bitarray is published under the PSF license. + + This file contains the C implementation of some useful utility functions. + + Author: Ilan Schnell +*/ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "bitarray.h" + +/* set using the Python module function _set_bato() */ +static PyObject *bitarray_type_obj = NULL; + +/* Return 0 if obj is bitarray. If not, returns -1 and sets an exception. */ +static int +ensure_bitarray(PyObject *obj) +{ + int t; + + if (bitarray_type_obj == NULL) + Py_FatalError("bitarray_type_obj missing"); + t = PyObject_IsInstance(obj, bitarray_type_obj); + if (t < 0) + return -1; + if (t == 0) { + PyErr_SetString(PyExc_TypeError, "bitarray expected"); + return -1; + } + return 0; +} + +/************* start of actual functionality in this module ***************/ + +/* return the smallest index i for which a.count(1, 0, i) == n, or when + n exceeds the total count return -1 */ +static Py_ssize_t +count_to_n(bitarrayobject *a, Py_ssize_t n) +{ + Py_ssize_t i = 0; /* index */ + Py_ssize_t j = 0; /* total count up to index */ + Py_ssize_t block_start, block_stop, k, m; + unsigned char c; + + if (n == 0) + return 0; + +#define BLOCK_BITS 8192 + /* by counting big blocks we save comparisons */ + while (i + BLOCK_BITS < a->nbits) { + m = 0; + assert(i % 8 == 0); + block_start = i / 8; + block_stop = block_start + (BLOCK_BITS / 8); + for (k = block_start; k < block_stop; k++) { + assert(k < Py_SIZE(a)); + c = a->ob_item[k]; + m += bitcount_lookup[c]; + } + if (j + m >= n) + break; + j += m; + i += BLOCK_BITS; + } +#undef BLOCK_BITS + + while (i + 8 < a->nbits) { + k = i / 8; + assert(k < Py_SIZE(a)); + c = a->ob_item[k]; + m = bitcount_lookup[c]; + if (j + m >= n) + break; + j += m; + i += 8; + } + + while (j < n && i < a->nbits ) { + j += GETBIT(a, i); + i++; + } + if (j < n) + return -1; + + return i; +} + +/* return index of last occurrence of vi, -1 when x is not in found. */ +static Py_ssize_t +find_last(bitarrayobject *a, int vi) +{ + Py_ssize_t i, j; + char c; + + if (a->nbits == 0) + return -1; + + /* search within top byte */ + for (i = a->nbits - 1; i >= BITS(a->nbits / 8); i--) + if (GETBIT(a, i) == vi) + return i; + + if (i < 0) /* not found within top byte */ + return -1; + assert((i + 1) % 8 == 0); + + /* seraching for 1 means: break when byte is not 0x00 + searching for 0 means: break when byte is not 0xff */ + c = vi ? 0x00 : 0xff; + + /* skip ahead by checking whole bytes */ + for (j = BYTES(i) - 1; j >= 0; j--) + if (c ^ a->ob_item[j]) + break; + + if (j < 0) /* not found within bytes */ + return -1; + + /* search within byte found */ + for (i = BITS(j + 1) - 1; i >= BITS(j); i--) + if (GETBIT(a, i) == vi) + return i; + + return -1; +} + +/* translation table which swaps the 4 highest with the 4 lowest bits in + each byte - to be used as argument of bytes.translate() */ +static PyObject * +make_swap_hilo_bytes(void) +{ + char bytes[256]; + int i; + + for (i = 0; i < 256; i++) + bytes[i] = (char) (((i & 0x0f) << 4) ^ (i >> 4)); + + return PyBytes_FromStringAndSize(bytes, 256); +} + +/****************************** Module functions **************************/ + +static PyObject * +count_n(PyObject *module, PyObject *args) +{ + PyObject *a; + Py_ssize_t n, i; + + if (!PyArg_ParseTuple(args, "On:count_n", &a, &n)) + return NULL; + + if (ensure_bitarray(a) < 0) + return NULL; + + if (n < 0) { + PyErr_SetString(PyExc_ValueError, "non-negative integer expected"); + return NULL; + } +#define aa ((bitarrayobject *) a) + if (n > aa->nbits) { + PyErr_SetString(PyExc_ValueError, "n larger than bitarray size"); + return NULL; + } + i = count_to_n(aa, n); /* do actual work here */ +#undef aa + if (i < 0) { + PyErr_SetString(PyExc_ValueError, "n exceeds total count"); + return NULL; + } + return PyLong_FromSsize_t(i); +} + +PyDoc_STRVAR(count_n_doc, +"count_n(a, n, /) -> int\n\ +\n\ +Find the smallest index `i` for which `a[:i].count() == n`.\n\ +Raises `ValueError`, when n exceeds total count (`a.count()`)."); + + +static PyObject * +r_index(PyObject *module, PyObject *args) +{ + PyObject *x = Py_True, *a; + Py_ssize_t i; + int vi; + + if (!PyArg_ParseTuple(args, "O|O:rindex", &a, &x)) + return NULL; + + if (ensure_bitarray(a) < 0) + return NULL; + + vi = PyObject_IsTrue(x); + if (vi < 0) + return NULL; + + i = find_last((bitarrayobject *) a, vi); + if (i < 0) { + PyErr_Format(PyExc_ValueError, "%d not in bitarray", vi); + return NULL; + } + return PyLong_FromSsize_t(i); +} + +PyDoc_STRVAR(rindex_doc, +"rindex(bitarray, value=True, /) -> int\n\ +\n\ +Return the rightmost index of `bool(value)` in bitarray.\n\ +Raises `ValueError` if the value is not present."); + + +enum kernel_type { + KERN_cand, /* count bitwise and -> int */ + KERN_cor, /* count bitwise or -> int */ + KERN_cxor, /* count bitwise xor -> int */ + KERN_subset, /* is subset -> bool */ +}; + +static PyObject * +two_bitarray_func(PyObject *args, enum kernel_type kern, char *format) +{ + Py_ssize_t res = 0, nbytes, i; + PyObject *a, *b; + unsigned char c; + + if (!PyArg_ParseTuple(args, format, &a, &b)) + return NULL; + if (ensure_bitarray(a) < 0 || ensure_bitarray(b) < 0) + return NULL; + +#define aa ((bitarrayobject *) a) +#define bb ((bitarrayobject *) b) + if (aa->nbits != bb->nbits || aa->endian != bb->endian) { + PyErr_SetString(PyExc_ValueError, + "bitarrays of equal length and endianness expected"); + return NULL; + } + setunused(aa); + setunused(bb); + assert(Py_SIZE(a) == Py_SIZE(b)); + nbytes = Py_SIZE(a); + + switch (kern) { + case KERN_cand: + for (i = 0; i < nbytes; i++) { + c = aa->ob_item[i] & bb->ob_item[i]; + res += bitcount_lookup[c]; + } + break; + case KERN_cor: + for (i = 0; i < nbytes; i++) { + c = aa->ob_item[i] | bb->ob_item[i]; + res += bitcount_lookup[c]; + } + break; + case KERN_cxor: + for (i = 0; i < nbytes; i++) { + c = aa->ob_item[i] ^ bb->ob_item[i]; + res += bitcount_lookup[c]; + } + break; + case KERN_subset: + for (i = 0; i < nbytes; i++) { + if ((aa->ob_item[i] & bb->ob_item[i]) != aa->ob_item[i]) + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; + default: /* cannot happen */ + return NULL; + } +#undef aa +#undef bb + return PyLong_FromSsize_t(res); +} + +#define COUNT_FUNC(oper, ochar) \ +static PyObject * \ +count_ ## oper (bitarrayobject *module, PyObject *args) \ +{ \ + return two_bitarray_func(args, KERN_c ## oper, "OO:count_" #oper); \ +} \ +PyDoc_STRVAR(count_ ## oper ## _doc, \ +"count_" #oper "(a, b, /) -> int\n\ +\n\ +Returns `(a " ochar " b).count()`, but is more memory efficient,\n\ +as no intermediate bitarray object gets created.") + +COUNT_FUNC(and, "&"); +COUNT_FUNC(or, "|"); +COUNT_FUNC(xor, "^"); + + +static PyObject * +subset(PyObject *module, PyObject *args) +{ + return two_bitarray_func(args, KERN_subset, "OO:subset"); +} + +PyDoc_STRVAR(subset_doc, +"subset(a, b, /) -> bool\n\ +\n\ +Return True if bitarray `a` is a subset of bitarray `b` (False otherwise).\n\ +`subset(a, b)` is equivalent to `(a & b).count() == a.count()` but is more\n\ +efficient since we can stop as soon as one mismatch is found, and no\n\ +intermediate bitarray object gets created."); + + +/* set bitarray_type_obj (bato) */ +static PyObject * +set_bato(PyObject *module, PyObject *obj) +{ + bitarray_type_obj = obj; + Py_RETURN_NONE; +} + +static PyMethodDef module_functions[] = { + {"count_n", (PyCFunction) count_n, METH_VARARGS, count_n_doc}, + {"rindex", (PyCFunction) r_index, METH_VARARGS, rindex_doc}, + {"count_and", (PyCFunction) count_and, METH_VARARGS, count_and_doc}, + {"count_or", (PyCFunction) count_or, METH_VARARGS, count_or_doc}, + {"count_xor", (PyCFunction) count_xor, METH_VARARGS, count_xor_doc}, + {"subset", (PyCFunction) subset, METH_VARARGS, subset_doc}, + {"_set_bato", (PyCFunction) set_bato, METH_O, }, + {NULL, NULL} /* sentinel */ +}; + +/******************************* Install Module ***************************/ + +#ifdef IS_PY3K +static PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, "_util", 0, -1, module_functions, +}; +#endif + +PyMODINIT_FUNC +#ifdef IS_PY3K +PyInit__util(void) +#else +init_util(void) +#endif +{ + PyObject *m; + +#ifdef IS_PY3K + m = PyModule_Create(&moduledef); + if (m == NULL) + return NULL; +#else + m = Py_InitModule3("_util", module_functions, 0); + if (m == NULL) + return; +#endif + + PyModule_AddObject(m, "_swap_hilo_bytes", make_swap_hilo_bytes()); +#ifdef IS_PY3K + return m; +#endif +} diff -Nru python-bitarray-0.8.1/bitarray/util.py python-bitarray-1.6.3/bitarray/util.py --- python-bitarray-0.8.1/bitarray/util.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/bitarray/util.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,302 @@ +# Copyright (c) 2019 - 2020, Ilan Schnell +# bitarray is published under the PSF license. +# +# Author: Ilan Schnell +""" +Useful utilities for working with bitarrays. +""" +from __future__ import absolute_import + +import sys +import binascii + +from bitarray import bitarray, bits2bytes, get_default_endian + +from bitarray._util import (count_n, rindex, + count_and, count_or, count_xor, subset, + _swap_hilo_bytes, _set_bato) + + +__all__ = ['zeros', 'make_endian', 'rindex', 'strip', 'count_n', + 'count_and', 'count_or', 'count_xor', 'subset', + 'ba2hex', 'hex2ba', 'ba2int', 'int2ba', 'huffman_code'] + + +# tell the _util extension what the bitarray type object is, such that it +# can check for instances thereof +_set_bato(bitarray) + +_is_py2 = bool(sys.version_info[0] == 2) + + +def zeros(length, endian=None): + """zeros(length, /, endian=None) -> bitarray + +Create a bitarray of length, with all values 0, and optional +endianness, which may be 'big', 'little'. +""" + if not isinstance(length, (int, long) if _is_py2 else int): + raise TypeError("integer expected") + + a = bitarray(length, endian or get_default_endian()) + a.setall(0) + return a + + +def make_endian(a, endian): + """make_endian(bitarray, endian, /) -> bitarray + +When the endianness of the given bitarray is different from `endian`, +return a new bitarray, with endianness `endian` and the same elements +as the original bitarray, i.e. even though the binary representation of the +new bitarray will be different, the returned bitarray will equal the original +one. +Otherwise (endianness is already `endian`) the original bitarray is returned +unchanged. +""" + if not isinstance(a, bitarray): + raise TypeError("bitarray expected") + + if a.endian() == endian: + return a + + b = bitarray(a, endian) + b.bytereverse() + if len(a) % 8: + # copy last few bits directly + p = 8 * (bits2bytes(len(a)) - 1) + b[p:] = a[p:] + return b + + +def strip(a, mode='right'): + """strip(bitarray, mode='right', /) -> bitarray + +Strip zeros from left, right or both ends. +Allowed values for mode are the strings: `left`, `right`, `both` +""" + if not isinstance(a, bitarray): + raise TypeError("bitarray expected") + if not isinstance(mode, str): + raise TypeError("string expected for mode") + if mode not in ('left', 'right', 'both'): + raise ValueError("allowed values 'left', 'right', 'both', got: %r" % + mode) + first = 0 + if mode in ('left', 'both'): + try: + first = a.index(1) + except ValueError: + return bitarray(0, a.endian()) + + last = len(a) - 1 + if mode in ('right', 'both'): + try: + last = rindex(a) + except ValueError: + return bitarray(0, a.endian()) + + return a[first:last + 1] + + +def ba2hex(a): + """ba2hex(bitarray, /) -> hexstr + +Return a string containing with hexadecimal representation of +the bitarray (which has to be multiple of 4 in length). +""" + if not isinstance(a, bitarray): + raise TypeError("bitarray expected") + + if len(a) % 4: + raise ValueError("bitarray length not multiple of 4") + + b = a.tobytes() + if a.endian() == 'little': + b = b.translate(_swap_hilo_bytes) + + s = binascii.hexlify(b) + if len(a) % 8: + s = s[:-1] + return s if _is_py2 else s.decode() + + +def hex2ba(s, endian=None): + """hex2ba(hexstr, /, endian=None) -> bitarray + +Bitarray of hexadecimal representation. +hexstr may contain any number of hex digits (upper or lower case). +""" + if not isinstance(s, (str, unicode if _is_py2 else bytes)): + raise TypeError("string expected, got: %r" % s) + + strlen = len(s) + if strlen % 2: + s = s + ('0' if isinstance(s, str) else b'0') + + a = bitarray(0, endian or get_default_endian()) + b = binascii.unhexlify(s) + if a.endian() == 'little': + b = b.translate(_swap_hilo_bytes) + a.frombytes(b) + + if strlen % 2: + del a[-4:] + return a + + +def ba2int(a, signed=False): + """ba2int(bitarray, /, signed=False) -> int + +Convert the given bitarray into an integer. +The bit-endianness of the bitarray is respected. +`signed` indicates whether two's complement is used to represent the integer. +""" + if not isinstance(a, bitarray): + raise TypeError("bitarray expected") + length = len(a) + if length == 0: + raise ValueError("non-empty bitarray expected") + + big_endian = bool(a.endian() == 'big') + # for big endian pad leading zeros - for little endian we don't need to + # pad trailing zeros, as .tobytes() will treat them as zero + if big_endian and length % 8: + a = zeros(8 - length % 8, 'big') + a + b = a.tobytes() + + if _is_py2: + c = bytearray(b) + res = 0 + j = len(c) - 1 if big_endian else 0 + for x in c: + res |= x << 8 * j + j += -1 if big_endian else 1 + else: # py3 + res = int.from_bytes(b, byteorder=a.endian()) + + if signed and res >= 1 << (length - 1): + res -= 1 << length + return res + + +def int2ba(i, length=None, endian=None, signed=False): + """int2ba(int, /, length=None, endian=None, signed=False) -> bitarray + +Convert the given integer to a bitarray (with given endianness, +and no leading (big-endian) / trailing (little-endian) zeros), unless +the `length` of the bitarray is provided. An `OverflowError` is raised +if the integer is not representable with the given number of bits. +`signed` determines whether two's complement is used to represent the integer, +and requires `length` to be provided. +If signed is False and a negative integer is given, an OverflowError +is raised. +""" + if not isinstance(i, (int, long) if _is_py2 else int): + raise TypeError("integer expected") + if length is not None: + if not isinstance(length, int): + raise TypeError("integer expected for length") + if length <= 0: + raise ValueError("integer larger than 0 expected for length") + if signed and length is None: + raise TypeError("signed requires length") + + if i == 0: + # there are special cases for 0 which we'd rather not deal with below + return zeros(length or 1, endian) + + if signed: + if i >= 1 << (length - 1) or i < -(1 << (length - 1)): + raise OverflowError("signed integer out of range") + if i < 0: + i += 1 << length + elif i < 0 or (length and i >= 1 << length): + raise OverflowError("unsigned integer out of range") + + a = bitarray(0, endian or get_default_endian()) + big_endian = bool(a.endian() == 'big') + if _is_py2: + c = bytearray() + while i: + i, r = divmod(i, 256) + c.append(r) + if big_endian: + c.reverse() + b = bytes(c) + else: # py3 + b = i.to_bytes(bits2bytes(i.bit_length()), byteorder=a.endian()) + + a.frombytes(b) + if length is None: + return strip(a, 'left' if big_endian else 'right') + + la = len(a) + if la > length: + a = a[-length:] if big_endian else a[:length] + if la < length: + pad = zeros(length - la, endian) + a = pad + a if big_endian else a + pad + assert len(a) == length + return a + + +def huffman_code(freq_map, endian=None): + """huffman_code(dict, /, endian=None) -> dict + +Given a frequency map, a dictionary mapping symbols to their frequency, +calculate the Huffman code, i.e. a dict mapping those symbols to +bitarrays (with given endianness). Note that the symbols may be any +hashable object (including `None`). +""" + import heapq + + if not isinstance(freq_map, dict): + raise TypeError("dict expected") + if len(freq_map) == 0: + raise ValueError("non-empty dict expected") + + class Node(object): + # a Node object will have either .symbol or .child set below, + # .freq will always be set + def __lt__(self, other): + # heapq needs to be able to compare the nodes + return self.freq < other.freq + + def huff_tree(freq_map): + # given a dictionary mapping symbols to thier frequency, + # construct a Huffman tree and return its root node + + minheap = [] + # create all the leaf nodes and push them onto the queue + for sym, f in freq_map.items(): + nd = Node() + nd.symbol = sym + nd.freq = f + heapq.heappush(minheap, nd) + + # repeat the process until only one node remains + while len(minheap) > 1: + # take the nodes with smallest frequencies from the queue + child_0 = heapq.heappop(minheap) + child_1 = heapq.heappop(minheap) + # construct the new internal node and push it onto the queue + parent = Node() + parent.child = [child_0, child_1] + parent.freq = child_0.freq + child_1.freq + heapq.heappush(minheap, parent) + + # the single remaining node is the root of the Huffman tree + return minheap[0] + + result = {} + + def traverse(nd, prefix=bitarray(0, endian or get_default_endian())): + if hasattr(nd, 'symbol'): # leaf + result[nd.symbol] = prefix + else: # parent, so traverse each of the children + traverse(nd.child[0], prefix + bitarray([0])) + traverse(nd.child[1], prefix + bitarray([1])) + + traverse(huff_tree(freq_map)) + return result diff -Nru python-bitarray-0.8.1/CHANGE_LOG python-bitarray-1.6.3/CHANGE_LOG --- python-bitarray-0.8.1/CHANGE_LOG 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/CHANGE_LOG 2021-01-21 00:52:57.000000000 +0000 @@ -1,3 +1,233 @@ +2021-01-20 1.6.3: +------------------- + * add missing .h files to sdist tarball, #113 + + +2021-01-20 1.6.2: +------------------- + * use `Py_SET_TYPE()` and `Py_SET_SIZE()` for Python 3.10, #109 + * add official Python 3.10 support + * fix slice assignement to same object, e.g. a[2::] = a or a[::-1] = a, #112 + * add bitarray.h, #110 + + +2020-11-05 1.6.1: +------------------- + * use PyType_Ready for all types: bitarray, bitarrayiterator, + decodeiterator, decodetree, searchiterator + + +2020-10-17 1.6.0: +------------------- + * add `decodetree` object, for speeding up consecutive calls + to `.decode()` and `.iterdecode()`, in particular when dealing + with large prefix codes, see #103 + * add optional parameter to `.tolist()` which changes the items in the + returned list to integers (0 or 1), as opposed to Booleans + * remove deprecated `bitdiff()`, which has been deprecated since version + 1.2.0, use `bitarray.util.count_xor()` instead + * drop Python 2.6 support + * update license file, #104 + + +2020-08-24 1.5.3: +------------------- + * add optional index parameter to `.index()` to invert single bit + * fix `sys.getsizeof(bitarray)` by adding `.__sizeof__()`, see issue #100 + + +2020-08-16 1.5.2: +------------------- + * add PyType_Ready usage, issue #66 + * speedup search() for bitarrays with length 1 in sparse bitarrays, + see issue #67 + * add tests + + +2020-08-10 1.5.1: +------------------- + * support signed integers in `util.ba2int()` and `util.int2ba()`, + see issue #85 + * deprecate `.length()` in favor of `len()` + + +2020-08-05 1.5.0: +------------------- + * Use `Py_ssize_t` for bitarray index. This means that on 32bit + systems, the maximun number of elements in a bitarray is 2 GBits. + We used to have a special 64bit index type for all architectures, but + this prevented us from using Python's sequence, mapping and number + methods, and made those method lookups slow. + * speedup slice operations when step size = 1 (if alignment allows + copying whole bytes) + * Require equal endianness for operations: `&`, `|`, `^`, `&=`, `|=`, `^=`. + This should have always been the case but was overlooked in the past. + * raise TypeError when tring to create bitarray from boolean + * This will be last release to still support Python 2.6 (which was retired + in 2013). We do NOT plan to stop support for Python 2.7 anytime soon. + + +2020-07-15 1.4.2: +------------------- + * add more tests + * C-level: + - simplify pack/unpack code + - fix memory leak in `~` operation (bitarray_cpinvert) + + +2020-07-14 1.4.1: +------------------- + * add official Python 3.9 support + * improve many docstrings + * add DeprecationWarning for `bitdiff()` + * add DeprecationWarning when trying to extend bitarrays + from bytes on Python 3 (`bitarrays(b'011')` and `.extend(b'110')`) + * C-level: + - Rewrote `.fromfile()` and `.tofile()` implementation, + such that now the same code is used for Python 2 and 3. + The new implementation is more memoery efficient on + Python 3. + - use memcmp() in richcompare to shortcut EQ/NE, when + comparing two very large bitarrays for equality the + speedup can easily be 100x + - simplify how unpacking is handled + * add more tests + + +2020-07-11 1.4.0: +------------------- + * add `.clear()` method (Python 3.3 added this method to lists) + * avoid overallocation when bitarray objects are initially created + * raise BufferError when resizing bitarrays which is exporting buffers + * add example to study the resize() function + * improve some error messages + * add more tests + * raise `NotImplementedError` with (useful message) when trying to call + the `.fromstring()` or `.tostring()` methods, which have been removed + in the last release + + +2020-07-06 1.3.0: +------------------- + * add `bitarray.util.make_endian()` + * `util.ba2hex()` and `util.hex2ba()` now also support little-endian + * add `bitarray.get_default_endian()` + * made first argument of initializer a positional-only parameter + * remove `.fromstring()` and `.tostring()` methods, these have been + deprecated 8 years ago, since version 0.4.0 + * add `__all__` in `bitarray/__init__.py` + * drop Python 3.3 and 3.4 support + + +2020-05-18 1.2.2: +------------------- + * `util.ba2hex` now always return a string object (instead of bytes object + for Python 3), see issue #94 + * `util.hex2ba` allows a unicode object as input on Python 2 + * Determine 64-bitness of interpreter in a cross-platform fashion #91, + in order to better support PyPy + + +2020-01-06 1.2.1: +------------------- + * simplify markdown of readme so PyPI renders better + * make tests for bitarray.util required (instead of warning when + they cannot be imported) + + +2019-12-06 1.2.0: +------------------- + * add bitarray.util module which provides useful utility functions + * deprecate `bitarray.bitdiff` in favor of `bitarray.util.count_xor` + * use markdown for documentation + * fix bug in .count() on 32bit systems in special cases when array size + is 2^29 bits or larger + * simplified tests by using bytes syntax + * update smallints and sieve example to use new utility module + * simplified mandel example to use numba + * use file context managers in tests + + +2019-11-07 1.1.0: +------------------- + * add frozenbitarray object + * add optional start and stop parameters to .count() method + * add official Python 3.8 support + * optimize setrange() C-function by using memset + * fix issue #74, bitarray is hashable on Python 2 + * fix issue #68, `unittest.TestCase.assert_` deprecated + * improved test suite - tests should run in about 1 second + * update documentation to use positional-only syntax in docstrings + * update readme to pass Python 3 doctest + * add utils module to examples + + +2019-07-19 1.0.1: +------------------- + * fix readme to pass `twine check` + + +2019-07-15 1.0.0: +------------------- + * fix bitarrays beings created from unicode in Python 2 + * use `PyBytes_*` in C code, treating the Py3k function names as default, + which also removes all redefinitions of `PyString_*` + * handle negative arguments of .index() method consistently with how + they are treated for lists + * add a few more comments to the C code + * move imports outside tests: pickle, io, etc. + * drop Python 2.5 support + + +2019-05-20 0.9.3: +------------------- + * refactor resize() - only shrink allocated memory if new size falls + lower than half the allocated size + * improve error message when trying to initialize from float or complex + + +2019-04-29 0.9.2: +------------------- + * fix to compile on Windows with VS 2015, issue #72 + + +2019-04-28 0.9.1: +------------------- + * fix types to actually be types, #29 + * check for ambiguous prefix codes when building binary tree for decoding + * remove Python level methods: encode, decode, iterdecode (in favor of + having these implemented on the C-level along with check_codedict) + * fix self tests for Python 2.5 and 2.6 + * move all Huffman code related example code into examples/huffman + * add code to generate graphviz .dot file of Huffman tree to examples + + +2019-04-22 0.9.0: +------------------- + * more efficient decode and iterdecode by using C-level binary tree + instead of a python one, #54 + * added buffer protocol support for Python 3, #55 + * fixed invalid pointer exceptions in pypy, #47 + * made all examples Py3k compatible + * add gene sequence example + * add official Python 3.7 support + * drop Python 2.4, 3.1 and 3.2 support + + +2018-07-06 0.8.3: +------------------- + * add exception to setup.py when README.rst cannot be opened + + +2018-05-30 0.8.2: +------------------- + * add official Python 3.6 support (although it was already working) + * fix description of fill(), #52 + * handle extending self correctly, #28 + * copy_n: fast copy with memmove fixed, #43 + * minor clarity/wording changes to README, #23 + + 2013-03-30 0.8.1: ------------------- * fix issue #10, i.e. int(bitarray()) segfault @@ -71,7 +301,7 @@ 2009-01-15 0.3.4: ------------------- * Made C code less ambiguous, such that the package compiles on - Visual Studio, will all tests passing. + Visual Studio, with all tests passing. 2008-12-14 0.3.3: @@ -95,7 +325,7 @@ 2008-09-30 0.3.0: ------------------- - * Fixed a severe bug for 64bit machines. Implemented all methods in C, + * Fixed a severe bug for 64-bit machines. Implemented all methods in C, improved tests. * Removed deprecated methods from01 and fromlist. diff -Nru python-bitarray-0.8.1/contributing.md python-bitarray-1.6.3/contributing.md --- python-bitarray-0.8.1/contributing.md 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/contributing.md 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,31 @@ +Contributing to bitarray +======================== + +The bitarray type, is very stable and feature complete at this point, +which means that pull requests to `bitarray/_bitarray.c` will most likely +be rejected. + +There may be room for improvements/additions in the `bitarray.util` module, +added in the 1.2.0 release. However, due to the slow release cycle of this +package, it may be more practical to create your own library which depends +on bitarray. This is completely possible, even on the C-level. Please +study the implementation of `bitarray/_util.c` for details. In particular for +C extensions to work with the bitarray type, it is important that +the `bitarrayobject` struct is defined in the same way: + + typedef struct { + PyObject_VAR_HEAD + char *ob_item; /* buffer */ + Py_ssize_t allocated; /* how many bytes allocated */ + Py_ssize_t nbits; /* length of bitarray, i.e. elements */ + int endian; /* bit endianness of bitarray */ + int ob_exports; /* how many buffer exports */ + PyObject *weakreflist; /* list of weak references */ + } bitarrayobject; + + /* member endian may have these values */ + #define ENDIAN_LITTLE 0 + #define ENDIAN_BIG 1 + +These essential (as well as other useful) declarations can be found +in `bitarray/bitarray.h`. diff -Nru python-bitarray-0.8.1/debian/changelog python-bitarray-1.6.3/debian/changelog --- python-bitarray-0.8.1/debian/changelog 2020-12-07 17:42:02.000000000 +0000 +++ python-bitarray-1.6.3/debian/changelog 2021-02-03 16:38:04.000000000 +0000 @@ -1,20 +1,46 @@ -python-bitarray (0.8.1-2build3) hirsute; urgency=medium +python-bitarray (1.6.3-2) unstable; urgency=medium - * No-change rebuild to drop python3.8 extensions. + * Reupload to unstable - -- Matthias Klose Mon, 07 Dec 2020 18:42:02 +0100 + -- Jan Dittberner Wed, 03 Feb 2021 17:38:04 +0100 -python-bitarray (0.8.1-2build2) hirsute; urgency=medium +python-bitarray (1.6.3-1) experimental; urgency=medium - * No-change rebuild to build with python3.9 as supported. + [ Debian Janitor ] + * Use secure URI in debian/watch. + * Bump debhelper from old 9 to 12. + + [ Ondřej Nový ] + * d/control: Update Vcs-* fields with new Debian Python Team Salsa + layout. + + [ Sandro Tosi ] + * Use the new Debian Python Team contact name and address + + [ Antonio Valentino ] + * New upstream release. + * d/control: + - bump Standards-Version to 4.5.1, no change + - drop dpkg-dev versioned dependency, no longer necessary + - add Testsuite: autopkgtest-pkg-python + - add Rules-Requires-Root: no + - update Homepage fields (point to github) + - docutils replaced by pandoc in the list of build dependencies + * d/watch: + - update watch file format version to 4 + - use upstream tarball form github + * d/copyright: + - update Source fields: point to github + - update copyright date + - update license text + * d/patches: drop 2to3_update_readme.patch, no longer necessary. + * Set upstream metadata fields: Bug-Database, Bug-Submit, Repository, + Repository-Browse. + * d/rules: + - fix doc build + - remove vcs control files from examples - -- Matthias Klose Sat, 24 Oct 2020 10:54:13 +0200 - -python-bitarray (0.8.1-2build1) focal; urgency=medium - - * No-change rebuild to drop python3.7. - - -- Matthias Klose Tue, 18 Feb 2020 10:22:36 +0100 + -- Jan Dittberner Sat, 30 Jan 2021 12:04:25 +0100 python-bitarray (0.8.1-2) unstable; urgency=medium diff -Nru python-bitarray-0.8.1/debian/control python-bitarray-1.6.3/debian/control --- python-bitarray-0.8.1/debian/control 2019-12-15 22:23:35.000000000 +0000 +++ python-bitarray-1.6.3/debian/control 2021-02-03 16:38:04.000000000 +0000 @@ -2,18 +2,17 @@ Section: python Priority: optional Maintainer: Jan Dittberner -Uploaders: Debian Python Modules Team -Build-Depends: debhelper-compat (= 9), +Uploaders: Debian Python Team +Build-Depends: debhelper-compat (= 12), dh-python, - dpkg-dev (>= 1.16.1~), + pandoc, python3-all, python3-all-dev, - python3-docutils, python3-setuptools -Standards-Version: 3.9.5 -Homepage: https://pypi.python.org/pypi/bitarray -Vcs-Git: https://salsa.debian.org/python-team/modules/python-bitarray.git -Vcs-Browser: https://salsa.debian.org/python-team/modules/python-bitarray +Standards-Version: 4.5.1 +Homepage: https://github.com/ilanschnell/bitarray +Vcs-Git: https://salsa.debian.org/python-team/packages/python-bitarray.git +Vcs-Browser: https://salsa.debian.org/python-team/packages/python-bitarray Package: python3-bitarray Architecture: any diff -Nru python-bitarray-0.8.1/debian/copyright python-bitarray-1.6.3/debian/copyright --- python-bitarray-0.8.1/debian/copyright 2019-12-15 22:23:35.000000000 +0000 +++ python-bitarray-1.6.3/debian/copyright 2021-02-03 16:38:04.000000000 +0000 @@ -1,57 +1,55 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: python-bitarray Upstream-Contact: Ilan Schnell -Source: http://pypi.python.org/pypi/bitarray/ +Source: https://github.com/ilanschnell/bitarray Files: * -Copyright: 2008-2012, Ilan Schnell ilanschnell@gmail.com +Copyright: 2008-2020, Ilan Schnell ilanschnell@gmail.com License: Python PYTHON SOFTWARE FOUNDATION LICENSE ---------------------------------- . - 1. This LICENSE AGREEMENT is between the Python Software Foundation - ("PSF"), and the Individual or Organization ("Licensee") accessing and - otherwise using this software ("Python") in source or binary form and - its associated documentation. + 1. This LICENSE AGREEMENT is between Ilan Schnell, and the Individual or + Organization ("Licensee") accessing and otherwise using this software + ("bitarray") in source or binary form and its associated documentation. . - 2. Subject to the terms and conditions of this License Agreement, PSF + 2. Subject to the terms and conditions of this License Agreement, Ilan Schnell hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, - prepare derivative works, distribute, and otherwise use Python - alone or in any derivative version, provided, however, that PSF's - License Agreement and PSF's notice of copyright, i.e., "Copyright (c) - 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Python Software Foundation; - All Rights Reserved" are retained in Python alone or in any derivative - version prepared by Licensee. + prepare derivative works, distribute, and otherwise use bitarray + alone or in any derivative version, provided, however, that Ilan Schnell's + License Agreement and Ilan Schnell's notice of copyright, i.e., "Copyright (c) + 2008 - 2020 Ilan Schnell; All Rights Reserved" are retained in bitarray + alone or in any derivative version prepared by Licensee. . 3. In the event Licensee prepares a derivative work that is based on - or incorporates Python or any part thereof, and wants to make + or incorporates bitarray or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of - the changes made to Python. + the changes made to bitarray. . - 4. PSF is making Python available to Licensee on an "AS IS" - basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR - IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND + 4. Ilan Schnell is making bitarray available to Licensee on an "AS IS" + basis. ILAN SCHNELL MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR + IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ILAN SCHNELL MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS - FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT + FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF BITARRAY WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. . - 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON + 5. ILAN SCHNELL SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF BITARRAY FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS - A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, + A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING BITARRAY, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. . 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. . 7. Nothing in this License Agreement shall be deemed to create any - relationship of agency, partnership, or joint venture between PSF and - Licensee. This License Agreement does not grant permission to use PSF - trademarks or trade name in a trademark sense to endorse or promote + relationship of agency, partnership, or joint venture between Ilan Schnell + and Licensee. This License Agreement does not grant permission to use Ilan + Schnell trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. . - 8. By copying, installing or otherwise using Python, Licensee + 8. By copying, installing or otherwise using bitarray, Licensee agrees to be bound by the terms and conditions of this License Agreement. diff -Nru python-bitarray-0.8.1/debian/docs python-bitarray-1.6.3/debian/docs --- python-bitarray-0.8.1/debian/docs 2019-12-15 22:23:35.000000000 +0000 +++ python-bitarray-1.6.3/debian/docs 2021-02-03 16:38:04.000000000 +0000 @@ -1 +1 @@ -AUTHORS README.rst README.html TODO +AUTHORS README.md README.html TODO diff -Nru python-bitarray-0.8.1/debian/patches/2to3_update_readme.patch python-bitarray-1.6.3/debian/patches/2to3_update_readme.patch --- python-bitarray-0.8.1/debian/patches/2to3_update_readme.patch 2019-12-15 22:23:35.000000000 +0000 +++ python-bitarray-1.6.3/debian/patches/2to3_update_readme.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ ---- a/update_readme.py -+++ b/update_readme.py -@@ -1,7 +1,7 @@ - import os - import re - import doctest --from cStringIO import StringIO -+from io import StringIO - - import bitarray - -@@ -85,13 +85,13 @@ def main(): - fo.close() - - if new_data == data: -- print "already up-to-date" -+ print("already up-to-date") - else: - with open('README.rst', 'w') as f: - f.write(new_data) - - doctest.testfile('README.rst') -- os.system('rst2html.py README.rst >README.html') -+ os.system('rst2html README.rst >README.html') - - - if __name__ == '__main__': diff -Nru python-bitarray-0.8.1/debian/patches/series python-bitarray-1.6.3/debian/patches/series --- python-bitarray-0.8.1/debian/patches/series 2019-12-15 22:23:35.000000000 +0000 +++ python-bitarray-1.6.3/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -2to3_update_readme.patch diff -Nru python-bitarray-0.8.1/debian/rules python-bitarray-1.6.3/debian/rules --- python-bitarray-0.8.1/debian/rules 2019-12-15 22:23:35.000000000 +0000 +++ python-bitarray-1.6.3/debian/rules 2021-02-03 16:38:04.000000000 +0000 @@ -12,7 +12,13 @@ # Move README.html creation here: # we need to have the build-directory, so that we can import `bitarray`, # so let's have pybuild do the heavy lifting of locating it -export PYBUILD_AFTER_BUILD=mkdir -p $(DOCTEMPDIR) && cp $(CURDIR)/README.rst $(CURDIR)/CHANGE_LOG $(CURDIR)/update_readme.py $(DOCTEMPDIR); cd $(DOCTEMPDIR); PYTHONPATH={build_dir} {interpreter} update_readme.py; mv $(DOCTEMPDIR)/README.html $(CURDIR) +export PYBUILD_AFTER_BUILD=\ +mkdir -p $(DOCTEMPDIR) && \ +cp $(CURDIR)/README.md $(CURDIR)/CHANGE_LOG $(CURDIR)/update_readme.py $(DOCTEMPDIR); \ +cd $(DOCTEMPDIR); \ +PYTHONPATH={build_dir} {interpreter} update_readme.py; \ +pandoc -r gfm -w html5 -o README.html -s --toc --metadata title="BitArray Documentation" README.md; \ +mv $(DOCTEMPDIR)/README.html $(CURDIR) override_dh_auto_clean: rm -rf $(DOCTEMPDIR) README.html build @@ -25,5 +31,9 @@ PYBUILD_SYSTEM=custom \ PYBUILD_TEST_ARGS="cd {build_dir}; {interpreter} -c 'import bitarray; bitarray.test()'" dh_auto_test +override_dh_installexamples: + dh_installexamples + find $(CURDIR)/debian/python*-bitarray -name '.gitignore' -delete + %: dh $@ --with python3 --buildsystem=pybuild diff -Nru python-bitarray-0.8.1/debian/upstream/metadata python-bitarray-1.6.3/debian/upstream/metadata --- python-bitarray-0.8.1/debian/upstream/metadata 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/debian/upstream/metadata 2021-02-03 16:38:04.000000000 +0000 @@ -0,0 +1,5 @@ +--- +Bug-Database: https://github.com/ilanschnell/bitarray/issues +Bug-Submit: https://github.com/ilanschnell/bitarray/issues/new +Repository: https://github.com/ilanschnell/bitarray.git +Repository-Browse: https://github.com/ilanschnell/bitarray diff -Nru python-bitarray-0.8.1/debian/watch python-bitarray-1.6.3/debian/watch --- python-bitarray-0.8.1/debian/watch 2019-12-15 22:23:35.000000000 +0000 +++ python-bitarray-1.6.3/debian/watch 2021-02-03 16:38:04.000000000 +0000 @@ -1,2 +1,3 @@ -version=3 -http://pypi.python.org/packages/source/b/bitarray/ bitarray-(.*)\.tar\.gz +version=4 +opts=filenamemangle=s/.+\/v?(\d\S+)\.tar\.gz/bitarray-$1\.tar\.gz/ \ + https://github.com/ilanschnell/bitarray/tags .*/v?(\d\S+)\.tar\.gz diff -Nru python-bitarray-0.8.1/examples/bloom.py python-bitarray-1.6.3/examples/bloom.py --- python-bitarray-0.8.1/examples/bloom.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/bloom.py 2021-01-21 00:52:57.000000000 +0000 @@ -1,26 +1,57 @@ -""" -Demonstrates the implementation of a Bloom filter, see: -http://en.wikipedia.org/wiki/Bloom_filter -""" +import sys import hashlib -from math import exp, log +from math import ceil, exp, log from bitarray import bitarray +if sys.version_info[0] == 2: + int = long + range = xrange + log2 = lambda x: log(x) / log(2) +else: + from math import log2 -class BloomFilter(object): - def __init__(self, m, k): - self.m = m - self.k = k - self.array = bitarray(m) +class BloomFilter(object): + """ + Implementation of a Bloom filter. An instance is initialized by + it's capacity `n` and error rate `p`. The capacity tells how many + elements can be stored while maintaining no more than `p` false + positives. + """ + def __init__(self, n, p=0.01): + assert 0 < p < 1 + self.n = n + # number of hash functions + self.k = int(ceil(-log2(p))) + # size of array + self.m = int(ceil(-n * log2(p) / log(2))) + self.array = bitarray(self.m) self.array.setall(0) + def calculate_p(self): + """ + Calculate the actual false positive error rate `p` from the number + of hashes `k` and the size if the bitarray `m`. This is slightly + different from the given `p`, because the integer value of `k` + is being used. + """ + return pow(1.0 - exp(-float(self.k) * self.n / self.m), self.k) + + def approx_items(self): + """ + Return the approximate number of items in the Bloom filter. + """ + count = self.array.count() + if count == 0: + return 0.0 + return -float(self.m) / self.k * log(1.0 - float(count) / self.m) + def add(self, key): for i in self._hashes(key): self.array[i] = 1 - def contains(self, key): + def __contains__(self, key): return all(self.array[i] for i in self._hashes(key)) def _hashes(self, key): @@ -29,29 +60,36 @@ the m array positions with a uniform random distribution """ h = hashlib.new('md5') - h.update(str(key)) - x = long(h.hexdigest(), 16) - for _ in xrange(self.k): - if x < self.m: - h.update('.') - x = long(h.hexdigest(), 16) + h.update(str(key).encode()) + x = int(h.hexdigest(), 16) + for _unused in range(self.k): + if x < 1024 * self.m: + h.update(b'x') + x = int(h.hexdigest(), 16) x, y = divmod(x, self.m) yield y -def test_bloom(m, k, n): - b = BloomFilter(m, k) - for i in xrange(n): +def test_bloom(n, p): + print("Testing Bloom filter:") + print("capacity n = %d" % n) + print("given p = %.3f%%" % (100.0 * p)) + b = BloomFilter(n, p) + print("hashes k = %d = ceil(%.3f)" % (b.k, -log2(p))) + print("array size m = %d" % b.m) + for i in range(n): b.add(i) - assert b.contains(i) - - p = (1.0 - exp(-k * (n + 0.5) / (m - 1))) ** k - print 100.0 * p, '%' + assert i in b + print("approx_items(): %.2f" % b.approx_items()) + print("calculate_p(): %.3f%%" % (100.0 * b.calculate_p())) N = 100000 - false_pos = sum(b.contains(i) for i in xrange(n, n + N)) - print 100.0 * false_pos / N, '%' + false_pos = sum(i in b for i in range(n, n + N)) + print("experimental : %.3f%%\n" % (100.0 * false_pos / N)) if __name__ == '__main__': - test_bloom(50000, 6, 5000) + test_bloom(5000, 0.05) + test_bloom(10000, 0.01) + test_bloom(50000, 0.005) + test_bloom(100000, 0.002) diff -Nru python-bitarray-0.8.1/examples/compress.py python-bitarray-1.6.3/examples/compress.py --- python-bitarray-0.8.1/examples/compress.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/compress.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -""" -Demonstrates how the bz2 module may be used to create a compressed object -which represents a bitarray. -""" -import bz2 - -from bitarray import bitarray - - -def compress(ba): - """ - Given a bitarray, return an object which represents all information - within the bitarray in a compresed form. - The function `decompress` can be used to restore the bitarray from the - compresed object. - """ - assert isinstance(ba, bitarray) - return ba.length(), bz2.compress(ba.tobytes()), ba.endian() - - -def decompress(obj): - """ - Given an object (created by `compress`), return the a copy of the - original bitarray. - """ - n, data, endian = obj - res = bitarray(endian=endian) - res.frombytes(bz2.decompress(data)) - del res[n:] - return res - - -if __name__ == '__main__': - a = bitarray(12345) - a.setall(0) - a[::10] = True - c = compress(a) - print c - b = decompress(c) - assert a == b, a.endian() == b.endian() diff -Nru python-bitarray-0.8.1/examples/decoding.py python-bitarray-1.6.3/examples/decoding.py --- python-bitarray-0.8.1/examples/decoding.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/decoding.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,87 +0,0 @@ -import time -from bitarray import bitarray -from huffman import freq_string, huffCode - - -def traverse(it, tree): - """ - return False, when it has no more elements, or the leave node - resulting from traversing the tree - """ - try: - subtree = tree[next(it)] - except StopIteration: - return False - - if isinstance(subtree, list) and len(subtree)==2: - return traverse(it, subtree) - else: # leave node - return subtree - - -def insert(tree, sym, ba): - """ - insert symbol which is mapped to bitarray into tree - """ - v = ba[0] - if len(ba) > 1: - if tree[v] == []: - tree[v] = [[], []] - insert(tree[v], sym, ba[1:]) - else: - if tree[v] != []: - raise ValueError("prefix code ambiguous") - tree[v] = sym - - -def decode(codedict, bitsequence): - """ - this function does the same thing as the bitarray decode method - """ - # generate tree from codedict - tree = [[], []] - for sym, ba in codedict.items(): - insert(tree, sym, ba) - - # actual decoding by traversing until StopIteration - res = [] - it = iter(bitsequence) - while True: - r = traverse(it, tree) - if r is False: - break - else: - if r == []: - raise ValueError("prefix code does not match data") - res.append(r) - return res - - -def main(): - txt = open('README').read() - code = huffCode(freq_string(txt)) - - sample = 2000 * txt - - a = bitarray() - a.encode(code, sample) - - # Time the decode function above - start_time = time.time() - res = decode(code, a) - Py_time = time.time() - start_time - assert ''.join(res) == sample - print('Py_time: %.6f sec' % Py_time) - - # Time the decode method which is implemented in C - start_time = time.time() - res = a.decode(code) - C_time = time.time() - start_time - assert ''.join(res) == sample - print('C_time: %.6f sec' % C_time) - - print('Ratio: %f' % (Py_time / C_time)) - - -if __name__ == '__main__': - main() diff -Nru python-bitarray-0.8.1/examples/gene.py python-bitarray-1.6.3/examples/gene.py --- python-bitarray-0.8.1/examples/gene.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/gene.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,27 @@ +# gene sequence example from @yoch, see +# https://github.com/ilanschnell/bitarray/pull/54 + +from random import choice +from timeit import timeit + +from bitarray import bitarray + + +trans = { + "A": bitarray("00"), + "T": bitarray("01"), + "G": bitarray("10"), + "C": bitarray("11") +} + +N = 10000 +seq = [choice("ATGC") for _ in range(N)] + +arr = bitarray() +arr.encode(trans, seq) + +assert arr.decode(trans) == seq + +# decodage +t = timeit(lambda: arr.decode(trans), number=1000) +print(t) diff -Nru python-bitarray-0.8.1/examples/growth/.gitignore python-bitarray-1.6.3/examples/growth/.gitignore --- python-bitarray-0.8.1/examples/growth/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/growth/.gitignore 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,2 @@ +resize +pattern-* diff -Nru python-bitarray-0.8.1/examples/growth/growth.py python-bitarray-1.6.3/examples/growth/growth.py --- python-bitarray-0.8.1/examples/growth/growth.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/growth/growth.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,23 @@ +from bitarray import bitarray + + +def show(a): + _ptr, size, _endian, _unused, alloc = a.buffer_info() + print('%d %d' % (size, alloc)) + +a = bitarray() +prev = -1 +while len(a) < 2000: + alloc = a.buffer_info()[4] + if prev != alloc: + show(a) + prev = alloc + a.append(1) + +for i in 800_000, 400_000, 399_992, 0, 0, 80_000: + if len(a) < i: + a.extend(bitarray(i - len(a))) + else: + del a[i:] + assert len(a) == i + show(a) diff -Nru python-bitarray-0.8.1/examples/growth/Makefile python-bitarray-1.6.3/examples/growth/Makefile --- python-bitarray-0.8.1/examples/growth/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/growth/Makefile 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,12 @@ +resize: resize.c + gcc -Wall resize.c -o resize + +test: resize + ./resize >pattern-c.txt + python growth.py >pattern-py.txt + diff pattern-c.txt pattern-py.txt + python test.py + +clean: + rm -f resize + rm -f pattern-* diff -Nru python-bitarray-0.8.1/examples/growth/README.md python-bitarray-1.6.3/examples/growth/README.md --- python-bitarray-0.8.1/examples/growth/README.md 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/growth/README.md 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,10 @@ +The bitarray growth pattern +=========================== + +Running `python growth.py` will display the bitarray growth pattern. +This is done by appending one bit to a bitarray in a loop, and displaying +the allocated size of the bitarray object each time it changes. + +The program `resize.c` contains a distilled version of the `resize()` +function which contains the implementation of this growth pattern. +Running this C program gives exactly the same output. diff -Nru python-bitarray-0.8.1/examples/growth/resize.c python-bitarray-1.6.3/examples/growth/resize.c --- python-bitarray-0.8.1/examples/growth/resize.c 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/growth/resize.c 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,81 @@ +#include +#include + + +typedef struct { + int size; + int nbits; + int allocated; +} bitarrayobject; + + +/* number of bytes necessary to store given bits */ +#define BYTES(bits) (((bits) == 0) ? 0 : (((bits) - 1) / 8 + 1)) + + +int resize(bitarrayobject *self, int nbits) +{ + int new_allocated, allocated = self->allocated, size = self->size; + int newsize; + + newsize = BYTES(nbits); + if (newsize == size) { + /* the memory size hasn't changed - bypass almost everything */ + self->nbits = nbits; + return 0; + } + + /* Bypass realloc() ... */ + if (allocated >= newsize && newsize >= (allocated >> 1)) { + self->size = newsize; + self->nbits = nbits; + return 0; + } + + if (newsize == 0) { + /* free(self->ob_item) */ + self->size = 0; + self->allocated = 0; + self->nbits = 0; + return 0; + } + + new_allocated = newsize; + if (size == 0 && newsize <= 4) + /* When resizing an empty bitarray, we want at least 4 bytes. */ + new_allocated = 4; + + else if (size != 0 && newsize > size) + new_allocated += (newsize >> 4) + (newsize < 8 ? 3 : 7); + + /* realloc(self->ob_item) */ + self->size = newsize; + self->allocated = new_allocated; + self->nbits = nbits; + return 1; +} + + +int main() +{ + int size; + bitarrayobject x; + + x.size = 0; + x.allocated = 0; + +#define SHOW printf("%d %d\n", x.size, x.allocated) + + resize(&x, 0); SHOW; + for (size = 0; size < 2000; size++) + if (resize(&x, size)) + SHOW; + + resize(&x, 800000); SHOW; + resize(&x, 400000); SHOW; + resize(&x, 399992); SHOW; + resize(&x, 0); SHOW; + resize(&x, 0); SHOW; + resize(&x, 80000); SHOW; + return 0; +} diff -Nru python-bitarray-0.8.1/examples/growth/test.py python-bitarray-1.6.3/examples/growth/test.py --- python-bitarray-0.8.1/examples/growth/test.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/growth/test.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,49 @@ +from bitarray import bitarray + + +def show(a): + ptr, size, _endian, _unused, alloc = a.buffer_info() + print('%18d %10d %10d' % (ptr, size, alloc)) + + +# make sure sequence of appends will always increase allocated size +a = bitarray() +prev = -1 +while len(a) < 1_000_000: + alloc = a.buffer_info()[4] + assert prev <= alloc + prev = alloc + a.append(1) + + +# ensure that when we start from a large array and delete part, we always +# get a decreasing allocation +a = bitarray(10_000_000) +prev = a.buffer_info()[4] +for _ in range(100): + del a[-100_000:] + alloc = a.buffer_info()[4] + assert alloc <= prev + prev = alloc + + +# initalizing a bitarray from a string or list should not overallocate +for n in 0, 4, 10, 100, 1000, 10_000: + a = bitarray(8 * n * '1') + assert n == a.buffer_info()[4] + a = bitarray(8 * n * [1]) + assert n == a.buffer_info()[4] + + +# starting from a large bitarray, make we sure we don't realloc each time +# we extend +a = bitarray(1_000_000) # no overallocation +assert a.buffer_info()[4] == 125_000 +a.extend(bitarray(8)) # overallocation happens here +alloc = a.buffer_info()[4] +for _ in range(1000): + a.extend(bitarray(8)) + assert a.buffer_info()[4] == alloc + + +print("OK") diff -Nru python-bitarray-0.8.1/examples/huffman/compress.py python-bitarray-1.6.3/examples/huffman/compress.py --- python-bitarray-0.8.1/examples/huffman/compress.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/huffman/compress.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,93 @@ +""" +This program demonstrates how Huffman codes can be used to efficiently +compress and uncompress files (text or binary). +""" +import os +from optparse import OptionParser +from collections import Counter + +from bitarray import bitarray +from bitarray.util import huffman_code + + +def encode(filename): + with open(filename, 'rb') as fi: + plain = bytearray(fi.read()) + + code = huffman_code(Counter(plain)) + with open(filename + '.huff', 'wb') as fo: + for sym in sorted(code): + fo.write(('%02x %s\n' % (sym, code[sym].to01())).encode()) + a = bitarray(endian='little') + a.encode(code, plain) + # write unused bits + fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode()) + a.tofile(fo) + print('Bits: %d / %d' % (len(a), 8 * len(plain))) + print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(plain))) + + +def decode(filename): + assert filename.endswith('.huff') + code = {} + + with open(filename, 'rb') as fi: + while 1: + line = fi.readline() + sym, b = line.split() + if sym == b'unused': + u = int(b) + break + i = int(sym, 16) + code[i] = bitarray(b.decode()) + a = bitarray(endian='little') + a.fromfile(fi) + + if u: + del a[-u:] + + with open(filename[:-5] + '.out', 'wb') as fo: + fo.write(bytearray(a.iterdecode(code))) + + +def main(): + p = OptionParser("usage: %prog [options] FILE") + p.add_option( + '-e', '--encode', + action="store_true", + help="encode (compress) FILE using the Huffman code calculated for " + "the frequency of characters in FILE itself. " + "The output is FILE.huff which contains both the Huffman " + "code and the bitarray resulting from the encoding.") + p.add_option( + '-d', '--decode', + action="store_true", + help="decode (decompress) FILE.huff and write the output to FILE.out") + p.add_option( + '-t', '--test', + action="store_true", + help="encode FILE, decode FILE.huff, compare FILE with FILE.out, " + "and unlink created files.") + opts, args = p.parse_args() + if len(args) != 1: + p.error('exactly one argument required') + filename = args[0] + + if opts.encode: + encode(filename) + + if opts.decode: + decode(filename + '.huff') + + if opts.test: + huff = filename + '.huff' + out = filename + '.out' + encode(filename) + decode(huff) + assert open(filename, 'rb').read() == open(out, 'rb').read() + os.unlink(huff) + os.unlink(out) + + +if __name__ == '__main__': + main() diff -Nru python-bitarray-0.8.1/examples/huffman/decodetree.py python-bitarray-1.6.3/examples/huffman/decodetree.py --- python-bitarray-0.8.1/examples/huffman/decodetree.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/huffman/decodetree.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,36 @@ +from random import random, randint +from time import time + +from bitarray import bitarray, decodetree +from bitarray.util import huffman_code + + +N = 1_000_000 + +# create Huffman code for N symbols +code = huffman_code({i: random() for i in range(N)}) +print(len(code)) + +# create the decodetree object +t0 = time() +tree = decodetree(code) +print('decodetree(code): %9.6f sec' % (time() - t0)) + +print(tree.nodes()) +plain = [randint(0, N - 1) for _ in range(100)] + +a = bitarray() +a.encode(code, plain) + +# decode using the code dictionary +t0 = time() +res = a.decode(code) +print('decode(code): %9.6f sec' % (time() - t0)) +assert res == plain + +# decode using the decodetree +t0 = time() +res = a.decode(tree) +print('decode(tree): %9.6f sec' % (time() - t0)) +assert res == plain +assert tree.todict() == code diff -Nru python-bitarray-0.8.1/examples/huffman/decoding.py python-bitarray-1.6.3/examples/huffman/decoding.py --- python-bitarray-0.8.1/examples/huffman/decoding.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/huffman/decoding.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,76 @@ +from __future__ import print_function +from optparse import OptionParser +from time import time +from collections import Counter +from bitarray import bitarray + +from huffman import (huff_tree, huff_code, write_dot, print_code, + make_tree, iterdecode) + + +def main(): + p = OptionParser("usage: %prog [options] [FILE]") + p.add_option( + '-p', '--print', + action="store_true", + help="print Huffman code") + p.add_option( + '-t', '--tree', + action="store_true", + help="store the tree as a .dot file") + opts, args = p.parse_args() + + if len(args) == 0: + filename = 'README' + elif len(args) == 1: + filename = args[0] + else: + p.error('only one argument expected') + + with open(filename, 'rb') as fi: + plain = bytearray(fi.read()) + if len(args) == 0: + plain *= 1000 + + t0 = time() + freq = Counter(plain) + print('count: %9.6f sec' % (time() - t0)) + + t0 = time() + tree = huff_tree(freq) + print('tree: %9.6f sec' % (time() - t0)) + + if opts.tree: + write_dot(tree, 'tree.dot', 0 in plain) + code = huff_code(tree) + if opts.print: + print_code(freq, code) + if opts.tree: + # create tree from code (no frequencies) + write_dot(make_tree(code), 'tree_raw.dot', 0 in plain) + + a = bitarray() + + t0 = time() + a.encode(code, plain) + print('C encode: %9.6f sec' % (time() - t0)) + + # Time the decode function above + t0 = time() + res = bytearray(iterdecode(tree, a)) + Py_time = time() - t0 + print('Py decode: %9.6f sec' % Py_time) + assert res == plain + + # Time the decode method which is implemented in C + t0 = time() + res = bytearray(a.iterdecode(code)) + C_time = time() - t0 + print('C decode: %9.6f sec' % C_time) + assert res == plain + + print('Ratio: %f' % (Py_time / C_time)) + + +if __name__ == '__main__': + main() diff -Nru python-bitarray-0.8.1/examples/huffman/huffman.py python-bitarray-1.6.3/examples/huffman/huffman.py --- python-bitarray-0.8.1/examples/huffman/huffman.py 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/huffman/huffman.py 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,216 @@ +""" +This library contains useful functionality for working with Huffman trees +and codes. + +Note: +There is a function for directly creating a Huffman code from a frequency +map in the bitarray library itself: bitarray.util.huffman_code() +""" +from __future__ import print_function +from heapq import heappush, heappop +from bitarray import bitarray + + +class Node(object): + def __init__(self): + self.child = [None, None] + self.symbol = None + self.freq = None + + def __lt__(self, other): + # heapq needs to be able to compare the nodes + return self.freq < other.freq + + +def huff_tree(freq): + """ + Given a dictionary mapping symbols to thier frequency, construct a Huffman + tree and return its root node. + """ + minheap = [] + # create all the leaf nodes and push them onto the queue + for sym in sorted(freq): + nd = Node() + nd.symbol = sym + nd.freq = freq[sym] + heappush(minheap, nd) + + # repeat the process until only one node remains + while len(minheap) > 1: + # take the nodes with smallest frequencies from the queue + child_0 = heappop(minheap) + child_1 = heappop(minheap) + # construct the new internal node and push it onto the queue + parent = Node() + parent.child = [child_0, child_1] + parent.freq = child_0.freq + child_1.freq + heappush(minheap, parent) + + # return the one remaining node, which is the root of the Huffman tree + return minheap[0] + + +def huff_code(tree): + """ + Given a Huffman tree, traverse the tree and return the Huffman code, i.e. + a dictionary mapping symbols to bitarrays. + """ + result = {} + + def traverse(nd, prefix=bitarray()): + if nd.symbol is None: # parent, so traverse each of the children + traverse(nd.child[0], prefix + bitarray([0])) + traverse(nd.child[1], prefix + bitarray([1])) + else: # leaf + result[nd.symbol] = prefix + + traverse(tree) + return result + + +def insert_symbol(tree, ba, sym): + """ + Insert symbol into a tree at the position described by the bitarray, + creating nodes as necessary. + """ + if sym is None: + raise ValueError("symbol cannot be None") + nd = tree + for k in ba: + prev = nd + nd = nd.child[k] + if nd and nd.symbol is not None: + raise ValueError("ambiguity") + if not nd: + nd = Node() + prev.child[k] = nd + if nd.symbol is not None or nd.child[0] or nd.child[1]: + raise ValueError("ambiguity") + nd.symbol = sym + + +def make_tree(codedict): + """ + Create a tree from the given code dictionary, and return its root node. + Unlike trees created by huff_tree, all nodes will have .freq set to None. + """ + tree = Node() + for sym, ba in codedict.items(): + insert_symbol(tree, ba, sym) + return tree + + +def traverse(tree, it): + """ + Traverse tree until a leaf node is reached, and return its symbol. + This function consumes an iterator on which next() is called during each + step of traversing. + """ + nd = tree + while 1: + nd = nd.child[next(it)] + if not nd: + raise ValueError("prefix code does not match data in bitarray") + if nd.symbol is not None: + return nd.symbol + if nd != tree: + raise ValueError("decoding not terminated") + + +def iterdecode(tree, bitsequence): + """ + Given a tree and a bitsequence, decode the bitsequence and generate + the symbols. + """ + it = iter(bitsequence) + while True: + try: + yield traverse(tree, it) + except StopIteration: + return + + +def write_dot(tree, fn, binary=False): + """ + Given a tree (which may or may not contain frequencies), write + a graphviz '.dot' file with a visual representation of the tree. + """ + special_ascii = {' ': 'SPACE', '\n': 'LF', '\r': 'CR', '\t': 'TAB', + '\\': r'\\', '"': r'\"'} + def disp_sym(i): + if binary: + return '0x%02x' % i + else: + c = chr(i) + res = special_ascii.get(c, c) + assert res.strip(), repr(c) + return res + + def disp_freq(f): + if f is None: + return '' + return '%d' % f + + with open(fn, 'w') as fo: # dot -Tpng tree.dot -O + def write_nd(fo, nd): + if nd.symbol is not None: # leaf node + a, b = disp_freq(nd.freq), disp_sym(nd.symbol) + fo.write(' %d [label="%s%s%s"];\n' % + (id(nd), a, ': ' if a and b else '', b)) + else: # parent node + fo.write(' %d [shape=circle, style=filled, ' + 'fillcolor=grey, label="%s"];\n' % + (id(nd), disp_freq(nd.freq))) + + for k in range(2): + if nd.child[k]: + fo.write(' %d->%d;\n' % (id(nd), id(nd.child[k]))) + + for k in range(2): + if nd.child[k]: + write_nd(fo, nd.child[k]) + + fo.write('digraph BT {\n') + fo.write(' node [shape=box, fontsize=20, fontname="Arial"];\n') + write_nd(fo, tree) + fo.write('}\n') + + +def print_code(freq, codedict): + """ + Given a frequency map (dictionary mapping symbols to thier frequency) + and a codedict, print them in a readable form. + """ + special_ascii = {0: 'NUL', 9: 'TAB', 10: 'LF', 13: 'CR', 127: 'DEL'} + def disp_char(i): + if 32 <= i < 127: + return repr(chr(i)) + return special_ascii.get(i, '') + + print(' symbol char hex frequency Huffman code') + print(70 * '-') + for i in sorted(codedict, key=lambda c: (freq[c], c), reverse=True): + print('%7r %-4s 0x%02x %10i %s' % ( + i, disp_char(i), i, freq[i], codedict[i].to01())) + + +def test(): + freq = {'a': 10, 'b': 2, 'c': 1} + tree = huff_tree(freq) + code = huff_code(tree) + assert len(code['a']) == 1 + assert len(code['b']) == len(code['c']) == 2 + + code = {'a': bitarray('0'), + 'b': bitarray('10'), + 'c': bitarray('11')} + tree = make_tree(code) + txt = 'abca' + a = bitarray() + a.encode(code, txt) + assert a == bitarray('010110') + assert list(iterdecode(tree, a)) == ['a', 'b', 'c', 'a'] + + +if __name__ == '__main__': + test() diff -Nru python-bitarray-0.8.1/examples/huffman/README python-bitarray-1.6.3/examples/huffman/README --- python-bitarray-0.8.1/examples/huffman/README 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/examples/huffman/README 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,16 @@ +compress.py: + Demonstrates how Huffman codes can be used to efficiently + compress and uncompress files (text or binary). + Given an input file, calculates the number of occurrences for each + character; from those frequencies, a Huffman tree is build. + Also allows encoding and decoding of a file, see -h option. + + +decoding.py: + This example demonstrates how much faster bitarray's decoing is as + opposed to traversing the Huffman tree using Python. + + +huffman.py: + Library containing useful functionality for working with Huffman trees + and codes. diff -Nru python-bitarray-0.8.1/examples/huffman.py python-bitarray-1.6.3/examples/huffman.py --- python-bitarray-0.8.1/examples/huffman.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/huffman.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,141 +0,0 @@ -""" -The non-trivial part of the code is derived from: -http://en.literateprograms.org/Huffman_coding_(Python) - -The link also contains a good description of the algorithm. -""" -import os, sys -from collections import defaultdict -from bitarray import bitarray -from heapq import heappush, heappop - - -def huffCode(freq): - """ - Given a dictionary mapping symbols to thier frequency, - return the Huffman code in the form of - a dictionary mapping the symbols to bitarrays. - """ - minheap = [] - for s in freq: - heappush(minheap, (freq[s], s)) - - while len(minheap) > 1: - childR, childL = heappop(minheap), heappop(minheap) - parent = (childL[0] + childR[0], childL, childR) - heappush(minheap, parent) - - # Now minheap[0] is the root node of the Huffman tree - - def traverse(tree, prefix=bitarray()): - if len(tree) == 2: - result[tree[1]] = prefix - else: - for i in range(2): - traverse(tree[i+1], prefix + bitarray([i])) - - result = {} - traverse(minheap[0]) - return result - - -def freq_string(s): - """ - Given a string, return a dictionary - mapping characters to thier frequency. - """ - res = defaultdict(int) - for c in s: - res[c] += 1 - return res - - -def print_code(filename): - freq = freq_string(open(filename).read()) - code = huffCode(freq) - print(' char frequency Huffman code') - print(70*'-') - for c in sorted(code, key=lambda c: freq[c], reverse=True): - print('%7r %8i %s' % (c, freq[c], code[c].to01())) - - -def encode(filename): - s = open(filename, 'rb').read() - code = huffCode(freq_string(s)) - fo = open(filename + '.huff', 'wb') - fo.write(repr(code) + '\n') - a = bitarray(endian='little') - a.encode(code, s) - fo.write(str(a.buffer_info()[3])) # write unused bits as one char string - a.tofile(fo) - fo.close() - print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s))) - - -def decode(filename): - fi = open(filename, 'rb') - code = eval(fi.readline()) - u = int(fi.read(1)) # number of unused bits in last byte stored in file - a = bitarray(endian='little') - a.fromfile(fi) - fi.close() - if u: del a[-u:] - - assert filename.endswith('.huff') - fo = open(filename[:-5] + '.out', 'wb') - fo.write(''.join(a.decode(code))) - fo.close() - - -def usage(): - print("""Usage: %s command FILE - - print -- calculate and display the Huffman code for the frequency - of characters in FILE. - - encode -- encode FILE using the Huffman code calculated for the - frequency of characters in FILE itself. - The output is FILE.huff which contains both the Huffman - code and the bitarray resulting from the encoding. - - decode -- decode FILE, which has .huff extension generated with the - encode command. The output is written in a filename - where .huff is replaced by .out - - test -- encode FILE, decode FILE.huff, compare FILE with FILE.out, - and unlink created files. -""" % sys.argv[0]) - sys.exit(0) - - -if __name__ == '__main__': - if len(sys.argv) != 3: - usage() - - cmd, filename = sys.argv[1:3] - - if cmd == 'print': - print_code(filename) - - elif cmd == 'encode': - encode(filename) - - elif cmd == 'decode': - if filename.endswith('.huff'): - decode(filename) - else: - print('Filename has no .huff extension') - - elif cmd == 'test': - huff = filename + '.huff' - out = filename + '.out' - - encode(filename) - decode(huff) - assert open(filename, 'rb').read() == open(out, 'rb').read() - os.unlink(huff) - os.unlink(out) - - else: - print('Unknown command %r' % cmd) - usage() diff -Nru python-bitarray-0.8.1/examples/mandel.py python-bitarray-1.6.3/examples/mandel.py --- python-bitarray-0.8.1/examples/mandel.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/mandel.py 2021-01-21 00:52:57.000000000 +0000 @@ -1,94 +1,40 @@ -# does not work with Python 3, because weave is not yet supported - -import hashlib - +import sys from bitarray import bitarray +from numba import jit + +width, height = 4000, 3000 +maxdepth = 500 -import numpy -from scipy import weave +@jit(nopython=True) +def mandel(c): + d = 0 + z = c + while abs(z) < 4.0 and d <= maxdepth: + d += 1 + z = z * z + c + return d + + +def main(): + data = bitarray(endian='big') + + for j in range(height): + sys.stdout.write('.') + sys.stdout.flush() + y = +1.5 - 3.0 * j / height + for i in range(width): + x = -2.75 + 4.0 * i / width + c = mandel(complex(x, y)) % 2 + data.append(c) + print("done") + + with open('out.ppm', 'wb') as fo: + fo.write(b'P4\n') + fo.write(b'# partable bitmap image of the Mandelbrot set\n') + fo.write(b'%i %i\n' % (width, height)) + data.tofile(fo) -support_code = ''' -#define D 501 -int color(double cr, double ci) -{ - int d = 1; - double zr=cr, zi=ci, zr2, zi2; - for(;;) { - zr2 = zr * zr; - zi2 = zi * zi; - if( zr2+zi2 > 16.0 ) goto finish; - if( ++d == D ) goto finish; - zi = 2.0 * zr * zi + ci; - zr = zr2 - zi2 + cr; - } - finish: - return d % 2; -} - -static void -PyUFunc_0(char **args, npy_intp *dimensions, npy_intp *steps, void *func) -{ - npy_intp i, n; - npy_intp is0 = steps[0]; - npy_intp is1 = steps[1]; - npy_intp os = steps[2]; - char *ip0 = args[0]; - char *ip1 = args[1]; - char *op = args[2]; - n = dimensions[0]; - - for(i = 0; i < n; i++) { - *(long *)op = color(*(double *)ip0, *(double *)ip1); - ip0 += is0; - ip1 += is1; - op += os; - } -} - -static PyUFuncGenericFunction f_functions[] = { - PyUFunc_0, -}; -static char f_types[] = { - NPY_DOUBLE, NPY_DOUBLE, NPY_BOOL, -}; -''' -ufunc_info = weave.base_info.custom_info() -ufunc_info.add_header('"numpy/ufuncobject.h"') - -mandel = weave.inline('/* ' + hashlib.md5(support_code).hexdigest() + ''' */ -import_ufunc(); - -return_val = PyUFunc_FromFuncAndData(f_functions, - NULL, - f_types, - 1, /* ntypes */ - 2, /* nin */ - 1, /* nout */ - PyUFunc_None, /* identity */ - "mandel", /* name */ - "doc", /* doc */ - 0); -''', - support_code=support_code, - verbose=0, - customize=ufunc_info) - -# ---------------------------------------------------------------------------- - -w, h = 8000, 6000 - -y, x = numpy.ogrid[-1.5:+1.5:h*1j, -2.75:+1.25:w*1j] - -data = mandel(x, y) - -bitdata = bitarray(endian='big') -bitdata.pack(data.tostring()) - -fo = open('mandel.ppm', 'wb') -fo.write('P4\n') -fo.write('# This is a partable bitmap image of the Mandelbrot set.\n') -fo.write('%i %i\n' % (w, h)) -bitdata.tofile(fo) -fo.close() +if __name__ == '__main__': + main() diff -Nru python-bitarray-0.8.1/examples/ndarray.py python-bitarray-1.6.3/examples/ndarray.py --- python-bitarray-0.8.1/examples/ndarray.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/ndarray.py 2021-01-21 00:52:57.000000000 +0000 @@ -2,19 +2,20 @@ # This example illusatrates how binary data can be efficiently be passed # between a bitarray object and an ndarray with dtype bool # +from __future__ import print_function + import bitarray import numpy a = bitarray.bitarray('100011001001') -print a +print(a) # bitarray -> ndarray -b = numpy.fromstring(a.unpack(), dtype=bool) -print repr(b) +b = numpy.frombuffer(a.unpack(), dtype=bool) +print(repr(b)) # ndarray -> bitarray c = bitarray.bitarray() c.pack(b.tostring()) assert a == c - diff -Nru python-bitarray-0.8.1/examples/README python-bitarray-1.6.3/examples/README --- python-bitarray-0.8.1/examples/README 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/README 2021-01-21 00:52:57.000000000 +0000 @@ -1,42 +1,35 @@ bloom.py: - Demonstrates the implementation of a Bloom filter, see: + Demonstrates the implementation of a "Bloom filter", see: http://en.wikipedia.org/wiki/Bloom_filter -compress.py: - Demonstrates how the bz2 module may be used to create a compressed - object which represents a bitarray - - -decoding.py - Bitarray's decode method is implemented in C. Since the C code - might be hard to read, we have implemented exactly the same - algorithm in Python. It is about 20 times slower than it's - C counterpart, since (recursive) function calls are more expensive - in Python than in C. - - -huffman.py - Demonstrates building a Huffman tree. Given an input file, - calculates the number of occurrences for each character; - from those frequencies, a Huffman tree is build; and by traversing - the tree, the Huffman code is evaluated. - Also allows encoding and decoding of a file, see -h option. +gene.py: + shows how gene sequences (ATGC) can be very easily and efficiently + represented by bitarrays. + + +growth/ + Things to study the bitarray growth pattern, including tests for the + current implementation. + + +huffman/ + Directory containing a library and examples for working with Huffman + trees and codes. mandel.py - Generates a .ppm image file of size 8000x6000 of the Mandelbrot set. - Despite it's size, the output image file has only a size of slightly - over 6 Million bytes (uncompressed) because each pixel is stored in + Generates a .ppm image file of size 4000 x 3000 of the Mandelbrot set. + Despite its size, the output image file has only a size of slightly + over 1.5 Million bytes (uncompressed) because each pixel is stored in one bit. - Requires numpy and scipy (see http://scipy.org/). - Not supported by Python 3.x. + Requires: numba ndarray.py Demonstrates how to efficiently convert boolean data from a bitarray to a numpy.ndarray of dtype bool. - Requires numpy. + Requires: numpy pbm.py @@ -46,10 +39,8 @@ sieve.py - Sieve of Eratosthenes is a simple, ancient algorithm for finding all - prime numbers up to a specified integer. In this exmaple, the algorithm - is implemented using the numpy ndarray as well as the bitarray object. - Thanks Steve for emailing this example. + Demonstrates the "Sieve of Eratosthenes" algorithm for finding all prime + numbers up to a specified integer. smallints.py @@ -58,4 +49,3 @@ For example, an array with 1000 5 bit integers can be created, allowing each element in the array to take values form 0 to 31, while the size of the object is 625 (5000/8) bytes. - Thanks to David Kammeyer for the idea to apply a bitarray in this way. diff -Nru python-bitarray-0.8.1/examples/runall python-bitarray-1.6.3/examples/runall --- python-bitarray-0.8.1/examples/runall 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/runall 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -#!/bin/bash -x - -python huffman.py test README || exit 1 - -for x in *.py -do - echo Running: $x - python $x || exit 1 -done - -rm *.ppm *.pyc diff -Nru python-bitarray-0.8.1/examples/sieve.py python-bitarray-1.6.3/examples/sieve.py --- python-bitarray-0.8.1/examples/sieve.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/sieve.py 2021-01-21 00:52:57.000000000 +0000 @@ -1,45 +1,35 @@ -import time +""" +Demonstrates the implementation of "Sieve of Eratosthenes" algorithm for +finding all prime numbers up to any given limit. +""" +from __future__ import print_function +import sys +if sys.version_info[0] == 2: + range = xrange -import numpy -import bitarray - -def primesToN1(n): - # use numpy: 8-bit array of boolean flags - if n < 2: - return [] - print 'init numpy' - A = numpy.ones(n+1, numpy.bool) # set to 1 == True - A[:2] = A[2*2::2] = 0 - print 'sieve' - for i in xrange(3, int(n**.5)+1, 2): # odd numbers - if A[i]: # i is prime - A[i*i::i*2] = 0 - print 'counting' - print numpy.sum(A) - - -def primesToN2(n): - # use bitarray: 1-bit boolean flags - if n < 2: - return [] - print 'init bitarray' - A = bitarray.bitarray(n+1) - A.setall(1) - A[:2] = A[2*2::2] = 0 - print 'sieve' - for i in xrange(3, int(n**.5)+1, 2): # odd numbers - if A[i]: # i is prime - A[i*i::i*2] = 0 - print 'counting' - print A.count() +from bitarray import bitarray +from bitarray.util import count_n N = 100 * 1000 * 1000 -def run(func): - start_time = time.time() - func(N) - print 'time: %.6f sec\n' % (time.time() - start_time) - -run(primesToN1) -run(primesToN2) +# Each bit corresponds to whether or not a[i] is a prime +a = bitarray(N + 1) +a.setall(True) +# Zero and one are not prime +a[:2] = False +# Perform sieve +for i in range(2, int(N ** 0.5) + 1): + if a[i]: # i is prime + a[i*i::i] = False + +print('the first few primes are:') +for i in range(30): + if a[i]: + print(i) + +# There are 5,761,455 primes up to 100 million +print('there are %d primes up to %d' % (a.count(), N)) +m = 1000 * 1000 +# The 1 millionth prime number is 15,485,863 +print('the %dth prime is %d' % (m, count_n(a, m) - 1)) diff -Nru python-bitarray-0.8.1/examples/smallints.py python-bitarray-1.6.3/examples/smallints.py --- python-bitarray-0.8.1/examples/smallints.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/examples/smallints.py 2021-01-21 00:52:57.000000000 +0000 @@ -1,34 +1,29 @@ -# -# Thanks to David Kammeyer for the idea to apply a bitarray in this way. -# from bitarray import bitarray +from bitarray.util import int2ba, ba2int + class SmallIntArray(object): """ A class which allows efficiently storeing an array of integers - represented by a specified number of bits (1..8). + represented by a specified number of bits. For example, an array with 1000 5 bit integers can be created, allowing each element in the array to take values form 0 to 31, while the size of the object is 625 (5000/8) bytes. """ def __init__(self, N, k): - assert 0 < k <= 8 self.N = N # number of integers self.k = k # bits for each integer - self.data = bitarray(N*k, endian='little') + self.array = bitarray(N * k) def slice_i(self, i): assert 0 <= i < self.N return slice(self.k * i, self.k * (i + 1)) def __getitem__(self, i): - return ord(self.data[self.slice_i(i)].tostring()) + return ba2int(self.array[self.slice_i(i)]) def __setitem__(self, i, v): - assert 0 <= v < 2 ** self.k - a = bitarray(endian='little') - a.fromstring(chr(v)) - self.data[self.slice_i(i)] = a[:self.k] + self.array[self.slice_i(i)] = int2ba(v, self.k) if __name__ == '__main__': @@ -44,8 +39,8 @@ a[i] = v print(b[:5]) - print(a.data.buffer_info()) - print(a.data[:25]) + print(a.array.buffer_info()) + print(a.array[:25]) for i in range(1000): assert a[i] == b[i] diff -Nru python-bitarray-0.8.1/.gitignore python-bitarray-1.6.3/.gitignore --- python-bitarray-0.8.1/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/.gitignore 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,9 @@ +*~ +*.pyc +*.egg-info +*.so +*.o +MANIFEST +README.html +build/ +dist/ diff -Nru python-bitarray-0.8.1/LICENSE python-bitarray-1.6.3/LICENSE --- python-bitarray-0.8.1/LICENSE 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/LICENSE 2021-01-21 00:52:57.000000000 +0000 @@ -1,48 +1,46 @@ PYTHON SOFTWARE FOUNDATION LICENSE ---------------------------------- -1. This LICENSE AGREEMENT is between the Python Software Foundation -("PSF"), and the Individual or Organization ("Licensee") accessing and -otherwise using this software ("Python") in source or binary form and -its associated documentation. +1. This LICENSE AGREEMENT is between Ilan Schnell, and the Individual or +Organization ("Licensee") accessing and otherwise using this software +("bitarray") in source or binary form and its associated documentation. -2. Subject to the terms and conditions of this License Agreement, PSF +2. Subject to the terms and conditions of this License Agreement, Ilan Schnell hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, -prepare derivative works, distribute, and otherwise use Python -alone or in any derivative version, provided, however, that PSF's -License Agreement and PSF's notice of copyright, i.e., "Copyright (c) -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Python Software Foundation; -All Rights Reserved" are retained in Python alone or in any derivative -version prepared by Licensee. +prepare derivative works, distribute, and otherwise use bitarray +alone or in any derivative version, provided, however, that Ilan Schnell's +License Agreement and Ilan Schnell's notice of copyright, i.e., "Copyright (c) +2008 - 2020 Ilan Schnell; All Rights Reserved" are retained in bitarray +alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on -or incorporates Python or any part thereof, and wants to make +or incorporates bitarray or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of -the changes made to Python. +the changes made to bitarray. -4. PSF is making Python available to Licensee on an "AS IS" -basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +4. Ilan Schnell is making bitarray available to Licensee on an "AS IS" +basis. ILAN SCHNELL MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ILAN SCHNELL MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF BITARRAY WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. -5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +5. ILAN SCHNELL SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF BITARRAY FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING BITARRAY, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between PSF and -Licensee. This License Agreement does not grant permission to use PSF -trademarks or trade name in a trademark sense to endorse or promote +relationship of agency, partnership, or joint venture between Ilan Schnell +and Licensee. This License Agreement does not grant permission to use Ilan +Schnell trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. -8. By copying, installing or otherwise using Python, Licensee +8. By copying, installing or otherwise using bitarray, Licensee agrees to be bound by the terms and conditions of this License Agreement. diff -Nru python-bitarray-0.8.1/Makefile python-bitarray-1.6.3/Makefile --- python-bitarray-0.8.1/Makefile 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/Makefile 2021-01-21 00:52:57.000000000 +0000 @@ -1,3 +1,6 @@ +PYTHON=python + + bitarray/_bitarray.so: bitarray/_bitarray.c $(PYTHON) setup.py build_ext --inplace @@ -6,13 +9,20 @@ $(PYTHON) -c "import bitarray; bitarray.test()" +install: + $(PYTHON) setup.py install + + doc: bitarray/_bitarray.so - $(python) update_readme.py + $(PYTHON) update_readme.py + $(PYTHON) setup.py sdist + twine check dist/* clean: rm -rf build dist rm -f bitarray/*.o bitarray/*.so rm -f bitarray/*.pyc + rm -f examples/*.pyc rm -rf bitarray/__pycache__ *.egg-info - rm -f README.html + rm -rf examples/__pycache__ diff -Nru python-bitarray-0.8.1/README.md python-bitarray-1.6.3/README.md --- python-bitarray-0.8.1/README.md 1970-01-01 00:00:00.000000000 +0000 +++ python-bitarray-1.6.3/README.md 2021-01-21 00:52:57.000000000 +0000 @@ -0,0 +1,787 @@ +bitarray: efficient arrays of booleans +====================================== + +This module provides an object type which efficiently represents an array +of booleans. Bitarrays are sequence types and behave very much like usual +lists. Eight bits are represented by one byte in a contiguous block of +memory. The user can select between two representations: little-endian +and big-endian. All of the functionality is implemented in C. +Methods for accessing the machine representation are provided. +This can be useful when bit level access to binary files is required, +such as portable bitmap image files (.pbm). Also, when dealing with +compressed data which uses variable bit length encoding, you may find +this module useful. + + +Key features +------------ + + * All functionality implemented in C. + * Bitarray objects behave very much like a list object, in particular + slicing (including slice assignment and deletion) is supported. + * The bit endianness can be specified for each bitarray object, see below. + * Packing and unpacking to other binary data formats, e.g. numpy.ndarray + is possible. + * Fast methods for encoding and decoding variable bit length prefix codes + * Bitwise operations: `&`, `|`, `^`, `&=`, `|=`, `^=`, `~` + * Sequential search + * Pickling and unpickling of bitarray objects. + * Bitarray objects support the buffer protocol + * On 32-bit systems, a bitarray object can contain up to 2 Gbits. + + +Installation +------------ + +Bitarray can be installed from source: + + $ tar xzf bitarray-1.6.3.tar.gz + $ cd bitarray-1.6.3 + $ python setup.py install + +On Unix systems, the latter command may have to be executed with root +privileges. You can also pip install bitarray. Please note that you need +a working C compiler to run the `python setup.py install` command. +If you rather want to use precompiled binaries, you can: + +* `pip install bitarray-hardbyte` (this PyPI package contains Python + wheels for Linux, MaxOSX and Windows and all common Python versions) +* `conda install bitarray` (both the default Anaconda repository as well + as conda-forge support bitarray) +* download Windows wheels from + [Chris Gohlke](https://www.lfd.uci.edu/~gohlke/pythonlibs/#bitarray) + +Once you have installed the package, you may want to test it: + + $ python -c 'import bitarray; bitarray.test()' + bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray + bitarray version: 1.6.3 + 3.7.4 (r271:86832, Dec 29 2018) [GCC 4.2.1 (SUSE Linux)] + ......................................................................... + ......................................................................... + .............................. + ---------------------------------------------------------------------- + Ran 257 tests in 0.921s + + OK + +You can always import the function test, +and `test().wasSuccessful()` will return `True` when the test went well. + + +Using the module +---------------- + +As mentioned above, bitarray objects behave very much like lists, so +there is not too much to learn. The biggest difference from list +objects (except that bitarray are obviously homogeneous) is the ability +to access the machine representation of the object. +When doing so, the bit endianness is of importance; this issue is +explained in detail in the section below. Here, we demonstrate the +basic usage of bitarray objects: + + >>> from bitarray import bitarray + >>> a = bitarray() # create empty bitarray + >>> a.append(True) + >>> a.extend([False, True, True]) + >>> a + bitarray('1011') + +Bitarray objects can be instantiated in different ways: + + >>> a = bitarray(2**20) # bitarray of length 1048576 (uninitialized) + >>> bitarray('1001011') # from a string + bitarray('1001011') + >>> lst = [True, False, False, True, False, True, True] + >>> bitarray(lst) # from list, tuple, iterable + bitarray('1001011') + +Bits can be assigned from any Python object, if the value can be interpreted +as a truth value. You can think of this as Python's built-in function bool() +being applied, whenever casting an object: + + >>> a = bitarray([42, '', True, {}, 'foo', None]) + >>> a + bitarray('101010') + >>> a.append(a) # note that bool(a) is True + >>> a.count(42) # counts occurrences of True (not 42) + 4 + >>> a.remove('') # removes first occurrence of False + >>> a + bitarray('110101') + +Like lists, bitarray objects support slice assignment and deletion: + + >>> a = bitarray(50) + >>> a.setall(False) + >>> a[11:37:3] = 9 * bitarray([True]) + >>> a + bitarray('00000000000100100100100100100100100100000000000000') + >>> del a[12::3] + >>> a + bitarray('0000000000010101010101010101000000000') + >>> a[-6:] = bitarray('10011') + >>> a + bitarray('000000000001010101010101010100010011') + >>> a += bitarray('000111') + >>> a[9:] + bitarray('001010101010101010100010011000111') + +In addition, slices can be assigned to booleans, which is easier (and +faster) than assigning to a bitarray in which all values are the same: + + >>> a = 20 * bitarray('0') + >>> a[1:15:3] = True + >>> a + bitarray('01001001001001000000') + +This is easier and faster than: + + >>> a = 20 * bitarray('0') + >>> a[1:15:3] = 5 * bitarray('1') + >>> a + bitarray('01001001001001000000') + +Note that in the latter we have to create a temporary bitarray whose length +must be known or calculated. + + +Bit endianness +-------------- + +Since a bitarray allows addressing of individual bits, where the machine +represents 8 bits in one byte, there are two obvious choices for this +mapping: little- and big-endian. +When creating a new bitarray object, the endianness can always be +specified explicitly: + + >>> a = bitarray(endian='little') + >>> a.frombytes(b'A') + >>> a + bitarray('10000010') + >>> b = bitarray('11000010', endian='little') + >>> b.tobytes() + b'C' + +Here, the low-bit comes first because little-endian means that increasing +numeric significance corresponds to an increasing address (index). +So `a[0]` is the lowest and least significant bit, and `a[7]` is the +highest and most significant bit. + + >>> a = bitarray(endian='big') + >>> a.frombytes(b'A') + >>> a + bitarray('01000001') + >>> a[6] = 1 + >>> a.tobytes() + b'C' + +Here, the high-bit comes first because big-endian +means "most-significant first". +So `a[0]` is now the lowest and most significant bit, and `a[7]` is the +highest and least significant bit. + +The bit endianness is a property attached to each bitarray object. +When comparing bitarray objects, the endianness (and hence the machine +representation) is irrelevant; what matters is the mapping from indices +to bits: + + >>> bitarray('11001', endian='big') == bitarray('11001', endian='little') + True + +Bitwise operations (`&`, `|`, `^`, `&=`, `|=`, `^=`, `~`) are implemented +efficiently using the corresponding byte operations in C, i.e. the operators +act on the machine representation of the bitarray objects. +Therefore, one has to be cautious when applying the operation to bitarrays +with different endianness. + +When converting to and from machine representation, using +the `tobytes`, `frombytes`, `tofile` and `fromfile` methods, +the endianness matters: + + >>> a = bitarray(endian='little') + >>> a.frombytes(b'\x01') + >>> a + bitarray('10000000') + >>> b = bitarray(endian='big') + >>> b.frombytes(b'\x80') + >>> b + bitarray('10000000') + >>> a == b + True + >>> a.tobytes() == b.tobytes() + False + +The endianness can not be changed once an object is created. +However, since creating a bitarray from another bitarray just copies the +memory representing the data, you can create a new bitarray with different +endianness: + + >>> a = bitarray('11100000', endian='little') + >>> a + bitarray('11100000') + >>> b = bitarray(a, endian='big') + >>> b + bitarray('00000111') + >>> a == b + False + >>> a.tobytes() == b.tobytes() + True + +The default bit endianness is currently big-endian, however this may change +in the future, and when dealing with the machine representation of bitarray +objects, it is recommended to always explicitly specify the endianness. + +Unless explicitly converting to machine representation, using +the `tobytes`, `frombytes`, `tofile` and `fromfile` methods, +the bit endianness will have no effect on any computation, and one +can safely ignore setting the endianness, and other details of this section. + + +Buffer protocol +--------------- + +Python 2.7 provides memoryview objects, which allow Python code to access +the internal data of an object that supports the buffer protocol without +copying. Bitarray objects support this protocol, with the memory being +interpreted as simple bytes. + + >>> a = bitarray('01000001' '01000010' '01000011', endian='big') + >>> v = memoryview(a) + >>> len(v) + 3 + >>> v[-1] + 67 + >>> v[:2].tobytes() + b'AB' + >>> v.readonly # changing a bitarray's memory is also possible + False + >>> v[1] = 111 + >>> a + bitarray('010000010110111101000011') + + +Variable bit length prefix codes +-------------------------------- + +The method `encode` takes a dictionary mapping symbols to bitarrays +and an iterable, and extends the bitarray object with the encoded symbols +found while iterating. For example: + + >>> d = {'H':bitarray('111'), 'e':bitarray('0'), + ... 'l':bitarray('110'), 'o':bitarray('10')} + ... + >>> a = bitarray() + >>> a.encode(d, 'Hello') + >>> a + bitarray('111011011010') + +Note that the string `'Hello'` is an iterable, but the symbols are not +limited to characters, in fact any immutable Python object can be a symbol. +Taking the same dictionary, we can apply the `decode` method which will +return a list of the symbols: + + >>> a.decode(d) + ['H', 'e', 'l', 'l', 'o'] + >>> ''.join(a.decode(d)) + 'Hello' + +Since symbols are not limited to being characters, it is necessary to return +them as elements of a list, rather than simply returning the joined string. + +When the codes are large, and you have many decode calls, most time will +be spent creating the (same) internal decode tree objects. In this case, +it will be much faster to create a `decodetree` object (which is initialized +with a prefix code dictionary), and can be passed to bitarray's `.decode()` +and `.iterdecode()` methods, instead of passing the prefix code dictionary +to those methods itself. + +The above dictionary `d` can be efficiently constructed using the function +`bitarray.util.huffman_code()`. I also wrote [Huffman coding in Python using +bitarray](http://ilan.schnell-web.net/prog/huffman/) for more background +information. + + +Reference +========= + +The bitarray object: +-------------------- + +`bitarray(initializer=0, /, endian='big')` -> bitarray + +Return a new bitarray object whose items are bits initialized from +the optional initial object, and endianness. +The initializer may be of the following types: + +`int`: Create a bitarray of given integer length. The initial values are +arbitrary. If you want all values to be set, use the .setall() method. + +`str`: Create bitarray from a string of `0` and `1`. + +`list`, `tuple`, `iterable`: Create bitarray from a sequence, each +element in the sequence is converted to a bit using its truth value. + +`bitarray`: Create bitarray from another bitarray. This is done by +copying the buffer holding the bitarray data, and is hence very fast. + +The optional keyword arguments `endian` specifies the bit endianness of the +created bitarray object. +Allowed values are the strings `big` and `little` (default is `big`). + +Note that setting the bit endianness only has an effect when accessing the +machine representation of the bitarray, i.e. when using the methods: tofile, +fromfile, tobytes, frombytes. + + +**A bitarray object supports the following methods:** + +`all()` -> bool + +Returns True when all bits in the array are True. + + +`any()` -> bool + +Returns True when any bit in the array is True. + + +`append(item, /)` + +Append the truth value `bool(item)` to the end of the bitarray. + + +`buffer_info()` -> tuple + +Return a tuple (address, size, endianness, unused, allocated) giving the +memory address of the bitarray's buffer, the buffer size (in bytes), +the bit endianness as a string, the number of unused bits within the last +byte, and the allocated memory for the buffer (in bytes). + + +`bytereverse()` + +For all bytes representing the bitarray, reverse the bit order (in-place). +Note: This method changes the actual machine values representing the +bitarray; it does not change the endianness of the bitarray object. + + +`clear()` + +Remove all items from the bitarray. + + +`copy()` -> bitarray + +Return a copy of the bitarray. + + +`count(value=True, start=0, stop=, /)` -> int + +Count the number of occurrences of bool(value) in the bitarray. + + +`decode(code, /)` -> list + +Given a prefix code (a dict mapping symbols to bitarrays, or `decodetree` +object), decode the content of the bitarray and return it as a list of +symbols. + + +`encode(code, iterable, /)` + +Given a prefix code (a dict mapping symbols to bitarrays), +iterate over the iterable object with symbols, and extend the bitarray +with the corresponding bitarray for each symbol. + + +`endian()` -> str + +Return the bit endianness of the bitarray as a string (`little` or `big`). + + +`extend(iterable or string, /)` + +Extend bitarray by appending the truth value of each element given +by iterable. If a string is provided, each `0` and `1` are appended +as bits. + + +`fill()` -> int + +Adds zeros to the end of the bitarray, such that the length of the bitarray +will be a multiple of 8. Returns the number of bits added (0..7). + + +`frombytes(bytes, /)` + +Extend bitarray with raw bytes. That is, each append byte will add eight +bits to the bitarray. + + +`fromfile(f, n=-1, /)` + +Extend bitarray with up to n bytes read from the file object f. +When n is omitted or negative, reads all data until EOF. +When n is provided and positions but exceeds the data available, +EOFError is raised (but the available data is still read and appended. + + +`index(value, start=0, stop=, /)` -> int + +Return index of the first occurrence of `bool(value)` in the bitarray. +Raises `ValueError` if the value is not present. + + +`insert(index, value, /)` + +Insert `bool(value)` into the bitarray before index. + + +`invert(index=)` + +Invert all bits in the array (in-place). +When the optional `index` is given, only invert the single bit at index. + + +`iterdecode(code, /)` -> iterator + +Given a prefix code (a dict mapping symbols to bitarrays, or `decodetree` +object), decode the content of the bitarray and return an iterator over +the symbols. + + +`itersearch(bitarray, /)` -> iterator + +Searches for the given a bitarray in self, and return an iterator over +the start positions where bitarray matches self. + + +`length()` -> int + +Return the length - a.length() is the same as len(a). +Deprecated since 1.5.1, use len(). + + +`pack(bytes, /)` + +Extend the bitarray from bytes, where each byte corresponds to a single +bit. The byte `b'\x00'` maps to bit 0 and all other characters map to +bit 1. +This method, as well as the unpack method, are meant for efficient +transfer of data between bitarray objects to other python objects +(for example NumPy's ndarray object) which have a different memory view. + + +`pop(index=-1, /)` -> item + +Return the i-th (default last) element and delete it from the bitarray. +Raises `IndexError` if bitarray is empty or index is out of range. + + +`remove(value, /)` + +Remove the first occurrence of `bool(value)` in the bitarray. +Raises `ValueError` if item is not present. + + +`reverse()` + +Reverse the order of bits in the array (in-place). + + +`search(bitarray, limit=, /)` -> list + +Searches for the given bitarray in self, and return the list of start +positions. +The optional argument limits the number of search results to the integer +specified. By default, all search results are returned. + + +`setall(value, /)` + +Set all bits in the bitarray to `bool(value)`. + + +`sort(reverse=False)` + +Sort the bits in the array (in-place). + + +`to01()` -> str + +Return a string containing '0's and '1's, representing the bits in the +bitarray object. + + +`tobytes()` -> bytes + +Return the byte representation of the bitarray. +When the length of the bitarray is not a multiple of 8, the few remaining +bits (1..7) are considered to be 0. + + +`tofile(f, /)` + +Write the byte representation of the bitarray to the file object f. +When the length of the bitarray is not a multiple of 8, +the remaining bits (1..7) are set to 0. + + +`tolist(as_ints=False, /)` -> list + +Return a list with the items (False or True) in the bitarray. +The optional parameter, changes the items in the list to integers (0 or 1). +Note that the list object being created will require 32 or 64 times more +memory (depending on the machine architecture) than the bitarray object, +which may cause a memory error if the bitarray is very large. + + +`unpack(zero=b'\x00', one=b'\xff')` -> bytes + +Return bytes containing one character for each bit in the bitarray, +using the specified mapping. + + +The frozenbitarray object: +-------------------------- + +This object is very similar to the bitarray object. The difference is that +this a frozenbitarray is immutable, and hashable: + + >>> from bitarray import frozenbitarray + >>> a = frozenbitarray('1100011') + >>> a[3] = 1 + Traceback (most recent call last): + File "", line 1, in + File "bitarray/__init__.py", line 40, in __delitem__ + raise TypeError("'frozenbitarray' is immutable") + TypeError: 'frozenbitarray' is immutable + >>> {a: 'some value'} + {frozenbitarray('1100011'): 'some value'} + +`frozenbitarray(initializer=0, /, endian='big')` -> frozenbitarray + +Return a frozenbitarray object, which is initialized the same way a bitarray +object is initialized. A frozenbitarray is immutable and hashable. +Its contents cannot be altered after it is created; however, it can be used +as a dictionary key. + + +The decodetree object: +---------------------- + +This (immutable and unhashable) object stores a binary tree initialized +from a prefix code dictionary. It's sole purpose is to be passed to +bitarray's `.decode()` and `.iterdecode()` methods, instead of passing +the prefix code dictionary to those methods directly: + + >>> from bitarray import bitarray, decodetree + >>> t = decodetree({'a': bitarray('0'), 'b': bitarray('1')}) + >>> a = bitarray('0110') + >>> a.decode(t) + ['a', 'b', 'b', 'a'] + >>> ''.join(a.iterdecode(t)) + 'abba' + +`decodetree(code, /)` -> decodetree + +Given a prefix code (a dict mapping symbols to bitarrays), +create a binary tree object to be passed to `.decode()` or `.iterdecode()`. + + +Functions defined in the `bitarray` module: +-------------------------------------------- + +`test(verbosity=1, repeat=1)` -> TextTestResult + +Run self-test, and return unittest.runner.TextTestResult object. + + +`bits2bytes(n, /)` -> int + +Return the number of bytes necessary to store n bits. + + +`get_default_endian()` -> string + +Return the default endianness for new bitarray objects being created. +Under normal circumstances, the return value is `big`. + + +Functions defined in `bitarray.util` module: +-------------------------------------------- + +`zeros(length, /, endian=None)` -> bitarray + +Create a bitarray of length, with all values 0, and optional +endianness, which may be 'big', 'little'. + + +`make_endian(bitarray, endian, /)` -> bitarray + +When the endianness of the given bitarray is different from `endian`, +return a new bitarray, with endianness `endian` and the same elements +as the original bitarray, i.e. even though the binary representation of the +new bitarray will be different, the returned bitarray will equal the original +one. +Otherwise (endianness is already `endian`) the original bitarray is returned +unchanged. + + +`rindex(bitarray, value=True, /)` -> int + +Return the rightmost index of `bool(value)` in bitarray. +Raises `ValueError` if the value is not present. + + +`strip(bitarray, mode='right', /)` -> bitarray + +Strip zeros from left, right or both ends. +Allowed values for mode are the strings: `left`, `right`, `both` + + +`count_n(a, n, /)` -> int + +Find the smallest index `i` for which `a[:i].count() == n`. +Raises `ValueError`, when n exceeds total count (`a.count()`). + + +`count_and(a, b, /)` -> int + +Returns `(a & b).count()`, but is more memory efficient, +as no intermediate bitarray object gets created. + + +`count_or(a, b, /)` -> int + +Returns `(a | b).count()`, but is more memory efficient, +as no intermediate bitarray object gets created. + + +`count_xor(a, b, /)` -> int + +Returns `(a ^ b).count()`, but is more memory efficient, +as no intermediate bitarray object gets created. + + +`subset(a, b, /)` -> bool + +Return True if bitarray `a` is a subset of bitarray `b` (False otherwise). +`subset(a, b)` is equivalent to `(a & b).count() == a.count()` but is more +efficient since we can stop as soon as one mismatch is found, and no +intermediate bitarray object gets created. + + +`ba2hex(bitarray, /)` -> hexstr + +Return a string containing with hexadecimal representation of +the bitarray (which has to be multiple of 4 in length). + + +`hex2ba(hexstr, /, endian=None)` -> bitarray + +Bitarray of hexadecimal representation. +hexstr may contain any number of hex digits (upper or lower case). + + +`ba2int(bitarray, /, signed=False)` -> int + +Convert the given bitarray into an integer. +The bit-endianness of the bitarray is respected. +`signed` indicates whether two's complement is used to represent the integer. + + +`int2ba(int, /, length=None, endian=None, signed=False)` -> bitarray + +Convert the given integer to a bitarray (with given endianness, +and no leading (big-endian) / trailing (little-endian) zeros), unless +the `length` of the bitarray is provided. An `OverflowError` is raised +if the integer is not representable with the given number of bits. +`signed` determines whether two's complement is used to represent the integer, +and requires `length` to be provided. +If signed is False and a negative integer is given, an OverflowError +is raised. + + +`huffman_code(dict, /, endian=None)` -> dict + +Given a frequency map, a dictionary mapping symbols to their frequency, +calculate the Huffman code, i.e. a dict mapping those symbols to +bitarrays (with given endianness). Note that the symbols may be any +hashable object (including `None`). + + +Change log +---------- + +*1.6.3* (2021-01-20): + + * add missing .h files to sdist tarball, #113 + + +*1.6.2* (2021-01-20): + + * use `Py_SET_TYPE()` and `Py_SET_SIZE()` for Python 3.10, #109 + * add official Python 3.10 support + * fix slice assignement to same object, e.g. a[2::] = a or a[::-1] = a, #112 + * add bitarray.h, #110 + + +*1.6.1* (2020-11-05): + + * use PyType_Ready for all types: bitarray, bitarrayiterator, + decodeiterator, decodetree, searchiterator + + +*1.6.0* (2020-10-17): + + * add `decodetree` object, for speeding up consecutive calls + to `.decode()` and `.iterdecode()`, in particular when dealing + with large prefix codes, see #103 + * add optional parameter to `.tolist()` which changes the items in the + returned list to integers (0 or 1), as opposed to Booleans + * remove deprecated `bitdiff()`, which has been deprecated since version + 1.2.0, use `bitarray.util.count_xor()` instead + * drop Python 2.6 support + * update license file, #104 + + +*1.5.3* (2020-08-24): + + * add optional index parameter to `.index()` to invert single bit + * fix `sys.getsizeof(bitarray)` by adding `.__sizeof__()`, see issue #100 + + +*1.5.2* (2020-08-16): + + * add PyType_Ready usage, issue #66 + * speedup search() for bitarrays with length 1 in sparse bitarrays, + see issue #67 + * add tests + + +*1.5.1* (2020-08-10): + + * support signed integers in `util.ba2int()` and `util.int2ba()`, + see issue #85 + * deprecate `.length()` in favor of `len()` + + +*1.5.0* (2020-08-05): + + * Use `Py_ssize_t` for bitarray index. This means that on 32bit + systems, the maximun number of elements in a bitarray is 2 GBits. + We used to have a special 64bit index type for all architectures, but + this prevented us from using Python's sequence, mapping and number + methods, and made those method lookups slow. + * speedup slice operations when step size = 1 (if alignment allows + copying whole bytes) + * Require equal endianness for operations: `&`, `|`, `^`, `&=`, `|=`, `^=`. + This should have always been the case but was overlooked in the past. + * raise TypeError when tring to create bitarray from boolean + * This will be last release to still support Python 2.6 (which was retired + in 2013). We do NOT plan to stop support for Python 2.7 anytime soon. + + +Please find the complete change log [here](https://github.com/ilanschnell/bitarray/blob/master/CHANGE_LOG). diff -Nru python-bitarray-0.8.1/README.rst python-bitarray-1.6.3/README.rst --- python-bitarray-0.8.1/README.rst 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/README.rst 1970-01-01 00:00:00.000000000 +0000 @@ -1,562 +0,0 @@ -====================================== -bitarray: efficient arrays of booleans -====================================== - -This module provides an object type which efficiently represents an array -of booleans. Bitarrays are sequence types and behave very much like usual -lists. Eight bits are represented by one byte in a contiguous block of -memory. The user can select between two representations; little-endian -and big-endian. All of the functionality is implemented in C. -Methods for accessing the machine representation are provided. -This can be useful when bit level access to binary files is required, -such as portable bitmap image files (.pbm). Also, when dealing with -compressed data which uses variable bit length encoding, you may find -this module useful. - - -Key features ------------- - - * All functionality implemented in C. - - * Bitarray objects behave very much like a list object, in particular - slicing (including slice assignment and deletion) is supported. - - * The bit endianness can be specified for each bitarray object, see below. - - * On 32bit systems, a bitarray object can contain up to 2^34 elements, - that is 16 Gbits (on 64bit machines up to 2^63 elements in theory -- - on Python 2.4 only 2^31 elements, - see `PEP 353 `_ - (added in Python 2.5)). - - * Packing and unpacking to other binary data formats, - e.g. `numpy.ndarray `_, - is possible. - - * Fast methods for encoding and decoding variable bit length prefix codes - - * Sequential search (as list or iterator) - - * Bitwise operations: ``&, |, ^, &=, |=, ^=, ~`` - - * Pickling and unpickling of bitarray objects possible. - - * Bitarray objects support the buffer protocol (Python 2.7 only) - - -Installation ------------- - -bitarray can be installed from source:: - - $ tar xzf bitarray-0.8.1.tar.gz - $ cd bitarray-0.8.1 - $ python setup.py install - -On Unix systems, the latter command may have to be executed with root -privileges. -If you have `distribute `_ -installed, you can easy_install bitarray. -Once you have installed the package, you may want to test it:: - - $ python -c 'import bitarray; bitarray.test()' - bitarray is installed in: /usr/local/lib/python2.7/site-packages/bitarray - bitarray version: 0.8.1 - 2.7.2 (r271:86832, Nov 29 2010) [GCC 4.2.1 (SUSE Linux)] - ......................................................................... - ........................................... - ---------------------------------------------------------------------- - Ran 134 tests in 1.396s - - OK - -You can always import the function test, -and ``test().wasSuccessful()`` will return True when the test went well. - - - -Using the module ----------------- - -As mentioned above, bitarray objects behave very much like lists, so -there is not too new to learn. The biggest difference to list objects -is the ability to access the machine representation of the object. -When doing so, the bit endianness is of importance, this issue is -explained in detail in the section below. Here, we demonstrate the -basic usage of bitarray objects: - - >>> from bitarray import bitarray - >>> a = bitarray() # create empty bitarray - >>> a.append(True) - >>> a.extend([False, True, True]) - >>> a - bitarray('1011') - -Bitarray objects can be instantiated in different ways: - - >>> a = bitarray(2**20) # bitarray of length 1048576 (uninitialized) - >>> bitarray('1001011') # from a string - bitarray('1001011') - >>> lst = [True, False, False, True, False, True, True] - >>> bitarray(lst) # from list, tuple, iterable - bitarray('1001011') - -Bits can be assigned from any Python object, if the value can be interpreted -as a truth value. You can think of this as Python's built-in function bool() -being applied, whenever casting an object: - - >>> a = bitarray([42, '', True, {}, 'foo', None]) - >>> a - bitarray('101010') - >>> a.append(a) # note that bool(a) is True - >>> a.count(42) # counts occurrences of True (not 42) - 4L - >>> a.remove('') # removes first occurrence of False - >>> a - bitarray('110101') - -Like lists, bitarray objects support slice assignment and deletion: - - >>> a = bitarray(50) - >>> a.setall(False) - >>> a[11:37:3] = 9 * bitarray([True]) - >>> a - bitarray('00000000000100100100100100100100100100000000000000') - >>> del a[12::3] - >>> a - bitarray('0000000000010101010101010101000000000') - >>> a[-6:] = bitarray('10011') - >>> a - bitarray('000000000001010101010101010100010011') - >>> a += bitarray('000111') - >>> a[9:] - bitarray('001010101010101010100010011000111') - -In addition, slices can be assigned to booleans, which is easier (and -faster) than assigning to a bitarray in which all values are the same: - - >>> a = 20 * bitarray('0') - >>> a[1:15:3] = True - >>> a - bitarray('01001001001001000000') - -This is easier and faster than: - - >>> a = 20 * bitarray('0') - >>> a[1:15:3] = 5 * bitarray('1') - >>> a - bitarray('01001001001001000000') - -Note that in the latter we have to create a temporary bitarray whose length -must be known or calculated. - - -Bit endianness --------------- - -Since a bitarray allows addressing of individual bits, where the machine -represents 8 bits in one byte, there two obvious choices for this mapping; -little- and big-endian. -When creating a new bitarray object, the endianness can always be -specified explicitly: - - >>> a = bitarray(endian='little') - >>> a.frombytes(b'A') - >>> a - bitarray('10000010') - >>> b = bitarray('11000010', endian='little') - >>> b.tobytes() - 'C' - -Here, the low-bit comes first because little-endian means that increasing -numeric significance corresponds to an increasing address (index). -So a[0] is the lowest and least significant bit, and a[7] is the highest -and most significant bit. - - >>> a = bitarray(endian='big') - >>> a.frombytes(b'A') - >>> a - bitarray('01000001') - >>> a[6] = 1 - >>> a.tobytes() - 'C' - -Here, the high-bit comes first because big-endian -means "most-significant first". -So a[0] is now the lowest and most significant bit, and a[7] is the highest -and least significant bit. - -The bit endianness is a property attached to each bitarray object. -When comparing bitarray objects, the endianness (and hence the machine -representation) is irrelevant; what matters is the mapping from indices -to bits: - - >>> bitarray('11001', endian='big') == bitarray('11001', endian='little') - True - -Bitwise operations (``&, |, ^, &=, |=, ^=, ~``) are implemented efficiently -using the corresponding byte operations in C, i.e. the operators act on the -machine representation of the bitarray objects. Therefore, one has to be -cautious when applying the operation to bitarrays with different endianness. - -When converting to and from machine representation, using -the ``tobytes``, ``frombytes``, ``tofile`` and ``fromfile`` methods, -the endianness matters: - - >>> a = bitarray(endian='little') - >>> a.frombytes(b'\x01') - >>> a - bitarray('10000000') - >>> b = bitarray(endian='big') - >>> b.frombytes(b'\x80') - >>> b - bitarray('10000000') - >>> a == b - True - >>> a.tobytes() == b.tobytes() - False - -The endianness can not be changed once an object is created. -However, since creating a bitarray from another bitarray just copies the -memory representing the data, you can create a new bitarray with different -endianness: - - >>> a = bitarray('11100000', endian='little') - >>> a - bitarray('11100000') - >>> b = bitarray(a, endian='big') - >>> b - bitarray('00000111') - >>> a == b - False - >>> a.tobytes() == b.tobytes() - True - -The default bit endianness is currently big-endian, however this may change -in the future, and when dealing with the machine representation of bitarray -objects, it is recommended to always explicitly specify the endianness. - -Unless, explicitly converting to machine representation, using -the ``tobytes``, ``frombytes``, ``tofile`` and ``fromfile`` methods, -the bit endianness will have no effect on any computation, and one -can safely ignore setting the endianness, and other details of this section. - - -Buffer protocol ---------------- - -Python 2.7 provides memoryview objects, which allow Python code to access -the internal data of an object that supports the buffer protocol without -copying. Bitarray objects support this protocol, with the memory being -interpreted as simple bytes. - - >>> a = bitarray('01000001' '01000010' '01000011', endian='big') - >>> v = memoryview(a) - >>> len(v) - 3 - >>> v[-1] - 'C' - >>> v[:2].tobytes() - 'AB' - >>> v.readonly # changing a bitarray's memory is also possible - False - >>> v[1] = 'o' - >>> a - bitarray('010000010110111101000011') - - -Variable bit length prefix codes --------------------------------- - -The method ``encode`` takes a dictionary mapping symbols to bitarrays -and an iterable, and extends the bitarray object with the encoded symbols -found while iterating. For example: - - >>> d = {'H':bitarray('111'), 'e':bitarray('0'), - ... 'l':bitarray('110'), 'o':bitarray('10')} - ... - >>> a = bitarray() - >>> a.encode(d, 'Hello') - >>> a - bitarray('111011011010') - -Note that the string ``'Hello'`` is an iterable, but the symbols are not -limited to characters, in fact any immutable Python object can be a symbol. -Taking the same dictionary, we can apply the ``decode`` method which will -return a list of the symbols: - - >>> a.decode(d) - ['H', 'e', 'l', 'l', 'o'] - >>> ''.join(a.decode(d)) - 'Hello' - -Since symbols are not limited to being characters, it is necessary to return -them as elements of a list, rather than simply returning the joined string. - - -Reference ---------- - -**The bitarray class:** - -``bitarray([initial], [endian=string])`` - Return a new bitarray object whose items are bits initialized from - the optional initial, and endianness. - If no object is provided, the bitarray is initialized to have length zero. - The initial object may be of the following types: - - int, long - Create bitarray of length given by the integer. The initial values - in the array are random, because only the memory allocated. - - string - Create bitarray from a string of '0's and '1's. - - list, tuple, iterable - Create bitarray from a sequence, each element in the sequence is - converted to a bit using truth value value. - - bitarray - Create bitarray from another bitarray. This is done by copying the - memory holding the bitarray data, and is hence very fast. - - The optional keyword arguments 'endian' specifies the bit endianness of the - created bitarray object. - Allowed values are 'big' and 'little' (default is 'big'). - - Note that setting the bit endianness only has an effect when accessing the - machine representation of the bitarray, i.e. when using the methods: tofile, - fromfile, tobytes, frombytes. - - -**A bitarray object supports the following methods:** - -``all()`` -> bool - Returns True when all bits in the array are True. - - -``any()`` -> bool - Returns True when any bit in the array is True. - - -``append(item)`` - Append the value bool(item) to the end of the bitarray. - - -``buffer_info()`` -> tuple - Return a tuple (address, size, endianness, unused, allocated) giving the - current memory address, the size (in bytes) used to hold the bitarray's - contents, the bit endianness as a string, the number of unused bits - (e.g. a bitarray of length 11 will have a buffer size of 2 bytes and - 5 unused bits), and the size (in bytes) of the allocated memory. - - -``bytereverse()`` - For all bytes representing the bitarray, reverse the bit order (in-place). - Note: This method changes the actual machine values representing the - bitarray; it does not change the endianness of the bitarray object. - - -``copy()`` -> bitarray - Return a copy of the bitarray. - - -``count([value])`` -> int - Return number of occurrences of value (defaults to True) in the bitarray. - - -``decode(code)`` -> list - Given a prefix code (a dict mapping symbols to bitarrays), - decode the content of the bitarray and return the list of symbols. - - -``encode(code, iterable)`` - Given a prefix code (a dict mapping symbols to bitarrays), - iterates over iterable object with symbols, and extends the bitarray - with the corresponding bitarray for each symbols. - - -``endian()`` -> string - Return the bit endianness as a string (either 'little' or 'big'). - - -``extend(object)`` - Append bits to the end of the bitarray. The objects which can be passed - to this method are the same iterable objects which can given to a bitarray - object upon initialization. - - -``fill()`` -> int - Adds zeros to the end of the bitarray, such that the length of the bitarray - is not a multiple of 8. Returns the number of bits added (0..7). - - -``frombytes(bytes)`` - Append from a byte string, interpreted as machine values. - - -``fromfile(f, [n])`` - Read n bytes from the file object f and append them to the bitarray - interpreted as machine values. When n is omitted, as many bytes are - read until EOF is reached. - - -``fromstring(string)`` - Append from a string, interpreting the string as machine values. - Deprecated since version 0.4.0, use ``frombytes()`` instead. - - -``index(value, [start, [stop]])`` -> int - Return index of the first occurrence of bool(value) in the bitarray. - Raises ValueError if the value is not present. - - -``insert(i, item)`` - Insert bool(item) into the bitarray before position i. - - -``invert()`` - Invert all bits in the array (in-place), - i.e. convert each 1-bit into a 0-bit and vice versa. - - -``iterdecode(code)`` -> iterator - Given a prefix code (a dict mapping symbols to bitarrays), - decode the content of the bitarray and iterate over the symbols. - - -``itersearch(bitarray)`` -> iterator - Searches for the given a bitarray in self, and return an iterator over - the start positions where bitarray matches self. - - -``length()`` -> int - Return the length, i.e. number of bits stored in the bitarray. - This method is preferred over __len__ (used when typing ``len(a)``), - since __len__ will fail for a bitarray object with 2^31 or more elements - on a 32bit machine, whereas this method will return the correct value, - on 32bit and 64bit machines. - - -``pack(bytes)`` - Extend the bitarray from a byte string, where each characters corresponds to - a single bit. The character b'\x00' maps to bit 0 and all other characters - map to bit 1. - This method, as well as the unpack method, are meant for efficient - transfer of data between bitarray objects to other python objects - (for example NumPy's ndarray object) which have a different view of memory. - - -``pop([i])`` -> item - Return the i-th (default last) element and delete it from the bitarray. - Raises IndexError if bitarray is empty or index is out of range. - - -``remove(item)`` - Remove the first occurrence of bool(item) in the bitarray. - Raises ValueError if item is not present. - - -``reverse()`` - Reverse the order of bits in the array (in-place). - - -``search(bitarray, [limit])`` -> list - Searches for the given a bitarray in self, and returns the start positions - where bitarray matches self as a list. - The optional argument limits the number of search results to the integer - specified. By default, all search results are returned. - - -``setall(value)`` - Set all bits in the bitarray to bool(value). - - -``sort(reverse=False)`` - Sort the bits in the array (in-place). - - -``to01()`` -> string - Return a string containing '0's and '1's, representing the bits in the - bitarray object. - Note: To extend a bitarray from a string containing '0's and '1's, - use the extend method. - - -``tobytes()`` -> bytes - Return the byte representation of the bitarray. - When the length of the bitarray is not a multiple of 8, the few remaining - bits (1..7) are set to 0. - - -``tofile(f)`` - Write all bits (as machine values) to the file object f. - When the length of the bitarray is not a multiple of 8, - the remaining bits (1..7) are set to 0. - - -``tolist()`` -> list - Return an ordinary list with the items in the bitarray. - Note that the list object being created will require 32 or 64 times more - memory than the bitarray object, which may cause a memory error if the - bitarray is very large. - Also note that to extend a bitarray with elements from a list, - use the extend method. - - -``tostring()`` -> string - Return the string representing (machine values) of the bitarray. - When the length of the bitarray is not a multiple of 8, the few remaining - bits (1..7) are set to 0. - Deprecated since version 0.4.0, use ``tobytes()`` instead. - - -``unpack(zero=b'\x00', one=b'\xff')`` -> bytes - Return a byte string containing one character for each bit in the bitarray, - using the specified mapping. - See also the pack method. - - -**Functions defined in the module:** - -``test(verbosity=1, repeat=1)`` -> TextTestResult - Run self-test, and return unittest.runner.TextTestResult object. - - -``bitdiff(a, b)`` -> int - Return the difference between two bitarrays a and b. - This is function does the same as (a ^ b).count(), but is more memory - efficient, as no intermediate bitarray object gets created - - -``bits2bytes(n)`` -> int - Return the number of bytes necessary to store n bits. - - -Change log ----------- - -**0.8.1** (2013-03-30): - - * fix issue #10, i.e. int(bitarray()) segfault - * added tests for using a bitarray object as an argument to functions - like int, long (on Python 2), float, list, tuple, dict - - -**0.8.0** (2012-04-04): - - * add Python 2.4 support - * add (module level) function bitdiff for calculating the difference - between two bitarrays - - -**0.7.0** (2012-02-15): - - * add iterdecode method (C level), which returns an iterator but is - otherwise like the decode method - * improve memory efficiency and speed of pickling large bitarray objects - - -Please find the complete change log -`here `_. diff -Nru python-bitarray-0.8.1/setup.py python-bitarray-1.6.3/setup.py --- python-bitarray-0.8.1/setup.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/setup.py 2021-01-21 00:52:57.000000000 +0000 @@ -1,18 +1,17 @@ import re -from os.path import join -try: - from setuptools import setup, Extension -except ImportError: - from distutils.core import setup, Extension +from distutils.core import setup, Extension kwds = {} -kwds['long_description'] = open('README.rst').read() +try: + kwds['long_description'] = open('README.md').read() +except IOError: + pass -# Read version from bitarray/__init__.py -pat = re.compile(r'__version__\s*=\s*(\S+)', re.M) -data = open(join('bitarray', '__init__.py')).read() -kwds['version'] = eval(pat.search(data).group(1)) +# Read version from bitarray/bitarray.h +pat = re.compile(r'#define\s+BITARRAY_VERSION\s+"(\S+)"', re.M) +data = open('bitarray/bitarray.h').read() +kwds['version'] = pat.search(data).group(1) setup( @@ -28,19 +27,22 @@ "Operating System :: OS Independent", "Programming Language :: C", "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.4", - "Programming Language :: Python :: 2.5", - "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.1", - "Programming Language :: Python :: 3.2", - "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", packages = ["bitarray"], + package_data = {"bitarray": ["*.h"]}, ext_modules = [Extension(name = "bitarray._bitarray", - sources = ["bitarray/_bitarray.c"])], + sources = ["bitarray/_bitarray.c"]), + Extension(name = "bitarray._util", + sources = ["bitarray/_util.c"])], **kwds ) diff -Nru python-bitarray-0.8.1/TODO python-bitarray-1.6.3/TODO --- python-bitarray-0.8.1/TODO 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/TODO 2021-01-21 00:52:57.000000000 +0000 @@ -7,6 +7,16 @@ --------------------------- RANDOM NOTES --------------------------------- +#define setbit(self, i, bit) \ + ((self)->ob_item[(i) / 8] = (bit) ? \ + ((self)->ob_item[(i) / 8] | BITMASK((self)->endian, (i))) : \ + ((self)->ob_item[(i) / 8] & ~BITMASK((self)->endian, (i)))) + + +/* print('x=', repr(x)) but in C */ +printf("x = %s\n", PyUnicode_AsUTF8(PyObject_Repr(x))); + + #!/bin/bash for PY in py26 py26d py27 py32 py33 do @@ -40,9 +50,25 @@ print sorted(sizes) +/* walk the binary tree, and display information for each node */ +static void +display_binode_tree(binode *tree) +{ + int k; + + printf("id: %p child0: %14p child1: %14p symbol: %s\n", + tree, + tree->child[0], + tree->child[1], + tree->symbol ? PyString_AsString(tree->symbol) : "NULL"); + + for (k = 0; k < 2; k++) + if (tree->child[k]) + display_binode_tree(tree->child[k]); +} -If I should ever decide to use pre-calculated tables: +# If I should ever decide to use pre-calculated tables: static char bytereverse_trans[256] = { 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, diff -Nru python-bitarray-0.8.1/update_readme.py python-bitarray-1.6.3/update_readme.py --- python-bitarray-0.8.1/update_readme.py 2013-03-31 02:07:08.000000000 +0000 +++ python-bitarray-1.6.3/update_readme.py 2021-01-21 00:52:57.000000000 +0000 @@ -1,12 +1,16 @@ -import os +import sys +if sys.version_info[0] != 3: + sys.exit("This program requires Python 3") + import re import doctest -from cStringIO import StringIO +from io import StringIO import bitarray +import bitarray.util -fo = StringIO() +fo = None def write_changelog(): @@ -17,41 +21,42 @@ for line in open('CHANGE_LOG'): m = ver_pat.match(line) if m: - if count == 3: + if count == 8: break count += 1 - fo.write(m.expand(r'**\2** (\1):\n')) + fo.write(m.expand(r'*\2* (\1):\n')) elif line.startswith('---'): fo.write('\n') else: fo.write(line) url = "https://github.com/ilanschnell/bitarray/blob/master/CHANGE_LOG" - fo.write("Please find the complete change log\n" - "`here <%s>`_.\n" % url) + fo.write('Please find the complete change log [here](%s).\n' % url) sig_pat = re.compile(r'(\w+\([^()]*\))( -> (.+))?') def write_doc(name): doc = eval('bitarray.%s.__doc__' % name) + assert doc, name lines = doc.splitlines() m = sig_pat.match(lines[0]) if m is None: raise Exception("signature line invalid: %r" % lines[0]) - s = '``%s``' % m.group(1) + s = '`%s`' % m.group(1) if m.group(3): s += ' -> %s' % m.group(3) - fo.write(s + '\n') + fo.write(s + '\n\n') assert lines[1] == '' for line in lines[2:]: - fo.write(' %s\n' % line) + fo.write(line.rstrip() + '\n') fo.write('\n\n') def write_reference(): fo.write("Reference\n" - "---------\n\n" - "**The bitarray class:**\n\n") + "=========\n\n" + "The bitarray object:\n" + "--------------------\n\n") write_doc('bitarray') fo.write("**A bitarray object supports the following methods:**\n\n") @@ -60,10 +65,57 @@ continue write_doc('bitarray.%s' % method) - fo.write("**Functions defined in the module:**\n\n") + fo.write("""\ +The frozenbitarray object: +-------------------------- + +This object is very similar to the bitarray object. The difference is that +this a frozenbitarray is immutable, and hashable: + + >>> from bitarray import frozenbitarray + >>> a = frozenbitarray('1100011') + >>> a[3] = 1 + Traceback (most recent call last): + File "", line 1, in + File "bitarray/__init__.py", line 40, in __delitem__ + raise TypeError("'frozenbitarray' is immutable") + TypeError: 'frozenbitarray' is immutable + >>> {a: 'some value'} + {frozenbitarray('1100011'): 'some value'} + +""") + write_doc('frozenbitarray') + + fo.write("""\ +The decodetree object: +---------------------- + +This (immutable and unhashable) object stores a binary tree initialized +from a prefix code dictionary. It's sole purpose is to be passed to +bitarray's `.decode()` and `.iterdecode()` methods, instead of passing +the prefix code dictionary to those methods directly: + + >>> from bitarray import bitarray, decodetree + >>> t = decodetree({'a': bitarray('0'), 'b': bitarray('1')}) + >>> a = bitarray('0110') + >>> a.decode(t) + ['a', 'b', 'b', 'a'] + >>> ''.join(a.iterdecode(t)) + 'abba' + +""") + write_doc('decodetree') + + fo.write("Functions defined in the `bitarray` module:\n" + "--------------------------------------------\n\n") write_doc('test') - write_doc('bitdiff') write_doc('bits2bytes') + write_doc('get_default_endian') + + fo.write("Functions defined in `bitarray.util` module:\n" + "--------------------------------------------\n\n") + for func in bitarray.util.__all__: + write_doc('util.%s' % func) def write_all(data): @@ -79,19 +131,25 @@ def main(): - data = open('README.rst').read() - write_all(data) - new_data = fo.getvalue() - fo.close() + if len(sys.argv) > 1: + sys.exit("no arguments expected") + + with open('README.md', 'r') as fi: + data = fi.read() + + global fo + with StringIO() as fo: + write_all(data) + new_data = fo.getvalue() + fo.close() if new_data == data: - print "already up-to-date" + print("already up-to-date") else: - with open('README.rst', 'w') as f: + with open('README.md', 'w') as f: f.write(new_data) - doctest.testfile('README.rst') - os.system('rst2html.py README.rst >README.html') + doctest.testfile('README.md') if __name__ == '__main__':