diff -Nru zope.index-3.6.1/CHANGES.txt zope.index-3.6.3/CHANGES.txt --- zope.index-3.6.1/CHANGES.txt 2010-07-08 17:55:06.000000000 +0000 +++ zope.index-3.6.3/CHANGES.txt 2011-12-03 23:56:15.000000000 +0000 @@ -1,6 +1,21 @@ Changes ======= +3.6.3 (2011-12-03) +------------------ + +- KeywordIndex: Minor optimization; use __nonzero__ instead of __len__ + to avoid loading the full TreeSet. + +3.6.2 (2011-12-03) +------------------ + +- KeywordIndex: Store docids in TreeSet rather than a Set when the + number of documents matching a word reaches a configurable + threshold (default 64). The rule is applied to individual words at + indexing time, but you can call the new optimize method to optimize + all the words in an index at once. Designed to fix LP #881950. + 3.6.1 (2010-07-08) ------------------ diff -Nru zope.index-3.6.1/debian/changelog zope.index-3.6.3/debian/changelog --- zope.index-3.6.1/debian/changelog 2011-12-31 08:20:06.000000000 +0000 +++ zope.index-3.6.3/debian/changelog 2012-01-03 15:32:45.000000000 +0000 @@ -1,3 +1,14 @@ +zope.index (3.6.3-0ubuntu1) precise; urgency=low + + * New upstream release + - Optimizes a case when a keyword occurs in many documents (LP: #881950) + * debian/control: + - depend on python-all-dev to fix building optimizations. + - fix formatting of long description. + * debian/copyright: update to latest dep5 format. + + -- Gediminas Paulauskas Tue, 03 Jan 2012 17:32:33 +0200 + zope.index (3.6.1-0ubuntu4) precise; urgency=low * Rebuild to drop python2.6 dependencies. diff -Nru zope.index-3.6.1/debian/control zope.index-3.6.3/debian/control --- zope.index-3.6.1/debian/control 2011-06-28 10:11:18.000000000 +0000 +++ zope.index-3.6.3/debian/control 2012-01-03 15:25:18.000000000 +0000 @@ -3,20 +3,23 @@ Priority: extra Maintainer: Ubuntu Developers XSBC-Original-Maintainer: Gediminas Paulauskas -Build-Depends: debhelper (>= 7), python-all (>= 2.6.6-3~), - python-setuptools (>= 0.6b3), python-van.pydeb (>= 1.3.0-4) +Build-Depends: debhelper (>= 7), + python-all-dev (>= 2.6.6-3~), + python-setuptools, + python-van.pydeb (>= 1.3.0-4) Standards-Version: 3.9.2 X-Python-Version: >= 2.5 Package: python-zope.index Architecture: any Depends: ${pydeb:Depends}, ${python:Depends}, ${misc:Depends}, ${shlibs:Depends} -Provides: ${pydeb:Provides}, ${python:Provides} +Provides: ${pydeb:Provides} Suggests: ${pydeb:Suggests} Description: Indices for using with catalog like text, field, etc. The ``zope.index`` package provides several indices for the Zope catalog. These include: - * a field index (for indexing orderable values), - * a keyword index, - * a topic index, - * a text index (with support for lexicon, splitter, normalizer, etc.) + . + * a field index (for indexing orderable values), + * a keyword index, + * a topic index, + * a text index (with support for lexicon, splitter, normalizer, etc.) diff -Nru zope.index-3.6.1/debian/copyright zope.index-3.6.3/debian/copyright --- zope.index-3.6.1/debian/copyright 2011-06-28 10:11:19.000000000 +0000 +++ zope.index-3.6.3/debian/copyright 2012-01-03 15:03:55.000000000 +0000 @@ -1,11 +1,11 @@ -Format-Specification: http://dep.debian.net/deps/dep5/ -Name: zope.index -Maintainer: Zope Foundation and Contributors +Format: http://dep.debian.net/deps/dep5/ +Upstream-Name: zope.index +Upstream-Contact: Zope Foundation and Contributors Source: http://pypi.python.org/pypi/zope.index Files: * Copyright: (c) 2002, 2003, 2009 Zope Foundation and Contributors. -License: ZPL-2.1 +License: Zope-2.1 Zope Public License (ZPL) Version 2.1 . A copyright notice accompanies this license document that identifies the diff -Nru zope.index-3.6.1/debian/tests/control zope.index-3.6.3/debian/tests/control --- zope.index-3.6.1/debian/tests/control 2011-06-28 10:11:19.000000000 +0000 +++ zope.index-3.6.3/debian/tests/control 2012-01-03 14:33:41.000000000 +0000 @@ -1,3 +1,3 @@ Tests: all Features: no-build-needed -Depends: @, python-zope.testing +Depends: @, python-zope.testrunner, python-zope.testing diff -Nru zope.index-3.6.1/PKG-INFO zope.index-3.6.3/PKG-INFO --- zope.index-3.6.1/PKG-INFO 2010-07-08 17:56:04.000000000 +0000 +++ zope.index-3.6.3/PKG-INFO 2011-12-03 23:56:38.000000000 +0000 @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: zope.index -Version: 3.6.1 +Version: 3.6.3 Summary: Indices for using with catalog like text, field, etc. Home-page: http://pypi.python.org/pypi/zope.index Author: Zope Foundation and Contributors @@ -24,6 +24,21 @@ Changes ======= + 3.6.3 (2011-12-03) + ------------------ + + - KeywordIndex: Minor optimization; use __nonzero__ instead of __len__ + to avoid loading the full TreeSet. + + 3.6.2 (2011-12-03) + ------------------ + + - KeywordIndex: Store docids in TreeSet rather than a Set when the + number of documents matching a word reaches a configurable + threshold (default 64). The rule is applied to individual words at + indexing time, but you can call the new optimize method to optimize + all the words in an index at once. Designed to fix LP #881950. + 3.6.1 (2010-07-08) ------------------ diff -Nru zope.index-3.6.1/setup.cfg zope.index-3.6.3/setup.cfg --- zope.index-3.6.1/setup.cfg 2010-07-08 17:56:04.000000000 +0000 +++ zope.index-3.6.3/setup.cfg 2011-12-03 23:56:38.000000000 +0000 @@ -1,11 +1,11 @@ +[nosetests] +nocapture = 1 +with-coverage = 1 +cover-erase = 1 +cover-package = zope.index + [egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 -[nosetests] -cover-package = zope.index -cover-erase = 1 -nocapture = 1 -with-coverage = 1 - diff -Nru zope.index-3.6.1/setup.py zope.index-3.6.3/setup.py --- zope.index-3.6.1/setup.py 2010-07-08 17:55:06.000000000 +0000 +++ zope.index-3.6.3/setup.py 2011-12-03 23:56:15.000000000 +0000 @@ -61,7 +61,7 @@ print >> sys.stderr, '*' * 80 setup(name='zope.index', - version='3.6.1', + version='3.6.3', url='http://pypi.python.org/pypi/zope.index', license='ZPL 2.1', author='Zope Foundation and Contributors', diff -Nru zope.index-3.6.1/src/zope/index/keyword/index.py zope.index-3.6.3/src/zope/index/keyword/index.py --- zope.index-3.6.1/src/zope/index/keyword/index.py 2010-07-08 17:55:05.000000000 +0000 +++ zope.index-3.6.3/src/zope/index/keyword/index.py 2011-12-03 23:56:15.000000000 +0000 @@ -30,6 +30,10 @@ implements(IInjection, IStatistics, IIndexSearch, IKeywordQuerying) family = BTrees.family32 + # If a word is referenced by at least tree_threshold docids, + # use a TreeSet for that word instead of a Set. + tree_threshold = 64 + def __init__(self, family=None): if family is not None: self.family = family @@ -93,7 +97,7 @@ for word in kw_removed: fwd = self._fwd_index[word] fwd.remove(docid) - if len(fwd) == 0: + if not fwd: del self._fwd_index[word] # now update reverse and forward indexes @@ -123,11 +127,19 @@ """insert a sequence of words into the forward index """ idx = self._fwd_index - has_key = idx.has_key + get_word_idx = idx.get + IF = self.family.IF + Set = IF.Set + TreeSet = IF.TreeSet for word in words: - if not has_key(word): - idx[word] = self.family.IF.Set() - idx[word].insert(docid) + word_idx = get_word_idx(word) + if word_idx is None: + idx[word] = word_idx = Set() + word_idx.insert(docid) + if (not isinstance(word_idx, TreeSet) and + len(word_idx) >= self.tree_threshold): + # Convert to a TreeSet. + idx[word] = TreeSet(word_idx) def _insert_reverse(self, docid, words): """ add words to forward index """ @@ -175,6 +187,28 @@ query = query['query'] return self.search(query, operator=operator) + def optimize(self): + """Optimize the index. Call this after changing tree_threshold. + + This converts internal data structures between + Sets and TreeSets based on tree_threshold. + """ + idx = self._fwd_index + IF = self.family.IF + Set = IF.Set + TreeSet = IF.TreeSet + items = list(self._fwd_index.items()) + for word, word_idx in items: + if len(word_idx) >= self.tree_threshold: + if not isinstance(word_idx, TreeSet): + # Convert to a TreeSet. + idx[word] = TreeSet(word_idx) + else: + if isinstance(word_idx, TreeSet): + # Convert to a Set. + idx[word] = Set(word_idx) + + class CaseInsensitiveKeywordIndex(KeywordIndex): """A case-normalizing keyword index (for strings as keywords)""" diff -Nru zope.index-3.6.1/src/zope/index/keyword/tests.py zope.index-3.6.3/src/zope/index/keyword/tests.py --- zope.index-3.6.1/src/zope/index/keyword/tests.py 2010-07-08 17:55:05.000000000 +0000 +++ zope.index-3.6.3/src/zope/index/keyword/tests.py 2011-12-03 23:33:11.000000000 +0000 @@ -85,6 +85,60 @@ self._apply_or(index, ('cmf', 'zope4'), self.IFSet([5])) self._apply_or(index, ('zope', 'Zope'), self.IFSet([1,3])) + def test_apply_with_only_tree_set(self): + index = self._makeOne() + index.tree_threshold = 0 + self._populate(index) + self.assertEqual(type(index._fwd_index['zope']), + type(self.IFTreeSet())) + self._apply_and(index, ('CMF', 'Zope3'), self.IFSet([1])) + self._apply_and(index, ('CMF', 'zope'), self.IFSet([1])) + self._apply_and(index, ('cmf', 'zope4'), self.IFSet()) + self._apply_and(index, ('quick', 'FOX'), self.IFSet([2])) + + def test_apply_with_mix_of_tree_set_and_simple_set(self): + index = self._makeOne() + index.tree_threshold = 2 + self._populate(index) + self.assertEqual(type(index._fwd_index['zope']), + type(self.IFSet())) + self._apply_and(index, ('CMF', 'Zope3'), self.IFSet([1])) + self._apply_and(index, ('CMF', 'zope'), self.IFSet([1])) + self._apply_and(index, ('cmf', 'zope4'), self.IFSet()) + self._apply_and(index, ('quick', 'FOX'), self.IFSet([2])) + + def test_optimize_converts_to_tree_set(self): + index = self._makeOne() + self._populate(index) + self.assertEqual(type(index._fwd_index['zope']), + type(self.IFSet())) + index.tree_threshold = 0 + index.optimize() + self.assertEqual(type(index._fwd_index['zope']), + type(self.IFTreeSet())) + + def test_optimize_converts_to_simple_set(self): + index = self._makeOne() + index.tree_threshold = 0 + self._populate(index) + self.assertEqual(type(index._fwd_index['zope']), + type(self.IFTreeSet())) + index.tree_threshold = 99 + index.optimize() + self.assertEqual(type(index._fwd_index['zope']), + type(self.IFSet())) + + def test_optimize_leaves_words_alone(self): + index = self._makeOne() + self._populate(index) + self.assertEqual(type(index._fwd_index['zope']), + type(self.IFSet())) + index.tree_threshold = 99 + index.optimize() + self.assertEqual(type(index._fwd_index['zope']), + type(self.IFSet())) + + class CaseInsensitiveKeywordIndexTestsBase: def _getTargetClass(self): @@ -166,6 +220,10 @@ from BTrees.IFBTree import IFSet return IFSet(*args, **kw) + def IFTreeSet(self, *args, **kw): + from BTrees.IFBTree import IFTreeSet + return IFTreeSet(*args, **kw) + class _SixtyFourBitBase: def _get_family(self): @@ -176,6 +234,10 @@ from BTrees.LFBTree import LFSet return LFSet(*args, **kw) + def IFTreeSet(self, *args, **kw): + from BTrees.LFBTree import LFTreeSet + return LFTreeSet(*args, **kw) + _marker = object() class _TestCaseBase: diff -Nru zope.index-3.6.1/src/zope.index.egg-info/PKG-INFO zope.index-3.6.3/src/zope.index.egg-info/PKG-INFO --- zope.index-3.6.1/src/zope.index.egg-info/PKG-INFO 2010-07-08 17:56:04.000000000 +0000 +++ zope.index-3.6.3/src/zope.index.egg-info/PKG-INFO 2011-12-03 23:56:38.000000000 +0000 @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: zope.index -Version: 3.6.1 +Version: 3.6.3 Summary: Indices for using with catalog like text, field, etc. Home-page: http://pypi.python.org/pypi/zope.index Author: Zope Foundation and Contributors @@ -24,6 +24,21 @@ Changes ======= + 3.6.3 (2011-12-03) + ------------------ + + - KeywordIndex: Minor optimization; use __nonzero__ instead of __len__ + to avoid loading the full TreeSet. + + 3.6.2 (2011-12-03) + ------------------ + + - KeywordIndex: Store docids in TreeSet rather than a Set when the + number of documents matching a word reaches a configurable + threshold (default 64). The rule is applied to individual words at + indexing time, but you can call the new optimize method to optimize + all the words in an index at once. Designed to fix LP #881950. + 3.6.1 (2010-07-08) ------------------