diff -Nru libjavaewah-java-0.6.12/CHANGELOG libjavaewah-java-0.7.9/CHANGELOG --- libjavaewah-java-0.6.12/CHANGELOG 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/CHANGELOG 2013-11-12 14:31:20.000000000 +0000 @@ -1,3 +1,43 @@ +version 0.7.9 (November 12th 2013) + - Spelled out the license in the source code to avoid any confusion + +version 0.7.8 (October 1st 2013) + - Resolved an issue with addStream... methods. + +version 0.7.7 (October 1st 2013) + - Resolved an issue with 32-bit EWAH caused by a partial update in version 0.7.6. + +version 0.7.6 (September 30th 2013) + - Fixed a memory leak + +version 0.7.5 (September 7th 2013) + - Fixed recently introduced "Get" method. Better unit testing. + +version 0.7.4 (September 7th 2013) + - Added a convenience method that allows you to query for the value of a bit (based on code by @zhenjl) + +version 0.7.3 (August 8th 2013) + - Fixed bug that could sometimes arise with the new iterator-based processing (O. Kaser) + + +version 0.7.2 (July 25th 2013) + - Clarifying the difference between setSizeInBits(int) and + setSizeInBits(int,boolean). Some code that worked previously + (but was potentially unsafe) might throw an exception when + using setSizeInBits(int). The fix is to call setSizeInBits(int,false) + which will be equivalent, but safer. + +version 0.7.1 (July 16th 2013) + - From now on, we require java 6 or better: in practice this means + that we are starting to use annotations such as @Override. + - We revert to the pre-0.7 behavior by setting the "sizeInBits" + of the bitmaps according to the max of the input bitmaps (issue 27). + + +version 0.7.0 (July 16th 2013) + - Performance improvements when aggregating several bitmaps + - Introduction of an iterator-based approach + version 0.6.12 (May 21st 2013) - Fixed bug with addStreamOfNegatedDirtyWords (Vicent Marti) - Removed some unnecessary recursion diff -Nru libjavaewah-java-0.6.12/debian/changelog libjavaewah-java-0.7.9/debian/changelog --- libjavaewah-java-0.6.12/debian/changelog 2017-06-30 04:38:56.000000000 +0000 +++ libjavaewah-java-0.7.9/debian/changelog 2019-02-28 22:09:54.000000000 +0000 @@ -1,3 +1,22 @@ +libjavaewah-java (0.7.9-1) unstable; urgency=medium + + * Team upload. + + [ Markus Koschany ] + * New upstream version 0.7.9. + * Use https for Format field. + * Use compat level 12. + * Declare compliance with Debian Policy 4.3.0. + * Use canonical VCS URI. + * Remove maven.cleanIgnoreRules and maven.publishedRules. + * Remove dh_install override. + * Remove debian/gbp.conf. xz compression is the default now. + + [ Emmanuel Bourg ] + * Removed the unused build dependency on libmaven-install-plugin-java + + -- Markus Koschany Thu, 28 Feb 2019 23:09:54 +0100 + libjavaewah-java (0.6.12-2) unstable; urgency=medium * Add missing build-dep on junit4 (Closes: #866546) diff -Nru libjavaewah-java-0.6.12/debian/compat libjavaewah-java-0.7.9/debian/compat --- libjavaewah-java-0.6.12/debian/compat 2017-06-30 04:38:56.000000000 +0000 +++ libjavaewah-java-0.7.9/debian/compat 2019-02-28 22:09:54.000000000 +0000 @@ -1 +1 @@ -10 +12 diff -Nru libjavaewah-java-0.6.12/debian/control libjavaewah-java-0.7.9/debian/control --- libjavaewah-java-0.6.12/debian/control 2017-06-30 04:38:56.000000000 +0000 +++ libjavaewah-java-0.7.9/debian/control 2019-02-28 22:09:54.000000000 +0000 @@ -2,19 +2,24 @@ Section: java Priority: optional Maintainer: Debian Java Maintainers -Uploaders: Jakub Adam , +Uploaders: + Jakub Adam , tony mancill -Build-Depends: debhelper (>= 10), default-jdk, maven-debian-helper (>= 1.6.3) -Build-Depends-Indep: libmaven-bundle-plugin-java, libmaven-install-plugin-java, - libmaven-javadoc-plugin-java, junit4 -Standards-Version: 4.0.0 +Build-Depends: + debhelper (>= 12), + default-jdk, + junit4, + libmaven-bundle-plugin-java, + libmaven-javadoc-plugin-java, + maven-debian-helper (>= 1.6.3) +Standards-Version: 4.3.0 +Vcs-Git: https://salsa.debian.org/java-team/libjavaewah-java.git +Vcs-Browser: https://salsa.debian.org/java-team/libjavaewah-java Homepage: https://github.com/lemire/javaewah -Vcs-Git: git://anonscm.debian.org/pkg-java/libjavaewah-java.git -Vcs-Browser: http://anonscm.debian.org/gitweb/?p=pkg-java/libjavaewah-java.git Package: libjavaewah-java Architecture: all -Depends: ${misc:Depends}, ${maven:Depends} +Depends: ${maven:Depends}, ${misc:Depends} Recommends: ${maven:OptionalDepends} Suggests: libjavaewah-java-doc Description: Compressed variant of the Java bitset class @@ -36,7 +41,7 @@ Package: libjavaewah-java-doc Architecture: all Section: doc -Depends: ${misc:Depends}, ${maven:DocDepends} +Depends: ${maven:DocDepends}, ${misc:Depends} Recommends: ${maven:DocOptionalDepends} Suggests: libjavaewah-java Description: Compressed variant of the Java bitset class (documentation) @@ -46,4 +51,3 @@ compresses better, but is not as fast. . This package contains the API documentation of libjavaewah-java. - diff -Nru libjavaewah-java-0.6.12/debian/copyright libjavaewah-java-0.7.9/debian/copyright --- libjavaewah-java-0.6.12/debian/copyright 2017-06-30 04:38:56.000000000 +0000 +++ libjavaewah-java-0.7.9/debian/copyright 2019-02-28 22:09:54.000000000 +0000 @@ -1,4 +1,4 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: JavaEWAH Upstream-Contact: JavaEWAH team Source: https://github.com/lemire/javaewah diff -Nru libjavaewah-java-0.6.12/debian/gbp.conf libjavaewah-java-0.7.9/debian/gbp.conf --- libjavaewah-java-0.6.12/debian/gbp.conf 2017-06-30 04:38:56.000000000 +0000 +++ libjavaewah-java-0.7.9/debian/gbp.conf 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -[DEFAULT] -compression=xz diff -Nru libjavaewah-java-0.6.12/debian/maven.cleanIgnoreRules libjavaewah-java-0.7.9/debian/maven.cleanIgnoreRules --- libjavaewah-java-0.6.12/debian/maven.cleanIgnoreRules 2017-06-30 04:38:56.000000000 +0000 +++ libjavaewah-java-0.7.9/debian/maven.cleanIgnoreRules 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ - diff -Nru libjavaewah-java-0.6.12/debian/maven.publishedRules libjavaewah-java-0.7.9/debian/maven.publishedRules --- libjavaewah-java-0.6.12/debian/maven.publishedRules 2017-06-30 04:38:56.000000000 +0000 +++ libjavaewah-java-0.7.9/debian/maven.publishedRules 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ - diff -Nru libjavaewah-java-0.6.12/debian/rules libjavaewah-java-0.7.9/debian/rules --- libjavaewah-java-0.6.12/debian/rules 2017-06-30 04:38:56.000000000 +0000 +++ libjavaewah-java-0.7.9/debian/rules 2019-02-28 22:09:54.000000000 +0000 @@ -3,8 +3,3 @@ %: dh $@ --buildsystem=maven -# Can be removed once #711852 is fixed. -override_dh_install: - dh_install - # remove JARed javadoc, we already have unpacked version in /usr/share/doc - rm -rf debian/libjavaewah-java-doc/usr/share/maven-repo diff -Nru libjavaewah-java-0.6.12/example.java libjavaewah-java-0.7.9/example.java --- libjavaewah-java-0.6.12/example.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/example.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,9 +1,17 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; import java.io.*; +/** + * @author lemire + * + */ public class example { - public static void main(String[] args) throws java.io.IOException { + /** + * @param args arguments from the command line + * @throws IOException if an IO error occurs + */ +public static void main(final String[] args) throws java.io.IOException { EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0,2,64,1<<30); EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1,3,64,1<<30); System.out.println("bitmap 1: "+ewahBitmap1); @@ -40,4 +48,5 @@ ewahBitmap1.readExternal(new ObjectInputStream(bis)); System.out.println("bitmap 1 (recovered) : "+ewahBitmap1); } + } diff -Nru libjavaewah-java-0.6.12/LICENSE libjavaewah-java-0.7.9/LICENSE --- libjavaewah-java-0.6.12/LICENSE 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/LICENSE 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -Nru libjavaewah-java-0.6.12/LICENSE-2.0.txt libjavaewah-java-0.7.9/LICENSE-2.0.txt --- libjavaewah-java-0.6.12/LICENSE-2.0.txt 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/LICENSE-2.0.txt 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -Nru libjavaewah-java-0.6.12/pom.xml libjavaewah-java-0.7.9/pom.xml --- libjavaewah-java-0.6.12/pom.xml 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/pom.xml 2013-11-12 14:31:20.000000000 +0000 @@ -3,12 +3,12 @@ com.googlecode.javaewah JavaEWAH - 0.6.12 + 0.7.9 bundle - 1.5 - 1.5 + 1.6 + 1.6 UTF-8 diff -Nru libjavaewah-java-0.6.12/README libjavaewah-java-0.7.9/README --- libjavaewah-java-0.6.12/README 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,92 +0,0 @@ -JavaEWAH -(c) 2009-2013 -Daniel Lemire (http://lemire.me/en/), -Cliff Moon (https://github.com/cliffmoon), -David McIntosh (https://github.com/mctofu), -Robert Becho (https://github.com/RBecho), -Colby Ranger (https://github.com/crangeratgoogle) -and Veronika Zenz (https://github.com/veronikazenz) - - - -This code is licensed under Apache License, Version 2.0 (ASL2.0). -(GPL 2.0 derivatives are allowed.) - -This is a word-aligned compressed variant of -the Java Bitset class. We provide both a 64-bit -and a 32-bit RLE-like compression scheme. It can -be used to implement bitmap indexes. - -The goal of word-aligned compression is not to -achieve the best compression, but rather to -improve query processing time. Hence, we try -to save CPU cycles, maybe at the expense of -storage. However, the EWAH scheme we implemented -is always more efficient storage-wise than an -uncompressed bitmap (as implemented in the java -BitSet class by Sun). - - -For better performance, use a 64-bit JVM over -64-bit CPUs when using the 64-bit scheme (javaewah.EWAHCompressedBitmap). - -The 32-bit version (javaewah32.EWAHCompressedBitmap32) should -compress better but be comparatively slower. - - -For more details regarding the compression format, please -see Section 3 of the following paper: - -Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. - http://arxiv.org/abs/0901.3751 - - (The PDF file is freely available on the arXiv site.) - -== Benchmark == - -For a simple comparison between this library and other libraries such as -WAH, ConciseSet, BitSet and other options, please see - -https://github.com/lemire/simplebitmapbenchmark - -== Unit testing == - -As of October 2011, this packages relies on Maven. To -test it: - -mvn test - -See -http://maven.apache.org/guides/introduction/introduction-to-the-lifecycle.html -for details. - - -=== Usage == - -See example.java. - -== Maven central repository == - -You can download JavaEWAH from the Maven central repository: -http://repo1.maven.org/maven2/com/googlecode/javaewah/JavaEWAH/ - -You can also specify the dependency in the Maven "pom.xml" file: - - - - com.googlecode.javaewah - JavaEWAH - 0.6.11 - - - -Naturally, you should replace "version" by the version -you desire. - - - -== Clojure == - -Joel Boehland wrote Clojure wrappers: - -https://github.com/jolby/clojure-ewah-bitmap diff -Nru libjavaewah-java-0.6.12/README.md libjavaewah-java-0.7.9/README.md --- libjavaewah-java-0.6.12/README.md 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/README.md 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,123 @@ +JavaEWAH +========================================================== + +(c) 2009-2013 +Daniel Lemire (http://lemire.me/en/), +Cliff Moon (https://github.com/cliffmoon), +David McIntosh (https://github.com/mctofu), +Robert Becho (https://github.com/RBecho), +Colby Ranger (https://github.com/crangeratgoogle) +Veronika Zenz (https://github.com/veronikazenz) +and Owen Kaser (https://github.com/owenkaser) + + +This code is licensed under Apache License, Version 2.0 (ASL2.0). +(GPL 2.0 derivatives are allowed.) + +This is a word-aligned compressed variant of +the Java Bitset class. We provide both a 64-bit +and a 32-bit RLE-like compression scheme. It can +be used to implement bitmap indexes. + +The goal of word-aligned compression is not to +achieve the best compression, but rather to +improve query processing time. Hence, we try +to save CPU cycles, maybe at the expense of +storage. However, the EWAH scheme we implemented +is always more efficient storage-wise than an +uncompressed bitmap (as implemented in the java +BitSet class by Sun). + + +For better performance, use a 64-bit JVM over +64-bit CPUs when using the 64-bit scheme (javaewah.EWAHCompressedBitmap). + +The 32-bit version (javaewah32.EWAHCompressedBitmap32) should +compress better but be comparatively slower. + +Java 6 or better is required. + +For more details regarding the compression format, please +see Section 3 of the following paper: + +Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. + http://arxiv.org/abs/0901.3751 + + (The PDF file is freely available on the arXiv site.) + +Benchmark +--------- + +For a simple comparison between this library and other libraries such as +WAH, ConciseSet, BitSet and other options, please see + +https://github.com/lemire/simplebitmapbenchmark + +Unit testing +------------ + +As of October 2011, this packages relies on Maven. To +test it: + +mvn test + +See +http://maven.apache.org/guides/introduction/introduction-to-the-lifecycle.html +for details. + + +Usage +----- + +See example.java. + +Maven central repository +------------------------ + +You can download JavaEWAH from the Maven central repository: +http://repo1.maven.org/maven2/com/googlecode/javaewah/JavaEWAH/ + +You can also specify the dependency in the Maven "pom.xml" file: + + + + com.googlecode.javaewah + JavaEWAH + 0.7.9 + + + +Naturally, you should replace "version" by the version +you desire. + +Travis (Continuous integration) +------------------------------- + +You can check whether the latest version builds on your favorite version +of Java using Travis: https://travis-ci.org/lemire/javaewah/builds/11059867 + +Clojure +------- + +Joel Boehland wrote Clojure wrappers: + +https://github.com/jolby/clojure-ewah-bitmap + +Frequent questions +------------------ + +Question: How do I check the value of a bit? + +Answer: If you need to routinely check the value of a given bit quickly, then +EWAH might not be the right format. However, if you must do it, you can proceed as +follows: + + /** + * Suppose you have the following bitmap: + */ + EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(0,2,64,1<<30); + /** + * We want to know if bit 64 is set: + */ + boolean is64set = (b.and(EWAHCompressedBitmap.bitmapOf(64)).cardinality() == 1); + diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,212 @@ +package com.googlecode.javaewah.benchmark; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +import java.text.DecimalFormat; +import java.util.List; +import com.googlecode.javaewah32.EWAHCompressedBitmap32; +import com.googlecode.javaewah.FastAggregation; +import com.googlecode.javaewah.IntIterator; +import com.googlecode.javaewah32.IteratingRLW32; +import com.googlecode.javaewah32.IteratorAggregation32; +import com.googlecode.javaewah32.IteratorUtil32; + +/** + * This class is used to benchmark the performance EWAH. + * + * @author Daniel Lemire + */ +public class Benchmark32 { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(100, 16, 1); +// test(2, 24, 1); + } + + @SuppressWarnings("javadoc") + public static void test(int N, int nbr, int repeat) { + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { + long bogus = 0; + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + System.out.println("# generating random data..."); + int[] inter = cdg.generateClustered(1 << (nbr/2), Max); + for (int k = 0; k < N; ++k) + data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); + System.out.println("# generating random data... ok."); + // building + bef = System.currentTimeMillis(); + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + int size = 0; + for (int r = 0; r < repeat; ++r) { + size = 0; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + size += ewah[k].sizeInBytes(); + } + } + aft = System.currentTimeMillis(); + line += "\t" + size; + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + int[] array = ewah[k].toArray(); + bogus += array.length; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + int[] array = new int[ewah[k].cardinality()]; + int c = 0; + for (int x : ewah[k]) + array[c++] = x; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + List L = ewah[k].getPositions(); + int[] array = new int[L.size()]; + int c = 0; + for (int x : L) + array[c++] = x; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IntIterator iter = ewah[k].intIterator(); + while (iter.hasNext()) { + bogus += iter.next(); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + line += "\t\t\t"; + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.or(ewah[j]); + } + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation.or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW32 ewahor = IteratorAggregation32.bufferedor(ewahcp); + bogus += IteratorUtil32.materialize(ewahor).sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + line += "\t\t\t"; + // logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahand = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahand = ewahand.and(ewah[j]); + } + bogus += ewahand.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32 + .and(ewahcp); + bogus += ewahand.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW32 ewahand = IteratorAggregation32.bufferedand(ewahcp); + bogus += IteratorUtil32.materialize(ewahand).sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + + System.out + .println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); + System.out.println(line); + System.out.println("# bogus =" + bogus); + } + } +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,130 @@ +package com.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; +import com.googlecode.javaewah32.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical and (intersection) aggregate. + */ +public class BenchmarkIntersection32 { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + } + + @SuppressWarnings({ "javadoc" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + int[] inter = cdg.generateClustered(1 << (nbr/2), Max); + for (int k = 0; k < N; ++k) + data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); + // building + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap32 answer = ewah[0].and(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.and(ewah[k]); + + EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32.and(ewah); + if (!answer.equals(ewahand)) + throw new RuntimeException( + "bug EWAHCompressedBitmap.and"); + EWAHCompressedBitmap32 ewahand2 = FastAggregation32 + .bufferedand(65536,ewah); + if (!ewahand.equals(ewahand2)) + throw new RuntimeException( + "bug FastAggregation.bufferedand "); + + } + + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.and(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 + .and(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation32 + .bufferedand(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord32( + ewah[j]); + } + IteratingRLW32 ewahor = IteratorAggregation32.bufferedand(ewahcp); + int wordcounter = IteratorUtil32.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + System.out + .println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,130 @@ +package com.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; +import com.googlecode.javaewah.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical and (intersection) aggregate. + */ +public class BenchmarkIntersection { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + } + + @SuppressWarnings({ "javadoc"}) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + int[] inter = cdg.generateClustered(1 << (nbr/2), Max); + for (int k = 0; k < N; ++k) + data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); + // building + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap answer = ewah[0].and(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.and(ewah[k]); + + EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewah); + if (!answer.equals(ewahand)) + throw new RuntimeException( + "bug EWAHCompressedBitmap.and"); + EWAHCompressedBitmap ewahand2 = FastAggregation + .bufferedand(65536,ewah); + if (!ewahand.equals(ewahand2)) + throw new RuntimeException( + "bug FastAggregation.bufferedand "); + + } + + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.and(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = EWAHCompressedBitmap + .and(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .bufferedand(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord( + ewah[j]); + } + IteratingRLW ewahor = IteratorAggregation.bufferedand(ewahcp); + int wordcounter = IteratorUtil.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + System.out + .println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,182 +1,284 @@ package com.googlecode.javaewah.benchmark; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ - import java.text.DecimalFormat; +import java.util.Arrays; import java.util.List; import com.googlecode.javaewah.EWAHCompressedBitmap; import com.googlecode.javaewah.FastAggregation; import com.googlecode.javaewah.IntIterator; +import com.googlecode.javaewah.IteratingRLW; +import com.googlecode.javaewah.IteratorAggregation; +import com.googlecode.javaewah.IteratorUtil; /** - * This class is used to benchmark the performance - * EWAH. + * This class is used to benchmark the performance EWAH. * * @author Daniel Lemire */ public class Benchmark { + + /** + * Compute the union between two sorted arrays + * @param set1 first sorted array + * @param set2 second sorted array + * @return merged array + */ + static public int[] unite2by2(final int[] set1, final int[] set2) { + int pos = 0; + int k1 = 0, k2 = 0; + if (0 == set1.length) + return Arrays.copyOf(set2, set2.length); + if (0 == set2.length) + return Arrays.copyOf(set1, set1.length); + int[] buffer = new int[set1.length + set2.length]; + while (true) { + if (set1[k1] < set2[k2]) { + buffer[pos++] = set1[k1]; + ++k1; + if (k1 >= set1.length) { + for (; k2 < set2.length; ++k2) + buffer[pos++] = set2[k2]; + break; + } + } else if (set1[k1] == set2[k2]) { + buffer[pos++] = set1[k1]; + ++k1; + ++k2; + if (k1 >= set1.length) { + for (; k2 < set2.length; ++k2) + buffer[pos++] = set2[k2]; + break; + } + if (k2 >= set2.length) { + for (; k1 < set1.length; ++k1) + buffer[pos++] = set1[k1]; + break; + } + } else {// if (set1[k1]>set2[k2]) { + buffer[pos++] = set2[k2]; + ++k2; + if (k2 >= set2.length) { + for (; k1 < set1.length; ++k1) + buffer[pos++] = set1[k1]; + break; + } + } + } + return Arrays.copyOf(buffer, pos); + } + + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + //test(2, 24, 1); + test(100, 16, 1); + } + + @SuppressWarnings("javadoc") + public static void test(int N, int nbr, int repeat) { + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { + long bogus = 0; + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + System.out.println("# generating random data..."); + int[] inter = cdg.generateClustered(1 << (nbr/2), Max); + for (int k = 0; k < N; ++k) + data[k] = unite2by2(cdg.generateClustered(1 << nbr, Max),inter); + System.out.println("# generating random data... ok."); + // building + bef = System.currentTimeMillis(); + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + int size = 0; + for (int r = 0; r < repeat; ++r) { + size = 0; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + size += ewah[k].sizeInBytes(); + } + } + aft = System.currentTimeMillis(); + line += "\t" + size; + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + int[] array = ewah[k].toArray(); + bogus += array.length; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + int[] array = new int[ewah[k].cardinality()]; + int c = 0; + for (int x : ewah[k]) + array[c++] = x; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + List L = ewah[k].getPositions(); + int[] array = new int[L.size()]; + int c = 0; + for (int x : L) + array[c++] = x; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IntIterator iter = ewah[k].intIterator(); + while (iter.hasNext()) { + bogus += iter.next(); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + line += "\t\t\t"; + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.or(ewah[j]); + } + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = EWAHCompressedBitmap + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + // run sanity check + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); + EWAHCompressedBitmap ewahorp = EWAHCompressedBitmap.or(Arrays.copyOf(ewah, k+1)); + EWAHCompressedBitmap mewahor = IteratorUtil.materialize(ewahor); + if(!ewahorp.equals(mewahor)) throw new RuntimeException("bug"); + } + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); + bogus += IteratorUtil.materialize(ewahor).sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + line += "\t\t\t"; + // logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahand = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahand = ewahand.and(ewah[j]); + } + bogus += ewahand.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahand = EWAHCompressedBitmap + .and(ewahcp); + bogus += ewahand.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); + EWAHCompressedBitmap ewahandp = EWAHCompressedBitmap.and(Arrays.copyOf(ewah, k+1)); + EWAHCompressedBitmap mewahand = IteratorUtil.materialize(ewahand); + if(!ewahandp.equals(mewahand)) throw new RuntimeException("bug"); + } + // fast logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); + bogus += IteratorUtil.materialize(ewahand).sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); - public static void main(String args[]) { - test(100, 16, 1); - } - - public static void test(int N, int nbr, int repeat) { - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 4) { - long bogus = 0; - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - System.out.println("# generating random data..."); - for (int k = 0; k < N; ++k) - data[k] = cdg.generateClustered(1 << nbr, Max); - System.out.println("# generating random data... ok."); - // building - bef = System.currentTimeMillis(); - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - int size = 0; - for (int r = 0; r < repeat; ++r) { - size = 0; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - size += ewah[k].sizeInBytes(); - } - } - aft = System.currentTimeMillis(); - line += "\t" + size; - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - int[] array = ewah[k].toArray(); - bogus += array.length; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - int[] array = new int[ewah[k].cardinality()]; - int c = 0; - for (int x : ewah[k]) - array[c++] = x; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - List L = ewah[k].getPositions(); - int[] array = new int[L.size()]; - int c = 0; - for (int x : L) - array[c++] = x; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IntIterator iter = ewah[k].intIterator(); - while (iter.hasNext()) { - bogus += iter.next(); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap ewahor = ewah[0]; - for (int j = 1; j < k; ++j) { - ewahor = ewahor.or(ewah[j]); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap ewahand = ewah[0]; - for (int j = 1; j < k; ++j) { - ewahand = ewahand.and(ewah[j]); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewahcp); - bogus += ewahand.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahand = FastAggregation.and(ewahcp); - bogus += ewahand.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - System.out.println("time for building, toArray(), Java iterator, intIterator, logical or (2-by-2), logical or (grouped), FastAggregation.or, and (2-by-2), logical and (grouped),FastAggregation.and"); - System.out.println(line); - System.out.println("# bogus =" + bogus); - } - } + + System.out + .println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); + System.out.println(line); + System.out.println("# bogus =" + bogus); + } + } } diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,165 @@ +package com.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; + +import com.googlecode.javaewah.FastAggregation; +import com.googlecode.javaewah32.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical or (union) aggregate. + */ +public class BenchmarkUnion32 { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + } + + @SuppressWarnings({ "javadoc", "deprecation" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + // building + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if(true){ + EWAHCompressedBitmap32 answer = ewah[0].or(ewah[1]); + for(int k = 2; k < ewah.length; ++k) + answer = answer.or(ewah[k]); + + EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 + .or(ewah); + if(!answer.equals(ewahor)) throw new RuntimeException("bug EWAHCompressedBitmap.or"); + EWAHCompressedBitmap32 ewahor3 = FastAggregation + .or(ewah); + if(!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or"); + EWAHCompressedBitmap32 ewahor2 = FastAggregation32 + .bufferedor(65536,ewah); + if(!ewahor.equals(ewahor2)) throw new RuntimeException("bug FastAggregation.bufferedor "); + + } + + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.or(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation32 + .bufferedor(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); + FastAggregation32.legacy_orWithContainer(x, ewahcp); + bogus += x.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord32(ewah[j]); + } + IteratingRLW32 ewahor = IteratorAggregation32 + .bufferedor(ewahcp); + int wordcounter = IteratorUtil32.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + + System.out + .println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,164 @@ +package com.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; +import com.googlecode.javaewah.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical or (union) aggregate. + */ +public class BenchmarkUnion { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + } + + @SuppressWarnings({ "javadoc", "deprecation" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + // building + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap answer = ewah[0].or(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.or(ewah[k]); + + EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewah); + if (!answer.equals(ewahor)) + throw new RuntimeException( + "bug EWAHCompressedBitmap.or"); + EWAHCompressedBitmap ewahor3 = FastAggregation.or(ewah); + if (!ewahor.equals(ewahor3)) + throw new RuntimeException("bug FastAggregation.or"); + EWAHCompressedBitmap ewahor2 = FastAggregation + .bufferedor(65536,ewah); + if (!ewahor.equals(ewahor2)) + throw new RuntimeException( + "bug FastAggregation.bufferedor "); + + } + + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.or(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = EWAHCompressedBitmap + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .bufferedor(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap x = new EWAHCompressedBitmap(); + FastAggregation.legacy_orWithContainer(x, ewahcp); + bogus += x.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord( + ewah[j]); + } + IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); + int wordcounter = IteratorUtil.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + System.out + .println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,137 @@ +package com.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; + +import com.googlecode.javaewah.FastAggregation; +import com.googlecode.javaewah32.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical xor aggregate. + */ +public class BenchmarkXOR32 { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + //test(2, 22, 1); + } + + @SuppressWarnings({ "javadoc" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + // building + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap32 answer = ewah[0].xor(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.xor(ewah[k]); + EWAHCompressedBitmap32 ewahor3 = FastAggregation.xor(ewah); + if (!answer.equals(ewahor3)) + throw new RuntimeException("bug FastAggregation.xor"); + EWAHCompressedBitmap32 ewahor2 = FastAggregation32 + .bufferedxor(65536,ewah); + if (!answer.equals(ewahor2)) + throw new RuntimeException( + "bug FastAggregation.bufferedxor "); + EWAHCompressedBitmap32 iwah = IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(IteratorUtil32.toIterators(ewah))); + if (!answer.equals(iwah)) + throw new RuntimeException( + "bug xor it "); + + } + + // logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.xor(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation + .xor(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation32 + .bufferedxor(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord32( + ewah[j]); + } + IteratingRLW32 ewahor = IteratorAggregation32.bufferedxor(ewahcp); + int wordcounter = IteratorUtil32.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + + System.out + .println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,134 @@ +package com.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; +import com.googlecode.javaewah.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical xor aggregate. + */ +public class BenchmarkXOR { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + //test(10, 18, 1); + test(2, 22, 1); + } + + @SuppressWarnings({ "javadoc" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + // building + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap answer = ewah[0].xor(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.xor(ewah[k]); + EWAHCompressedBitmap ewahor3 = FastAggregation.xor(ewah); + if (!answer.equals(ewahor3)) + throw new RuntimeException("bug FastAggregation.xor"); + EWAHCompressedBitmap ewahor2 = FastAggregation + .bufferedxor(65536,ewah); + if (!answer.equals(ewahor2)) + throw new RuntimeException( + "bug FastAggregation.bufferedxor "); + EWAHCompressedBitmap iwah = IteratorUtil.materialize(IteratorAggregation.bufferedxor(IteratorUtil.toIterators(ewah))); + if (!answer.equals(iwah)) + throw new RuntimeException( + "bug xor it "); + + + } + + // logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.xor(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .xor(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .bufferedxor(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord( + ewah[j]); + } + IteratingRLW ewahor = IteratorAggregation.bufferedxor(ewahcp); + int wordcounter = IteratorUtil.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + System.out + .println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java 2013-11-12 14:31:20.000000000 +0000 @@ -2,23 +2,39 @@ /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** - * This class will generate uniformly distributed lists of random integers. + * This class will generate lists of random integers with a "clustered" distribution. + * Reference: + * Anh VN, Moffat A. Index compression using 64-bit words. Software: Practice and Experience 2010; 40(2):131-147. * * @author Daniel Lemire */ -class ClusteredDataGenerator { +public class ClusteredDataGenerator { - public ClusteredDataGenerator() { + /** + * + */ +public ClusteredDataGenerator() { + this.unidg = new UniformDataGenerator(); } /** + * @param seed random seed + */ +public ClusteredDataGenerator(final int seed) { + this.unidg = new UniformDataGenerator(seed); +} + +/** * generates randomly N distinct integers from 0 to Max. + * @param N number of integers + * @param Max maximum integer value + * @return a randomly generated array */ public int[] generateClustered(int N, int Max) { int[] array = new int[N]; @@ -56,13 +72,7 @@ array[k + offset] = Min + v[k]; } - public static void main(String[] args) { - int[] example = (new ClusteredDataGenerator()).generateClustered(20, 1000); - for (int k = 0; k < example.length; ++k) - System.out.println(example[k]); - } - - UniformDataGenerator unidg = new UniformDataGenerator(); + UniformDataGenerator unidg; } diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,50 +1,114 @@ package com.googlecode.javaewah.benchmark; - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ - import java.util.Arrays; +import java.util.BitSet; import java.util.HashSet; import java.util.Iterator; import java.util.Random; - /** - * This class will generate "clustered" lists of random integers. That is, the - * integers tend not to be randomly distributed. + * This class will generate "uniform" lists of random integers. * * @author Daniel Lemire */ public class UniformDataGenerator { - public UniformDataGenerator(){ + /** + * construct generator of random arrays. + */ + public UniformDataGenerator() { + this.rand = new Random(); + } + + /** + * @param seed random seed + */ + public UniformDataGenerator(final int seed) { + this.rand = new Random(seed); + } + + /** + * generates randomly N distinct integers from 0 to Max. + */ + int[] generateUniformHash(int N, int Max) { + if (N > Max) + throw new RuntimeException("not possible"); + int[] ans = new int[N]; + HashSet s = new HashSet(); + while (s.size() < N) + s.add(new Integer(this.rand.nextInt(Max))); + Iterator i = s.iterator(); + for (int k = 0; k < N; ++k) + ans[k] = i.next().intValue(); + Arrays.sort(ans); + return ans; + } + + /** + * output all integers from the range [0,Max) that are not + * in the array + */ + static int[] negate(int[] x, int Max) { + int[] ans = new int[Max - x.length]; + int i = 0; + int c = 0; + for (int j = 0; j < x.length; ++j) { + int v = x[j]; + for (; i < v; ++i) + ans[c++] = i; + ++i; + } + while (c < ans.length) + ans[c++] = i++; + return ans; + } + + + /** + * generates randomly N distinct integers from 0 to Max. + * @param N Number of integers to generate + * @param Max Maximum value of the integers + * @return array containing random integers + */ + public int[] generateUniform(int N, int Max) { + if(N * 2 > Max) { + return negate( generateUniform(Max - N, Max), Max ); + } + if (2048 * N > Max) + return generateUniformBitmap(N, Max); + return generateUniformHash(N, Max); + } + + /** + * generates randomly N distinct integers from 0 to Max using a bitmap. + * @param N Number of integers to generate + * @param Max Maximum value of the integers + * @return array containing random integers + */ + int[] generateUniformBitmap(int N, int Max) { + if (N > Max) + throw new RuntimeException("not possible"); + int[] ans = new int[N]; + BitSet bs = new BitSet(Max); + int cardinality = 0; + while (cardinality < N) { + int v = this.rand.nextInt(Max); + if (!bs.get(v)) { + bs.set(v); + cardinality++; + } + } + int pos = 0; + for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { + ans[pos++] = i; + } + return ans; } - /** - * generates randomly N distinct integers from 0 to Max. - */ - int[] generateUniform(int N, int Max) { - if (N > Max) - throw new RuntimeException("not possible"); - int[] ans = new int[N]; - if (N == Max) { - for (int k = 0; k < N; ++k) - ans[k]=k; - return ans; - } - HashSet s = new HashSet(); - while (s.size() < N) - s.add(new Integer(this.rand.nextInt(Max))); - Iterator i = s.iterator(); - for (int k = 0; k < N; ++k) - ans[k]=i.next().intValue(); - Arrays.sort(ans); - return ans; - } - Random rand = new Random(); + Random rand = new Random(); - } diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/BitCounter.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BitCounter.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/BitCounter.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BitCounter.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,8 +1,8 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** * BitCounter is a fake bitset data structure. Instead of storing the actual @@ -20,7 +20,7 @@ * @param newdata * the word */ - // @Override : causes problems with Java 1.5 + @Override public void add(final long newdata) { this.oneBits += Long.bitCount(newdata); return; @@ -36,7 +36,7 @@ * @param number * the number of literal words to add */ - // @Override : causes problems with Java 1.5 + @Override public void addStreamOfLiteralWords(long[] data, int start, int number) { for (int i = start; i < start + number; i++) { add(data[i]); @@ -51,10 +51,9 @@ * zeros or ones * @param number * how many to words add - * @return the number of words added to the buffer */ - // @Override : causes problems with Java 1.5 - public void addStreamOfEmptyWords(boolean v, long number) { + @Override +public void addStreamOfEmptyWords(boolean v, long number) { if (v) { this.oneBits += number * EWAHCompressedBitmap.wordinbits; } @@ -72,7 +71,8 @@ * the number of literal words to add */ // @Override : causes problems with Java 1.5 - public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { + @Override +public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { for (int i = start; i < start + number; i++) { add(~data[i]); } @@ -96,7 +96,8 @@ * number of bits */ // @Override : causes problems with Java 1.5 - public void setSizeInBits(int bits) { + @Override +public void setSizeInBits(int bits) { // no action } diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/BitmapStorage.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BitmapStorage.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/BitmapStorage.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BitmapStorage.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,8 +1,8 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/BufferedIterator.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BufferedIterator.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/BufferedIterator.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BufferedIterator.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,151 @@ +package com.googlecode.javaewah; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * This class can be used to iterate over blocks of bitmap data. + * + * @author Daniel Lemire + * + */ +public class BufferedIterator implements IteratingRLW { + /** + * Instantiates a new iterating buffered running length word. + * + * @param iterator iterator + */ + public BufferedIterator(final CloneableIterator iterator) { + this.masteriterator = iterator; + if(this.masteriterator.hasNext()) { + this.iterator = this.masteriterator.next(); + this.brlw = new BufferedRunningLengthWord(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; + this.buffer = this.iterator.buffer(); + } + } + + + /** + * Discard first words, iterating to the next running length word if needed. + * + * @param x the number of words to be discarded + */ + @Override + public void discardFirstWords(long x) { + while (x > 0) { + if (this.brlw.RunningLength > x) { + this.brlw.RunningLength -= x; + return; + } + x -= this.brlw.RunningLength; + this.brlw.RunningLength = 0; + long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; + + this.literalWordStartPosition += toDiscard; + this.brlw.NumberOfLiteralWords -= toDiscard; + x -= toDiscard; + if ((x > 0) || (this.brlw.size() == 0)) { + if (!this.next()) { + break; + } + } + } + } + /** + * Move to the next RunningLengthWord + * @return whether the move was possible + */ + @Override + public boolean next() { + if (!this.iterator.hasNext()) { + if(!reload()) { + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; + } + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + return true; + } + private boolean reload() { + if(!this.masteriterator.hasNext()) { + return false; + } + this.iterator = this.masteriterator.next(); + this.buffer = this.iterator.buffer(); + return true; + } + + + /** + * Get the nth literal word for the current running length word + * @param index zero based index + * @return the literal word + */ + @Override + public long getLiteralWordAt(int index) { + return this.buffer[this.literalWordStartPosition + index]; + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override + public int getNumberOfLiteralWords() { + return this.brlw.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override + public boolean getRunningBit() { + return this.brlw.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override + public long getRunningLength() { + return this.brlw.RunningLength; + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the size + */ + @Override + public long size() { + return this.brlw.size(); + } + + + @Override + public BufferedIterator clone() throws CloneNotSupportedException { + BufferedIterator answer = (BufferedIterator) super.clone(); + answer.brlw = this.brlw.clone(); + answer.buffer = this.buffer; + answer.iterator = this.iterator.clone(); + answer.literalWordStartPosition = this.literalWordStartPosition; + answer.masteriterator = this.masteriterator.clone(); + return answer; + } + + private BufferedRunningLengthWord brlw; + private long[] buffer; + private int literalWordStartPosition; + private EWAHIterator iterator; + private CloneableIterator masteriterator; + } \ No newline at end of file diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,8 +1,8 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ @@ -15,7 +15,7 @@ * @since 0.1.0 * */ -public final class BufferedRunningLengthWord { +public final class BufferedRunningLengthWord implements Cloneable { /** * Instantiates a new buffered running length word. @@ -34,7 +34,7 @@ * @param rlw the rlw */ public BufferedRunningLengthWord(final RunningLengthWord rlw) { - this(rlw.array[rlw.position]); + this(rlw.parent.buffer[rlw.position]); } /** @@ -99,7 +99,7 @@ * @param rlw the other running length word */ public void reset(final RunningLengthWord rlw) { - reset(rlw.array[rlw.position]); + reset(rlw.parent.buffer[rlw.position]); } /** @@ -147,6 +147,17 @@ + getRunningLength() + " number of lit. words " + getNumberOfLiteralWords(); } + + @Override +public BufferedRunningLengthWord clone() throws CloneNotSupportedException { + BufferedRunningLengthWord answer = (BufferedRunningLengthWord) super.clone(); + answer.literalwordoffset = this.literalwordoffset; + answer.NumberOfLiteralWords = this.NumberOfLiteralWords; + answer.RunningBit = this.RunningBit; + answer.RunningLength = this.RunningLength; + return answer; + } + /** how many literal words have we read so far? */ public int literalwordoffset = 0; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/CloneableIterator.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/CloneableIterator.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/CloneableIterator.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/CloneableIterator.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,24 @@ +package com.googlecode.javaewah; + +/** + * Like a standard Java iterator, except that you can clone it. + * + * @param the data type of the iterator + */ +public interface CloneableIterator extends Cloneable { + + /** + * @return whether there is more + */ + public boolean hasNext(); + /** + * @return the next element + */ + public E next(); + /** + * @return a copy + * @throws CloneNotSupportedException this should never happen in practice + */ + public CloneableIterator clone() throws CloneNotSupportedException; + +} \ No newline at end of file diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,14 +1,15 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ import java.util.*; import java.io.*; + /** *

* This implements the patent-free(1) EWAH scheme. Roughly speaking, it is a @@ -41,7 +42,7 @@ * *

    *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves - * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages + * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages * 3-28, 2010. http://arxiv.org/abs/0901.3751
  • *
* @@ -81,7 +82,7 @@ */ public EWAHCompressedBitmap() { this.buffer = new long[defaultbuffersize]; - this.rlw = new RunningLengthWord(this.buffer, 0); + this.rlw = new RunningLengthWord(this, 0); } /** @@ -94,7 +95,7 @@ */ public EWAHCompressedBitmap(final int buffersize) { this.buffer = new long[buffersize]; - this.rlw = new RunningLengthWord(this.buffer, 0); + this.rlw = new RunningLengthWord(this, 0); } /** @@ -103,10 +104,15 @@ * This is normally how you add data to the array. So you add bits in streams * of 8*8 bits. * + * Example: if you add 321, you are have added (in binary notation) + * 0b101000001, so you have effectively called set(0), set(6), set(8) + * in sequence. + * * @param newdata * the word */ - public void add(final long newdata) { + @Override +public void add(final long newdata) { add(newdata, wordinbits); } @@ -182,13 +188,14 @@ * @param number * the number of literal words to add */ - public void addStreamOfLiteralWords(final long[] data, final int start, + @Override +public void addStreamOfLiteralWords(final long[] data, final int start, final int number) { int leftovernumber = number; while(leftovernumber > 0) { final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); - final int whatwecanadd = number < RunningLengthWord.largestliteralcount - - NumberOfLiteralWords ? number : RunningLengthWord.largestliteralcount + final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount + - NumberOfLiteralWords ? leftovernumber : RunningLengthWord.largestliteralcount - NumberOfLiteralWords; this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd); leftovernumber -= whatwecanadd; @@ -210,7 +217,8 @@ * @param number * the number */ - public void addStreamOfEmptyWords(final boolean v, long number) { + @Override +public void addStreamOfEmptyWords(final boolean v, long number) { if (number == 0) return; this.sizeinbits += number * wordinbits; @@ -255,13 +263,14 @@ * @param number * the number of literal words to add */ + @Override public void addStreamOfNegatedLiteralWords(final long[] data, final int start, final int number) { int leftovernumber = number; while (leftovernumber > 0) { final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); - final int whatwecanadd = number < RunningLengthWord.largestliteralcount - - NumberOfLiteralWords ? number + final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount + - NumberOfLiteralWords ? leftovernumber : RunningLengthWord.largestliteralcount - NumberOfLiteralWords; this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords @@ -283,12 +292,16 @@ * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @since 0.4.3 * @param a * the other bitmap * @return the EWAH compressed bitmap */ - public EWAHCompressedBitmap and(final EWAHCompressedBitmap a) { + @Override +public EWAHCompressedBitmap and(final EWAHCompressedBitmap a) { final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); container .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords @@ -341,10 +354,12 @@ rlwj.discardFirstWords(nbre_literal); } } - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; - remaining.dischargeAsEmpty(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + if(adjustContainerSizeWhenAggregating) { + final boolean i_remains = rlwi.size()>0; + final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; + remaining.dischargeAsEmpty(container); + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } } @@ -371,11 +386,15 @@ * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param a * the other bitmap * @return the EWAH compressed bitmap */ - public EWAHCompressedBitmap andNot(final EWAHCompressedBitmap a) { + @Override +public EWAHCompressedBitmap andNot(final EWAHCompressedBitmap a) { final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); container .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords @@ -387,15 +406,14 @@ /** * Returns a new compressed bitmap containing the bitwise AND NOT values of * the current bitmap with some other bitmap. This method is expected to - * be faster than doing A.and(((EWAHCompressedBitmap) B.clone()).not()). + * be faster than doing A.and(B.clone().not()). * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * @since 0.4.0 - * @param a - * the other bitmap - * @return the EWAH compressed bitmap + * @param a the other bitmap + * @param container where to store the result */ public void andNotToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { @@ -440,9 +458,10 @@ final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; if(i_remains) remaining.discharge(container); - else + else if(adjustContainerSizeWhenAggregating) remaining.dischargeAsEmpty(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + if(adjustContainerSizeWhenAggregating) + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } /** @@ -469,7 +488,7 @@ */ public int cardinality() { int counter = 0; - final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords); + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord localrlw = i.next(); if (localrlw.getRunningBit()) { @@ -498,10 +517,10 @@ * @see java.lang.Object#clone() */ @Override - public Object clone() throws java.lang.CloneNotSupportedException { + public EWAHCompressedBitmap clone() throws java.lang.CloneNotSupportedException { final EWAHCompressedBitmap clone = (EWAHCompressedBitmap) super.clone(); clone.buffer = this.buffer.clone(); - clone.rlw = new RunningLengthWord(clone.buffer, this.rlw.position); + clone.rlw = new RunningLengthWord(clone, this.rlw.position); clone.actualsizeinwords = this.actualsizeinwords; clone.sizeinbits = this.sizeinbits; return clone; @@ -523,13 +542,12 @@ } for (int k = 0; k < this.actualsizeinwords; ++k) this.buffer[k] = in.readLong(); - this.rlw = new RunningLengthWord(this.buffer, in.readInt()); + this.rlw = new RunningLengthWord(this, in.readInt()); } /** * Check to see whether the two compressed bitmaps contain the same set bits. * - * @author Colby Ranger * @see java.lang.Object#equals(java.lang.Object) */ @Override @@ -554,7 +572,6 @@ * the boolean value * @param number * the number (must be greater than 0) - * @return nothing */ private void fastaddStreamOfEmptyWords(final boolean v, long number) { if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { @@ -597,10 +614,16 @@ * @return the EWAHIterator */ public EWAHIterator getEWAHIterator() { - return new EWAHIterator(this.buffer, this.actualsizeinwords); + return new EWAHIterator(this, this.actualsizeinwords); } /** + * @return the IteratingRLW iterator corresponding to this bitmap + */ + public IteratingRLW getIteratingRLW() { + return new IteratingBufferedRunningLengthWord(this); + } + /** * get the locations of the true values as one vector. (may use more memory * than iterator()) * @@ -608,7 +631,7 @@ */ public List getPositions() { final ArrayList v = new ArrayList(); - final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords); + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); int pos = 0; while (i.hasNext()) { RunningLengthWord localrlw = i.next(); @@ -645,7 +668,7 @@ public int hashCode() { int karprabin = 0; final int B = 31; - final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords); + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); while( i.hasNext() ) { i.next(); if (i.rlw.getRunningBit() == true) { @@ -691,7 +714,7 @@ */ public IntIterator intIterator() { return new IntIteratorImpl( - new EWAHIterator(this.buffer, this.actualsizeinwords)); + new EWAHIterator(this, this.actualsizeinwords)); } /** @@ -700,17 +723,21 @@ * * @return the iterator */ - public Iterator iterator() { + @Override +public Iterator iterator() { return new Iterator() { - public boolean hasNext() { + @Override + public boolean hasNext() { return this.under.hasNext(); } - public Integer next() { + @Override + public Integer next() { return new Integer(this.under.next()); } - public void remove() { + @Override + public void remove() { throw new UnsupportedOperationException("bitsets do not support remove"); } @@ -732,9 +759,14 @@ final int number) { while (this.actualsizeinwords + number >= this.buffer.length) { final long oldbuffer[] = this.buffer; - this.buffer = new long[oldbuffer.length * 2]; + if((this.actualsizeinwords + number) < 32768) + this.buffer = new long[ (this.actualsizeinwords + number) * 2]; + else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow + this.buffer = new long[Integer.MAX_VALUE]; + else + this.buffer = new long[(this.actualsizeinwords + number) * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.array = this.buffer; + this.rlw.parent.buffer = this.buffer; } for (int k = 0; k < number; ++k) this.buffer[this.actualsizeinwords + k] = ~data[start + k]; @@ -749,8 +781,9 @@ * sizeInBytes()). * */ - public void not() { - final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords); + @Override +public void not() { + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); if (!i.hasNext()) return; @@ -786,17 +819,23 @@ * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param a * the other bitmap * @return the EWAH compressed bitmap */ - public EWAHCompressedBitmap or(final EWAHCompressedBitmap a) { + @Override +public EWAHCompressedBitmap or(final EWAHCompressedBitmap a) { final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); container.reserve(this.actualsizeinwords + a.actualsizeinwords); orToContainer(a, container); return container; } + + /** * Computes the bitwise or between the current bitmap and the bitmap "a". * Stores the result in the container. @@ -872,9 +911,14 @@ private void push_back(final long data) { if (this.actualsizeinwords == this.buffer.length) { final long oldbuffer[] = this.buffer; - this.buffer = new long[oldbuffer.length * 2]; + if(oldbuffer.length < 32768) + this.buffer = new long[ oldbuffer.length * 2]; + else if(oldbuffer.length * 3 / 2 < oldbuffer.length) // overflow + this.buffer = new long[Integer.MAX_VALUE]; + else + this.buffer = new long[oldbuffer.length * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.array = this.buffer; + this.rlw.parent.buffer = this.buffer; } this.buffer[this.actualsizeinwords++] = data; } @@ -890,11 +934,16 @@ * the number of words to add */ private void push_back(final long[] data, final int start, final int number) { - while (this.actualsizeinwords + number >= this.buffer.length) { + if (this.actualsizeinwords + number >= this.buffer.length) { final long oldbuffer[] = this.buffer; - this.buffer = new long[oldbuffer.length * 2]; + if(this.actualsizeinwords + number < 32768) + this.buffer = new long[(this.actualsizeinwords + number) * 2]; + else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow + this.buffer = new long[Integer.MAX_VALUE]; + else + this.buffer = new long[( this.actualsizeinwords + number) * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.array = this.buffer; + this.rlw.parent.buffer = this.buffer; } System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number); this.actualsizeinwords += number; @@ -903,7 +952,8 @@ /* * @see java.io.Externalizable#readExternal(java.io.ObjectInput) */ - public void readExternal(ObjectInput in) throws IOException { + @Override +public void readExternal(ObjectInput in) throws IOException { deserialize(in); } @@ -919,7 +969,7 @@ final long oldbuffer[] = this.buffer; this.buffer = new long[size]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.array = this.buffer; + this.rlw.parent.buffer = this.buffer; return true; } return false; @@ -949,15 +999,48 @@ public int serializedSizeInBytes() { return this.sizeInBytes() + 3 * 4; } + + + /** + * Query the value of a single bit. Relying on this method when speed is + * needed is discouraged. The complexity is linear with the size of the + * bitmap. + * + * (This implementation is based on zhenjl's Go version of JavaEWAH.) + * + * @param i + * the bit we are interested in + * @return whether the bit is set to true + */ + public boolean get(final int i) { + if ((i < 0) || (i >= this.sizeinbits)) + return false; + int WordChecked = 0; + final IteratingRLW j = getIteratingRLW(); + final int wordi = i/wordinbits; + while (WordChecked <= wordi ) { + WordChecked += j.getRunningLength(); + if (wordi < WordChecked) { + return j.getRunningBit(); + } + if (wordi < WordChecked + j.getNumberOfLiteralWords()) { + final long w = j.getLiteralWordAt(wordi - WordChecked); + return (w & (1l << i)) != 0; + } + WordChecked += j.getNumberOfLiteralWords(); + j.next(); + } + return false; + } /** - * set the bit at position i to true, the bits must be set in increasing + * Set the bit at position i to true, the bits must be set in (strictly) increasing * order. For example, set(15) and then set(7) will fail. You must do set(7) * and then set(15). * * @param i * the index - * @return true if the value was set (always true when i>= sizeInBits()). + * @return true if the value was set (always true when i greater or equal to sizeInBits()). * @throws IndexOutOfBoundsException * if i is negative or greater than Integer.MAX_VALUE - 64 */ @@ -994,17 +1077,21 @@ } /** - * set the size in bits + * Set the size in bits. This does not change the compressed bitmap. * * @since 0.4.0 */ - public void setSizeInBits(final int size) { + @Override +public void setSizeInBits(final int size) { + if((size+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits) + throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean)."); this.sizeinbits = size; } /** * Change the reported size in bits of the *uncompressed* bitmap represented - * by this compressed bitmap. It is not possible to reduce the sizeInBits, but + * by this compressed bitmap. It may change the underlying compressed bitmap. + * It is not possible to reduce the sizeInBits, but * it can be extended. The new bits are set to false or true depending on the * value of defaultvalue. * @@ -1042,7 +1129,8 @@ * * @return the size in bits */ - public int sizeInBits() { + @Override +public int sizeInBits() { return this.sizeinbits; } @@ -1052,7 +1140,8 @@ * * @return the size in bytes */ - public int sizeInBytes() { + @Override +public int sizeInBytes() { return this.actualsizeinwords * (wordinbits / 8); } @@ -1066,7 +1155,7 @@ int[] ans = new int[this.cardinality()]; int inanspos = 0; int pos = 0; - final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords); + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord localrlw = i.next(); if (localrlw.getRunningBit()) { @@ -1108,7 +1197,7 @@ public String toDebugString() { String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits + " size in words = " + this.actualsizeinwords + "\n"; - final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords); + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord localrlw = i.next(); if (localrlw.getRunningBit()) { @@ -1144,11 +1233,43 @@ answer.append("}"); return answer.toString(); } + + /** + * swap the content of the bitmap with another. + * @param other bitmap to swap with + */ +public void swap(final EWAHCompressedBitmap other) { + long[] tmp = this.buffer; + this.buffer = other.buffer; + other.buffer = tmp; + + + int tmp2 = this.rlw.position; + this.rlw.position = other.rlw.position; + other.rlw.position = tmp2; + + int tmp3 = this.actualsizeinwords; + this.actualsizeinwords = other.actualsizeinwords; + other.actualsizeinwords = tmp3; + + int tmp4 = this.sizeinbits; + this.sizeinbits = other.sizeinbits; + other.sizeinbits = tmp4; + } + + /** + * Reduce the internal buffer to its minimal allowable size (given + * by this.actualsizeinwords). This can free memory. + */ + public void trim() { + this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords); + } /* * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) */ - public void writeExternal(ObjectOutput out) throws IOException { + @Override +public void writeExternal(ObjectOutput out) throws IOException { serialize(out); } @@ -1159,11 +1280,15 @@ * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param a * the other bitmap * @return the EWAH compressed bitmap */ - public EWAHCompressedBitmap xor(final EWAHCompressedBitmap a) { + @Override +public EWAHCompressedBitmap xor(final EWAHCompressedBitmap a) { final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); container.reserve(this.actualsizeinwords + a.actualsizeinwords); xorToContainer(a, container); @@ -1250,121 +1375,20 @@ */ public static void andWithContainer(final BitmapStorage container, final EWAHCompressedBitmap... bitmaps) { - if (bitmaps.length == 2) { - // should be more efficient - bitmaps[0].andToContainer(bitmaps[1], container); - return; - } - - // Sort the bitmaps in ascending order by sizeinbits. When we exhaust the - // first bitmap the rest - // of the result is zeros. - final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); - Arrays.sort(sortedBitmaps, new Comparator() { - public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { - return a.sizeinbits < b.sizeinbits ? -1 - : a.sizeinbits == b.sizeinbits ? 0 : 1; - } - }); - - int maxSize = sortedBitmaps[sortedBitmaps.length - 1].sizeinbits; - - final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; - for (int i = 0; i < sortedBitmaps.length; i++) { - EWAHIterator iterator = sortedBitmaps[i].getEWAHIterator(); - if (iterator.hasNext()) { - rlws[i] = new IteratingBufferedRunningLengthWord(iterator); - } else { - // this never happens... - if (maxSize > 0) { - extendEmptyBits(container, 0, maxSize); - } - container.setSizeInBits(maxSize); - return; - } - } - - while (true) { - long maxZeroRl = 0; - long minOneRl = Long.MAX_VALUE; - long minSize = Long.MAX_VALUE; - int numEmptyRl = 0; - - - for (IteratingBufferedRunningLengthWord rlw : rlws) { - - long size = rlw.size(); - minSize = Math.min(minSize, size); - - if (!rlw.getRunningBit()) { - long rl = rlw.getRunningLength(); - maxZeroRl = Math.max(maxZeroRl, rl); - minOneRl = 0; - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } else { - long rl = rlw.getRunningLength(); - minOneRl = Math.min(minOneRl, rl); - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } - } - - if (minSize == 0) { - extendEmptyBits(container, sortedBitmaps[0].sizeinbits, maxSize); - break; - } - if (maxZeroRl > 0) { - container.addStreamOfEmptyWords(false, maxZeroRl); - for (IteratingBufferedRunningLengthWord rlw : rlws) { - rlw.discardFirstWords(maxZeroRl); - } - } else if (minOneRl > 0) { - container.addStreamOfEmptyWords(true, minOneRl); - for (IteratingBufferedRunningLengthWord rlw : rlws) { - rlw.discardFirstWords(minOneRl); - } - } else { - int index = 0; - - if (numEmptyRl == 1) { - // if one rlw has literal words to process and the rest have a run of - // 1's we can write them out here - IteratingBufferedRunningLengthWord emptyRl = null; - long minNonEmptyRl = Long.MAX_VALUE; - for (IteratingBufferedRunningLengthWord rlw : rlws) { - long rl = rlw.getRunningLength(); - if (rl == 0) { - assert emptyRl == null; - emptyRl = rlw; - } else { - minNonEmptyRl = Math.min(minNonEmptyRl, rl); - } - } - long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; - if (emptyRl != null) - emptyRl.writeLiteralWords((int) wordsToWrite, container); - index += wordsToWrite; - } - - while (index < minSize) { - long word = ~0l; - for (IteratingBufferedRunningLengthWord rlw : rlws) { - if (rlw.getRunningLength() <= index) { - word &= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength()); - } - } - container.add(word); - index++; - } - for (IteratingBufferedRunningLengthWord rlw : rlws) { - rlw.discardFirstWords(minSize); - } - } - } - container.setSizeInBits(maxSize); + if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap"); + if(bitmaps.length == 2) { + bitmaps[0].andToContainer(bitmaps[1],container); + return; + } + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(); + bitmaps[0].andToContainer(bitmaps[1], answer); + for(int k = 2; k < bitmaps.length - 1; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + answer.andToContainer(bitmaps[bitmaps.length - 1], container); } /** @@ -1373,20 +1397,28 @@ * * It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)). * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @since 0.4.3 * @param bitmaps * bitmaps to AND together * @return result of the AND */ public static EWAHCompressedBitmap and(final EWAHCompressedBitmap... bitmaps) { - final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - int largestSize = 0; - for (EWAHCompressedBitmap bitmap : bitmaps) { - largestSize = Math.max(bitmap.actualsizeinwords, largestSize); - } - container.reserve((int) (largestSize * 1.5)); - andWithContainer(container, bitmaps); - return container; + if(bitmaps.length == 1) return bitmaps[0]; + if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]); + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(); + bitmaps[0].andToContainer(bitmaps[1], answer); + for(int k = 2; k < bitmaps.length; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + return answer; } /** @@ -1400,6 +1432,7 @@ * @return the cardinality */ public static int andCardinality(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0].cardinality(); final BitCounter counter = new BitCounter(); andWithContainer(counter, bitmaps); return counter.getCount(); @@ -1446,152 +1479,75 @@ + (currentLeftover != 0 ? -1 : 0)); } - /** - * For internal use. Computes the bitwise or of the provided bitmaps and - * stores the result in the container. - * - * @since 0.4.0 - */ - public static void orWithContainer(final BitmapStorage container, - final EWAHCompressedBitmap... bitmaps) { - if (bitmaps.length == 2) { - // should be more efficient - bitmaps[0].orToContainer(bitmaps[1], container); - return; - } - - // Sort the bitmaps in descending order by sizeinbits. We will exhaust the - // sorted bitmaps from right to left. - final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); - Arrays.sort(sortedBitmaps, new Comparator() { - public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { - return a.sizeinbits < b.sizeinbits ? 1 - : a.sizeinbits == b.sizeinbits ? 0 : -1; - } - }); - - final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; - int maxAvailablePos = 0; - for (EWAHCompressedBitmap bitmap : sortedBitmaps) { - EWAHIterator iterator = bitmap.getEWAHIterator(); - if (iterator.hasNext()) { - rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord( - iterator); - } - } - - if (maxAvailablePos == 0) { // this never happens... - container.setSizeInBits(0); - return; - } - - int maxSize = sortedBitmaps[0].sizeinbits; - - while (true) { - long maxOneRl = 0; - long minZeroRl = Long.MAX_VALUE; - long minSize = Long.MAX_VALUE; - int numEmptyRl = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - long size = rlw.size(); - if (size == 0) { - maxAvailablePos = i; - break; - } - minSize = Math.min(minSize, size); - - if (rlw.getRunningBit()) { - long rl = rlw.getRunningLength(); - maxOneRl = Math.max(maxOneRl, rl); - minZeroRl = 0; - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } else { - long rl = rlw.getRunningLength(); - minZeroRl = Math.min(minZeroRl, rl); - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } - } - - if (maxAvailablePos == 0) { - break; - } else if (maxAvailablePos == 1) { - // only one bitmap is left so just write the rest of it out - rlws[0].discharge(container); - break; - } - - if (maxOneRl > 0) { - container.addStreamOfEmptyWords(true, maxOneRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(maxOneRl); - } - } else if (minZeroRl > 0) { - container.addStreamOfEmptyWords(false, minZeroRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(minZeroRl); - } - } else { - int index = 0; - - if (numEmptyRl == 1) { - // if one rlw has literal words to process and the rest have a run of - // 0's we can write them out here - IteratingBufferedRunningLengthWord emptyRl = null; - long minNonEmptyRl = Long.MAX_VALUE; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - long rl = rlw.getRunningLength(); - if (rl == 0) { - assert emptyRl == null; - emptyRl = rlw; - } else { - minNonEmptyRl = Math.min(minNonEmptyRl, rl); - } - } - long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; - if (emptyRl != null) - emptyRl.writeLiteralWords((int) wordsToWrite, container); - index += wordsToWrite; - } - - while (index < minSize) { - long word = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - if (rlw.getRunningLength() <= index) { - word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength()); - } - } - container.add(word); - index++; - } - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(minSize); - } - } - } - container.setSizeInBits(maxSize); - } + + /** + * Uses an adaptive technique to compute the logical OR. + * Mostly for internal use. + * + * @param container where the aggregate is written. + * @param bitmaps to be aggregated + */ + public static void orWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); + long size = 0L; + long sinbits = 0L; + for (EWAHCompressedBitmap b : bitmaps) { + size += b.sizeInBytes(); + if (sinbits < b.sizeInBits()) + sinbits = b.sizeInBits(); + } + if (size * 8 > sinbits) { + FastAggregation.bufferedorWithContainer(container, 65536, bitmaps); + } else { + FastAggregation.orToContainer(container, bitmaps); + } + } + + /** + * Uses an adaptive technique to compute the logical XOR. + * Mostly for internal use. + * + * @param container where the aggregate is written. + * @param bitmaps to be aggregated + */ + public static void xorWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); + long size = 0L; + long sinbits = 0L; + for (EWAHCompressedBitmap b : bitmaps) { + size += b.sizeInBytes(); + if (sinbits < b.sizeInBits()) + sinbits = b.sizeInBits(); + } + if (size * 8 > sinbits) { + FastAggregation.bufferedxorWithContainer(container, 65536, bitmaps); + } else { + FastAggregation.xorToContainer(container, bitmaps); + } + } /** * Returns a new compressed bitmap containing the bitwise OR values of the * provided bitmaps. This is typically faster than doing the aggregation * two-by-two (A.or(B).or(C).or(D)). * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @since 0.4.0 * @param bitmaps * bitmaps to OR together * @return result of the OR */ public static EWAHCompressedBitmap or(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) + return bitmaps[0]; final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); int largestSize = 0; for (EWAHCompressedBitmap bitmap : bitmaps) { @@ -1601,6 +1557,32 @@ orWithContainer(container, bitmaps); return container; } + /** + * Returns a new compressed bitmap containing the bitwise XOR values of the + * provided bitmaps. This is typically faster than doing the aggregation + * two-by-two (A.xor(B).xor(C).xor(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param bitmaps + * bitmaps to XOR together + * @return result of the XOR + */ + public static EWAHCompressedBitmap xor(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) + return bitmaps[0]; + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + int largestSize = 0; + for (EWAHCompressedBitmap bitmap : bitmaps) { + largestSize = Math.max(bitmap.actualsizeinwords, largestSize); + } + container.reserve((int) (largestSize * 1.5)); + xorWithContainer(container, bitmaps); + return container; + } /** * Returns the cardinality of the result of a bitwise OR of the values of the @@ -1613,6 +1595,7 @@ * @return the cardinality */ public static int orCardinality(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0].cardinality(); final BitCounter counter = new BitCounter(); orWithContainer(counter, bitmaps); return counter.getCount(); @@ -1638,6 +1621,9 @@ /** optimization option **/ public static final boolean usetrailingzeros = true; + + /** whether we adjust after some aggregation by adding in zeroes **/ + public static final boolean adjustContainerSizeWhenAggregating = true; /** The Constant wordinbits represents the number of bits in a long. */ public static final int wordinbits = 64; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/EWAHIterator.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/EWAHIterator.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/EWAHIterator.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/EWAHIterator.java 2013-11-12 14:31:20.000000000 +0000 @@ -2,27 +2,27 @@ /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** * The class EWAHIterator represents a special type of * efficient iterator iterating over (uncompressed) words of bits. - * + * It is not meant for end users. * @author Daniel Lemire * @since 0.1.0 * */ -public final class EWAHIterator { +public final class EWAHIterator implements Cloneable { /** - * Instantiates a new eWAH iterator. + * Instantiates a new EWAH iterator. * * @param a the array of words * @param sizeinwords the number of words that are significant in the array of words */ - public EWAHIterator(final long[] a, final int sizeinwords) { + public EWAHIterator(final EWAHCompressedBitmap a, final int sizeinwords) { this.rlw = new RunningLengthWord(a, 0); this.size = sizeinwords; this.pointer = 0; @@ -31,7 +31,7 @@ /** * Allow expert developers to instantiate an EWAHIterator. * - * @param bitmap + * @param bitmap we want to iterate over * @return an iterator */ public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) { @@ -45,7 +45,7 @@ * @return the long[] */ public long[] buffer() { - return this.rlw.array; + return this.rlw.parent.buffer; } /** @@ -77,6 +77,14 @@ return this.rlw; } + @Override + public EWAHIterator clone() throws CloneNotSupportedException { + EWAHIterator ans = (EWAHIterator) super.clone(); + ans.rlw = this.rlw.clone(); + ans.size = this.size; + ans.pointer = this.pointer; + return ans; + } /** The pointer represent the location of the current running length * word in the array of words (embedded in the rlw attribute). */ int pointer; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/FastAggregation.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/FastAggregation.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/FastAggregation.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/FastAggregation.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,74 +1,436 @@ package com.googlecode.javaewah; +import java.util.Arrays; import java.util.Comparator; import java.util.PriorityQueue; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + /** - * Fast algorithms to aggregate many bitmaps. - * These algorithms are just given as reference. - * They may not be faster than the corresponding - * methods in the EWAHCompressedBitmap class. + * Fast algorithms to aggregate many bitmaps. These algorithms are just given as + * reference. They may not be faster than the corresponding methods in the + * EWAHCompressedBitmap class. * * @author Daniel Lemire - * + * */ public class FastAggregation { + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) + * @return the or aggregate. + */ + public static EWAHCompressedBitmap bufferedand(final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedandWithContainer(answer,bufsize, bitmaps); + return answer; + } + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) + * @param bitmaps the source bitmaps + */ + public static void bufferedandWithContainer(final BitmapStorage container,final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + + java.util.LinkedList al = new java.util.LinkedList(); + for (EWAHCompressedBitmap bitmap : bitmaps) { + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + + long[] hardbitmap = new long[bufsize*bitmaps.length]; + + for(IteratingRLW i : al) + if (i.size() == 0) { + al.clear(); + break; + } + + while (!al.isEmpty()) { + Arrays.fill(hardbitmap, ~0l); + long effective = Integer.MAX_VALUE; + for(IteratingRLW i : al) { + int eff = IteratorAggregation.inplaceand(hardbitmap, i); + if (eff < effective) + effective = eff; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + for(IteratingRLW i : al) + if (i.size() == 0) { + al.clear(); + break; + } + } + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words + * @return the or aggregate. + */ + public static EWAHCompressedBitmap bufferedor(final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedorWithContainer(answer, bufsize, bitmaps); + return answer; + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words + * @param bitmaps the source bitmaps + */ + public static void bufferedorWithContainer(final BitmapStorage container, final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + int range = 0; + EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return b.sizeinbits - a.sizeinbits; + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap bitmap : sbitmaps) { + if (bitmap.sizeinbits > range) + range = bitmap.sizeinbits; + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + long[] hardbitmap = new long[bufsize]; + int maxr = al.size(); + while (maxr > 0) { + long effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation.inplaceor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + + } + container.setSizeInBits(range); + } + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words + * @return the xor aggregate. + */ + public static EWAHCompressedBitmap bufferedxor(final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedxorWithContainer(answer, bufsize,bitmaps); + return answer; + } + + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words + * @param bitmaps the source bitmaps + */ + public static void bufferedxorWithContainer(final BitmapStorage container, final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + int range = 0; + EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return b.sizeinbits - a.sizeinbits; + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap bitmap : sbitmaps) { + if (bitmap.sizeinbits > range) + range = bitmap.sizeinbits; + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + long[] hardbitmap = new long[bufsize]; + int maxr = al.size(); + while (maxr > 0) { + long effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + } + container.setSizeInBits(range); + } + + /** + * Uses a priority queue to compute the or aggregate. + * @param a class extending LogicalElement (like a compressed bitmap) + * @param bitmaps + * bitmaps to be aggregated + * @return the or aggregate + */ + @SuppressWarnings({ "rawtypes", "unchecked" }) + public static T or(T... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(T a, T b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (T x : bitmaps) { + pq.add(x); + } + while (pq.size() > 1) { + T x1 = pq.poll(); + T x2 = pq.poll(); + pq.add((T) x1.or(x2)); + } + return pq.poll(); + } + /** + * Uses a priority queue to compute the or aggregate. + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void orToContainer(final BitmapStorage container, + final EWAHCompressedBitmap ... bitmaps) { + if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (EWAHCompressedBitmap x : bitmaps) { + pq.add(x); + } + while (pq.size() > 2) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.or(x2)); + } + pq.poll().orToContainer(pq.poll(), container); + } + + + /** + * Uses a priority queue to compute the xor aggregate. + * + * @param a class extending LogicalElement (like a compressed bitmap) + * @param bitmaps + * bitmaps to be aggregated + * @return the xor aggregate + */ + @SuppressWarnings({ "rawtypes", "unchecked" }) + public static T xor(T... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + + @Override + public int compare(T a, T b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (T x : bitmaps) + pq.add(x); + while (pq.size() > 1) { + T x1 = pq.poll(); + T x2 = pq.poll(); + pq.add((T) x1.xor(x2)); + } + return pq.poll(); + } + + /** + * Uses a priority queue to compute the xor aggregate. + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void xorToContainer(final BitmapStorage container, + final EWAHCompressedBitmap ... bitmaps) { + if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (EWAHCompressedBitmap x : bitmaps) { + pq.add(x); + } + while (pq.size() > 2) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + pq.poll().xorToContainer(pq.poll(), container); + } + + /** + * For internal use. Computes the bitwise or of the provided bitmaps and + * stores the result in the container. (This used to be the default.) + * + * @deprecated use EWAHCompressedBitmap.or instead + * @since 0.4.0 + * @param container where store the result + * @param bitmaps to be aggregated + */ + @Deprecated + public static void legacy_orWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length == 2) { + // should be more efficient + bitmaps[0].orToContainer(bitmaps[1], container); + return; + } + + // Sort the bitmaps in descending order by sizeinbits. We will exhaust the + // sorted bitmaps from right to left. + final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); + Arrays.sort(sortedBitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeinbits < b.sizeinbits ? 1 + : a.sizeinbits == b.sizeinbits ? 0 : -1; + } + }); + + final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; + int maxAvailablePos = 0; + for (EWAHCompressedBitmap bitmap : sortedBitmaps) { + EWAHIterator iterator = bitmap.getEWAHIterator(); + if (iterator.hasNext()) { + rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord( + iterator); + } + } + + if (maxAvailablePos == 0) { // this never happens... + container.setSizeInBits(0); + return; + } + + int maxSize = sortedBitmaps[0].sizeinbits; + + while (true) { + long maxOneRl = 0; + long minZeroRl = Long.MAX_VALUE; + long minSize = Long.MAX_VALUE; + int numEmptyRl = 0; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + long size = rlw.size(); + if (size == 0) { + maxAvailablePos = i; + break; + } + minSize = Math.min(minSize, size); + + if (rlw.getRunningBit()) { + long rl = rlw.getRunningLength(); + maxOneRl = Math.max(maxOneRl, rl); + minZeroRl = 0; + if (rl == 0 && size > 0) { + numEmptyRl++; + } + } else { + long rl = rlw.getRunningLength(); + minZeroRl = Math.min(minZeroRl, rl); + if (rl == 0 && size > 0) { + numEmptyRl++; + } + } + } + + if (maxAvailablePos == 0) { + break; + } else if (maxAvailablePos == 1) { + // only one bitmap is left so just write the rest of it out + rlws[0].discharge(container); + break; + } + + if (maxOneRl > 0) { + container.addStreamOfEmptyWords(true, maxOneRl); + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + rlw.discardFirstWords(maxOneRl); + } + } else if (minZeroRl > 0) { + container.addStreamOfEmptyWords(false, minZeroRl); + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + rlw.discardFirstWords(minZeroRl); + } + } else { + int index = 0; + + if (numEmptyRl == 1) { + // if one rlw has literal words to process and the rest have a run of + // 0's we can write them out here + IteratingBufferedRunningLengthWord emptyRl = null; + long minNonEmptyRl = Long.MAX_VALUE; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + long rl = rlw.getRunningLength(); + if (rl == 0) { + assert emptyRl == null; + emptyRl = rlw; + } else { + minNonEmptyRl = Math.min(minNonEmptyRl, rl); + } + } + long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; + if (emptyRl != null) + emptyRl.writeLiteralWords((int) wordsToWrite, container); + index += wordsToWrite; + } - @SuppressWarnings({ "rawtypes", "unchecked" }) - public static T and(T...bitmaps) { - // for "and" a priority queue is not needed, but - // overhead ought to be small - PriorityQueue pq = new PriorityQueue( - bitmaps.length, new Comparator() { - public int compare(T a, T b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (T x : bitmaps) - pq.add(x); - while (pq.size() > 1) { - T x1 = pq.poll(); - T x2 = pq.poll(); - pq.add( (T) x1.and(x2)); - } - return pq.poll(); - } - - @SuppressWarnings({ "rawtypes", "unchecked" }) - public static T or(T...bitmaps) { - PriorityQueue pq = new PriorityQueue( - bitmaps.length, new Comparator() { - public int compare(T a, T b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (T x : bitmaps) { - pq.add(x); - } - while (pq.size() > 1) { - T x1 = pq.poll(); - T x2 = pq.poll(); - pq.add( (T) x1.or(x2)); - } - return pq.poll(); - } - - @SuppressWarnings({ "rawtypes", "unchecked" }) - public static T xor(T...bitmaps) { - PriorityQueue pq = new PriorityQueue( - bitmaps.length, new Comparator() { - - public int compare(T a, T b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (T x : bitmaps) - pq.add(x); - while (pq.size() > 1) { - T x1 = pq.poll(); - T x2 = pq.poll(); - pq.add( (T) x1.xor(x2)); - } - return pq.poll(); - } + while (index < minSize) { + long word = 0; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + if (rlw.getRunningLength() <= index) { + word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength()); + } + } + container.add(word); + index++; + } + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + rlw.discardFirstWords(minSize); + } + } + } + container.setSizeInBits(maxSize); + } + } diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java 2013-11-12 14:31:20.000000000 +0000 @@ -2,7 +2,7 @@ /* * Copyright 2012, Google Inc. - * Licensed under APL 2.0. + * Licensed under the Apache License, Version 2.0. */ import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; @@ -43,11 +43,13 @@ return true; } - public boolean hasNext() { + @Override +public boolean hasNext() { return this.hasnext; } - public final int next() { + @Override +public final int next() { final int answer; if (runningHasNext()) { answer = this.position++; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IntIterator.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIterator.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IntIterator.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIterator.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,8 +1,8 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,89 @@ +package com.googlecode.javaewah; + +import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Implementation of an IntIterator over an IteratingRLW. + * + * + */ +public class IntIteratorOverIteratingRLW implements IntIterator { + IteratingRLW parent; + private int position; + private int runningLength; + private long word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasnext; + + /** + * @param p iterator we wish to iterate over + */ + public IntIteratorOverIteratingRLW(final IteratingRLW p) { + this.parent = p; + this.position = 0; + setupForCurrentRunningLengthWord(); + this.hasnext = moveToNext(); + } + + /** + * @return whether we could find another set bit; don't move if there is an unprocessed value + */ + private final boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (this.parent.next()) + setupForCurrentRunningLengthWord(); + else return false; + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasnext; + } + + @Override + public final int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int bit = Long.numberOfTrailingZeros(this.word); + this.word ^= (1l << bit); + answer = this.literalPosition + bit; + } + this.hasnext = this.moveToNext(); + return answer; + } + + private final void setupForCurrentRunningLengthWord() { + this.runningLength = wordinbits * (int) this.parent.getRunningLength() + + this.position; + + if (!this.parent.getRunningBit()) { + this.position = this.runningLength; + } + this.wordPosition = 0; + this.wordLength = this.parent.getNumberOfLiteralWords(); + } + + private final boolean runningHasNext() { + return this.position < this.runningLength; + } + + private final boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.parent.getLiteralWordAt(this.wordPosition++); + this.literalPosition = this.position; + this.position += wordinbits; + } + return this.word != 0; + } +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IteratingBufferedRunningLengthWord.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratingBufferedRunningLengthWord.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IteratingBufferedRunningLengthWord.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratingBufferedRunningLengthWord.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,8 +1,8 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. Similar to BufferedRunningLengthWord, but automatically @@ -11,7 +11,7 @@ * @since 0.4.0 * @author David McIntosh */ -public final class IteratingBufferedRunningLengthWord { +public final class IteratingBufferedRunningLengthWord implements IteratingRLW, Cloneable{ /** * Instantiates a new iterating buffered running length word. * @@ -24,21 +24,30 @@ this.buffer = this.iterator.buffer(); } - /** - * Instantiates a new iterating buffered running length word. - * - * @param iterator iterator - */ - public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) { - this(EWAHIterator.getEWAHIterator(bitmap)); - } + + + /** + * Instantiates a new iterating buffered running length word. + * @param bitmap over which we want to iterate + * + */ +public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) { + this.iterator = EWAHIterator.getEWAHIterator(bitmap); + this.brlw = new BufferedRunningLengthWord(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; + this.buffer = this.iterator.buffer(); + } + + + /** * Discard first words, iterating to the next running length word if needed. * - * @param x the x + * @param x the number of words to be discarded */ - public void discardFirstWords(long x) { + @Override +public void discardFirstWords(long x) { while (x > 0) { if (this.brlw.RunningLength > x) { this.brlw.RunningLength -= x; @@ -60,6 +69,21 @@ } } } + /** + * Move to the next RunningLengthWord + * @return whether the move was possible + */ + @Override +public boolean next() { + if (!this.iterator.hasNext()) { + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + return true; + } /** * Write out up to max words, returns how many were written @@ -103,14 +127,13 @@ pl = max - index; } container.addStreamOfEmptyWords(!getRunningBit(), pl); - discardFirstWords(pl); index += pl; int pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = (int) (max - index); } writeNegatedLiteralWords(pd, container); - discardFirstWords(pd); + discardFirstWords(pl+pd); index += pd; } return index; @@ -144,7 +167,8 @@ * @param index zero based index * @return the literal word */ - public long getLiteralWordAt(int index) { + @Override +public long getLiteralWordAt(int index) { return this.buffer[this.literalWordStartPosition + index]; } @@ -153,7 +177,8 @@ * * @return the number of literal words */ - public int getNumberOfLiteralWords() { + @Override +public int getNumberOfLiteralWords() { return this.brlw.NumberOfLiteralWords; } @@ -162,7 +187,8 @@ * * @return the running bit */ - public boolean getRunningBit() { + @Override +public boolean getRunningBit() { return this.brlw.RunningBit; } @@ -171,7 +197,8 @@ * * @return the running length */ - public long getRunningLength() { + @Override +public long getRunningLength() { return this.brlw.RunningLength; } @@ -180,14 +207,15 @@ * * @return the long */ - public long size() { + @Override +public long size() { return this.brlw.size(); } /** * write the first N literal words to the target bitmap. Does not discard the words or perform iteration. - * @param numWords - * @param container + * @param numWords number of words to be written + * @param container where we write */ public void writeLiteralWords(int numWords, BitmapStorage container) { container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); @@ -195,15 +223,15 @@ /** * write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. - * @param numWords - * @param container + * @param numWords number of words to be written + * @param container where we write */ public void writeNegatedLiteralWords(int numWords, BitmapStorage container) { container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); } /** - * For internal use. (One could use the non-static dischard method instead, + * For internal use. (One could use the non-static discharge method instead, * but we expect them to be slower.) * * @param initialWord @@ -228,6 +256,18 @@ runningLengthWord = new BufferedRunningLengthWord(iterator.next()); } } + + + @Override + public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException { + IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super.clone(); + answer.brlw = this.brlw.clone(); + answer.buffer = this.buffer; + answer.iterator = this.iterator.clone(); + answer.literalWordStartPosition = this.literalWordStartPosition; + return answer; + } + private BufferedRunningLengthWord brlw; private long[] buffer; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IteratingRLW.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratingRLW.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IteratingRLW.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratingRLW.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,49 @@ +package com.googlecode.javaewah; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * High-level iterator over a compressed bitmap. + * + */ +public interface IteratingRLW { + /** + * @return whether there is more + */ + public boolean next() ; + /** + * @param index where the literal word is + * @return the literal word at the given index. + */ + public long getLiteralWordAt(int index); + /** + * @return the number of literal (non-fill) words + */ + public int getNumberOfLiteralWords() ; + /** + * @return the bit used for the fill bits + */ + public boolean getRunningBit() ; + /** + * @return sum of getRunningLength() and getNumberOfLiteralWords() + */ + public long size() ; + /** + * @return length of the run of fill words + */ + public long getRunningLength() ; + /** + * @param x the number of words to discard + */ + public void discardFirstWords(long x); + + /** + * @return a copy of the iterator + * @throws CloneNotSupportedException this should not be thrown in theory + */ + public IteratingRLW clone() throws CloneNotSupportedException; +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IteratorAggregation.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratorAggregation.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IteratorAggregation.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratorAggregation.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,616 @@ +package com.googlecode.javaewah; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedList; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Set of helper functions to aggregate bitmaps. + * + */ +public class IteratorAggregation { + + /** + * @param x iterator to negate + * @return negated version of the iterator + */ + public static IteratingRLW not(final IteratingRLW x) { + return new IteratingRLW() { + + @Override + public boolean next() { + return x.next(); + } + + @Override + public long getLiteralWordAt(int index) { + return ~x.getLiteralWordAt(index); + } + + @Override + public int getNumberOfLiteralWords() { + return x.getNumberOfLiteralWords(); + } + + @Override + public boolean getRunningBit() { + return ! x.getRunningBit(); + } + + @Override + public long size() { + return x.size(); + } + + @Override + public long getRunningLength() { + return x.getRunningLength(); + } + + @Override + public void discardFirstWords(long y) { + x.discardFirstWords(y); + } + + @Override + public IteratingRLW clone() throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } + + + }; + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return and aggregate + */ + public static IteratingRLW bufferedand(final IteratingRLW... al) { + return bufferedand(DEFAULTMAXBUFSIZE,al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words (per input iterator) + * @return and aggregate + */ + public static IteratingRLW bufferedand(final int bufsize, final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + final LinkedList basell = new LinkedList(); + for (IteratingRLW i : al) + basell.add(i); + return new BufferedIterator(new BufferedAndIterator(basell,bufsize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return or aggregate + */ + public static IteratingRLW bufferedor(final IteratingRLW... al) { + return bufferedor(DEFAULTMAXBUFSIZE,al); + } + + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return or aggregate + */ + public static IteratingRLW bufferedor(final int bufsize, final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + for (IteratingRLW i : al) + basell.add(i); + return new BufferedIterator(new BufferedORIterator(basell,bufsize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return xor aggregate + */ + public static IteratingRLW bufferedxor(final IteratingRLW... al) { + return bufferedxor(DEFAULTMAXBUFSIZE,al); + } + + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return xor aggregate + */ + public static IteratingRLW bufferedxor(final int bufsize, final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + + final LinkedList basell = new LinkedList(); + for (IteratingRLW i : al) + basell.add(i); + + return new BufferedIterator(new BufferedXORIterator(basell, bufsize)); + } + + + /** + * Write out the content of the iterator, but as if it were all zeros. + * + * @param container + * where we write + * @param i + * the iterator + */ + protected static void dischargeAsEmpty(final BitmapStorage container, + final IteratingRLW i) { + while (i.size() > 0) { + container.addStreamOfEmptyWords(false, i.size()); + i.next(); + + } + } + + /** + * Write out up to max words, returns how many were written + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + + protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) { + long counter = 0; + while (i.size() > 0 && counter < max) { + long L1 = i.getRunningLength(); + if (L1 > 0) { + if (L1 + counter > max) + L1 = max - counter; + container.addStreamOfEmptyWords(i.getRunningBit(), L1); + counter += L1; + } + long L = i.getNumberOfLiteralWords(); + if(L + counter > max) L = max - counter; + for (int k = 0; k < L; ++k) { + container.add(i.getLiteralWordAt(k)); + } + counter += L; + i.discardFirstWords(L+L1); + } + return counter; + } + + + /** + * Write out up to max negated words, returns how many were written + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) { + long counter = 0; + while (i.size() > 0 && counter < max) { + long L1 = i.getRunningLength(); + if (L1 > 0) { + if (L1 + counter > max) + L1 = max - counter; + container.addStreamOfEmptyWords(!i.getRunningBit(), L1); + counter += L1; + } + long L = i.getNumberOfLiteralWords(); + if(L + counter > max) L = max - counter; + for (int k = 0; k < L; ++k) { + container.add(~i.getLiteralWordAt(k)); + } + counter += L; + i.discardFirstWords(L+L1); + } + return counter; + } + + static void andToContainer(final BitmapStorage container, + int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + static void andToContainer(final BitmapStorage container, + final IteratingRLW rlwi, IteratingRLW rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + + /** + * Compute the first few words of the XOR aggregate between two iterators. + * + * @param container where to write + * @param desiredrlwcount number of words to be written (max) + * @param rlwi first iterator to aggregate + * @param rlwj second iterator to aggregate + */ + public static void xorToContainer(final BitmapStorage container, + int desiredrlwcount, final IteratingRLW rlwi, final IteratingRLW rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + long index = dischargeNegated(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + protected static int inplaceor(long[] bitmap, + IteratingRLW i) { + + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (i.getRunningBit()) + java.util.Arrays.fill(bitmap, pos, pos + L, ~0l); + pos += L; + final int LR = i.getNumberOfLiteralWords(); + + for (int k = 0; k < LR; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = (int) i.getRunningLength(); + + if (pos + L > bitmap.length) { + if (i.getRunningBit()) { + java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0l); + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + java.util.Arrays.fill(bitmap, pos, pos + L, ~0l); + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + protected static int inplacexor(long[] bitmap, + IteratingRLW i) { + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (i.getRunningBit()) { + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = (int) i.getRunningLength(); + if (pos + L > bitmap.length) { + if (i.getRunningBit()) { + for(int k = pos ; k < bitmap.length; ++k) + bitmap[k] = ~bitmap[k]; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + protected static int inplaceand(long[] bitmap, + IteratingRLW i) { + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (!i.getRunningBit()) { + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = 0; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = (int) i.getRunningLength(); + if (pos + L > bitmap.length) { + if (!i.getRunningBit()) { + for(int k = pos ; k < bitmap.length; ++k) + bitmap[k] = 0; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (!i.getRunningBit()) + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = 0; + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + /** + * An optimization option. Larger values may improve speed, but at + * the expense of memory. + */ + public final static int DEFAULTMAXBUFSIZE = 65536; +} +class BufferedORIterator implements CloneableIterator { + EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + long[] hardbitmap; + LinkedList ll; + int buffersize; + + BufferedORIterator(LinkedList basell, int bufsize) { + this.ll = basell; + this.hardbitmap = new long[bufsize]; + } + + @Override + public BufferedXORIterator clone() throws CloneNotSupportedException { + BufferedXORIterator answer = (BufferedXORIterator) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + long effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation.inplaceor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) { + this.buffer.add(this.hardbitmap[k]); + } + + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} + +class BufferedXORIterator implements CloneableIterator { + EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + long[] hardbitmap; + LinkedList ll; + int buffersize; + + BufferedXORIterator(LinkedList basell, int bufsize) { + this.ll = basell; + this.hardbitmap = new long[bufsize]; + } + + @Override + public BufferedXORIterator clone() throws CloneNotSupportedException { + BufferedXORIterator answer = (BufferedXORIterator) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + long effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.add(this.hardbitmap[k]); + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} + + +class BufferedAndIterator implements CloneableIterator { + EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + LinkedList ll; + int buffersize; + + public BufferedAndIterator(LinkedList basell, int bufsize) { + this.ll = basell; + this.buffersize = bufsize; + + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public BufferedAndIterator clone() throws CloneNotSupportedException { + BufferedAndIterator answer = (BufferedAndIterator) super.clone(); + answer.buffer = this.buffer.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + IteratorAggregation.andToContainer(this.buffer, this.buffersize * this.ll.size(), + this.ll.get(0), this.ll.get(1)); + if (this.ll.size() > 2) { + Iterator i = this.ll.iterator(); + i.next(); + i.next(); + EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap(); + while (i.hasNext() && this.buffer.sizeInBytes() > 0) { + IteratorAggregation.andToContainer(tmpbuffer, + this.buffer.getIteratingRLW(), i.next()); + this.buffer.swap(tmpbuffer); + tmpbuffer.clear(); + } + } + Iterator i = this.ll.iterator(); + while(i.hasNext()) { + if(i.next().size() == 0) { + this.ll.clear(); + break; + } + } + return this.buffer.getEWAHIterator(); + } + +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IteratorUtil.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratorUtil.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/IteratorUtil.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratorUtil.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,132 @@ +package com.googlecode.javaewah; + +import java.util.Iterator; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Convenience functions for working over iterators + * + */ +public class IteratorUtil { + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static IntIterator toSetBitsIntIterator(final IteratingRLW i) { + return new IntIteratorOverIteratingRLW(i); + } + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static Iterator toSetBitsIterator(final IteratingRLW i) { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return new Integer(this.under.next()); + } + + @Override + public void remove() { + } + + final private IntIterator under = toSetBitsIntIterator(i); + }; + + } + + /** + * Generate a bitmap from an iterator + * + * @param i iterator we wish to materialize + * @param c where we write + */ + public static void materialize(final IteratingRLW i, final BitmapStorage c) { + while (true) { + if (i.getRunningLength() > 0) { + c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); + } + for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) + c.add(i.getLiteralWordAt(k)); + if (!i.next()) + break; + } + } + + /** + * @param i iterator we wish to iterate over + * @return the cardinality (number of set bits) corresponding to the iterator + */ + public static int cardinality(final IteratingRLW i) { + int answer = 0; + while (true) { + if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap.wordinbits; + for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) + answer += Long.bitCount(i.getLiteralWordAt(k)); + if(!i.next()) break; + } + return answer; + } + + /** + * @param x set of bitmaps + * @return an array of iterators corresponding to the array of bitmaps + */ + public static IteratingRLW[] toIterators(final EWAHCompressedBitmap... x) { + IteratingRLW[] X = new IteratingRLW[x.length]; + for (int k = 0; k < X.length; ++k) { + X[k] = new IteratingBufferedRunningLengthWord(x[k]); + } + return X; + } + /** + * Turn an iterator into a bitmap. + * + * @param i iterator we wish to materialize + * @param c where we write + * @param Max maximum number of words we wish to materialize + * @return how many words were actually materialized + */ + public static long materialize(final IteratingRLW i, final BitmapStorage c, long Max) { + final long origMax = Max; + while (true) { + if (i.getRunningLength() > 0) { + long L = i.getRunningLength(); + if(L > Max) L = Max; + c.addStreamOfEmptyWords(i.getRunningBit(), L); + Max -= L; + } + long L = i.getNumberOfLiteralWords(); + for (int k = 0; k < L; ++k) + c.add(i.getLiteralWordAt(k)); + if(Max>0) { + if (!i.next()) + break; + } + else break; + } + return origMax - Max; + } + /** + * Turn an iterator into a bitmap + * + * @param i iterator we wish to materialize + * @return materialized version of the iterator + */ + public static EWAHCompressedBitmap materialize(final IteratingRLW i) { + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + materialize(i, ewah); + return ewah; + } + +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/LogicalElement.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/LogicalElement.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/LogicalElement.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/LogicalElement.java 2013-11-12 14:31:20.000000000 +0000 @@ -6,19 +6,20 @@ * be concerned by this class. * * @author Daniel Lemire + * @param the type of element (e.g., a bitmap class) * */ public interface LogicalElement { /** * Compute the bitwise logical and - * @param another element + * @param le element * @return the result of the operation */ public T and(T le); /** * Compute the bitwise logical and not - * @param another element + * @param le element * @return the result of the operation */ public T andNot(T le); @@ -27,10 +28,12 @@ * Compute the bitwise logical not (in place) */ public void not(); - @SuppressWarnings("rawtypes") + + + @SuppressWarnings({ "rawtypes", "javadoc" }) /** * Compute the bitwise logical or - * @param another element + * @param le another element * @return the result of the operation */ public LogicalElement or(T le); @@ -38,7 +41,6 @@ /** * How many logical bits does this element represent? * - * @param another element * @return the number of bits represented by this element */ public int sizeInBits(); @@ -52,7 +54,7 @@ /** * Compute the bitwise logical Xor - * @param another element + * @param le element * @return the results of the operation */ public T xor(T le); diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,8 +1,8 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** * This is a BitmapStorage that can be used to determine quickly if the result @@ -38,42 +38,43 @@ * * @see com.googlecode.javaewah.BitmapStorage#add(long) */ - public void add(long newdata) { + @Override +public void add(long newdata) { if (newdata != 0) throw nonEmptyException; return; } /** - * throws a NonEmptyException exception when number > 0 + * throws a NonEmptyException exception when number is greater than 0 * - * @see com.googlecode.javaewah.BitmapStorage#addStreamOfLiteralWords(long[], long, long) */ - public void addStreamOfLiteralWords(long[] data, int start, int number) { + @Override +public void addStreamOfLiteralWords(long[] data, int start, int number) { if(number>0){ throw nonEmptyException; } } /** - * If the boolean value is true and number>0, then it throws a NonEmptyException exception, + * If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, * otherwise, nothing happens. * * @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, long) */ - public void addStreamOfEmptyWords(boolean v, long number) { + @Override +public void addStreamOfEmptyWords(boolean v, long number) { if (v && (number>0)) throw nonEmptyException; return; } /** - * throws a NonEmptyException exception when number > 0 + * throws a NonEmptyException exception when number is greater than 0 * - * @see com.googlecode.javaewah.BitmapStorage#addStreamOfNegatedLiteralWords(long[], long, - * long) */ - public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { + @Override +public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { if(number>0){ throw nonEmptyException; } @@ -84,7 +85,8 @@ * * @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) */ - public void setSizeInBits(int bits) { + @Override +public void setSizeInBits(int bits) { } } diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/RunningLengthWord.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/RunningLengthWord.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah/RunningLengthWord.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/RunningLengthWord.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,139 +1,152 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. - * + * * @since 0.1.0 * @author Daniel Lemire */ -public final class RunningLengthWord { +public final class RunningLengthWord implements Cloneable { + + /** + * Instantiates a new running length word. + * + * @param a + * an array of 64-bit words + * @param p + * position in the array where the running length word is + * located. + */ + RunningLengthWord(final EWAHCompressedBitmap a, final int p) { + this.parent = a; + this.position = p; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return (int) (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return (this.parent.buffer[this.position] & 1) != 0; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public long getRunningLength() { + return (this.parent.buffer[this.position] >>> 1) + & largestrunninglengthcount; + } + + /** + * Sets the number of literal words. + * + * @param number + * the new number of literal words + */ + public void setNumberOfLiteralWords(final long number) { + this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; + this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) + | runninglengthplusrunningbit; + } + + /** + * Sets the running bit. + * + * @param b + * the new running bit + */ + public void setRunningBit(final boolean b) { + if (b) + this.parent.buffer[this.position] |= 1l; + else + this.parent.buffer[this.position] &= ~1l; + } + + /** + * Sets the running length. + * + * @param number + * the new running length + */ + public void setRunningLength(final long number) { + this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; + this.parent.buffer[this.position] &= (number << 1) + | notshiftedlargestrunninglengthcount; + } + + /** + * Return the size in uncompressed words represented by this running + * length word. + * + * @return the size + */ + public long size() { + return getRunningLength() + getNumberOfLiteralWords(); + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + + " running length = " + getRunningLength() + + " number of lit. words " + getNumberOfLiteralWords(); + } + + @Override + public RunningLengthWord clone() throws CloneNotSupportedException { + RunningLengthWord answer; + answer = (RunningLengthWord) super.clone(); + answer.parent = this.parent; + answer.position = this.position; + return answer; + } + + /** The array of words. */ + public EWAHCompressedBitmap parent; + + /** The position in array. */ + public int position; + + /** + * number of bits dedicated to marking of the running length of clean + * words + */ + public static final int runninglengthbits = 32; + + private static final int literalbits = 64 - 1 - runninglengthbits; + + /** largest number of literal words in a run. */ + public static final int largestliteralcount = (1 << literalbits) - 1; + + /** largest number of clean words in a run */ + public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1; + + private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1; + + private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; + + private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; + + private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; - /** - * Instantiates a new running length word. - * - * @param a an array of 64-bit words - * @param p position in the array where the running length word is located. - */ - RunningLengthWord(final long[] a, final int p) { - this.array = a; - this.position = p; - } - - /** - * Gets the number of literal words. - * - * @return the number of literal words - */ - public int getNumberOfLiteralWords() { - return (int) (this.array[this.position] >>> (1 + runninglengthbits)); - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - public boolean getRunningBit() { - return (this.array[this.position] & 1) != 0; - } - - /** - * Gets the running length. - * - * @return the running length - */ - public long getRunningLength() { - return (this.array[this.position] >>> 1) & largestrunninglengthcount; - } - - /** - * Sets the number of literal words. - * - * @param number the new number of literal words - */ - public void setNumberOfLiteralWords(final long number) { - this.array[this.position] |= notrunninglengthplusrunningbit; - this.array[this.position] &= (number << (runninglengthbits + 1)) - | runninglengthplusrunningbit; - } - - /** - * Sets the running bit. - * - * @param b the new running bit - */ - public void setRunningBit(final boolean b) { - if (b) - this.array[this.position] |= 1l; - else - this.array[this.position] &= ~1l; - } - - /** - * Sets the running length. - * - * @param number the new running length - */ - public void setRunningLength(final long number) { - this.array[this.position] |= shiftedlargestrunninglengthcount; - this.array[this.position] &= (number << 1) - | notshiftedlargestrunninglengthcount; - } - - /** - * Return the size in uncompressed words represented by - * this running length word. - * - * @return the long - */ - public long size() { - return getRunningLength() + getNumberOfLiteralWords(); - } - - /* - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "running bit = " + getRunningBit() + " running length = " - + getRunningLength() + " number of lit. words " - + getNumberOfLiteralWords(); - } - - - /** The array of words. */ - public long[] array; - - /** The position in array. */ - public int position; - - - - - /** number of bits dedicated to marking of the running length of clean words */ - public static final int runninglengthbits = 32; - - private static final int literalbits = 64 - 1 - runninglengthbits; - - - /** largest number of literal words in a run. */ - public static final int largestliteralcount = (1 << literalbits) - 1; - - /** largest number of clean words in a run */ - public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1; - - private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1; - - private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; - - - private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; - - private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; - } \ No newline at end of file diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/BitCounter32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/BitCounter32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/BitCounter32.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/BitCounter32.java 2013-11-12 14:31:20.000000000 +0000 @@ -4,7 +4,7 @@ /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Licensed under the Apache License, Version 2.0. */ /** * BitCounter is a fake bitset data structure. Instead of storing the actual data, @@ -22,7 +22,8 @@ * @param newdata the word */ // @Override : causes problems with Java 1.5 - public void add(final int newdata) { + @Override +public void add(final int newdata) { this.oneBits += Integer.bitCount(newdata); } @@ -35,7 +36,8 @@ * @param number the number of literal words to add */ // @Override : causes problems with Java 1.5 - public void addStreamOfLiteralWords(int[] data, int start, int number) { + @Override +public void addStreamOfLiteralWords(int[] data, int start, int number) { for(int i=start;i iterator) { + this.masteriterator = iterator; + if(this.masteriterator.hasNext()) { + this.iterator = this.masteriterator.next(); + this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; + this.buffer = this.iterator.buffer(); + } + } + + + /** + * Discard first words, iterating to the next running length word if needed. + * + * @param x the number of words to be discarded + */ + @Override + public void discardFirstWords(int x) { + while (x > 0) { + if (this.brlw.RunningLength > x) { + this.brlw.RunningLength -= x; + return; + } + x -= this.brlw.RunningLength; + this.brlw.RunningLength = 0; + int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; + + this.literalWordStartPosition += toDiscard; + this.brlw.NumberOfLiteralWords -= toDiscard; + x -= toDiscard; + if ((x > 0) || (this.brlw.size() == 0)) { + if (!this.next()) { + break; + } + } + } + } + /** + * Move to the next RunningLengthWord + * @return whether the move was possible + */ + @Override + public boolean next() { + if (!this.iterator.hasNext()) { + if(!reload()) { + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; + } + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + return true; + } + private boolean reload() { + if(!this.masteriterator.hasNext()) { + return false; + } + this.iterator = this.masteriterator.next(); + this.buffer = this.iterator.buffer(); + return true; + } + + + /** + * Get the nth literal word for the current running length word + * @param index zero based index + * @return the literal word + */ + @Override + public int getLiteralWordAt(int index) { + return this.buffer[this.literalWordStartPosition + index]; + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override + public int getNumberOfLiteralWords() { + return this.brlw.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override + public boolean getRunningBit() { + return this.brlw.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override + public int getRunningLength() { + return this.brlw.RunningLength; + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the size + */ + @Override + public int size() { + return this.brlw.size(); + } + + @Override + public BufferedIterator32 clone() throws CloneNotSupportedException { + BufferedIterator32 answer = (BufferedIterator32) super.clone(); + answer.brlw = this.brlw.clone(); + answer.buffer = this.buffer; + answer.iterator = this.iterator.clone(); + answer.literalWordStartPosition = this.literalWordStartPosition; + answer.masteriterator = this.masteriterator.clone(); + return answer; + } + + private BufferedRunningLengthWord32 brlw; + private int[] buffer; + private int literalWordStartPosition; + private EWAHIterator32 iterator; + private CloneableIterator masteriterator; + } \ No newline at end of file diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,8 +1,8 @@ package com.googlecode.javaewah32; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ @@ -15,7 +15,7 @@ * @since 0.5.0 * */ -public final class BufferedRunningLengthWord32 { +public final class BufferedRunningLengthWord32 implements Cloneable { /** * Instantiates a new buffered running length word. @@ -34,13 +34,13 @@ * @param rlw the rlw */ public BufferedRunningLengthWord32(final RunningLengthWord32 rlw) { - this(rlw.array[rlw.position]); + this(rlw.parent.buffer[rlw.position]); } /** * Discard first words. * - * @param x the x + * @param x the number of words to be discarded */ public void discardFirstWords(int x) { if (this.RunningLength >= x) { @@ -99,7 +99,7 @@ * @param rlw the other running length word */ public void reset(final RunningLengthWord32 rlw) { - reset(rlw.array[rlw.position]); + reset(rlw.parent.buffer[rlw.position]); } /** @@ -147,6 +147,16 @@ + getRunningLength() + " number of lit. words " + getNumberOfLiteralWords(); } + + @Override +public BufferedRunningLengthWord32 clone() throws CloneNotSupportedException { + BufferedRunningLengthWord32 answer = (BufferedRunningLengthWord32) super.clone(); + answer.literalwordoffset = this.literalwordoffset; + answer.NumberOfLiteralWords = this.NumberOfLiteralWords; + answer.RunningBit = this.RunningBit; + answer.RunningLength = this.RunningLength; + return answer; + } /** how many literal words have we read so far? */ public int literalwordoffset = 0; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,8 +1,8 @@ package com.googlecode.javaewah32; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ import java.util.*; @@ -19,12 +19,6 @@ *

* *

- * In contrast witht the 64-bit EWAH scheme (javaewah.EWAHCompressedBitmap), you - * can expect this class to compress better, but to be slower at processing the - * data. In effect, there is a trade-off between memory usage and performances. - *

- * - *

* In contrast with the 64-bit EWAH scheme (javaewah.EWAHCompressedBitmap), you * can expect this class to compress better, but to be slower at processing the * data. In effect, there is a trade-off between memory usage and performances. @@ -44,7 +38,7 @@ * *

    *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves - * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages + * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages * 3-28, 2010. http://arxiv.org/abs/0901.3751
  • *
* @@ -58,7 +52,7 @@ */ public EWAHCompressedBitmap32() { this.buffer = new int[defaultbuffersize]; - this.rlw = new RunningLengthWord32(this.buffer, 0); + this.rlw = new RunningLengthWord32(this, 0); } /** @@ -71,19 +65,24 @@ */ public EWAHCompressedBitmap32(final int buffersize) { this.buffer = new int[buffersize]; - this.rlw = new RunningLengthWord32(this.buffer, 0); + this.rlw = new RunningLengthWord32(this, 0); } /** * Adding words directly to the bitmap (for expert use). * * This is normally how you add data to the array. So you add bits in streams - * of 8*8 bits. + * of 4*8 bits. + * + * Example: if you add 321, you are have added (in binary notation) + * 0b101000001, so you have effectively called set(0), set(6), set(8) + * in sequence. * * @param newdata * the word */ - public void add(final int newdata) { + @Override +public void add(final int newdata) { add(newdata, wordinbits); } @@ -163,13 +162,14 @@ * @param number * the number of literal words to add */ - public void addStreamOfLiteralWords(final int[] data, final int start, + @Override +public void addStreamOfLiteralWords(final int[] data, final int start, final int number) { int leftovernumber = number; while (leftovernumber > 0) { final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); - final int whatwecanadd = number < RunningLengthWord32.largestliteralcount - - NumberOfLiteralWords ? number + final int whatwecanadd = leftovernumber < RunningLengthWord32.largestliteralcount + - NumberOfLiteralWords ? leftovernumber : RunningLengthWord32.largestliteralcount - NumberOfLiteralWords; this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords @@ -193,7 +193,8 @@ * @param number * the number */ - public void addStreamOfEmptyWords(final boolean v, int number) { + @Override +public void addStreamOfEmptyWords(final boolean v, int number) { if (number == 0) return; this.sizeinbits += number * wordinbits; @@ -239,13 +240,14 @@ * @param number * the number of literal words to add */ - public void addStreamOfNegatedLiteralWords(final int[] data, final int start, + @Override +public void addStreamOfNegatedLiteralWords(final int[] data, final int start, final int number) { int leftovernumber = number; while (leftovernumber > 0) { final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); - final int whatwecanadd = number < RunningLengthWord32.largestliteralcount - - NumberOfLiteralWords ? number + final int whatwecanadd = leftovernumber < RunningLengthWord32.largestliteralcount + - NumberOfLiteralWords ? leftovernumber : RunningLengthWord32.largestliteralcount - NumberOfLiteralWords; this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords @@ -267,11 +269,15 @@ * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param a * the other bitmap * @return the EWAH compressed bitmap */ - public EWAHCompressedBitmap32 and(final EWAHCompressedBitmap32 a) { + @Override +public EWAHCompressedBitmap32 and(final EWAHCompressedBitmap32 a) { final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); container .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords @@ -338,10 +344,13 @@ rlwj.discardFirstWords(nbre_literal); } } - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; - remaining.dischargeAsEmpty(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + if (adjustContainerSizeWhenAggregating) { + final boolean i_remains = rlwi.size() > 0; + final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi + : rlwj; + remaining.dischargeAsEmpty(container); + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } } @@ -367,11 +376,15 @@ * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param a * the other bitmap * @return the EWAH compressed bitmap */ - public EWAHCompressedBitmap32 andNot(final EWAHCompressedBitmap32 a) { + @Override +public EWAHCompressedBitmap32 andNot(final EWAHCompressedBitmap32 a) { final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); container .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords @@ -387,9 +400,8 @@ * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * - * @param a - * the other bitmap - * @return the EWAH compressed bitmap + * @param a the other bitmap + * @param container where we store the result */ public void andNotToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { @@ -434,9 +446,10 @@ final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; if(i_remains) remaining.discharge(container); - else + else if (adjustContainerSizeWhenAggregating) remaining.dischargeAsEmpty(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + if (adjustContainerSizeWhenAggregating) + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } @@ -463,7 +476,7 @@ */ public int cardinality() { int counter = 0; - final EWAHIterator32 i = new EWAHIterator32(this.buffer, + final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord32 localrlw = i.next(); @@ -493,7 +506,7 @@ * @see java.lang.Object#clone() */ @Override - public Object clone() throws java.lang.CloneNotSupportedException { + public EWAHCompressedBitmap32 clone() throws java.lang.CloneNotSupportedException { final EWAHCompressedBitmap32 clone = (EWAHCompressedBitmap32) super.clone(); clone.buffer = this.buffer.clone(); clone.actualsizeinwords = this.actualsizeinwords; @@ -517,13 +530,12 @@ } for (int k = 0; k < this.actualsizeinwords; ++k) this.buffer[k] = in.readInt(); - this.rlw = new RunningLengthWord32(this.buffer, in.readInt()); + this.rlw = new RunningLengthWord32(this, in.readInt()); } /** * Check to see whether the two compressed bitmaps contain the same set bits. * - * @author Colby Ranger * @see java.lang.Object#equals(java.lang.Object) */ @Override @@ -589,8 +601,15 @@ * @return the EWAHIterator */ public EWAHIterator32 getEWAHIterator() { - return new EWAHIterator32(this.buffer, this.actualsizeinwords); + return new EWAHIterator32(this, this.actualsizeinwords); } + + /** + * @return the IteratingRLW iterator corresponding to this bitmap + */ + public IteratingRLW32 getIteratingRLW() { + return new IteratingBufferedRunningLengthWord32(this); + } /** * get the locations of the true values as one vector. (may use more memory @@ -600,7 +619,7 @@ */ public List getPositions() { final ArrayList v = new ArrayList(); - final EWAHIterator32 i = new EWAHIterator32(this.buffer, + final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); int pos = 0; while (i.hasNext()) { @@ -638,7 +657,7 @@ public int hashCode() { int karprabin = 0; final int B = 31; - final EWAHIterator32 i = new EWAHIterator32(this.buffer, + final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); while( i.hasNext() ) { i.next(); @@ -681,7 +700,7 @@ */ public IntIterator intIterator() { return new IntIteratorImpl32( - new EWAHIterator32(this.buffer, this.actualsizeinwords)); + new EWAHIterator32(this, this.actualsizeinwords)); } /** @@ -690,17 +709,21 @@ * * @return the iterator */ - public Iterator iterator() { + @Override +public Iterator iterator() { return new Iterator() { - public boolean hasNext() { + @Override + public boolean hasNext() { return this.under.hasNext(); } - public Integer next() { + @Override + public Integer next() { return new Integer(this.under.next()); } - public void remove() { + @Override + public void remove() { throw new UnsupportedOperationException("bitsets do not support remove"); } @@ -722,9 +745,14 @@ final int number) { while (this.actualsizeinwords + number >= this.buffer.length) { final int oldbuffer[] = this.buffer; - this.buffer = new int[oldbuffer.length * 2]; + if(this.actualsizeinwords + number < 32768) + this.buffer = new int[(this.actualsizeinwords + number) * 2]; + else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) + this.buffer = new int[Integer.MAX_VALUE]; + else + this.buffer = new int[(this.actualsizeinwords + number) * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.array = this.buffer; + this.rlw.parent.buffer = this.buffer; } for (int k = 0; k < number; ++k) this.buffer[this.actualsizeinwords + k] = ~data[start + k]; @@ -739,8 +767,9 @@ * sizeInBytes()). * */ - public void not() { - final EWAHIterator32 i = new EWAHIterator32(this.buffer, + @Override +public void not() { + final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); if (!i.hasNext()) return; @@ -769,51 +798,23 @@ } } - public int oldaddStreamOfEmptyWords(final boolean v, final int number) { - if (number == 0) - return 0; - final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0); - final int runlen = this.rlw.getRunningLength(); - if ((noliteralword) && (runlen == 0)) { - this.rlw.setRunningBit(v); - } - int wordsadded = 0; - if ((noliteralword) && (this.rlw.getRunningBit() == v) - && (runlen < RunningLengthWord32.largestrunninglengthcount)) { - int whatwecanadd = number < RunningLengthWord32.largestrunninglengthcount - - runlen ? number : RunningLengthWord32.largestrunninglengthcount - - runlen; - this.rlw.setRunningLength(runlen + whatwecanadd); - this.sizeinbits += whatwecanadd * wordinbits; - if (number - whatwecanadd > 0) - addStreamOfEmptyWords(v, number - whatwecanadd); - } else { - push_back(0); - ++wordsadded; - this.rlw.position = this.actualsizeinwords - 1; - final int whatwecanadd = number < RunningLengthWord32.largestrunninglengthcount ? number - : RunningLengthWord32.largestrunninglengthcount; - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(whatwecanadd); - this.sizeinbits += whatwecanadd * wordinbits; - if (number - whatwecanadd > 0) - addStreamOfEmptyWords(v, number - whatwecanadd); - } - return wordsadded; - } - + /** * Returns a new compressed bitmap containing the bitwise OR values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). - * + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param a * the other bitmap * @return the EWAH compressed bitmap */ - public EWAHCompressedBitmap32 or(final EWAHCompressedBitmap32 a) { + @Override +public EWAHCompressedBitmap32 or(final EWAHCompressedBitmap32 a) { final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); container.reserve(this.actualsizeinwords + a.actualsizeinwords); orToContainer(a, container); @@ -892,9 +893,14 @@ private void push_back(final int data) { if (this.actualsizeinwords == this.buffer.length) { final int oldbuffer[] = this.buffer; - this.buffer = new int[oldbuffer.length * 2]; + if(oldbuffer.length < 32768) + this.buffer = new int[oldbuffer.length * 2]; + else if (oldbuffer.length * 3 / 2 < oldbuffer.length) + this.buffer = new int[Integer.MAX_VALUE]; + else + this.buffer = new int[oldbuffer.length * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.array = this.buffer; + this.rlw.parent.buffer = this.buffer; } this.buffer[this.actualsizeinwords++] = data; } @@ -910,11 +916,16 @@ * the number of words to add */ private void push_back(final int[] data, final int start, final int number) { - while (this.actualsizeinwords + number >= this.buffer.length) { + if (this.actualsizeinwords + number >= this.buffer.length) { final int oldbuffer[] = this.buffer; - this.buffer = new int[oldbuffer.length * 2]; + if(this.actualsizeinwords + number < 32768) + this.buffer = new int[(this.actualsizeinwords + number) * 2]; + else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) //overflow + this.buffer = new int[Integer.MAX_VALUE]; + else + this.buffer = new int[(this.actualsizeinwords + number) * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.array = this.buffer; + this.rlw.parent.buffer = this.buffer; } System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number); this.actualsizeinwords += number; @@ -923,7 +934,8 @@ /* * @see java.io.Externalizable#readExternal(java.io.ObjectInput) */ - public void readExternal(ObjectInput in) throws IOException { + @Override +public void readExternal(ObjectInput in) throws IOException { deserialize(in); } @@ -939,7 +951,7 @@ final int oldbuffer[] = this.buffer; this.buffer = new int[size]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.array = this.buffer; + this.rlw.parent.buffer = this.buffer; return true; } return false; @@ -969,15 +981,48 @@ public int serializedSizeInBytes() { return this.sizeInBytes() + 3 * 4; } + + /** + * Query the value of a single bit. Relying on this method when speed is + * needed is discouraged. The complexity is linear with the size of the + * bitmap. + * + * (This implementation is based on zhenjl's Go version of JavaEWAH.) + * + * @param i + * the bit we are interested in + * @return whether the bit is set to true + */ + public boolean get(final int i) { + if ((i < 0) || (i >= this.sizeinbits)) + return false; + int WordChecked = 0; + final IteratingRLW32 j = getIteratingRLW(); + final int wordi = i / wordinbits; + while (WordChecked <= wordi) { + WordChecked += j.getRunningLength(); + if (wordi < WordChecked) { + return j.getRunningBit(); + } + if (wordi < WordChecked + j.getNumberOfLiteralWords()) { + final int w = j.getLiteralWordAt(wordi + - WordChecked); + return (w & (1 << i)) != 0; + } + WordChecked += j.getNumberOfLiteralWords(); + j.next(); + } + return false; + } /** - * set the bit at position i to true, the bits must be set in increasing + * Set the bit at position i to true, the bits must be set in (strictly) increasing * order. For example, set(15) and then set(7) will fail. You must do set(7) * and then set(15). * * @param i * the index - * @return true if the value was set (always true when i>= sizeInBits()). + * @return true if the value was set (always true when i is greater or equal to sizeInBits()). * @throws IndexOutOfBoundsException * if i is negative or greater than Integer.MAX_VALUE - 32 */ @@ -1015,16 +1060,20 @@ } /** - * set the size in bits + * Set the size in bits. This does not change the compressed bitmap. * */ - public void setSizeInBits(final int size) { - this.sizeinbits = size; + @Override +public void setSizeInBits(final int size) { + if((size+EWAHCompressedBitmap32.wordinbits-1)/EWAHCompressedBitmap32.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap32.wordinbits-1)/EWAHCompressedBitmap32.wordinbits) + throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean): "+size+" "+this.sizeinbits); + this.sizeinbits = size; } /** * Change the reported size in bits of the *uncompressed* bitmap represented - * by this compressed bitmap. It is not possible to reduce the sizeInBits, but + * by this compressed bitmap. It may change the underlying compressed bitmap. + * It is not possible to reduce the sizeInBits, but * it can be extended. The new bits are set to false or true depending on the * value of defaultvalue. * @@ -1062,7 +1111,8 @@ * * @return the size in bits */ - public int sizeInBits() { + @Override +public int sizeInBits() { return this.sizeinbits; } @@ -1072,7 +1122,8 @@ * * @return the size in bytes */ - public int sizeInBytes() { + @Override +public int sizeInBytes() { return this.actualsizeinwords * (wordinbits / 8); } @@ -1086,7 +1137,7 @@ int[] ans = new int[this.cardinality()]; int inanspos = 0; int pos = 0; - final EWAHIterator32 i = new EWAHIterator32(this.buffer, + final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord32 localrlw = i.next(); @@ -1129,7 +1180,7 @@ public String toDebugString() { String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits + " size in words = " + this.actualsizeinwords + "\n"; - final EWAHIterator32 i = new EWAHIterator32(this.buffer, + final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord32 localrlw = i.next(); @@ -1166,11 +1217,42 @@ answer.append("}"); return answer.toString(); } + /** + * swap the content of the bitmap with another. + * + * @param other + * bitmap to swap with + */ + public void swap(final EWAHCompressedBitmap32 other) { + int[] tmp = this.buffer; + this.buffer = other.buffer; + other.buffer = tmp; + + int tmp2 = this.rlw.position; + this.rlw.position = other.rlw.position; + other.rlw.position = tmp2; + + int tmp3 = this.actualsizeinwords; + this.actualsizeinwords = other.actualsizeinwords; + other.actualsizeinwords = tmp3; + + int tmp4 = this.sizeinbits; + this.sizeinbits = other.sizeinbits; + other.sizeinbits = tmp4; + } + /** + * Reduce the internal buffer to its minimal allowable size (given + * by this.actualsizeinwords). This can free memory. + */ + public void trim() { + this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords); + } /* * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) */ - public void writeExternal(ObjectOutput out) throws IOException { + @Override +public void writeExternal(ObjectOutput out) throws IOException { serialize(out); } @@ -1181,11 +1263,15 @@ * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param a * the other bitmap * @return the EWAH compressed bitmap */ - public EWAHCompressedBitmap32 xor(final EWAHCompressedBitmap32 a) { + @Override +public EWAHCompressedBitmap32 xor(final EWAHCompressedBitmap32 a) { final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); container.reserve(this.actualsizeinwords + a.actualsizeinwords); xorToContainer(a, container); @@ -1270,118 +1356,20 @@ */ public static void andWithContainer(final BitmapStorage32 container, final EWAHCompressedBitmap32... bitmaps) { - if (bitmaps.length == 2) { - // should be more efficient - bitmaps[0].andToContainer(bitmaps[1], container); - return; - } - - // Sort the bitmaps in ascending order by sizeinbits. When we exhaust the - // first bitmap the rest - // of the result is zeros. - final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); - Arrays.sort(sortedBitmaps, new Comparator() { - public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { - return a.sizeinbits < b.sizeinbits ? -1 - : a.sizeinbits == b.sizeinbits ? 0 : 1; - } - }); - - int maxSize = sortedBitmaps[sortedBitmaps.length - 1].sizeinbits; - - final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; - for (int i = 0; i < sortedBitmaps.length; i++) { - EWAHIterator32 iterator = sortedBitmaps[i].getEWAHIterator(); - if (iterator.hasNext()) { - rlws[i] = new IteratingBufferedRunningLengthWord32(iterator); - } else { - // this never happens... - if (maxSize > 0) { - extendEmptyBits(container, 0, maxSize); - } - container.setSizeInBits(maxSize); - return; - } - } - - while (true) { - int maxZeroRl = 0; - int minOneRl = Integer.MAX_VALUE; - int minSize = Integer.MAX_VALUE; - int numEmptyRl = 0; - - for (IteratingBufferedRunningLengthWord32 rlw : rlws) { - int size = rlw.size(); - minSize = Math.min(minSize, size); - - if (!rlw.getRunningBit()) { - int rl = rlw.getRunningLength(); - maxZeroRl = Math.max(maxZeroRl, rl); - minOneRl = 0; - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } else { - int rl = rlw.getRunningLength(); - minOneRl = Math.min(minOneRl, rl); - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } - } - if (minSize == 0) { - extendEmptyBits(container, sortedBitmaps[0].sizeinbits, maxSize); - break; - } - if (maxZeroRl > 0) { - container.addStreamOfEmptyWords(false, maxZeroRl); - for (IteratingBufferedRunningLengthWord32 rlw : rlws) { - rlw.discardFirstWords(maxZeroRl); - } - } else if (minOneRl > 0) { - container.addStreamOfEmptyWords(true, minOneRl); - for (IteratingBufferedRunningLengthWord32 rlw : rlws) { - rlw.discardFirstWords(minOneRl); - } - } else { - int index = 0; - - if (numEmptyRl == 1) { - // if one rlw has literal words to process and the rest have a run of - // 1's we can write them out here - IteratingBufferedRunningLengthWord32 emptyRl = null; - int minNonEmptyRl = Integer.MAX_VALUE; - for (IteratingBufferedRunningLengthWord32 rlw : rlws) { - int rl = rlw.getRunningLength(); - if (rl == 0) { - assert emptyRl == null; - emptyRl = rlw; - } else { - minNonEmptyRl = Math.min(minNonEmptyRl, rl); - } - } - int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; - if (emptyRl != null) - emptyRl.writeLiteralWords(wordsToWrite, container); - index += wordsToWrite; - } - - while (index < minSize) { - int word = ~0; - for (IteratingBufferedRunningLengthWord32 rlw : rlws) { - if (rlw.getRunningLength() <= index) { - word &= rlw.getLiteralWordAt(index - rlw.getRunningLength()); - } - } - container.add(word); - index++; - } - for (IteratingBufferedRunningLengthWord32 rlw : rlws) { - rlw.discardFirstWords(minSize); - } - } - } - container.setSizeInBits(maxSize); + if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap"); + if(bitmaps.length == 2) { + bitmaps[0].andToContainer(bitmaps[1],container); + return; + } + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(); + bitmaps[0].andToContainer(bitmaps[1], answer); + for(int k = 2; k < bitmaps.length - 1; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + answer.andToContainer(bitmaps[bitmaps.length - 1], container); } /** @@ -1390,20 +1378,28 @@ * * It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)). * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param bitmaps * bitmaps to AND together * @return result of the AND */ public static EWAHCompressedBitmap32 and( final EWAHCompressedBitmap32... bitmaps) { - final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); - int largestSize = 0; - for (EWAHCompressedBitmap32 bitmap : bitmaps) { - largestSize = Math.max(bitmap.actualsizeinwords, largestSize); - } - container.reserve((int) (largestSize * 1.5)); - andWithContainer(container, bitmaps); - return container; + if(bitmaps.length == 1) return bitmaps[0]; + if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]); + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(); + bitmaps[0].andToContainer(bitmaps[1], answer); + for(int k = 2; k < bitmaps.length; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + return answer; } /** @@ -1416,6 +1412,7 @@ * @return the cardinality */ public static int andCardinality(final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0].cardinality(); final BitCounter32 counter = new BitCounter32(); andWithContainer(counter, bitmaps); return counter.getCount(); @@ -1465,147 +1462,68 @@ /** * For internal use. Computes the bitwise or of the provided bitmaps and * stores the result in the container. + * @param container where store the result + * @param bitmaps to be aggregated */ - public static void orWithContainer(final BitmapStorage32 container, - final EWAHCompressedBitmap32... bitmaps) { - if (bitmaps.length == 2) { - // should be more efficient - bitmaps[0].orToContainer(bitmaps[1], container); - return; - } - - // Sort the bitmaps in descending order by sizeinbits. We will exhaust the - // sorted bitmaps from right to left. - final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); - Arrays.sort(sortedBitmaps, new Comparator() { - public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { - return a.sizeinbits < b.sizeinbits ? 1 - : a.sizeinbits == b.sizeinbits ? 0 : -1; - } - }); - - final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; - int maxAvailablePos = 0; - for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) { - EWAHIterator32 iterator = bitmap.getEWAHIterator(); - if (iterator.hasNext()) { - rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32( - iterator); - } - } - - if (maxAvailablePos == 0) { // this never happens... - container.setSizeInBits(0); - return; - } - - int maxSize = sortedBitmaps[0].sizeinbits; - - while (true) { - int maxOneRl = 0; - int minZeroRl = Integer.MAX_VALUE; - int minSize = Integer.MAX_VALUE; - int numEmptyRl = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - int size = rlw.size(); - if (size == 0) { - maxAvailablePos = i; - break; - } - minSize = Math.min(minSize, size); - - if (rlw.getRunningBit()) { - int rl = rlw.getRunningLength(); - maxOneRl = Math.max(maxOneRl, rl); - minZeroRl = 0; - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } else { - int rl = rlw.getRunningLength(); - minZeroRl = Math.min(minZeroRl, rl); - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } - } - - if (maxAvailablePos == 0) { - break; - } else if (maxAvailablePos == 1) { - // only one bitmap is left so just write the rest of it out - rlws[0].discharge(container); - break; - } - - if (maxOneRl > 0) { - container.addStreamOfEmptyWords(true, maxOneRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(maxOneRl); - } - } else if (minZeroRl > 0) { - container.addStreamOfEmptyWords(false, minZeroRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(minZeroRl); - } - } else { - int index = 0; - - if (numEmptyRl == 1) { - // if one rlw has literal words to process and the rest have a run of - // 0's we can write them out here - IteratingBufferedRunningLengthWord32 emptyRl = null; - int minNonEmptyRl = Integer.MAX_VALUE; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - int rl = rlw.getRunningLength(); - if (rl == 0) { - assert emptyRl == null; - emptyRl = rlw; - } else { - minNonEmptyRl = Math.min(minNonEmptyRl, rl); - } - } - int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; - if (emptyRl != null) - emptyRl.writeLiteralWords(wordsToWrite, container); - index += wordsToWrite; - } + public static void orWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); + int size = 0; + int sinbits = 0; + for (EWAHCompressedBitmap32 b : bitmaps) { + size += b.sizeInBytes(); + if (sinbits < b.sizeInBits()) + sinbits = b.sizeInBits(); + } + if (size * 8 > sinbits) { + FastAggregation32.bufferedorWithContainer(container, 65536, bitmaps); + } else { + FastAggregation32.orToContainer(container, bitmaps); + } + } - while (index < minSize) { - int word = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - if (rlw.getRunningLength() <= index) { - word |= rlw.getLiteralWordAt(index - rlw.getRunningLength()); - } - } - container.add(word); - index++; - } - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(minSize); - } - } - } - container.setSizeInBits(maxSize); - } + /** + * For internal use. Computes the bitwise xor of the provided bitmaps and + * stores the result in the container. + * @param container where store the result + * @param bitmaps to be aggregated + */ + public static void xorWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); + int size = 0; + int sinbits = 0; + for (EWAHCompressedBitmap32 b : bitmaps) { + size += b.sizeInBytes(); + if (sinbits < b.sizeInBits()) + sinbits = b.sizeInBits(); + } + if (size * 8 > sinbits) { + FastAggregation32.bufferedxorWithContainer(container, 65536, bitmaps); + } else { + FastAggregation32.xorToContainer(container, bitmaps); + } + } /** * Returns a new compressed bitmap containing the bitwise OR values of the * provided bitmaps. This is typically faster than doing the aggregation * two-by-two (A.or(B).or(C).or(D)). - * + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * * @param bitmaps * bitmaps to OR together * @return result of the OR */ public static EWAHCompressedBitmap32 or( final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0]; final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); int largestSize = 0; for (EWAHCompressedBitmap32 bitmap : bitmaps) { @@ -1616,6 +1534,34 @@ return container; } + + /** + * Returns a new compressed bitmap containing the bitwise XOR values of the + * provided bitmaps. This is typically faster than doing the aggregation + * two-by-two (A.xor(B).xor(C).xor(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param bitmaps + * bitmaps to XOR together + * @return result of the XOR + */ + public static EWAHCompressedBitmap32 xor( + final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0]; + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + int largestSize = 0; + for (EWAHCompressedBitmap32 bitmap : bitmaps) { + largestSize = Math.max(bitmap.actualsizeinwords, largestSize); + } + container.reserve((int) (largestSize * 1.5)); + xorWithContainer(container, bitmaps); + return container; + } + /** * Returns the cardinality of the result of a bitwise OR of the values of the * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold @@ -1626,6 +1572,7 @@ * @return the cardinality */ public static int orCardinality(final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0].cardinality(); final BitCounter32 counter = new BitCounter32(); orWithContainer(counter, bitmaps); return counter.getCount(); @@ -1652,6 +1599,9 @@ /** optimization option **/ public static final boolean usetrailingzeros = true; + /** whether we adjust after some aggregation by adding in zeroes **/ + public static final boolean adjustContainerSizeWhenAggregating = true; + /** The Constant wordinbits represents the number of bits in a int. */ public static final int wordinbits = 32; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java 2013-11-12 14:31:20.000000000 +0000 @@ -2,8 +2,8 @@ /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** @@ -14,7 +14,7 @@ * @since 0.5.0 * */ -public final class EWAHIterator32 { +public final class EWAHIterator32 implements Cloneable { /** * Instantiates a new eWAH iterator. @@ -22,7 +22,7 @@ * @param a the array of words * @param sizeinwords the number of words that are significant in the array of words */ - public EWAHIterator32(final int[] a, final int sizeinwords) { + public EWAHIterator32(final EWAHCompressedBitmap32 a, final int sizeinwords) { this.rlw = new RunningLengthWord32(a, 0); this.size = sizeinwords; this.pointer = 0; @@ -31,7 +31,7 @@ /** * Allow expert developers to instantiate an EWAHIterator. * - * @param bitmap + * @param bitmap we want to iterate over * @return an iterator */ public static EWAHIterator32 getEWAHIterator(EWAHCompressedBitmap32 bitmap) { @@ -44,7 +44,7 @@ * @return the int[] */ public int[] buffer() { - return this.rlw.array; + return this.rlw.parent.buffer; } /** @@ -76,6 +76,15 @@ return this.rlw; } + @Override + public EWAHIterator32 clone() throws CloneNotSupportedException { + EWAHIterator32 ans = (EWAHIterator32) super.clone(); + ans.rlw = this.rlw.clone(); + ans.size = this.size; + ans.pointer = this.pointer; + return ans; + } + /** The pointer represent the location of the current running length * word in the array of words (embedded in the rlw attribute). */ int pointer; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/FastAggregation32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/FastAggregation32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/FastAggregation32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/FastAggregation32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,377 @@ +package com.googlecode.javaewah32; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.PriorityQueue; + + +/** + * Fast algorithms to aggregate many bitmaps. These algorithms are just given as + * reference. They may not be faster than the corresponding methods in the + * EWAHCompressedBitmap class. + * + * @author Daniel Lemire + * + */ +public class FastAggregation32 { + + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) + * @return the or aggregate. + */ + public static EWAHCompressedBitmap32 bufferedand(final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedandWithContainer(answer,bufsize, bitmaps); + return answer; + } + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) + * @param bitmaps the source bitmaps + */ + public static void bufferedandWithContainer(final BitmapStorage32 container,final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + + java.util.LinkedList al = new java.util.LinkedList(); + for (EWAHCompressedBitmap32 bitmap : bitmaps) { + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufsize*bitmaps.length]; + + for(IteratingRLW32 i : al) + if (i.size() == 0) { + al.clear(); + break; + } + + while (!al.isEmpty()) { + Arrays.fill(hardbitmap, ~0); + int effective = Integer.MAX_VALUE; + for(IteratingRLW32 i : al) { + int eff = IteratorAggregation32.inplaceand(hardbitmap, i); + if (eff < effective) + effective = eff; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + for(IteratingRLW32 i : al) + if (i.size() == 0) { + al.clear(); + break; + } + } + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words + * @return the or aggregate. + */ + public static EWAHCompressedBitmap32 bufferedor(final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedorWithContainer(answer, bufsize, bitmaps); + return answer; + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words + * @param bitmaps the source bitmaps + */ + public static void bufferedorWithContainer(final BitmapStorage32 container,final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + int range = 0; + EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return b.sizeinbits - a.sizeinbits; + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap32 bitmap : sbitmaps) { + if (bitmap.sizeinbits > range) + range = bitmap.sizeinbits; + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufsize]; + int maxr = al.size(); + while (maxr > 0) { + int effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation32.inplaceor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + + } + container.setSizeInBits(range); + } + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words + * @return the xor aggregate. + */ + public static EWAHCompressedBitmap32 bufferedxor(final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedxorWithContainer(answer, bufsize, bitmaps); + return answer; + } + + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words + * @param bitmaps the source bitmaps + */ + public static void bufferedxorWithContainer(final BitmapStorage32 container,final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + int range = 0; + EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return b.sizeinbits - a.sizeinbits; + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap32 bitmap : sbitmaps) { + if (bitmap.sizeinbits > range) + range = bitmap.sizeinbits; + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufsize]; + int maxr = al.size(); + while (maxr > 0) { + int effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation32.inplacexor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + } + container.setSizeInBits(range); + } + + /** + * Uses a priority queue to compute the or aggregate. + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void orToContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32 ... bitmaps) { + if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (EWAHCompressedBitmap32 x : bitmaps) { + pq.add(x); + } + while (pq.size() > 2) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.or(x2)); + } + pq.poll().orToContainer(pq.poll(), container); + } + + + /** + * Uses a priority queue to compute the xor aggregate. + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void xorToContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32 ... bitmaps) { + if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (EWAHCompressedBitmap32 x : bitmaps) { + pq.add(x); + } + while (pq.size() > 2) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + pq.poll().xorToContainer(pq.poll(), container); + } + + /** + * For internal use. Computes the bitwise or of the provided bitmaps and + * stores the result in the container. (This used to be the default.) + * + * @deprecated use EWAHCompressedBitmap32.or instead + * @since 0.4.0 + * @param container where store the result + * @param bitmaps to be aggregated + */ + @Deprecated + public static void legacy_orWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length == 2) { + // should be more efficient + bitmaps[0].orToContainer(bitmaps[1], container); + return; + } + + // Sort the bitmaps in descending order by sizeinbits. We will exhaust the + // sorted bitmaps from right to left. + final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); + Arrays.sort(sortedBitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeinbits < b.sizeinbits ? 1 + : a.sizeinbits == b.sizeinbits ? 0 : -1; + } + }); + + final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; + int maxAvailablePos = 0; + for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) { + EWAHIterator32 iterator = bitmap.getEWAHIterator(); + if (iterator.hasNext()) { + rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32( + iterator); + } + } + + if (maxAvailablePos == 0) { // this never happens... + container.setSizeInBits(0); + return; + } + + int maxSize = sortedBitmaps[0].sizeinbits; + + while (true) { + int maxOneRl = 0; + int minZeroRl = Integer.MAX_VALUE; + int minSize = Integer.MAX_VALUE; + int numEmptyRl = 0; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + int size = rlw.size(); + if (size == 0) { + maxAvailablePos = i; + break; + } + minSize = Math.min(minSize, size); + + if (rlw.getRunningBit()) { + int rl = rlw.getRunningLength(); + maxOneRl = Math.max(maxOneRl, rl); + minZeroRl = 0; + if (rl == 0 && size > 0) { + numEmptyRl++; + } + } else { + int rl = rlw.getRunningLength(); + minZeroRl = Math.min(minZeroRl, rl); + if (rl == 0 && size > 0) { + numEmptyRl++; + } + } + } + + if (maxAvailablePos == 0) { + break; + } else if (maxAvailablePos == 1) { + // only one bitmap is left so just write the rest of it out + rlws[0].discharge(container); + break; + } + + if (maxOneRl > 0) { + container.addStreamOfEmptyWords(true, maxOneRl); + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + rlw.discardFirstWords(maxOneRl); + } + } else if (minZeroRl > 0) { + container.addStreamOfEmptyWords(false, minZeroRl); + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + rlw.discardFirstWords(minZeroRl); + } + } else { + int index = 0; + + if (numEmptyRl == 1) { + // if one rlw has literal words to process and the rest have a run of + // 0's we can write them out here + IteratingBufferedRunningLengthWord32 emptyRl = null; + int minNonEmptyRl = Integer.MAX_VALUE; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + int rl = rlw.getRunningLength(); + if (rl == 0) { + assert emptyRl == null; + emptyRl = rlw; + } else { + minNonEmptyRl = Math.min(minNonEmptyRl, rl); + } + } + int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; + if (emptyRl != null) + emptyRl.writeLiteralWords(wordsToWrite, container); + index += wordsToWrite; + } + + while (index < minSize) { + int word = 0; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + if (rlw.getRunningLength() <= index) { + word |= rlw.getLiteralWordAt(index - rlw.getRunningLength()); + } + } + container.add(word); + index++; + } + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + rlw.discardFirstWords(minSize); + } + } + } + container.setSizeInBits(maxSize); + } + +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java 2013-11-12 14:31:20.000000000 +0000 @@ -2,7 +2,7 @@ /* * Copyright 2012, Google Inc. - * Licensed under APL 2.0. + * Licensed under the Apache License, Version 2.0. */ import static com.googlecode.javaewah32.EWAHCompressedBitmap32.wordinbits; @@ -45,10 +45,12 @@ return true; } + @Override public final boolean hasNext() { return this.hasnext; } + @Override public final int next() { final int answer; if (runningHasNext()) { diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,91 @@ +package com.googlecode.javaewah32; + +import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; + +import com.googlecode.javaewah.IntIterator; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Implementation of an IntIterator over an IteratingRLW. + * + * + */ +public class IntIteratorOverIteratingRLW32 implements IntIterator { + IteratingRLW32 parent; + private int position; + private int runningLength; + private int word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasnext; + + /** + * @param p iterator we wish to iterate over + */ + public IntIteratorOverIteratingRLW32(final IteratingRLW32 p) { + this.parent = p; + this.position = 0; + setupForCurrentRunningLengthWord(); + this.hasnext = moveToNext(); + } + + /** + * @return whether we could find another set bit; don't move if there is an unprocessed value + */ + private final boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (this.parent.next()) + setupForCurrentRunningLengthWord(); + else return false; + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasnext; + } + + @Override + public final int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int bit = Long.numberOfTrailingZeros(this.word); + this.word ^= (1l << bit); + answer = this.literalPosition + bit; + } + this.hasnext = this.moveToNext(); + return answer; + } + + private final void setupForCurrentRunningLengthWord() { + this.runningLength = wordinbits * this.parent.getRunningLength() + + this.position; + + if (!this.parent.getRunningBit()) { + this.position = this.runningLength; + } + this.wordPosition = 0; + this.wordLength = this.parent.getNumberOfLiteralWords(); + } + + private final boolean runningHasNext() { + return this.position < this.runningLength; + } + + private final boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.parent.getLiteralWordAt(this.wordPosition++); + this.literalPosition = this.position; + this.position += wordinbits; + } + return this.word != 0; + } +} + diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java 2013-11-12 14:31:20.000000000 +0000 @@ -3,8 +3,8 @@ /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. Similar to BufferedRunningLengthWord32, but automatically @@ -13,7 +13,7 @@ * @since 0.5.0 * @author Daniel Lemire and David McIntosh */ -public final class IteratingBufferedRunningLengthWord32 { +public final class IteratingBufferedRunningLengthWord32 implements IteratingRLW32, Cloneable { /** * Instantiates a new iterating buffered running length word. * @@ -29,8 +29,8 @@ /** * Instantiates a new iterating buffered running length word. + * @param bitmap over which we want to iterate * - * @param iterator iterator */ public IteratingBufferedRunningLengthWord32(final EWAHCompressedBitmap32 bitmap) { this(EWAHIterator32.getEWAHIterator(bitmap)); @@ -42,7 +42,8 @@ * * @param x the x */ - public void discardFirstWords(int x) { + @Override +public void discardFirstWords(int x) { while (x > 0) { if (this.brlw.RunningLength > x) { @@ -107,19 +108,33 @@ pl = max - index; } container.addStreamOfEmptyWords(!getRunningBit(), pl); - discardFirstWords(pl); index += pl; int pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = max - index; } writeNegatedLiteralWords(pd, container); - discardFirstWords(pd); + discardFirstWords(pl+pd); index += pd; } return index; } + /** + * Move to the next RunningLengthWord + * @return whether the move was possible + */ + @Override +public boolean next() { + if (!this.iterator.hasNext()) { + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + return true; + } /** * Write out the remain words, transforming them to zeroes. @@ -147,7 +162,8 @@ * @param index zero based index * @return the literal word */ - public int getLiteralWordAt(int index) { + @Override +public int getLiteralWordAt(int index) { return this.buffer[this.literalWordStartPosition + index]; } @@ -156,7 +172,8 @@ * * @return the number of literal words */ - public int getNumberOfLiteralWords() { + @Override +public int getNumberOfLiteralWords() { return this.brlw.NumberOfLiteralWords; } @@ -165,7 +182,8 @@ * * @return the running bit */ - public boolean getRunningBit() { + @Override +public boolean getRunningBit() { return this.brlw.RunningBit; } @@ -174,7 +192,8 @@ * * @return the running length */ - public int getRunningLength() { + @Override +public int getRunningLength() { return this.brlw.RunningLength; } @@ -183,14 +202,15 @@ * * @return the int */ - public int size() { + @Override +public int size() { return this.brlw.size(); } /** * write the first N literal words to the target bitmap. Does not discard the words or perform iteration. - * @param numWords - * @param container + * @param numWords number of words to be written + * @param container where we write the data */ public void writeLiteralWords(int numWords, BitmapStorage32 container) { container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); @@ -199,8 +219,8 @@ /** * write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. - * @param numWords - * @param container + * @param numWords number of words to be written + * @param container where we write the data */ public void writeNegatedLiteralWords(int numWords, BitmapStorage32 container) { container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); @@ -208,7 +228,7 @@ /** - * For internal use. (One could use the non-static dischard method instead, + * For internal use. (One could use the non-static discharge method instead, * but we expect them to be slower.) * * @param initialWord @@ -234,6 +254,19 @@ runningLengthWord = new BufferedRunningLengthWord32(iterator.next()); } } + + + + @Override +public IteratingBufferedRunningLengthWord32 clone() throws CloneNotSupportedException { + IteratingBufferedRunningLengthWord32 answer = (IteratingBufferedRunningLengthWord32) super.clone(); + answer.brlw = this.brlw.clone(); + answer.buffer = this.buffer; + answer.iterator = this.iterator.clone(); + answer.literalWordStartPosition = this.literalWordStartPosition; + return answer; + } + private BufferedRunningLengthWord32 brlw; private int[] buffer; private int literalWordStartPosition; diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,42 @@ +package com.googlecode.javaewah32; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * High-level iterator over a compressed bitmap. + * + */ +public interface IteratingRLW32 { + /** + * @return whether there is more + */ + public boolean next() ; + /** + * @param index where the literal word is + * @return the literal word at the given index. + */ + public int getLiteralWordAt(int index); + /** + * @return the number of literal (non-fill) words + */ + public int getNumberOfLiteralWords() ; + /** + * @return the bit used for the fill bits + */ + public boolean getRunningBit() ; + /** + * @return sum of getRunningLength() and getNumberOfLiteralWords() + */ + public int size() ; + /** + * @return length of the run of fill words + */ + public int getRunningLength() ; + /** + * @param x the number of words to discard + */ + public void discardFirstWords(int x); +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,601 @@ +package com.googlecode.javaewah32; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedList; + +import com.googlecode.javaewah.CloneableIterator; + + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Set of helper functions to aggregate bitmaps. + * + */ +public class IteratorAggregation32 { + /** + * @param x iterator to negate + * @return negated version of the iterator + */ + public static IteratingRLW32 not(final IteratingRLW32 x) { + return new IteratingRLW32() { + + @Override + public boolean next() { + return x.next(); + } + + @Override + public int getLiteralWordAt(int index) { + return ~x.getLiteralWordAt(index); + } + + @Override + public int getNumberOfLiteralWords() { + return x.getNumberOfLiteralWords(); + } + + @Override + public boolean getRunningBit() { + return ! x.getRunningBit(); + } + + @Override + public int size() { + return x.size(); + } + + @Override + public int getRunningLength() { + return x.getRunningLength(); + } + + @Override + public void discardFirstWords(int y) { + x.discardFirstWords(y); + } + + }; + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return and aggregate + */ + public static IteratingRLW32 bufferedand(final IteratingRLW32... al) { + return bufferedand (DEFAULTMAXBUFSIZE,al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return and aggregate + */ + public static IteratingRLW32 bufferedand(final int bufsize, final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + final LinkedList basell = new LinkedList(); + for (IteratingRLW32 i : al) + basell.add(i); + return new BufferedIterator32(new AndIt(basell,bufsize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return or aggregate + */ + public static IteratingRLW32 bufferedor(final IteratingRLW32... al) { + return bufferedor(DEFAULTMAXBUFSIZE,al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return or aggregate + */ + public static IteratingRLW32 bufferedor(final int bufsize, final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + for (IteratingRLW32 i : al) + basell.add(i); + return new BufferedIterator32(new ORIt(basell,bufsize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return xor aggregate + */ + public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) { + return bufferedxor (DEFAULTMAXBUFSIZE,al); + } + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return xor aggregate + */ + public static IteratingRLW32 bufferedxor(final int bufsize, final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + for (IteratingRLW32 i : al) + basell.add(i); + return new BufferedIterator32(new XORIt(basell,bufsize)); + } + /** + * Write out the content of the iterator, but as if it were all zeros. + * + * @param container + * where we write + * @param i + * the iterator + */ + protected static void dischargeAsEmpty(final BitmapStorage32 container, + final IteratingRLW32 i) { + while (i.size() > 0) { + container.addStreamOfEmptyWords(false, i.size()); + i.next(); + + } + } + + /** + * Write out up to max words, returns how many were written + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static int discharge(final BitmapStorage32 container, IteratingRLW32 i, int max) { + int counter = 0; + while (i.size() > 0 && counter < max) { + int L1 = i.getRunningLength(); + if (L1 > 0) { + if (L1 + counter > max) + L1 = max - counter; + container.addStreamOfEmptyWords(i.getRunningBit(), L1); + counter += L1; + } + int L = i.getNumberOfLiteralWords(); + if(L + counter > max) L = max - counter; + for (int k = 0; k < L; ++k) { + container.add(i.getLiteralWordAt(k)); + } + counter += L; + i.discardFirstWords(L+L1); + } + return counter; + } + + /** + * Write out up to max negated words, returns how many were written + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static int dischargeNegated(final BitmapStorage32 container, IteratingRLW32 i, int max) { + int counter = 0; + while (i.size() > 0 && counter < max) { + int L1 = i.getRunningLength(); + if (L1 > 0) { + if (L1 + counter > max) + L1 = max - counter; + container.addStreamOfEmptyWords(i.getRunningBit(), L1); + counter += L1; + } + int L = i.getNumberOfLiteralWords(); + if(L + counter > max) L = max - counter; + for (int k = 0; k < L; ++k) { + container.add(i.getLiteralWordAt(k)); + } + counter += L; + i.discardFirstWords(L+L1); + } + return counter; + } + + static void andToContainer(final BitmapStorage32 container, + int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final int index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + static void andToContainer(final BitmapStorage32 container, + final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final int index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + + /** + * Compute the first few words of the XOR aggregate between two iterators. + * + * @param container where to write + * @param desiredrlwcount number of words to be written (max) + * @param rlwi first iterator to aggregate + * @param rlwj second iterator to aggregate + */ + public static void xorToContainer(final BitmapStorage32 container, + int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + int index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + int index = dischargeNegated(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + protected static int inplaceor(int[] bitmap, + IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (i.getRunningBit()) + java.util.Arrays.fill(bitmap, pos, pos + L, ~0); + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = i.getRunningLength(); + if (pos + L > bitmap.length) { + if (i.getRunningBit()) { + java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0); + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + java.util.Arrays.fill(bitmap, pos, pos + L, ~0); + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + + protected static int inplacexor(int[] bitmap, + IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (i.getRunningBit()) { + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = i.getRunningLength(); + if (pos + L > bitmap.length) { + if (i.getRunningBit()) { + for(int k = pos ; k < bitmap.length; ++k) + bitmap[k] = ~bitmap[k]; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + protected static int inplaceand(int[] bitmap, + IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (!i.getRunningBit()) { + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = 0; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = i.getRunningLength(); + if (pos + L > bitmap.length) { + if (!i.getRunningBit()) { + for(int k = pos ; k < bitmap.length; ++k) + bitmap[k] = 0; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (!i.getRunningBit()) + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = 0; + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + /** + * An optimization option. Larger values may improve speed, but at + * the expense of memory. + */ + public final static int DEFAULTMAXBUFSIZE = 65536; + + +} + + +class ORIt implements CloneableIterator { + EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + int[] hardbitmap; + LinkedList ll; + + ORIt(LinkedList basell, final int bufsize) { + this.ll = basell; + this.hardbitmap = new int[bufsize]; + } + + @Override + public XORIt clone() throws CloneNotSupportedException { + XORIt answer = (XORIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + int effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW32 rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation32.inplaceor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.add(this.hardbitmap[k]); + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} + +class XORIt implements CloneableIterator { + EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + int[] hardbitmap; + LinkedList ll; + + XORIt(LinkedList basell, final int bufsize) { + this.ll = basell; + this.hardbitmap = new int[bufsize]; + + } + + @Override + public XORIt clone() throws CloneNotSupportedException { + XORIt answer = (XORIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + int effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW32 rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation32.inplacexor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.add(this.hardbitmap[k]); + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} + +class AndIt implements CloneableIterator { + EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + LinkedList ll; + int buffersize; + + public AndIt(LinkedList basell, final int bufsize) { + this.ll = basell; + this.buffersize = bufsize; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public AndIt clone() throws CloneNotSupportedException { + AndIt answer = (AndIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + IteratorAggregation32.andToContainer(this.buffer, this.buffersize * this.ll.size(), + this.ll.get(0), this.ll.get(1)); + if (this.ll.size() > 2) { + Iterator i = this.ll.iterator(); + i.next(); + i.next(); + EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32(); + while (i.hasNext() && this.buffer.sizeInBytes() > 0) { + IteratorAggregation32.andToContainer(tmpbuffer, + this.buffer.getIteratingRLW(), i.next()); + this.buffer.swap(tmpbuffer); + tmpbuffer.clear(); + } + } + Iterator i = this.ll.iterator(); + while(i.hasNext()) { + if(i.next().size() == 0) { + this.ll.clear(); + break; + } + } + return this.buffer.getEWAHIterator(); + } + +} \ No newline at end of file diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,135 @@ +package com.googlecode.javaewah32; + +import java.util.Iterator; + +import com.googlecode.javaewah.IntIterator; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Convenience functions for working over iterators + * + */ +public class IteratorUtil32 { + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static IntIterator toSetBitsIntIterator(final IteratingRLW32 i) { + return new IntIteratorOverIteratingRLW32(i); + } + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static Iterator toSetBitsIterator(final IteratingRLW32 i) { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return new Integer(this.under.next()); + } + + @Override + public void remove() { + } + + final private IntIterator under = toSetBitsIntIterator(i); + }; + + } + + /** + * Turn an iterator into a bitmap + * @param i iterator we wish to materialize + * @param c where we write + */ + public static void materialize(final IteratingRLW32 i, final BitmapStorage32 c) { + while (true) { + if (i.getRunningLength() > 0) { + c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); + } + for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) + c.add(i.getLiteralWordAt(k)); + if (!i.next()) + break; + } + } + + /** + * @param i iterator we wish to iterate over + * @return the cardinality (number of set bits) corresponding to the iterator + */ + public static int cardinality(final IteratingRLW32 i) { + int answer = 0; + while (true) { + if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap32.wordinbits; + for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) + answer += Long.bitCount(i.getLiteralWordAt(k)); + if(!i.next()) break; + } + return answer; + } + + /** + * + * @param x set of bitmaps we wish to iterate over + * @return an array of iterators corresponding to the array of bitmaps + */ + public static IteratingRLW32[] toIterators(final EWAHCompressedBitmap32... x) { + IteratingRLW32[] X = new IteratingRLW32[x.length]; + for (int k = 0; k < X.length; ++k) { + X[k] = new IteratingBufferedRunningLengthWord32(x[k]); + } + return X; + } + /** + * Turn an iterator into a bitmap + * + * @param i iterator we wish to materialize + * @param c where we write + * @param Max maximum number of words to materialize + * @return how many words were actually materialized + */ + public static long materialize(final IteratingRLW32 i, final BitmapStorage32 c, int Max) { + final int origMax = Max; + while (true) { + if (i.getRunningLength() > 0) { + int L = i.getRunningLength(); + if(L > Max) L = Max; + c.addStreamOfEmptyWords(i.getRunningBit(), L); + Max -= L; + } + long L = i.getNumberOfLiteralWords(); + for (int k = 0; k < L; ++k) + c.add(i.getLiteralWordAt(k)); + if(Max>0) { + if (!i.next()) + break; + } + else break; + } + return origMax - Max; + } + /** + * Turn an iterator into a bitmap + * + * @param i iterator we wish to materialize + * @return materialized version of the iterator + */ + public static EWAHCompressedBitmap32 materialize(final IteratingRLW32 i) { + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + materialize(i, ewah); + return ewah; + } + +} diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java 2013-11-12 14:31:20.000000000 +0000 @@ -3,8 +3,8 @@ /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** * This is a BitmapStorage that can be used to determine quickly @@ -37,39 +37,39 @@ /** * If the word to be added is non-zero, a NonEmptyException exception is thrown. - * @see com.googlecode.javaewah.BitmapStorage#add(int) */ - public void add(int newdata) { + @Override +public void add(int newdata) { if(newdata!=0) throw nonEmptyException; } /** - * throws a NonEmptyException exception when number > 0 + * throws a NonEmptyException exception when number is greater than 0 * - * @see com.googlecode.javaewah.BitmapStorage#addStreamOfLiteralWords(int[], int, int) */ - public void addStreamOfLiteralWords(int[] data, int start, int number) { + @Override +public void addStreamOfLiteralWords(int[] data, int start, int number) { if (number > 0){ throw nonEmptyException; } } /** - * If the boolean value is true and number>0, then it throws a NonEmptyException exception, + * If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, * otherwise, nothing happens. * - * @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, int) */ - public void addStreamOfEmptyWords(boolean v, int number) { + @Override +public void addStreamOfEmptyWords(boolean v, int number) { if(v && (number>0)) throw nonEmptyException; } /** - * throws a NonEmptyException exception when number > 0 + * throws a NonEmptyException exception when number is greater than 0 * - * @see com.googlecode.javaewah.BitmapStorage#addStreamOfNegatedLiteralWords(int[], int, int) */ - public void addStreamOfNegatedLiteralWords(int[] data, int start, int number) { + @Override +public void addStreamOfNegatedLiteralWords(int[] data, int start, int number) { if (number > 0){ throw nonEmptyException; } @@ -80,7 +80,8 @@ * * @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) */ - public void setSizeInBits(int bits) { + @Override +public void setSizeInBits(int bits) { } } diff -Nru libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java --- libjavaewah-java-0.6.12/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,135 +1,152 @@ package com.googlecode.javaewah32; - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. - * + * * @since 0.5.0 * @author Daniel Lemire */ -public final class RunningLengthWord32 { +public final class RunningLengthWord32 implements Cloneable { + + /** + * Instantiates a new running length word. + * + * @param a + * an array of 32-bit words + * @param p + * position in the array where the running length word is + * located. + */ + RunningLengthWord32(final EWAHCompressedBitmap32 a, final int p) { + this.parent = a; + this.position = p; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return (this.parent.buffer[this.position] & 1) != 0; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public int getRunningLength() { + return (this.parent.buffer[this.position] >>> 1) + & largestrunninglengthcount; + } + + /** + * Sets the number of literal words. + * + * @param number + * the new number of literal words + */ + public void setNumberOfLiteralWords(final int number) { + this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; + this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) + | runninglengthplusrunningbit; + } + + /** + * Sets the running bit. + * + * @param b + * the new running bit + */ + public void setRunningBit(final boolean b) { + if (b) + this.parent.buffer[this.position] |= 1; + else + this.parent.buffer[this.position] &= ~1; + } + + /** + * Sets the running length. + * + * @param number + * the new running length + */ + public void setRunningLength(final int number) { + this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; + this.parent.buffer[this.position] &= (number << 1) + | notshiftedlargestrunninglengthcount; + } + + /** + * Return the size in uncompressed words represented by this running + * length word. + * + * @return the int + */ + public int size() { + return getRunningLength() + getNumberOfLiteralWords(); + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + + " running length = " + getRunningLength() + + " number of lit. words " + getNumberOfLiteralWords(); + } + + @Override + public RunningLengthWord32 clone() throws CloneNotSupportedException { + RunningLengthWord32 answer; + answer = (RunningLengthWord32) super.clone(); + answer.parent = this.parent; + answer.position = this.position; + return answer; + } + + /** The array of words. */ + public EWAHCompressedBitmap32 parent; + + /** The position in array. */ + public int position; + + /** + * number of bits dedicated to marking of the running length of clean + * words + */ + public static final int runninglengthbits = 16; + + private static final int literalbits = 32 - 1 - runninglengthbits; + + /** largest number of literal words in a run. */ + public static final int largestliteralcount = (1 << literalbits) - 1; + + /** largest number of clean words in a run */ + public static final int largestrunninglengthcount = (1 << runninglengthbits) - 1; + + private static final int runninglengthplusrunningbit = (1 << (runninglengthbits + 1)) - 1; + + private static final int shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; + + private static final int notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; + + private static final int notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; - /** - * Instantiates a new running length word. - * - * @param a an array of 32-bit words - * @param p position in the array where the running length word is located. - */ - RunningLengthWord32(final int[] a, final int p) { - this.array = a; - this.position = p; - } - - /** - * Gets the number of literal words. - * - * @return the number of literal words - */ - public int getNumberOfLiteralWords() { - return (this.array[this.position] >>> (1 + runninglengthbits)); - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - public boolean getRunningBit() { - return (this.array[this.position] & 1) != 0; - } - - /** - * Gets the running length. - * - * @return the running length - */ - public int getRunningLength() { - return (this.array[this.position] >>> 1) & largestrunninglengthcount; - } - - /** - * Sets the number of literal words. - * - * @param number the new number of literal words - */ - public void setNumberOfLiteralWords(final int number) { - this.array[this.position] |= notrunninglengthplusrunningbit; - this.array[this.position] &= (number << (runninglengthbits + 1)) - | runninglengthplusrunningbit; - } - - /** - * Sets the running bit. - * - * @param b the new running bit - */ - public void setRunningBit(final boolean b) { - if (b) - this.array[this.position] |= 1; - else - this.array[this.position] &= ~1; - } - - /** - * Sets the running length. - * - * @param number the new running length - */ - public void setRunningLength(final int number) { - this.array[this.position] |= shiftedlargestrunninglengthcount; - this.array[this.position] &= (number << 1) - | notshiftedlargestrunninglengthcount; - } - - /** - * Return the size in uncompressed words represented by - * this running length word. - * - * @return the int - */ - public int size() { - return getRunningLength() + getNumberOfLiteralWords(); - } - - /* - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "running bit = " + getRunningBit() + " running length = " - + getRunningLength() + " number of lit. words " - + getNumberOfLiteralWords(); - } - - - /** The array of words. */ - public int[] array; - - /** The position in array. */ - public int position; - - /** number of bits dedicated to marking of the running length of clean words */ - public static final int runninglengthbits = 16; - - private static final int literalbits = 32 - 1 - runninglengthbits; - - /** largest number of literal words in a run. */ - public static final int largestliteralcount = (1 << literalbits) - 1; - - /** largest number of clean words in a run */ - public static final int largestrunninglengthcount = (1 << runninglengthbits) - 1; - - private static final int runninglengthplusrunningbit = (1 << (runninglengthbits + 1)) - 1; - - private static final int shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; - - private static final int notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; - - private static final int notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; - } \ No newline at end of file diff -Nru libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java --- libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,902 +1,1016 @@ package com.googlecode.javaewah; - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ import org.junit.Test; import java.util.*; import java.io.*; - - - import junit.framework.Assert; /** - * This class is used for unit testing. + * This class is used for basic unit testing. */ +@SuppressWarnings("javadoc") public class EWAHCompressedBitmapTest { - - @Test - public void testDebugSetSizeInBitsTest() { - System.out.println("testing DebugSetSizeInBits"); - EWAHCompressedBitmap b = new EWAHCompressedBitmap(); - - b.set(4); - - b.setSizeInBits(6, true); - - List positions = b.getPositions(); - - Assert.assertEquals(2, positions.size()); - Assert.assertEquals(Integer.valueOf(4), positions.get(0)); - Assert.assertEquals(Integer.valueOf(5), positions.get(1)); - - Iterator iterator = b.iterator(); - Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(Integer.valueOf(4), iterator.next()); - Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(Integer.valueOf(5), iterator.next()); - Assert.assertFalse(iterator.hasNext()); - - IntIterator intIterator = b.intIterator(); - Assert.assertTrue(intIterator.hasNext()); - Assert.assertEquals(4, intIterator.next()); - Assert.assertTrue(intIterator.hasNext()); - Assert.assertEquals(5, intIterator.next()); - Assert.assertFalse(intIterator.hasNext()); - - } - - /** - * Created: 2/4/11 6:03 PM By: Arnon Moscona. - */ - @Test - public void EwahIteratorProblem() { - System.out.println("testing ArnonMoscona"); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - for (int i = 9434560; i <= 9435159; i++) { - bitmap.set(i); - } - IntIterator iterator = bitmap.intIterator(); - List v = bitmap.getPositions(); - int[] array = bitmap.toArray(); - for (int k = 0; k < v.size(); ++k) { - Assert.assertTrue(array[k]== v.get(k).intValue()); - Assert.assertTrue(iterator.hasNext()); - final int ival = iterator.next(); - final int vval = v.get(k).intValue(); - Assert.assertTrue(ival == vval); - } - Assert.assertTrue(!iterator.hasNext()); - // - for (int k = 2; k <= 1024; k *= 2) { - int[] bitsToSet = createSortedIntArrayOfBitsToSet(k,434455 + 5*k); - EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); - for (int i : bitsToSet) { - ewah.set(i); - } - equal(ewah.iterator(), bitsToSet); - } - } - - /** - * Test submitted by Gregory Ssi-Yan-Kai - */ - @Test - public void SsiYanKaiTest() { - System.out.println("testing SsiYanKaiTest"); - EWAHCompressedBitmap a = EWAHCompressedBitmap.bitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, 40089, 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, 40098, 40099, 40100); - EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 270000); - LinkedHashSet aPositions = new LinkedHashSet(a.getPositions()); - int intersection = 0; - EWAHCompressedBitmap inter = new EWAHCompressedBitmap(); - LinkedHashSet bPositions = new LinkedHashSet(b.getPositions()); - for (Integer integer : bPositions) { - if (aPositions.contains(integer)) { - inter.set(integer.intValue()); - ++intersection; - } - } - EWAHCompressedBitmap and2 = a.and(b); - if(!and2.equals(inter)) throw new RuntimeException("intersections don't match"); - if(intersection != and2.cardinality()) throw new RuntimeException("cardinalities don't match"); - } - - /** - * Test inspired by William Habermaas. - */ - @Test - public void habermaasTest() { - System.out.println("testing habermaasTest"); - BitSet bitsetaa = new BitSet(); - EWAHCompressedBitmap aa = new EWAHCompressedBitmap(); - int[] val = { 55400, 1000000, 1000128 }; - for (int k = 0; k < val.length; ++k) { - aa.set(val[k]); - bitsetaa.set(val[k]); - } - equal(aa, bitsetaa); - BitSet bitsetab = new BitSet(); - EWAHCompressedBitmap ab = new EWAHCompressedBitmap(); - for (int i = 4096; i < (4096 + 5); i++) { - ab.set(i); - bitsetab.set(i); - } - ab.set(99000); - bitsetab.set(99000); - ab.set(1000130); - bitsetab.set(1000130); - equal(ab, bitsetab); - EWAHCompressedBitmap bb = aa.or(ab); - EWAHCompressedBitmap bbAnd = aa.and(ab); - try { - EWAHCompressedBitmap abnot = (EWAHCompressedBitmap)ab.clone(); - abnot.not(); - EWAHCompressedBitmap bbAnd2 = aa.andNot(abnot); - assertEquals(bbAnd2,bbAnd); - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - BitSet bitsetbb = (BitSet) bitsetaa.clone(); - bitsetbb.or(bitsetab); - BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); - bitsetbbAnd.and(bitsetab); - equal(bbAnd, bitsetbbAnd); - equal(bb, bitsetbb); - } - - @Test - public void testAndResultAppend() - { - System.out.println("testing AndResultAppend"); - EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); - bitmap1.set(35); - EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); - bitmap2.set(35); - bitmap2.set(130); - - EWAHCompressedBitmap resultBitmap = bitmap1.and(bitmap2); - resultBitmap.set(131); - - bitmap1.set(131); - assertEquals(bitmap1,resultBitmap); - } - - /** - * Test cardinality. - */ - @Test - public void testCardinality() { - System.out.println("testing EWAH cardinality"); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(Integer.MAX_VALUE - 64); - // System.out.format("Total Items %d\n", bitmap.cardinality()); - Assert.assertTrue(bitmap.cardinality() == 1); - } - - /** - * Test clear function - */ - @Test - public void testClear() { - System.out.println("testing Clear"); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(5); - bitmap.clear(); - bitmap.set(7); - Assert.assertTrue(1 == bitmap.cardinality()); - Assert.assertTrue(1 == bitmap.getPositions().size()); - Assert.assertTrue(1 == bitmap.toArray().length); - Assert.assertTrue(7 == bitmap.getPositions().get(0).intValue()); - Assert.assertTrue(7 == bitmap.toArray()[0]); - bitmap.clear(); - bitmap.set( 5000 ); - Assert.assertTrue(1 == bitmap.cardinality()); - Assert.assertTrue(1 == bitmap.getPositions().size()); - Assert.assertTrue(1 == bitmap.toArray().length); - Assert.assertTrue(5000 == bitmap.getPositions().get(0).intValue()); - bitmap.set(5001); - bitmap.set(5005); - bitmap.set(5100); - bitmap.set(5500); - bitmap.clear(); - bitmap.set(5); - bitmap.set(7); - bitmap.set(1000); - bitmap.set(1001); - Assert.assertTrue(4 == bitmap.cardinality()); - List positions = bitmap.getPositions(); - Assert.assertTrue(4 == positions.size()); - Assert.assertTrue(5 == positions.get(0).intValue()); - Assert.assertTrue(7 == positions.get(1).intValue()); - Assert.assertTrue(1000 == positions.get(2).intValue()); - Assert.assertTrue(1001 == positions.get(3).intValue()); - } - - /** - * Test ewah compressed bitmap. - */ - @Test - public void testEWAHCompressedBitmap() { - System.out.println("testing EWAH"); - long zero = 0; - long specialval = 1l | (1l << 4) | (1l << 63); - long notzero = ~zero; - EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); - myarray1.add(zero); - myarray1.add(zero); - myarray1.add(zero); - myarray1.add(specialval); - myarray1.add(specialval); - myarray1.add(notzero); - myarray1.add(zero); - Assert.assertEquals(myarray1.getPositions().size(), 6 + 64); - EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap(); - myarray2.add(zero); - myarray2.add(specialval); - myarray2.add(specialval); - myarray2.add(notzero); - myarray2.add(zero); - myarray2.add(zero); - myarray2.add(zero); - Assert.assertEquals(myarray2.getPositions().size(), 6 + 64); - List data1 = myarray1.getPositions(); - List data2 = myarray2.getPositions(); - Vector logicalor = new Vector(); - { - HashSet tmp = new HashSet(); - tmp.addAll(data1); - tmp.addAll(data2); - logicalor.addAll(tmp); - } - Collections.sort(logicalor); - Vector logicaland = new Vector(); - logicaland.addAll(data1); - logicaland.retainAll(data2); - Collections.sort(logicaland); - EWAHCompressedBitmap arrayand = myarray1.and(myarray2); - Assert.assertTrue(arrayand.getPositions().equals(logicaland)); - EWAHCompressedBitmap arrayor = myarray1.or(myarray2); - Assert.assertTrue(arrayor.getPositions().equals(logicalor)); - EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1); - Assert.assertTrue(arrayandbis.getPositions().equals(logicaland)); - EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1); - Assert.assertTrue(arrayorbis.getPositions().equals(logicalor)); - EWAHCompressedBitmap x = new EWAHCompressedBitmap(); - for (Integer i : myarray1.getPositions()) { - x.set(i.intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); - x = new EWAHCompressedBitmap(); - for (Integer i : myarray2.getPositions()) { - x.set(i.intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); - x = new EWAHCompressedBitmap(); - for (Iterator k = myarray1.iterator(); k.hasNext();) { - x.set(extracted(k).intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); - x = new EWAHCompressedBitmap(); - for (Iterator k = myarray2.iterator(); k.hasNext();) { - x.set(extracted(k).intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); - } - - /** - * Test externalization. - * - * @throws IOException Signals that an I/O exception has occurred. - */ - @Test - public void testExternalization() throws IOException { - System.out.println("testing EWAH externalization"); - EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); - int[] val = { 5, 4400, 44600, 55400, 1000000 }; - for (int k = 0; k < val.length; ++k) { - ewcb.set(val[k]); - } - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - ObjectOutputStream oo = new ObjectOutputStream(bos); - ewcb.writeExternal(oo); - oo.close(); - ewcb = null; - ewcb = new EWAHCompressedBitmap(); - ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); - ewcb.readExternal(new ObjectInputStream(bis)); - List result = ewcb.getPositions(); - Assert.assertTrue(val.length == result.size()); - for (int k = 0; k < val.length; ++k) { - Assert.assertTrue(result.get(k).intValue() == val[k]); - } - } - - @Test - public void testExtremeRange() { - System.out.println("testing EWAH at its extreme range"); - int N = 1024; - EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); - for (int i = 0; i < N; ++i) { - myarray1.set(Integer.MAX_VALUE - 64 - N + i); - Assert.assertTrue(myarray1.cardinality() == i+1); - int[] val = myarray1.toArray(); - Assert.assertTrue(val[0] == Integer.MAX_VALUE - 64 - N); - } - } - - /** - * Test the intersects method - */ - @Test - public void testIntersectsMethod(){ - System.out.println("testing Intersets Bug"); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(1); - EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); - bitmap2.set(1); - bitmap2.set(11); - bitmap2.set(111); - bitmap2.set(1111111); - bitmap2.set(11111111); - Assert.assertTrue(bitmap.intersects(bitmap2)); - Assert.assertTrue(bitmap2.intersects(bitmap)); - - EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); - bitmap3.set(101); - EWAHCompressedBitmap bitmap4 = new EWAHCompressedBitmap(); - for (int i = 0; i < 100; i++) { - bitmap4.set(i); - } - Assert.assertFalse(bitmap3.intersects(bitmap4)); - Assert.assertFalse(bitmap4.intersects(bitmap3)); - - EWAHCompressedBitmap bitmap5 = new EWAHCompressedBitmap(); - bitmap5.set(0); - bitmap5.set(10); - bitmap5.set(20); - EWAHCompressedBitmap bitmap6 = new EWAHCompressedBitmap(); - bitmap6.set(1); - bitmap6.set(11); - bitmap6.set(21); - bitmap6.set(1111111); - bitmap6.set(11111111); - Assert.assertFalse(bitmap5.intersects(bitmap6)); - Assert.assertFalse(bitmap6.intersects(bitmap5)); - - bitmap5.set(21); - Assert.assertTrue(bitmap5.intersects(bitmap6)); - Assert.assertTrue(bitmap6.intersects(bitmap5)); - - EWAHCompressedBitmap bitmap7 = new EWAHCompressedBitmap(); - bitmap7.set(1); - bitmap7.set(10); - bitmap7.set(20); - bitmap7.set(1111111); - bitmap7.set(11111111); - EWAHCompressedBitmap bitmap8 = new EWAHCompressedBitmap(); - for (int i = 0; i < 1000; i++) { - if (i != 1 && i!=10 && i!=20){ - bitmap8.set(i); - } - } - Assert.assertFalse(bitmap7.intersects(bitmap8)); - Assert.assertFalse(bitmap8.intersects(bitmap7)); - } - - /** - * as per renaud.delbru, Feb 12, 2009 this might throw an error out of bound - * exception. - */ - @Test - public void testLargeEWAHCompressedBitmap() { - System.out.println("testing EWAH over a large array"); - EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); - int N = 11000000; - for (int i = 0; i < N; ++i) { - myarray1.set(i); - } - Assert.assertTrue(myarray1.sizeInBits() == N); - } - - - /** - * Test massive and. - */ - @Test - public void testMassiveAnd() { - System.out.println("testing massive logical and"); - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[1024]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap answer = ewah[0]; - for (int k = 1; k < ewah.length; ++k) - answer = answer.and(ewah[k]); - // result should be empty - if (answer.getPositions().size() != 0) - System.out.println(answer.toDebugString()); - Assert.assertTrue(answer.getPositions().size() == 0); - Assert.assertTrue(EWAHCompressedBitmap.and(ewah).getPositions().size() == 0); - } - - /** - * Test massive and not. - */ - @Test - public void testMassiveAndNot() { - System.out.println("testing massive and not"); - final int N = 1024; - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap answer = ewah[0]; - EWAHCompressedBitmap answer2 = ewah[0]; - for (int k = 1; k < ewah.length; ++k) { - answer = answer.andNot(ewah[k]); - EWAHCompressedBitmap copy = null; - try { - copy = (EWAHCompressedBitmap) ewah[k].clone(); - copy.not(); - answer2.and(copy); - assertEqualsPositions(answer, answer2); - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - } - } - - /** - * Test massive or. - */ - @Test - public void testMassiveOr() { - System.out.println("testing massive logical or (can take a couple of minutes)"); - final int N = 128; - for (int howmany = 512; howmany <= 10000; howmany *= 2) { - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - BitSet[] bset = new BitSet[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap(); - for (int k = 0; k < bset.length; ++k) - bset[k] = new BitSet(); - for (int k = 0; k < N; ++k) - assertEqualsPositions(bset[k], ewah[k]); - for (int k = 0; k < howmany; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - bset[(k + 2 * k * k) % ewah.length].set(k); - } - for (int k = 0; k < N; ++k) - assertEqualsPositions(bset[k], ewah[k]); - EWAHCompressedBitmap answer = ewah[0]; - BitSet bitsetanswer = bset[0]; - for (int k = 1; k < ewah.length; ++k) { - EWAHCompressedBitmap tmp = answer.or(ewah[k]); - bitsetanswer.or(bset[k]); - answer = tmp; - assertEqualsPositions(bitsetanswer, answer); - } - assertEqualsPositions(bitsetanswer, answer); - assertEqualsPositions(bitsetanswer, EWAHCompressedBitmap.or(ewah)); - int k = 0; - for (int j : answer) { - if (k != j) - System.out.println(answer.toDebugString()); - Assert.assertEquals(k, j); - k += 1; - } - } - } - - @Test - public void testsetSizeInBits() { - System.out.println("testing setSizeInBits"); - for(int k = 0; k < 4096; ++k) { - EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); - ewah.setSizeInBits(k); - Assert.assertEquals(ewah.sizeinbits,k); - Assert.assertEquals(ewah.cardinality(),0); - EWAHCompressedBitmap ewah2 = new EWAHCompressedBitmap(); - ewah2.setSizeInBits(k, false); - Assert.assertEquals(ewah2.sizeinbits,k); - Assert.assertEquals(ewah2.cardinality(),0); - EWAHCompressedBitmap ewah3 = new EWAHCompressedBitmap(); - for(int i = 0; i < k ; ++i) { - ewah3.set(i); - } - Assert.assertEquals(ewah3.sizeinbits,k); - Assert.assertEquals(ewah3.cardinality(),k); - EWAHCompressedBitmap ewah4 = new EWAHCompressedBitmap(); - ewah4.setSizeInBits(k, true); - Assert.assertEquals(ewah4.sizeinbits,k); - Assert.assertEquals(ewah4.cardinality(),k); - } - } - - - /** - * Test massive xor. - */ - @Test - public void testMassiveXOR() { - System.out.println("testing massive xor (can take a couple of minutes)"); - final int N = 16; - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - BitSet[] bset = new BitSet[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap(); - for (int k = 0; k < bset.length; ++k) - bset[k] = new BitSet(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - bset[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap answer = ewah[0]; - BitSet bitsetanswer = bset[0]; - for (int k = 1; k < ewah.length; ++k) { - answer = answer.xor(ewah[k]); - bitsetanswer.xor(bset[k]); - assertEqualsPositions(bitsetanswer, answer); - } - int k = 0; - for (int j : answer) { - if (k != j) - System.out.println(answer.toDebugString()); - Assert.assertEquals(k, j); - k += 1; - } - } - - - - @Test - public void testMultiAnd() - { - System.out.println("testing MultiAnd"); - // test bitmap3 has a literal word while bitmap1/2 have a run of 1 - EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); - bitmap1.addStreamOfEmptyWords(true, 1000); - EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); - bitmap2.addStreamOfEmptyWords(true, 2000); - EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); - bitmap3.set(500); - bitmap3.set(502); - bitmap3.set(504); - - assertAndEquals(bitmap1,bitmap2,bitmap3); - - //equal - bitmap1 = new EWAHCompressedBitmap(); - bitmap1.set(35); - bitmap2 = new EWAHCompressedBitmap(); - bitmap2.set(35); - bitmap3 = new EWAHCompressedBitmap(); - bitmap3.set(35); - - assertAndEquals(bitmap1,bitmap2,bitmap3); - - // same number of words for each - bitmap3.set(63); - assertAndEquals(bitmap1,bitmap2,bitmap3); - - // one word bigger - bitmap3.set(64); - assertAndEquals(bitmap1,bitmap2,bitmap3); - - // two words bigger - bitmap3.set(130); - assertAndEquals(bitmap1,bitmap2,bitmap3); - - // test that result can still be appended to - EWAHCompressedBitmap resultBitmap = EWAHCompressedBitmap.and(bitmap1,bitmap2,bitmap3); - resultBitmap.set(131); - - bitmap1.set(131); - assertEquals(bitmap1,resultBitmap); - - final int N = 128; - for (int howmany = 512; howmany <= 10000; howmany *= 2) { - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap(); - for (int k = 0; k < howmany; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - for(int k = 1; k<=ewah.length;++k) { - EWAHCompressedBitmap[] shortewah = new EWAHCompressedBitmap[k]; - for(int i = 0; i= 0; rl -= 1024) { - rlw.setNumberOfLiteralWords(rl); - Assert.assertEquals(rl, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - rlw.setNumberOfLiteralWords(0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - } - for (long rl = 0; rl <= RunningLengthWord.largestrunninglengthcount; rl += 1024) { - rlw.setRunningLength(rl); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(rl, rlw.getRunningLength()); - rlw.setRunningLength(0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - } - rlw.setRunningBit(true); - for (long rl = 0; rl <= RunningLengthWord.largestrunninglengthcount; rl += 1024) { - rlw.setRunningLength(rl); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(rl, rlw.getRunningLength()); - rlw.setRunningLength(0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - } - for (long rl = 0; rl <= RunningLengthWord.largestliteralcount; rl += 128) { - rlw.setNumberOfLiteralWords(rl); - Assert.assertEquals(rl, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - rlw.setNumberOfLiteralWords(0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - } - } - - /** - * Test sets and gets. - */ - @Test - public void testSetGet() { - System.out.println("testing EWAH set/get"); - EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); - int[] val = { 5, 4400, 44600, 55400, 1000000 }; - for (int k = 0; k < val.length; ++k) { - ewcb.set(val[k]); - } - List result = ewcb.getPositions(); - Assert.assertTrue(val.length == result.size()); - for (int k = 0; k < val.length; ++k) { - Assert.assertEquals(result.get(k).intValue() , val[k]); - } - } - - @Test - public void testHashCode() { - System.out.println("testing hashCode"); - EWAHCompressedBitmap ewcb = EWAHCompressedBitmap.bitmapOf(50, 70).and(EWAHCompressedBitmap.bitmapOf(50, 1000)); - Assert.assertEquals(EWAHCompressedBitmap.bitmapOf(50), ewcb); - Assert.assertEquals(EWAHCompressedBitmap.bitmapOf(50).hashCode(), ewcb.hashCode()); - } - - @Test - public void testSetSizeInBits() - { - System.out.println("testing SetSizeInBits"); - testSetSizeInBits(130,131); - testSetSizeInBits(63,64); - testSetSizeInBits(64,65); - testSetSizeInBits(64,128); - testSetSizeInBits(35,131); - testSetSizeInBits(130,400); - testSetSizeInBits(130,191); - testSetSizeInBits(130,192); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(31); - bitmap.setSizeInBits(130,false); - bitmap.set(131); - BitSet jdkBitmap = new BitSet(); - jdkBitmap.set(31); - jdkBitmap.set(131); - assertEquals(jdkBitmap,bitmap); - } - /** - * Test with parameters. - * - * @throws IOException Signals that an I/O exception has occurred. - */ - @Test - public void testWithParameters() throws IOException { - System.out - .println("These tests can run for several minutes. Please be patient."); - for (int k = 2; k < 1 << 24; k *= 8) - shouldSetBits(k); - PolizziTest(64); - PolizziTest(128); - PolizziTest(256); - PolizziTest(2048); - System.out.println("Your code is probably ok."); - } - - /** - * Pseudo-non-deterministic test inspired by S.J.vanSchaik. - * (Yes, non-deterministic tests are bad, but the test is actually deterministic.) - */ - @Test - public void vanSchaikTest() { - System.out.println("testing vanSchaikTest (this takes some time)"); - final int totalNumBits = 32768; - final double odds = 0.9; - Random rand = new Random(323232323); - for (int t = 0; t < 100; t++) { - int numBitsSet = 0; - EWAHCompressedBitmap cBitMap = new EWAHCompressedBitmap(); - for (int i = 0; i < totalNumBits; i++) { - if (rand.nextDouble() < odds) { - cBitMap.set(i); - numBitsSet++; + + @Test + public void testGet() { + for (int gap = 29; gap < 10000; gap *= 10) { + EWAHCompressedBitmap x = new EWAHCompressedBitmap(); + for (int k = 0; k < 100; ++k) + x.set(k * gap); + for (int k = 0; k < 100 * gap; ++k) + if (x.get(k)) { + if (k % gap != 0) + throw new RuntimeException( + "spotted an extra set bit at " + + k + " gap = " + + gap); + } else if (k % gap == 0) + throw new RuntimeException( + "missed a set bit " + k + + " gap = " + gap); + } } - } - Assert.assertEquals(cBitMap.cardinality(),numBitsSet); - } - - } - - /** - * Function used in a test inspired by Federico Fissore. - * - * @param size the number of set bits - * @param seed the random seed - * @return the pseudo-random array int[] - */ - public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { - Random random = new Random(seed); - // build raw int array - int[] bits = new int[size]; - for (int i = 0; i < bits.length; i++) { - bits[i] = random.nextInt(TEST_BS_SIZE); - } - // might generate duplicates - Arrays.sort(bits); - // first count how many distinct values - int counter = 0; - int oldx = -1; - for (int x : bits) { - if (x != oldx) - ++counter; - oldx = x; - } - // then construct new array - int[] answer = new int[counter]; - counter = 0; - oldx = -1; - for (int x : bits) { - if (x != oldx) { - answer[counter] = x; - ++counter; - } - oldx = x; - } - return answer; - } - /** - * Test inspired by Bilal Tayara - */ - @Test - public void TayaraTest() { - System.out.println("Tayara test"); - for(int offset = 64; offset<(1<<30);offset*=2){ - EWAHCompressedBitmap a = new EWAHCompressedBitmap(); - EWAHCompressedBitmap b = new EWAHCompressedBitmap(); - for(int k = 0; k< 64; ++k) { - a.set(offset+k); - b.set(offset+k); - } - if(!a.and(b).equals(a)) throw new RuntimeException("bug"); - if(!a.or(b).equals(a)) throw new RuntimeException("bug"); - } - } - - @Test + + @SuppressWarnings({ "deprecation", "boxing" }) + @Test + public void OKaserBugReportJuly2013() { + System.out.println("testing OKaserBugReportJuly2013"); + int[][] data = { {}, { 5, 6, 7, 8, 9 }, { 1 }, { 2 }, { 2, 5, 7 }, + { 1 }, { 2 }, { 1, 6, 9 }, { 1, 3, 4, 6, 8, 9 }, + { 1, 3, 4, 6, 8, 9 }, { 1, 3, 6, 8, 9 }, { 2, 5, 7 }, + { 2, 5, 7 }, { 1, 3, 9 }, { 3, 8, 9 } }; + + EWAHCompressedBitmap[] toBeOred = new EWAHCompressedBitmap[data.length]; + Set bruteForceAnswer = new HashSet(); + for (int i = 0; i < toBeOred.length; ++i) { + toBeOred[i] = new EWAHCompressedBitmap(); + for (int j : data[i]) { + toBeOred[i].set(j); + bruteForceAnswer.add(j); + } + toBeOred[i].setSizeInBits(1000,false); + } + long rightcard = bruteForceAnswer.size(); + EWAHCompressedBitmap e1 = FastAggregation.or(toBeOred); + Assert.assertEquals(rightcard, e1.cardinality()); + EWAHCompressedBitmap e2 = FastAggregation.bufferedor(65536, toBeOred); + Assert.assertEquals(rightcard, e2.cardinality()); + EWAHCompressedBitmap foo = new EWAHCompressedBitmap(); + FastAggregation.legacy_orWithContainer(foo, toBeOred); + Assert.assertEquals(rightcard, foo.cardinality()); + } + + + @Test + public void testSizeInBitsWithAnd() { + System.out.println("testing SizeInBitsWithAnd"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10,false); + b.setSizeInBits(10,false); + + EWAHCompressedBitmap and = a.and(b); + Assert.assertEquals(10, and.sizeInBits()); + EWAHCompressedBitmap and2 = EWAHCompressedBitmap.and(a,b); + Assert.assertEquals(10, and2.sizeInBits()); + } + @Test + public void testSizeInBitsWithAndNot() { + System.out.println("testing SizeInBitsWithAndNot"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10,false); + b.setSizeInBits(10,false); + + EWAHCompressedBitmap and = a.andNot(b); + Assert.assertEquals(10, and.sizeInBits()); + } + + @Test + public void testSizeInBitsWithOr() { + System.out.println("testing SizeInBitsWithOr"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10,false); + b.setSizeInBits(10,false); + + EWAHCompressedBitmap or = a.or(b); + Assert.assertEquals(10, or.sizeInBits()); + EWAHCompressedBitmap or2 = EWAHCompressedBitmap.or(a,b); + Assert.assertEquals(10, or2.sizeInBits()); + } + + + @Test + public void testSizeInBitsWithXor() { + System.out.println("testing SizeInBitsWithXor"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10,false); + b.setSizeInBits(10,false); + + EWAHCompressedBitmap xor = a.xor(b); + Assert.assertEquals(10, xor.sizeInBits()); + EWAHCompressedBitmap xor2 = EWAHCompressedBitmap.xor(a,b); + Assert.assertEquals(10, xor2.sizeInBits()); + } + + + @Test + public void testDebugSetSizeInBitsTest() { + System.out.println("testing DebugSetSizeInBits"); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + b.set(4); + + b.setSizeInBits(6, true); + + List positions = b.getPositions(); + + Assert.assertEquals(2, positions.size()); + Assert.assertEquals(Integer.valueOf(4), positions.get(0)); + Assert.assertEquals(Integer.valueOf(5), positions.get(1)); + + Iterator iterator = b.iterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(Integer.valueOf(4), iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(Integer.valueOf(5), iterator.next()); + Assert.assertFalse(iterator.hasNext()); + + IntIterator intIterator = b.intIterator(); + Assert.assertTrue(intIterator.hasNext()); + Assert.assertEquals(4, intIterator.next()); + Assert.assertTrue(intIterator.hasNext()); + Assert.assertEquals(5, intIterator.next()); + Assert.assertFalse(intIterator.hasNext()); + + } + + /** + * Created: 2/4/11 6:03 PM By: Arnon Moscona. + */ + @Test + public void EwahIteratorProblem() { + System.out.println("testing ArnonMoscona"); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + for (int i = 9434560; i <= 9435159; i++) { + bitmap.set(i); + } + IntIterator iterator = bitmap.intIterator(); + List v = bitmap.getPositions(); + int[] array = bitmap.toArray(); + for (int k = 0; k < v.size(); ++k) { + Assert.assertTrue(array[k] == v.get(k).intValue()); + Assert.assertTrue(iterator.hasNext()); + final int ival = iterator.next(); + final int vval = v.get(k).intValue(); + Assert.assertTrue(ival == vval); + } + Assert.assertTrue(!iterator.hasNext()); + // + for (int k = 2; k <= 1024; k *= 2) { + int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, 434455 + 5 * k); + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + for (int i : bitsToSet) { + ewah.set(i); + } + equal(ewah.iterator(), bitsToSet); + } + } + + /** + * Test submitted by Gregory Ssi-Yan-Kai + */ + @Test + public void SsiYanKaiTest() { + System.out.println("testing SsiYanKaiTest"); + EWAHCompressedBitmap a = EWAHCompressedBitmap.bitmapOf(39935, 39936, + 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, + 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, + 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, + 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, + 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, + 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, + 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, + 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, + 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, + 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, + 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, + 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, + 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, + 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, + 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, + 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, + 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, 40089, + 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, 40098, + 40099, 40100); + EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(39935, 39936, + 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, + 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, + 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, + 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, + 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, + 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, + 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, + 270000); + LinkedHashSet aPositions = new LinkedHashSet( + a.getPositions()); + int intersection = 0; + EWAHCompressedBitmap inter = new EWAHCompressedBitmap(); + LinkedHashSet bPositions = new LinkedHashSet( + b.getPositions()); + for (Integer integer : bPositions) { + if (aPositions.contains(integer)) { + inter.set(integer.intValue()); + ++intersection; + } + } + EWAHCompressedBitmap and2 = a.and(b); + if (!and2.equals(inter)) + throw new RuntimeException("intersections don't match"); + if (intersection != and2.cardinality()) + throw new RuntimeException("cardinalities don't match"); + } + + /** + * Test inspired by William Habermaas. + */ + @Test + public void habermaasTest() { + System.out.println("testing habermaasTest"); + BitSet bitsetaa = new BitSet(); + EWAHCompressedBitmap aa = new EWAHCompressedBitmap(); + int[] val = { 55400, 1000000, 1000128 }; + for (int k = 0; k < val.length; ++k) { + aa.set(val[k]); + bitsetaa.set(val[k]); + } + equal(aa, bitsetaa); + BitSet bitsetab = new BitSet(); + EWAHCompressedBitmap ab = new EWAHCompressedBitmap(); + for (int i = 4096; i < (4096 + 5); i++) { + ab.set(i); + bitsetab.set(i); + } + ab.set(99000); + bitsetab.set(99000); + ab.set(1000130); + bitsetab.set(1000130); + equal(ab, bitsetab); + EWAHCompressedBitmap bb = aa.or(ab); + EWAHCompressedBitmap bbAnd = aa.and(ab); + try { + EWAHCompressedBitmap abnot = ab.clone(); + abnot.not(); + EWAHCompressedBitmap bbAnd2 = aa.andNot(abnot); + assertEquals(bbAnd2, bbAnd); + } catch (CloneNotSupportedException e) { + e.printStackTrace(); + } + BitSet bitsetbb = (BitSet) bitsetaa.clone(); + bitsetbb.or(bitsetab); + BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); + bitsetbbAnd.and(bitsetab); + equal(bbAnd, bitsetbbAnd); + equal(bb, bitsetbb); + } + + @Test + public void testAndResultAppend() { + System.out.println("testing AndResultAppend"); + EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); + bitmap1.set(35); + EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); + bitmap2.set(35); + bitmap2.set(130); + + EWAHCompressedBitmap resultBitmap = bitmap1.and(bitmap2); + resultBitmap.set(131); + + bitmap1.set(131); + assertEquals(bitmap1, resultBitmap); + } + + /** + * Test cardinality. + */ + @Test + public void testCardinality() { + System.out.println("testing EWAH cardinality"); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(Integer.MAX_VALUE - 64); + // System.out.format("Total Items %d\n", bitmap.cardinality()); + Assert.assertTrue(bitmap.cardinality() == 1); + } + + /** + * Test clear function + */ + @Test + public void testClear() { + System.out.println("testing Clear"); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(5); + bitmap.clear(); + bitmap.set(7); + Assert.assertTrue(1 == bitmap.cardinality()); + Assert.assertTrue(1 == bitmap.getPositions().size()); + Assert.assertTrue(1 == bitmap.toArray().length); + Assert.assertTrue(7 == bitmap.getPositions().get(0).intValue()); + Assert.assertTrue(7 == bitmap.toArray()[0]); + bitmap.clear(); + bitmap.set(5000); + Assert.assertTrue(1 == bitmap.cardinality()); + Assert.assertTrue(1 == bitmap.getPositions().size()); + Assert.assertTrue(1 == bitmap.toArray().length); + Assert.assertTrue(5000 == bitmap.getPositions().get(0).intValue()); + bitmap.set(5001); + bitmap.set(5005); + bitmap.set(5100); + bitmap.set(5500); + bitmap.clear(); + bitmap.set(5); + bitmap.set(7); + bitmap.set(1000); + bitmap.set(1001); + Assert.assertTrue(4 == bitmap.cardinality()); + List positions = bitmap.getPositions(); + Assert.assertTrue(4 == positions.size()); + Assert.assertTrue(5 == positions.get(0).intValue()); + Assert.assertTrue(7 == positions.get(1).intValue()); + Assert.assertTrue(1000 == positions.get(2).intValue()); + Assert.assertTrue(1001 == positions.get(3).intValue()); + } + + /** + * Test ewah compressed bitmap. + */ + @Test + public void testEWAHCompressedBitmap() { + System.out.println("testing EWAH"); + long zero = 0; + long specialval = 1l | (1l << 4) | (1l << 63); + long notzero = ~zero; + EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); + myarray1.add(zero); + myarray1.add(zero); + myarray1.add(zero); + myarray1.add(specialval); + myarray1.add(specialval); + myarray1.add(notzero); + myarray1.add(zero); + Assert.assertEquals(myarray1.getPositions().size(), 6 + 64); + EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap(); + myarray2.add(zero); + myarray2.add(specialval); + myarray2.add(specialval); + myarray2.add(notzero); + myarray2.add(zero); + myarray2.add(zero); + myarray2.add(zero); + Assert.assertEquals(myarray2.getPositions().size(), 6 + 64); + List data1 = myarray1.getPositions(); + List data2 = myarray2.getPositions(); + Vector logicalor = new Vector(); + { + HashSet tmp = new HashSet(); + tmp.addAll(data1); + tmp.addAll(data2); + logicalor.addAll(tmp); + } + Collections.sort(logicalor); + Vector logicaland = new Vector(); + logicaland.addAll(data1); + logicaland.retainAll(data2); + Collections.sort(logicaland); + EWAHCompressedBitmap arrayand = myarray1.and(myarray2); + Assert.assertTrue(arrayand.getPositions().equals(logicaland)); + EWAHCompressedBitmap arrayor = myarray1.or(myarray2); + Assert.assertTrue(arrayor.getPositions().equals(logicalor)); + EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1); + Assert.assertTrue(arrayandbis.getPositions().equals(logicaland)); + EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1); + Assert.assertTrue(arrayorbis.getPositions().equals(logicalor)); + EWAHCompressedBitmap x = new EWAHCompressedBitmap(); + for (Integer i : myarray1.getPositions()) { + x.set(i.intValue()); + } + Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); + x = new EWAHCompressedBitmap(); + for (Integer i : myarray2.getPositions()) { + x.set(i.intValue()); + } + Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); + x = new EWAHCompressedBitmap(); + for (Iterator k = myarray1.iterator(); k.hasNext();) { + x.set(extracted(k).intValue()); + } + Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); + x = new EWAHCompressedBitmap(); + for (Iterator k = myarray2.iterator(); k.hasNext();) { + x.set(extracted(k).intValue()); + } + Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); + } + + /** + * Test externalization. + * + * @throws IOException + * Signals that an I/O exception has occurred. + */ + @Test + public void testExternalization() throws IOException { + System.out.println("testing EWAH externalization"); + EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); + int[] val = { 5, 4400, 44600, 55400, 1000000 }; + for (int k = 0; k < val.length; ++k) { + ewcb.set(val[k]); + } + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oo = new ObjectOutputStream(bos); + ewcb.writeExternal(oo); + oo.close(); + ewcb = null; + ewcb = new EWAHCompressedBitmap(); + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + ewcb.readExternal(new ObjectInputStream(bis)); + List result = ewcb.getPositions(); + Assert.assertTrue(val.length == result.size()); + for (int k = 0; k < val.length; ++k) { + Assert.assertTrue(result.get(k).intValue() == val[k]); + } + } + + @Test + public void testExtremeRange() { + System.out.println("testing EWAH at its extreme range"); + int N = 1024; + EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); + for (int i = 0; i < N; ++i) { + myarray1.set(Integer.MAX_VALUE - 64 - N + i); + Assert.assertTrue(myarray1.cardinality() == i + 1); + int[] val = myarray1.toArray(); + Assert.assertTrue(val[0] == Integer.MAX_VALUE - 64 - N); + } + } + + /** + * Test the intersects method + */ + @Test + public void testIntersectsMethod() { + System.out.println("testing Intersets Bug"); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(1); + EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); + bitmap2.set(1); + bitmap2.set(11); + bitmap2.set(111); + bitmap2.set(1111111); + bitmap2.set(11111111); + Assert.assertTrue(bitmap.intersects(bitmap2)); + Assert.assertTrue(bitmap2.intersects(bitmap)); + + EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); + bitmap3.set(101); + EWAHCompressedBitmap bitmap4 = new EWAHCompressedBitmap(); + for (int i = 0; i < 100; i++) { + bitmap4.set(i); + } + Assert.assertFalse(bitmap3.intersects(bitmap4)); + Assert.assertFalse(bitmap4.intersects(bitmap3)); + + EWAHCompressedBitmap bitmap5 = new EWAHCompressedBitmap(); + bitmap5.set(0); + bitmap5.set(10); + bitmap5.set(20); + EWAHCompressedBitmap bitmap6 = new EWAHCompressedBitmap(); + bitmap6.set(1); + bitmap6.set(11); + bitmap6.set(21); + bitmap6.set(1111111); + bitmap6.set(11111111); + Assert.assertFalse(bitmap5.intersects(bitmap6)); + Assert.assertFalse(bitmap6.intersects(bitmap5)); + + bitmap5.set(21); + Assert.assertTrue(bitmap5.intersects(bitmap6)); + Assert.assertTrue(bitmap6.intersects(bitmap5)); + + EWAHCompressedBitmap bitmap7 = new EWAHCompressedBitmap(); + bitmap7.set(1); + bitmap7.set(10); + bitmap7.set(20); + bitmap7.set(1111111); + bitmap7.set(11111111); + EWAHCompressedBitmap bitmap8 = new EWAHCompressedBitmap(); + for (int i = 0; i < 1000; i++) { + if (i != 1 && i != 10 && i != 20) { + bitmap8.set(i); + } + } + Assert.assertFalse(bitmap7.intersects(bitmap8)); + Assert.assertFalse(bitmap8.intersects(bitmap7)); + } + + /** + * as per renaud.delbru, Feb 12, 2009 this might throw an error out of bound + * exception. + */ + @Test + public void testLargeEWAHCompressedBitmap() { + System.out.println("testing EWAH over a large array"); + EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); + int N = 11000000; + for (int i = 0; i < N; ++i) { + myarray1.set(i); + } + Assert.assertTrue(myarray1.sizeInBits() == N); + } + + /** + * Test massive and. + */ + @Test + public void testMassiveAnd() { + System.out.println("testing massive logical and"); + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[1024]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap answer = ewah[0]; + for (int k = 1; k < ewah.length; ++k) + answer = answer.and(ewah[k]); + // result should be empty + if (answer.getPositions().size() != 0) + System.out.println(answer.toDebugString()); + Assert.assertTrue(answer.getPositions().size() == 0); + Assert.assertTrue(EWAHCompressedBitmap.and(ewah).getPositions().size() == 0); + } + + /** + * Test massive and not. + */ + @Test + public void testMassiveAndNot() { + System.out.println("testing massive and not"); + final int N = 1024; + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap answer = ewah[0]; + EWAHCompressedBitmap answer2 = ewah[0]; + for (int k = 1; k < ewah.length; ++k) { + answer = answer.andNot(ewah[k]); + EWAHCompressedBitmap copy = null; + try { + copy = ewah[k].clone(); + copy.not(); + answer2.and(copy); + assertEqualsPositions(answer, answer2); + } catch (CloneNotSupportedException e) { + e.printStackTrace(); + } + } + } + + /** + * Test massive or. + */ + @Test + public void testMassiveOr() { + System.out + .println("testing massive logical or (can take a couple of minutes)"); + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + BitSet[] bset = new BitSet[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < bset.length; ++k) + bset[k] = new BitSet(); + for (int k = 0; k < N; ++k) + assertEqualsPositions(bset[k], ewah[k]); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + bset[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 0; k < N; ++k) + assertEqualsPositions(bset[k], ewah[k]); + EWAHCompressedBitmap answer = ewah[0]; + BitSet bitsetanswer = bset[0]; + for (int k = 1; k < ewah.length; ++k) { + EWAHCompressedBitmap tmp = answer.or(ewah[k]); + bitsetanswer.or(bset[k]); + answer = tmp; + assertEqualsPositions(bitsetanswer, answer); + } + assertEqualsPositions(bitsetanswer, answer); + assertEqualsPositions(bitsetanswer, EWAHCompressedBitmap.or(ewah)); + int k = 0; + for (int j : answer) { + if (k != j) + System.out.println(answer.toDebugString()); + Assert.assertEquals(k, j); + k += 1; + } + } + } + + @Test + public void testsetSizeInBits() { + System.out.println("testing setSizeInBits"); + for (int k = 0; k < 4096; ++k) { + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + ewah.setSizeInBits(k,false); + Assert.assertEquals(ewah.sizeinbits, k); + Assert.assertEquals(ewah.cardinality(), 0); + EWAHCompressedBitmap ewah2 = new EWAHCompressedBitmap(); + ewah2.setSizeInBits(k, false); + Assert.assertEquals(ewah2.sizeinbits, k); + Assert.assertEquals(ewah2.cardinality(), 0); + EWAHCompressedBitmap ewah3 = new EWAHCompressedBitmap(); + for (int i = 0; i < k; ++i) { + ewah3.set(i); + } + Assert.assertEquals(ewah3.sizeinbits, k); + Assert.assertEquals(ewah3.cardinality(), k); + EWAHCompressedBitmap ewah4 = new EWAHCompressedBitmap(); + ewah4.setSizeInBits(k, true); + Assert.assertEquals(ewah4.sizeinbits, k); + Assert.assertEquals(ewah4.cardinality(), k); + } + } + + /** + * Test massive xor. + */ + @Test + public void testMassiveXOR() { + System.out + .println("testing massive xor (can take a couple of minutes)"); + final int N = 16; + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + BitSet[] bset = new BitSet[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < bset.length; ++k) + bset[k] = new BitSet(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + bset[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap answer = ewah[0]; + BitSet bitsetanswer = bset[0]; + for (int k = 1; k < ewah.length; ++k) { + answer = answer.xor(ewah[k]); + bitsetanswer.xor(bset[k]); + assertEqualsPositions(bitsetanswer, answer); + } + int k = 0; + for (int j : answer) { + if (k != j) + System.out.println(answer.toDebugString()); + Assert.assertEquals(k, j); + k += 1; + } + } + + @Test + public void testMultiAnd() { + System.out.println("testing MultiAnd"); + // test bitmap3 has a literal word while bitmap1/2 have a run of 1 + EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); + bitmap1.addStreamOfEmptyWords(true, 1000); + EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); + bitmap2.addStreamOfEmptyWords(true, 2000); + EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); + bitmap3.set(500); + bitmap3.set(502); + bitmap3.set(504); + + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // equal + bitmap1 = new EWAHCompressedBitmap(); + bitmap1.set(35); + bitmap2 = new EWAHCompressedBitmap(); + bitmap2.set(35); + bitmap3 = new EWAHCompressedBitmap(); + bitmap3.set(35); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // same number of words for each + bitmap3.set(63); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // one word bigger + bitmap3.set(64); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // two words bigger + bitmap3.set(130); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // test that result can still be appended to + EWAHCompressedBitmap resultBitmap = EWAHCompressedBitmap.and(bitmap1, + bitmap2, bitmap3); + + resultBitmap.set(131); + + bitmap1.set(131); + assertEquals(bitmap1, resultBitmap); + + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 1; k <= ewah.length; ++k) { + EWAHCompressedBitmap[] shortewah = new EWAHCompressedBitmap[k]; + for (int i = 0; i < k; ++i) + shortewah[i] = ewah[i]; + assertAndEquals(shortewah); + } + } + } + + @Test + public void testMultiOr() { + System.out.println("testing MultiOr"); + // test bitmap3 has a literal word while bitmap1/2 have a run of 0 + EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); + bitmap1.set(1000); + EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); + bitmap2.set(2000); + EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); + bitmap3.set(500); + bitmap3.set(502); + bitmap3.set(504); + + EWAHCompressedBitmap expected = bitmap1.or(bitmap2).or(bitmap3); + + assertEquals(expected, + EWAHCompressedBitmap.or(bitmap1, bitmap2, bitmap3)); + + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 1; k <= ewah.length; ++k) { + EWAHCompressedBitmap[] shortewah = new EWAHCompressedBitmap[k]; + for (int i = 0; i < k; ++i) + shortewah[i] = ewah[i]; + assertOrEquals(shortewah); + } + } + + } + + /** + * Test not. (Based on an idea by Ciaran Jessup) + */ + @Test + public void testNot() { + System.out.println("testing not"); + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + for (int i = 0; i <= 184; ++i) { + ewah.set(i); + } + Assert.assertEquals(ewah.cardinality(), 185); + ewah.not(); + Assert.assertEquals(ewah.cardinality(), 0); + } + + @Test + public void testOrCardinality() { + System.out.println("testing Or Cardinality"); + for (int N = 0; N < 1024; ++N) { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + for (int i = 0; i < N; i++) { + bitmap.set(i); + } + bitmap.set(1025); + bitmap.set(1026); + Assert.assertEquals(N + 2, bitmap.cardinality()); + EWAHCompressedBitmap orbitmap = bitmap.or(bitmap); + assertEquals(orbitmap, bitmap); + Assert.assertEquals(N + 2, orbitmap.cardinality()); + + Assert.assertEquals(N + 2, + bitmap.orCardinality(new EWAHCompressedBitmap())); + } + } + + /** + * Test sets and gets. + */ + @Test + public void testSetGet() { + System.out.println("testing EWAH set/get"); + EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); + int[] val = { 5, 4400, 44600, 55400, 1000000 }; + for (int k = 0; k < val.length; ++k) { + ewcb.set(val[k]); + } + List result = ewcb.getPositions(); + Assert.assertTrue(val.length == result.size()); + for (int k = 0; k < val.length; ++k) { + Assert.assertEquals(result.get(k).intValue(), val[k]); + } + } + + @Test + public void testHashCode() { + System.out.println("testing hashCode"); + EWAHCompressedBitmap ewcb = EWAHCompressedBitmap.bitmapOf(50, 70).and( + EWAHCompressedBitmap.bitmapOf(50, 1000)); + Assert.assertEquals(EWAHCompressedBitmap.bitmapOf(50), ewcb); + Assert.assertEquals(EWAHCompressedBitmap.bitmapOf(50).hashCode(), + ewcb.hashCode()); + } + + @Test + public void testSetSizeInBits() { + System.out.println("testing SetSizeInBits"); + testSetSizeInBits(130, 131); + testSetSizeInBits(63, 64); + testSetSizeInBits(64, 65); + testSetSizeInBits(64, 128); + testSetSizeInBits(35, 131); + testSetSizeInBits(130, 400); + testSetSizeInBits(130, 191); + testSetSizeInBits(130, 192); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(31); + bitmap.setSizeInBits(130, false); + bitmap.set(131); + BitSet jdkBitmap = new BitSet(); + jdkBitmap.set(31); + jdkBitmap.set(131); + assertEquals(jdkBitmap, bitmap); + } + + /** + * Test with parameters. + * + * @throws IOException + * Signals that an I/O exception has occurred. + */ + @Test + public void testWithParameters() throws IOException { + System.out + .println("These tests can run for several minutes. Please be patient."); + for (int k = 2; k < 1 << 24; k *= 8) + shouldSetBits(k); + PolizziTest(64); + PolizziTest(128); + PolizziTest(256); + PolizziTest(2048); + System.out.println("Your code is probably ok."); + } + + /** + * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, + * non-deterministic tests are bad, but the test is actually deterministic.) + */ + @Test + public void vanSchaikTest() { + System.out.println("testing vanSchaikTest (this takes some time)"); + final int totalNumBits = 32768; + final double odds = 0.9; + Random rand = new Random(323232323); + for (int t = 0; t < 100; t++) { + int numBitsSet = 0; + EWAHCompressedBitmap cBitMap = new EWAHCompressedBitmap(); + for (int i = 0; i < totalNumBits; i++) { + if (rand.nextDouble() < odds) { + cBitMap.set(i); + numBitsSet++; + } + } + Assert.assertEquals(cBitMap.cardinality(), numBitsSet); + } + + } + + /** + * Function used in a test inspired by Federico Fissore. + * + * @param size + * the number of set bits + * @param seed + * the random seed + * @return the pseudo-random array int[] + */ + public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { + Random random = new Random(seed); + // build raw int array + int[] bits = new int[size]; + for (int i = 0; i < bits.length; i++) { + bits[i] = random.nextInt(TEST_BS_SIZE); + } + // might generate duplicates + Arrays.sort(bits); + // first count how many distinct values + int counter = 0; + int oldx = -1; + for (int x : bits) { + if (x != oldx) + ++counter; + oldx = x; + } + // then construct new array + int[] answer = new int[counter]; + counter = 0; + oldx = -1; + for (int x : bits) { + if (x != oldx) { + answer[counter] = x; + ++counter; + } + oldx = x; + } + return answer; + } + + /** + * Test inspired by Bilal Tayara + */ + @Test + public void TayaraTest() { + System.out.println("Tayara test"); + for (int offset = 64; offset < (1 << 30); offset *= 2) { + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + for (int k = 0; k < 64; ++k) { + a.set(offset + k); + b.set(offset + k); + } + if (!a.and(b).equals(a)) + throw new RuntimeException("bug"); + if (!a.or(b).equals(a)) + throw new RuntimeException("bug"); + } + } + + @Test public void TestCloneEwahCompressedBitArray() throws CloneNotSupportedException { System.out.println("testing EWAH clone"); @@ -910,7 +1024,7 @@ EWAHCompressedBitmap b; - b = (EWAHCompressedBitmap) a.clone(); + b = a.clone(); a.setSizeInBits(487123, false); b.setSizeInBits(487123, false); @@ -918,375 +1032,419 @@ Assert.assertTrue(a.equals(b)); } - /** - * a non-deterministic test proposed by Marc Polizzi. - * - * @param maxlength the maximum uncompressed size of the bitmap - */ - public static void PolizziTest(int maxlength) { - System.out.println("Polizzi test with max length = " + maxlength); - for (int k = 0; k < 10000; ++k) { - final Random rnd = new Random(); - final EWAHCompressedBitmap ewahBitmap1 = new EWAHCompressedBitmap(); - final BitSet jdkBitmap1 = new BitSet(); - final EWAHCompressedBitmap ewahBitmap2 = new EWAHCompressedBitmap(); - final BitSet jdkBitmap2 = new BitSet(); - final EWAHCompressedBitmap ewahBitmap3 = new EWAHCompressedBitmap(); - final BitSet jdkBitmap3 = new BitSet(); - final int len = rnd.nextInt(maxlength); - for (int pos = 0; pos < len; pos++) { // random *** number of bits set *** - if (rnd.nextInt(7) == 0) { // random *** increasing *** values - ewahBitmap1.set(pos); - jdkBitmap1.set(pos); - } - if (rnd.nextInt(11) == 0) { // random *** increasing *** values - ewahBitmap2.set(pos); - jdkBitmap2.set(pos); - } - if (rnd.nextInt(7) == 0) { // random *** increasing *** values - ewahBitmap3.set(pos); - jdkBitmap3.set(pos); - } - } - assertEquals(jdkBitmap1, ewahBitmap1); - assertEquals(jdkBitmap2, ewahBitmap2); - assertEquals(jdkBitmap3, ewahBitmap3); - // XOR - { - final EWAHCompressedBitmap xorEwahBitmap = ewahBitmap1.xor(ewahBitmap2); - final BitSet xorJdkBitmap = (BitSet) jdkBitmap1.clone(); - xorJdkBitmap.xor(jdkBitmap2); - assertEquals(xorJdkBitmap, xorEwahBitmap); - } - // AND - { - final EWAHCompressedBitmap andEwahBitmap = ewahBitmap1.and(ewahBitmap2); - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - assertEquals(andJdkBitmap, andEwahBitmap); - } - // AND - { - final EWAHCompressedBitmap andEwahBitmap = ewahBitmap2.and(ewahBitmap1); - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - assertEquals(andJdkBitmap, andEwahBitmap); - assertEquals(andJdkBitmap, EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2)); - } - // MULTI AND - { - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - andJdkBitmap.and(jdkBitmap3); - assertEquals(andJdkBitmap, EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2, ewahBitmap3)); - assertEquals(andJdkBitmap, EWAHCompressedBitmap.and(ewahBitmap3, ewahBitmap2, ewahBitmap1)); - Assert.assertEquals(andJdkBitmap.cardinality(), EWAHCompressedBitmap.andCardinality(ewahBitmap1, ewahBitmap2, ewahBitmap3)); - } - // AND NOT - { - final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap1 - .andNot(ewahBitmap2); - final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1.clone(); - andNotJdkBitmap.andNot(jdkBitmap2); - assertEquals(andNotJdkBitmap, andNotEwahBitmap); - } - // AND NOT - { - final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap2 - .andNot(ewahBitmap1); - final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2.clone(); - andNotJdkBitmap.andNot(jdkBitmap1); - assertEquals(andNotJdkBitmap, andNotEwahBitmap); - } - // OR - { - final EWAHCompressedBitmap orEwahBitmap = ewahBitmap1.or(ewahBitmap2); - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - assertEquals(orJdkBitmap, orEwahBitmap); - assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap1, ewahBitmap2)); - Assert.assertEquals(orEwahBitmap.cardinality(), ewahBitmap1.orCardinality(ewahBitmap2)); - } - // OR - { - final EWAHCompressedBitmap orEwahBitmap = ewahBitmap2.or(ewahBitmap1); - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - assertEquals(orJdkBitmap, orEwahBitmap); - } - // MULTI OR - { - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - orJdkBitmap.or(jdkBitmap3); - assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap1, ewahBitmap2, ewahBitmap3)); - assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap3, ewahBitmap2, ewahBitmap1)); - Assert.assertEquals(orJdkBitmap.cardinality(), EWAHCompressedBitmap.orCardinality(ewahBitmap1, ewahBitmap2, ewahBitmap3)); - } - } - } - - /** - * Pseudo-non-deterministic test inspired by Federico Fissore. - * - * @param length the number of set bits in a bitmap - */ - public static void shouldSetBits(int length) { - System.out.println("testing shouldSetBits " + length); - int[] bitsToSet = createSortedIntArrayOfBitsToSet(length,434222); - EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); - System.out.println(" ... setting " + bitsToSet.length + " values"); - for (int i : bitsToSet) { - ewah.set(i); - } - System.out.println(" ... verifying " + bitsToSet.length + " values"); - equal(ewah.iterator(), bitsToSet); - System.out.println(" ... checking cardinality"); - Assert.assertEquals(bitsToSet.length, ewah.cardinality()); - } - - @Test - public void testSizeInBits1() { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.setSizeInBits(1, false); - bitmap.not(); - Assert.assertEquals(1, bitmap.cardinality()); - } - - @Test - public void testHasNextSafe() { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(0); - IntIterator it = bitmap.intIterator(); - Assert.assertTrue(it.hasNext()); - Assert.assertEquals(0, it.next()); - } - - - @Test - public void testHasNextSafe2() { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(0); - IntIterator it = bitmap.intIterator(); - Assert.assertEquals(0, it.next()); - } - - - @Test - public void testInfiniteLoop() { - System.out.println("Testing for an infinite loop"); + /** + * a non-deterministic test proposed by Marc Polizzi. + * + * @param maxlength + * the maximum uncompressed size of the bitmap + */ + public static void PolizziTest(int maxlength) { + System.out.println("Polizzi test with max length = " + maxlength); + for (int k = 0; k < 10000; ++k) { + final Random rnd = new Random(); + final EWAHCompressedBitmap ewahBitmap1 = new EWAHCompressedBitmap(); + final BitSet jdkBitmap1 = new BitSet(); + final EWAHCompressedBitmap ewahBitmap2 = new EWAHCompressedBitmap(); + final BitSet jdkBitmap2 = new BitSet(); + final EWAHCompressedBitmap ewahBitmap3 = new EWAHCompressedBitmap(); + final BitSet jdkBitmap3 = new BitSet(); + final int len = rnd.nextInt(maxlength); + for (int pos = 0; pos < len; pos++) { // random *** number of bits + // set *** + if (rnd.nextInt(7) == 0) { // random *** increasing *** values + ewahBitmap1.set(pos); + jdkBitmap1.set(pos); + } + if (rnd.nextInt(11) == 0) { // random *** increasing *** values + ewahBitmap2.set(pos); + jdkBitmap2.set(pos); + } + if (rnd.nextInt(7) == 0) { // random *** increasing *** values + ewahBitmap3.set(pos); + jdkBitmap3.set(pos); + } + } + assertEquals(jdkBitmap1, ewahBitmap1); + assertEquals(jdkBitmap2, ewahBitmap2); + assertEquals(jdkBitmap3, ewahBitmap3); + // XOR + { + final EWAHCompressedBitmap xorEwahBitmap = ewahBitmap1 + .xor(ewahBitmap2); + final BitSet xorJdkBitmap = (BitSet) jdkBitmap1.clone(); + xorJdkBitmap.xor(jdkBitmap2); + assertEquals(xorJdkBitmap, xorEwahBitmap); + } + // AND + { + final EWAHCompressedBitmap andEwahBitmap = ewahBitmap1 + .and(ewahBitmap2); + final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); + andJdkBitmap.and(jdkBitmap2); + assertEquals(andJdkBitmap, andEwahBitmap); + } + // AND + { + final EWAHCompressedBitmap andEwahBitmap = ewahBitmap2 + .and(ewahBitmap1); + final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); + andJdkBitmap.and(jdkBitmap2); + assertEquals(andJdkBitmap, andEwahBitmap); + assertEquals(andJdkBitmap, + EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2)); + } + // MULTI AND + { + final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); + andJdkBitmap.and(jdkBitmap2); + andJdkBitmap.and(jdkBitmap3); + assertEquals(andJdkBitmap, EWAHCompressedBitmap.and( + ewahBitmap1, ewahBitmap2, ewahBitmap3)); + assertEquals(andJdkBitmap, EWAHCompressedBitmap.and( + ewahBitmap3, ewahBitmap2, ewahBitmap1)); + Assert.assertEquals(andJdkBitmap.cardinality(), + EWAHCompressedBitmap.andCardinality(ewahBitmap1, + ewahBitmap2, ewahBitmap3)); + } + // AND NOT + { + final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap1 + .andNot(ewahBitmap2); + final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1.clone(); + andNotJdkBitmap.andNot(jdkBitmap2); + assertEquals(andNotJdkBitmap, andNotEwahBitmap); + } + // AND NOT + { + final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap2 + .andNot(ewahBitmap1); + final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2.clone(); + andNotJdkBitmap.andNot(jdkBitmap1); + assertEquals(andNotJdkBitmap, andNotEwahBitmap); + } + // OR + { + final EWAHCompressedBitmap orEwahBitmap = ewahBitmap1 + .or(ewahBitmap2); + final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); + orJdkBitmap.or(jdkBitmap2); + assertEquals(orJdkBitmap, orEwahBitmap); + assertEquals(orJdkBitmap, + EWAHCompressedBitmap.or(ewahBitmap1, ewahBitmap2)); + Assert.assertEquals(orEwahBitmap.cardinality(), + ewahBitmap1.orCardinality(ewahBitmap2)); + } + // OR + { + final EWAHCompressedBitmap orEwahBitmap = ewahBitmap2 + .or(ewahBitmap1); + final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); + orJdkBitmap.or(jdkBitmap2); + assertEquals(orJdkBitmap, orEwahBitmap); + } + // MULTI OR + { + final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); + orJdkBitmap.or(jdkBitmap2); + orJdkBitmap.or(jdkBitmap3); + assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap1, + ewahBitmap2, ewahBitmap3)); + assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap3, + ewahBitmap2, ewahBitmap1)); + Assert.assertEquals(orJdkBitmap.cardinality(), + EWAHCompressedBitmap.orCardinality(ewahBitmap1, + ewahBitmap2, ewahBitmap3)); + } + } + } + + /** + * Pseudo-non-deterministic test inspired by Federico Fissore. + * + * @param length + * the number of set bits in a bitmap + */ + public static void shouldSetBits(int length) { + System.out.println("testing shouldSetBits " + length); + int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, 434222); + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + System.out.println(" ... setting " + bitsToSet.length + " values"); + for (int i : bitsToSet) { + ewah.set(i); + } + System.out.println(" ... verifying " + bitsToSet.length + " values"); + equal(ewah.iterator(), bitsToSet); + System.out.println(" ... checking cardinality"); + Assert.assertEquals(bitsToSet.length, ewah.cardinality()); + } + + @Test + public void testSizeInBits1() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.setSizeInBits(1, false); + bitmap.not(); + Assert.assertEquals(1, bitmap.cardinality()); + } + + @Test + public void testHasNextSafe() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(0); + IntIterator it = bitmap.intIterator(); + Assert.assertTrue(it.hasNext()); + Assert.assertEquals(0, it.next()); + } + + @Test + public void testHasNextSafe2() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(0); + IntIterator it = bitmap.intIterator(); + Assert.assertEquals(0, it.next()); + } + + @Test + public void testInfiniteLoop() { + System.out.println("Testing for an infinite loop"); EWAHCompressedBitmap b1 = new EWAHCompressedBitmap(); EWAHCompressedBitmap b2 = new EWAHCompressedBitmap(); EWAHCompressedBitmap b3 = new EWAHCompressedBitmap(); - b3.setSizeInBits(5); + b3.setSizeInBits(5,false); b1.set(2); b2.set(4); EWAHCompressedBitmap.and(b1, b2, b3); EWAHCompressedBitmap.or(b1, b2, b3); - } + } + + @Test + public void testSizeInBits2() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.setSizeInBits(1, true); + bitmap.not(); + Assert.assertEquals(0, bitmap.cardinality()); + } + + private static void assertAndEquals(EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap expected = bitmaps[0]; + for (int i = 1; i < bitmaps.length; i++) { + expected = expected.and(bitmaps[i]); + } + Assert.assertTrue(expected.equals(EWAHCompressedBitmap.and(bitmaps))); + } + + private static void assertEquals(EWAHCompressedBitmap expected, + EWAHCompressedBitmap actual) { + Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); + assertEqualsPositions(expected, actual); + } + + private static void assertOrEquals(EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap expected = bitmaps[0]; + for (int i = 1; i < bitmaps.length; i++) { + expected = expected.or(bitmaps[i]); + } + assertEquals(expected, EWAHCompressedBitmap.or(bitmaps)); + } + + /** + * Extracted. + * + * @param bits + * the bits + * @return the integer + */ + private static Integer extracted(final Iterator bits) { + return bits.next(); + } + + private static void testSetSizeInBits(int size, int nextBit) { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.setSizeInBits(size, false); + bitmap.set(nextBit); + BitSet jdkBitmap = new BitSet(); + jdkBitmap.set(nextBit); + assertEquals(jdkBitmap, bitmap); + } + + /** + * Assess equality between an uncompressed bitmap and a compressed one, part + * of a test contributed by Marc Polizzi + * + * @param jdkBitmap + * the uncompressed bitmap + * @param ewahBitmap + * the compressed bitmap + */ + static void assertCardinality(BitSet jdkBitmap, + EWAHCompressedBitmap ewahBitmap) { + final int c1 = jdkBitmap.cardinality(); + final int c2 = ewahBitmap.cardinality(); + Assert.assertEquals(c1, c2); + } + + /** + * Assess equality between an uncompressed bitmap and a compressed one, part + * of a test contributed by Marc Polizzi. + * + * @param jdkBitmap + * the uncompressed bitmap + * @param ewahBitmap + * the compressed bitmap + */ + static void assertEquals(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) { + assertEqualsIterator(jdkBitmap, ewahBitmap); + assertEqualsPositions(jdkBitmap, ewahBitmap); + assertCardinality(jdkBitmap, ewahBitmap); + } + + static void assertEquals(int[] v, List p) { + assertEquals(p, v); + } + + static void assertEquals(List p, int[] v) { + if (v.length != p.size()) + throw new RuntimeException("Different lengths " + v.length + " " + + p.size()); + for (int k = 0; k < v.length; ++k) + if (v[k] != p.get(k).intValue()) + throw new RuntimeException("expected equal at " + k + " " + + v[k] + " " + p.get(k)); + } + + // + /** + * Assess equality between an uncompressed bitmap and a compressed one, part + * of a test contributed by Marc Polizzi + * + * @param jdkBitmap + * the jdk bitmap + * @param ewahBitmap + * the ewah bitmap + */ + static void assertEqualsIterator(BitSet jdkBitmap, + EWAHCompressedBitmap ewahBitmap) { + final Vector positions = new Vector(); + final Iterator bits = ewahBitmap.iterator(); + while (bits.hasNext()) { + final int bit = extracted(bits).intValue(); + Assert.assertTrue(jdkBitmap.get(bit)); + positions.add(new Integer(bit)); + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + if (!positions.contains(new Integer(pos))) { + throw new RuntimeException( + "iterator: bitset got different bits"); + } + } + } + + // part of a test contributed by Marc Polizzi + /** + * Assert equals positions. + * + * @param jdkBitmap + * the jdk bitmap + * @param ewahBitmap + * the ewah bitmap + */ + static void assertEqualsPositions(BitSet jdkBitmap, + EWAHCompressedBitmap ewahBitmap) { + final List positions = ewahBitmap.getPositions(); + for (int position : positions) { + if (!jdkBitmap.get(position)) { + throw new RuntimeException( + "positions: bitset got different bits"); + } + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + if (!positions.contains(new Integer(pos))) { + throw new RuntimeException( + "positions: bitset got different bits"); + } + } + // we check again + final int[] fastpositions = ewahBitmap.toArray(); + for (int position : fastpositions) { + if (!jdkBitmap.get(position)) { + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + } + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + int index = Arrays.binarySearch(fastpositions, pos); + if (index < 0) + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + if (fastpositions[index] != pos) + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + } + } + + /** + * Assert equals positions. + * + * @param ewahBitmap1 + * the ewah bitmap1 + * @param ewahBitmap2 + * the ewah bitmap2 + */ + static void assertEqualsPositions(EWAHCompressedBitmap ewahBitmap1, + EWAHCompressedBitmap ewahBitmap2) { + final List positions1 = ewahBitmap1.getPositions(); + final List positions2 = ewahBitmap2.getPositions(); + if (!positions1.equals(positions2)) + throw new RuntimeException( + "positions: alternative got different bits (two bitmaps)"); + // + final int[] fastpositions1 = ewahBitmap1.toArray(); + assertEquals(fastpositions1, positions1); + final int[] fastpositions2 = ewahBitmap2.toArray(); + assertEquals(fastpositions2, positions2); + if (!Arrays.equals(fastpositions1, fastpositions2)) + throw new RuntimeException( + "positions: alternative got different bits with toArray but not with getPositions (two bitmaps)"); + } + + /** + * Convenience function to assess equality between a compressed bitset and + * an uncompressed bitset + * + * @param x + * the compressed bitset/bitmap + * @param y + * the uncompressed bitset/bitmap + */ + static void equal(EWAHCompressedBitmap x, BitSet y) { + Assert.assertEquals(x.cardinality(), y.cardinality()); + for (int i : x.getPositions()) + Assert.assertTrue(y.get(i)); + } + + /** + * Convenience function to assess equality between an array and an iterator + * over Integers + * + * @param i + * the iterator + * @param array + * the array + */ + static void equal(Iterator i, int[] array) { + int cursor = 0; + while (i.hasNext()) { + int x = extracted(i).intValue(); + int y = array[cursor++]; + Assert.assertEquals(x, y); + } + } - @Test - public void testSizeInBits2() { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.setSizeInBits(1, true); - bitmap.not(); - Assert.assertEquals(0, bitmap.cardinality()); - } - private static void assertAndEquals(EWAHCompressedBitmap...bitmaps) - { - EWAHCompressedBitmap expected = bitmaps[0]; - for(int i = 1; i < bitmaps.length; i++) { - expected = expected.and(bitmaps[i]); - } - assertEquals(expected, EWAHCompressedBitmap.and(bitmaps)); - } - - private static void assertEquals(EWAHCompressedBitmap expected, EWAHCompressedBitmap actual) { - Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); - assertEqualsPositions(expected, actual); - } - - private static void assertOrEquals(EWAHCompressedBitmap...bitmaps) - { - EWAHCompressedBitmap expected = bitmaps[0]; - for(int i = 1; i < bitmaps.length; i++) { - expected = expected.or(bitmaps[i]); - } - assertEquals(expected, EWAHCompressedBitmap.or(bitmaps)); - } - - /** - * Extracted. - * - * @param bits the bits - * @return the integer - */ - private static Integer extracted(final Iterator bits) { - return bits.next(); - } - private static void testSetSizeInBits(int size, int nextBit) { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.setSizeInBits(size,false); - bitmap.set(nextBit); - BitSet jdkBitmap = new BitSet(); - jdkBitmap.set(nextBit); - assertEquals(jdkBitmap,bitmap); - } - - /** - * Assess equality between an uncompressed bitmap and a compressed one, - * part of a test contributed by Marc Polizzi - * - * @param jdkBitmap the uncompressed bitmap - * @param ewahBitmap the compressed bitmap - */ - static void assertCardinality(BitSet jdkBitmap, - EWAHCompressedBitmap ewahBitmap) { - final int c1 = jdkBitmap.cardinality(); - final int c2 = ewahBitmap.cardinality(); - Assert.assertEquals(c1, c2); - } - - /** - * Assess equality between an uncompressed bitmap and a compressed one, - * part of a test contributed by Marc Polizzi. - * - * @param jdkBitmap the uncompressed bitmap - * @param ewahBitmap the compressed bitmap - */ - static void assertEquals(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) { - assertEqualsIterator(jdkBitmap, ewahBitmap); - assertEqualsPositions(jdkBitmap, ewahBitmap); - assertCardinality(jdkBitmap, ewahBitmap); - } - - static void assertEquals(int[] v, List p) { - assertEquals(p,v); - } - static void assertEquals(List p, int[] v) { - if(v.length!= p.size()) - throw new RuntimeException("Different lengths "+v.length+ " "+p.size()); - for(int k = 0; k positions = new Vector(); - final Iterator bits = ewahBitmap.iterator(); - while (bits.hasNext()) { - final int bit = extracted(bits).intValue(); - Assert.assertTrue(jdkBitmap.get(bit)); - positions.add(new Integer(bit)); - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - if (!positions.contains(new Integer(pos))) { - throw new RuntimeException("iterator: bitset got different bits"); - } - } - } - - // part of a test contributed by Marc Polizzi - /** - * Assert equals positions. - * - * @param jdkBitmap the jdk bitmap - * @param ewahBitmap the ewah bitmap - */ - static void assertEqualsPositions(BitSet jdkBitmap, - EWAHCompressedBitmap ewahBitmap) { - final List positions = ewahBitmap.getPositions(); - for (int position : positions) { - if (!jdkBitmap.get(position)) { - throw new RuntimeException("positions: bitset got different bits"); - } - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - if (!positions.contains(new Integer(pos))) { - throw new RuntimeException("positions: bitset got different bits"); - } - } - // we check again - final int[] fastpositions = ewahBitmap.toArray(); - for (int position : fastpositions) { - if (!jdkBitmap.get(position)) { - throw new RuntimeException("positions: bitset got different bits with toArray"); - } - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - int index = Arrays.binarySearch(fastpositions,pos); - if(index < 0) - throw new RuntimeException("positions: bitset got different bits with toArray"); - if(fastpositions[index]!= pos) - throw new RuntimeException("positions: bitset got different bits with toArray"); - } - } - - /** - * Assert equals positions. - * - * @param ewahBitmap1 the ewah bitmap1 - * @param ewahBitmap2 the ewah bitmap2 - */ - static void assertEqualsPositions(EWAHCompressedBitmap ewahBitmap1, - EWAHCompressedBitmap ewahBitmap2) { - final List positions1 = ewahBitmap1.getPositions(); - final List positions2 = ewahBitmap2.getPositions(); - if (!positions1.equals(positions2)) - throw new RuntimeException("positions: alternative got different bits (two bitmaps)"); - // - final int[] fastpositions1 = ewahBitmap1.toArray(); - assertEquals(fastpositions1, positions1); - final int[] fastpositions2 = ewahBitmap2.toArray(); - assertEquals(fastpositions2, positions2); - if (!Arrays.equals(fastpositions1, fastpositions2)) - throw new RuntimeException("positions: alternative got different bits with toArray but not with getPositions (two bitmaps)"); - } - - /** - * Convenience function to assess equality between a compressed bitset - * and an uncompressed bitset - * - * @param x the compressed bitset/bitmap - * @param y the uncompressed bitset/bitmap - */ - static void equal(EWAHCompressedBitmap x, BitSet y) { - Assert.assertEquals (x.cardinality() , y.cardinality()); - for (int i : x.getPositions()) - Assert.assertTrue(y.get(i)); - } - - /** - * Convenience function to assess equality between an array and an iterator over - * Integers - * - * @param i the iterator - * @param array the array - */ - static void equal(Iterator i, int[] array) { - int cursor = 0; - while (i.hasNext()) { - int x = extracted(i).intValue(); - int y = array[cursor++]; - Assert.assertEquals(x,y); - } - } - - /** The Constant MEGA: a large integer. */ - private static final int MEGA = 8 * 1024 * 1024; + /** The Constant MEGA: a large integer. */ + private static final int MEGA = 8 * 1024 * 1024; - /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ - private static final int TEST_BS_SIZE = 8 * MEGA; + /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ + private static final int TEST_BS_SIZE = 8 * MEGA; } diff -Nru libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java --- libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,116 @@ +package com.googlecode.javaewah; + +import static org.junit.Assert.*; +import org.junit.Test; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Tests for utility class. Sketchy for now. + * + */ +@SuppressWarnings("javadoc") +public class IntIteratorOverIteratingRLWTest { + + @Test + // had problems with bitmaps beginning with two consecutive clean runs + public void testConsecClean() { + System.out + .println("testing int iteration, 2 consec clean runs starting with zeros"); + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + for (int i = 64; i < 128; ++i) + e.set(i); + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(64, ctr); + } + + @Test + public void testConsecCleanStartOnes() { + System.out + .println("testing int iteration, 2 consec clean runs starting with ones"); + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + for (int i = 0; i < 2 * 64; ++i) + e.set(i); + for (int i = 4 * 64; i < 5 * 64; ++i) + e.set(i); + + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(3 * 64, ctr); + } + + @Test + public void testStartDirty() { + System.out.println("testing int iteration, no initial runs"); + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + for (int i = 1; i < 2 * 64; ++i) + e.set(i); + for (int i = 4 * 64; i < 5 * 64; ++i) + e.set(i); + + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(3 * 64 - 1, ctr); + } + + @Test + public void testEmpty() { + System.out.println("testing int iteration over empty bitmap"); + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + assertFalse(ii.hasNext()); + } + + @Test + public void testRandomish() { + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + + int upperlimit = 100000; + for (int i = 0; i < upperlimit; ++i) { + double probabilityOfOne = i / (double) (upperlimit / 2); + if (probabilityOfOne > 1.0) + probabilityOfOne = 1.0; + if (Math.random() < probabilityOfOne) { + e.set(i); + } + } + + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + + assertEquals(e.cardinality(), ctr); + System.out + .println("checking int iteration over a var density bitset of size " + + e.cardinality()); + + } + +} diff -Nru libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java --- libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,144 @@ +package com.googlecode.javaewah; + +import static org.junit.Assert.*; +import java.util.Iterator; +import org.junit.Test; +import com.googlecode.javaewah.benchmark.ClusteredDataGenerator; +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Tests specifically for iterators. + * + */ +public class IteratorAggregationTest { + + /** + * @param N Number of bitmaps to generate in each set + * @param nbr parameter determining the size of the arrays (in a log scale) + * @return an iterator over sets of bitmaps + */ + public static Iterator getCollections(final int N, final int nbr) { + final ClusteredDataGenerator cdg = new ClusteredDataGenerator(123); + return new Iterator() { + int sparsity = 1; + + @Override + public boolean hasNext() { + return this.sparsity < 5; + } + + @Override + public EWAHCompressedBitmap[] next() { + int[][] data = new int[N][]; + int Max = (1 << (nbr + this.sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + this.sparsity += 3; + return ewah; + } + + @Override + public void remove() { + // unimplemented + } + + }; + + } + + /** + * + */ + @Test + public void testAnd() { + for (int N = 1; N < 10; ++N) { + System.out.println("testAnd N = " + N); + Iterator i = getCollections(N,3); + while (i.hasNext()) { + EWAHCompressedBitmap[] x = i.next(); + EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.and(x); + EWAHCompressedBitmap x1 = IteratorUtil + .materialize(IteratorAggregation.bufferedand(IteratorUtil + .toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + + } + + /** + * + */ + @Test + public void testOr() { + for (int N = 1; N < 10; ++N) { + System.out.println("testOr N = " + N); + Iterator i = getCollections(N,3); + while (i.hasNext()) { + EWAHCompressedBitmap[] x = i.next(); + EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.or(x); + EWAHCompressedBitmap x1 = IteratorUtil + .materialize(IteratorAggregation.bufferedor(IteratorUtil + .toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + } + + /** + * + */ + @SuppressWarnings("deprecation") + @Test + public void testWideOr() { + for (int nbr = 3; nbr <= 24; nbr += 3) { + for (int N = 100; N < 1000; N += 100) { + System.out.println("testWideOr N = " + N); + Iterator i = getCollections(N, 3); + while (i.hasNext()) { + EWAHCompressedBitmap[] x = i.next(); + EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.or(x); + EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + FastAggregation.legacy_orWithContainer(container, x); + assertTrue(container.equals(tanswer)); + EWAHCompressedBitmap x1 = IteratorUtil + .materialize(IteratorAggregation + .bufferedor(IteratorUtil.toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + } + } + + /** + * + */ + @Test + public void testXor() { + System.out.println("testXor "); + Iterator i = getCollections(2,3); + while (i.hasNext()) { + EWAHCompressedBitmap[] x = i.next(); + EWAHCompressedBitmap tanswer = x[0].xor(x[1]); + EWAHCompressedBitmap x1 = IteratorUtil + .materialize(IteratorAggregation.bufferedxor( + x[0].getIteratingRLW(), x[1].getIteratingRLW())); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + +} diff -Nru libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java --- libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java 2013-06-11 17:48:37.000000000 +0000 +++ libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java 2013-11-12 14:31:20.000000000 +0000 @@ -1,910 +1,1029 @@ package com.googlecode.javaewah32; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz - * Licensed under APL 2.0. + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. */ import org.junit.Test; + +import com.googlecode.javaewah.FastAggregation; import com.googlecode.javaewah.IntIterator; import java.util.*; import java.io.*; - import junit.framework.Assert; +/** + * This class is used for basic unit testing. + */ +@SuppressWarnings("javadoc") public class EWAHCompressedBitmap32Test { - @Test - public void testDebugSetSizeInBitsTest() { - System.out.println("testing DebugSetSizeInBits"); - EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); - - b.set(4); - - b.setSizeInBits(6, true); - - List positions = b.getPositions(); - - Assert.assertEquals(2, positions.size()); - Assert.assertEquals(Integer.valueOf(4), positions.get(0)); - Assert.assertEquals(Integer.valueOf(5), positions.get(1)); - - Iterator iterator = b.iterator(); - Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(Integer.valueOf(4), iterator.next()); - Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(Integer.valueOf(5), iterator.next()); - Assert.assertFalse(iterator.hasNext()); - - IntIterator intIterator = b.intIterator(); - Assert.assertTrue(intIterator.hasNext()); - Assert.assertEquals(4, intIterator.next()); - Assert.assertTrue(intIterator.hasNext()); - Assert.assertEquals(5, intIterator.next()); - Assert.assertFalse(intIterator.hasNext()); - - } - /** - * Created: 2/4/11 6:03 PM By: Arnon Moscona. - */ - @Test - public void EwahIteratorProblem() { - System.out.println("testing ArnonMoscona"); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - for (int i = 9434560; i <= 9435159; i++) { - bitmap.set(i); - } - IntIterator iterator = bitmap.intIterator(); - List v = bitmap.getPositions(); - int[] array = bitmap.toArray(); - for (int k = 0; k < v.size(); ++k) { - Assert.assertTrue(array[k] == v.get(k).intValue()); - Assert.assertTrue(iterator.hasNext()); - final int ival = iterator.next(); - final int vval = v.get(k).intValue(); - Assert.assertTrue(ival == vval); - } - Assert.assertTrue(!iterator.hasNext()); - // - for (int k = 2; k <= 1024; k *= 2) { - int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, 434455 + 5 * k); - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - for (int i : bitsToSet) { - ewah.set(i); - } - equal(ewah.iterator(), bitsToSet); - } - } - - - /** - * Test submitted by Gregory Ssi-Yan-Kai - */ - @Test - public void SsiYanKaiTest() { - System.out.println("testing SsiYanKaiTest"); - EWAHCompressedBitmap32 a = EWAHCompressedBitmap32.bitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, 40089, 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, 40098, 40099, 40100); - EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 270000); - LinkedHashSet aPositions = new LinkedHashSet(a.getPositions()); - int intersection = 0; - EWAHCompressedBitmap32 inter = new EWAHCompressedBitmap32(); - LinkedHashSet bPositions = new LinkedHashSet(b.getPositions()); - for (Integer integer : bPositions) { - if (aPositions.contains(integer)) { - inter.set(integer.intValue()); - ++intersection; - } - } - EWAHCompressedBitmap32 and2 = a.and(b); - if(!and2.equals(inter)) throw new RuntimeException("intersections don't match"); - if(intersection != and2.cardinality()) throw new RuntimeException("cardinalities don't match"); - } - - /** - * Test inspired by William Habermaas. - */ - @Test - public void habermaasTest() { - System.out.println("testing habermaasTest"); - BitSet bitsetaa = new BitSet(); - EWAHCompressedBitmap32 aa = new EWAHCompressedBitmap32(); - int[] val = { 55400, 1000000, 1000128 }; - for (int k = 0; k < val.length; ++k) { - aa.set(val[k]); - bitsetaa.set(val[k]); - } - equal(aa, bitsetaa); - BitSet bitsetab = new BitSet(); - EWAHCompressedBitmap32 ab = new EWAHCompressedBitmap32(); - for (int i = 4096; i < (4096 + 5); i++) { - ab.set(i); - bitsetab.set(i); - } - ab.set(99000); - bitsetab.set(99000); - ab.set(1000130); - bitsetab.set(1000130); - equal(ab, bitsetab); - EWAHCompressedBitmap32 bb = aa.or(ab); - EWAHCompressedBitmap32 bbAnd = aa.and(ab); - try { - EWAHCompressedBitmap32 abnot = (EWAHCompressedBitmap32)ab.clone(); - abnot.not(); - EWAHCompressedBitmap32 bbAnd2 = aa.andNot(abnot); - assertEquals(bbAnd2,bbAnd); - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - BitSet bitsetbb = (BitSet) bitsetaa.clone(); - bitsetbb.or(bitsetab); - BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); - bitsetbbAnd.and(bitsetab); - equal(bbAnd, bitsetbbAnd); - equal(bb, bitsetbb); - } - - @Test - public void testAndResultAppend() { - System.out.println("testing AndResultAppend"); - EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); - bitmap1.set(35); - EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.set(35); - bitmap2.set(130); - - EWAHCompressedBitmap32 resultBitmap = bitmap1.and(bitmap2); - resultBitmap.set(131); - - bitmap1.set(131); - assertEquals(bitmap1, resultBitmap); - } - - /** - * Test cardinality. - */ - @Test - public void testCardinality() { - System.out.println("testing EWAH cardinality"); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(Integer.MAX_VALUE - 32); - // System.out.format("Total Items %d\n", bitmap.cardinality()); - Assert.assertTrue(bitmap.cardinality() == 1); - } - - /** - * Test clear function - */ - @Test - public void testClear() { - System.out.println("testing Clear"); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(5); - bitmap.clear(); - bitmap.set(7); - Assert.assertTrue(1 == bitmap.cardinality()); - Assert.assertTrue(1 == bitmap.getPositions().size()); - Assert.assertTrue(1 == bitmap.toArray().length); - Assert.assertTrue(7 == bitmap.getPositions().get(0).intValue()); - Assert.assertTrue(7 == bitmap.toArray()[0]); - bitmap.clear(); - bitmap.set(5000); - Assert.assertTrue(1 == bitmap.cardinality()); - Assert.assertTrue(1 == bitmap.getPositions().size()); - Assert.assertTrue(1 == bitmap.toArray().length); - Assert.assertTrue(5000 == bitmap.getPositions().get(0).intValue()); - bitmap.set(5001); - bitmap.set(5005); - bitmap.set(5100); - bitmap.set(5500); - bitmap.clear(); - bitmap.set(5); - bitmap.set(7); - bitmap.set(1000); - bitmap.set(1001); - Assert.assertTrue(4 == bitmap.cardinality()); - List positions = bitmap.getPositions(); - Assert.assertTrue(4 == positions.size()); - Assert.assertTrue(5 == positions.get(0).intValue()); - Assert.assertTrue(7 == positions.get(1).intValue()); - Assert.assertTrue(1000 == positions.get(2).intValue()); - Assert.assertTrue(1001 == positions.get(3).intValue()); - } - - /** - * Test ewah compressed bitmap. - */ - @Test - public void testEWAHCompressedBitmap() { - System.out.println("testing EWAH"); - int zero = 0; - int specialval = 1 | (1 << 4) | (1 << 31); - int notzero = ~zero; - EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); - myarray1.add(zero); - myarray1.add(zero); - myarray1.add(zero); - myarray1.add(specialval); - myarray1.add(specialval); - myarray1.add(notzero); - myarray1.add(zero); - Assert.assertEquals(myarray1.getPositions().size(), 6 + 32); - EWAHCompressedBitmap32 myarray2 = new EWAHCompressedBitmap32(); - myarray2.add(zero); - myarray2.add(specialval); - myarray2.add(specialval); - myarray2.add(notzero); - myarray2.add(zero); - myarray2.add(zero); - myarray2.add(zero); - Assert.assertEquals(myarray2.getPositions().size(), 6 + 32); - List data1 = myarray1.getPositions(); - List data2 = myarray2.getPositions(); - Vector logicalor = new Vector(); - { - HashSet tmp = new HashSet(); - tmp.addAll(data1); - tmp.addAll(data2); - logicalor.addAll(tmp); - } - Collections.sort(logicalor); - Vector logicaland = new Vector(); - logicaland.addAll(data1); - logicaland.retainAll(data2); - Collections.sort(logicaland); - EWAHCompressedBitmap32 arrayand = myarray1.and(myarray2); - Assert.assertTrue(arrayand.getPositions().equals(logicaland)); - EWAHCompressedBitmap32 arrayor = myarray1.or(myarray2); - Assert.assertTrue(arrayor.getPositions().equals(logicalor)); - EWAHCompressedBitmap32 arrayandbis = myarray2.and(myarray1); - Assert.assertTrue(arrayandbis.getPositions().equals(logicaland)); - EWAHCompressedBitmap32 arrayorbis = myarray2.or(myarray1); - Assert.assertTrue(arrayorbis.getPositions().equals(logicalor)); - EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); - for (Integer i : myarray1.getPositions()) { - x.set(i.intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); - x = new EWAHCompressedBitmap32(); - for (Integer i : myarray2.getPositions()) { - x.set(i.intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); - x = new EWAHCompressedBitmap32(); - for (Iterator k = myarray1.iterator(); k.hasNext();) { - x.set(extracted(k).intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); - x = new EWAHCompressedBitmap32(); - for (Iterator k = myarray2.iterator(); k.hasNext();) { - x.set(extracted(k).intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); - } - - /** - * Test externalization. - * - * @throws IOException - * Signals that an I/O exception has occurred. - */ - @Test - public void testExternalization() throws IOException { - System.out.println("testing EWAH externalization"); - EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); - int[] val = { 5, 4400, 44600, 55400, 1000000 }; - for (int k = 0; k < val.length; ++k) { - ewcb.set(val[k]); - } - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - ObjectOutputStream oo = new ObjectOutputStream(bos); - ewcb.writeExternal(oo); - oo.close(); - ewcb = null; - ewcb = new EWAHCompressedBitmap32(); - ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); - ewcb.readExternal(new ObjectInputStream(bis)); - List result = ewcb.getPositions(); - Assert.assertTrue(val.length == result.size()); - for (int k = 0; k < val.length; ++k) { - Assert.assertTrue(result.get(k).intValue() == val[k]); - } - } - - @Test - public void testExtremeRange() { - System.out.println("testing EWAH at its extreme range"); - EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); - int N = 1024; - for (int i = 0; i < N; ++i) { - myarray1.set(Integer.MAX_VALUE - 32 - N + i); - Assert.assertTrue(myarray1.cardinality() == i+1); - int[] val = myarray1.toArray(); - Assert.assertTrue(val[0] == Integer.MAX_VALUE - 32 - N); - } - } - - /** - * Test the intersects method - */ - @Test - public void testIntersectsMethod(){ - System.out.println("testing Intersets Bug"); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(1); - EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.set(1); - bitmap2.set(11); - bitmap2.set(111); - bitmap2.set(1111111); - bitmap2.set(11111111); - Assert.assertTrue(bitmap.intersects(bitmap2)); - Assert.assertTrue(bitmap2.intersects(bitmap)); - - EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); - bitmap3.set(101); - EWAHCompressedBitmap32 bitmap4 = new EWAHCompressedBitmap32(); - for (int i = 0; i < 100; i++) { - bitmap4.set(i); - } - Assert.assertFalse(bitmap3.intersects(bitmap4)); - Assert.assertFalse(bitmap4.intersects(bitmap3)); - - EWAHCompressedBitmap32 bitmap5 = new EWAHCompressedBitmap32(); - bitmap5.set(0); - bitmap5.set(10); - bitmap5.set(20); - EWAHCompressedBitmap32 bitmap6 = new EWAHCompressedBitmap32(); - bitmap6.set(1); - bitmap6.set(11); - bitmap6.set(21); - bitmap6.set(1111111); - bitmap6.set(11111111); - Assert.assertFalse(bitmap5.intersects(bitmap6)); - Assert.assertFalse(bitmap6.intersects(bitmap5)); - - bitmap5.set(21); - Assert.assertTrue(bitmap5.intersects(bitmap6)); - Assert.assertTrue(bitmap6.intersects(bitmap5)); - - EWAHCompressedBitmap32 bitmap7 = new EWAHCompressedBitmap32(); - bitmap7.set(1); - bitmap7.set(10); - bitmap7.set(20); - bitmap7.set(1111111); - bitmap7.set(11111111); - EWAHCompressedBitmap32 bitmap8 = new EWAHCompressedBitmap32(); - for (int i = 0; i < 1000; i++) { - if (i != 1 && i!=10 && i!=20){ - bitmap8.set(i); - } - } - Assert.assertFalse(bitmap7.intersects(bitmap8)); - Assert.assertFalse(bitmap8.intersects(bitmap7)); - } - - /** - * as per renaud.delbru, Feb 12, 2009 this might throw an error out of bound - * exception. - */ - @Test - public void testLargeEWAHCompressedBitmap() { - System.out.println("testing EWAH over a large array"); - EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); - int N = 11000000; - for (int i = 0; i < N; ++i) { - myarray1.set(i); - } - Assert.assertTrue(myarray1.sizeInBits() == N); - } - - - /** - * Test massive and. - */ - @Test - public void testMassiveAnd() { - System.out.println("testing massive logical and"); - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[1024]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap32 answer = ewah[0]; - for (int k = 1; k < ewah.length; ++k) - answer = answer.and(ewah[k]); - // result should be empty - if (answer.getPositions().size() != 0) - System.out.println(answer.toDebugString()); - Assert.assertTrue(answer.getPositions().size() == 0); - Assert - .assertTrue(EWAHCompressedBitmap32.and(ewah).getPositions().size() == 0); - } - - /** - * Test massive and not. - */ - @Test - public void testMassiveAndNot() { - System.out.println("testing massive and not"); - final int N = 1024; - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap32 answer = ewah[0]; - EWAHCompressedBitmap32 answer2 = ewah[0]; - for (int k = 1; k < ewah.length; ++k) { - answer = answer.andNot(ewah[k]); - EWAHCompressedBitmap32 copy = null; - try { - copy = (EWAHCompressedBitmap32) ewah[k].clone(); - copy.not(); - answer2.and(copy); - assertEqualsPositions(answer, answer2); - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - } - } - - - @Test - public void testsetSizeInBits() { - System.out.println("testing setSizeInBits"); - for(int k = 0; k < 4096; ++k) { - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - ewah.setSizeInBits(k); - Assert.assertEquals(ewah.sizeinbits,k); - Assert.assertEquals(ewah.cardinality(),0); - EWAHCompressedBitmap32 ewah2 = new EWAHCompressedBitmap32(); - ewah2.setSizeInBits(k, false); - Assert.assertEquals(ewah2.sizeinbits,k); - Assert.assertEquals(ewah2.cardinality(),0); - EWAHCompressedBitmap32 ewah3 = new EWAHCompressedBitmap32(); - for(int i = 0; i < k ; ++i) { - ewah3.set(i); - } - Assert.assertEquals(ewah3.sizeinbits,k); - Assert.assertEquals(ewah3.cardinality(),k); - EWAHCompressedBitmap32 ewah4 = new EWAHCompressedBitmap32(); - ewah4.setSizeInBits(k, true); - Assert.assertEquals(ewah4.sizeinbits,k); - Assert.assertEquals(ewah4.cardinality(),k); - } - } - - - /** - * Test massive or. - */ - @Test - public void testMassiveOr() { - System.out - .println("testing massive logical or (can take a couple of minutes)"); - final int N = 128; - for (int howmany = 512; howmany <= 10000; howmany *= 2) { - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - BitSet[] bset = new BitSet[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < bset.length; ++k) - bset[k] = new BitSet(); - for (int k = 0; k < N; ++k) - assertEqualsPositions(bset[k], ewah[k]); - for (int k = 0; k < howmany; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - bset[(k + 2 * k * k) % ewah.length].set(k); - } - for (int k = 0; k < N; ++k) - assertEqualsPositions(bset[k], ewah[k]); - EWAHCompressedBitmap32 answer = ewah[0]; - BitSet bitsetanswer = bset[0]; - for (int k = 1; k < ewah.length; ++k) { - EWAHCompressedBitmap32 tmp = answer.or(ewah[k]); - bitsetanswer.or(bset[k]); - answer = tmp; - assertEqualsPositions(bitsetanswer, answer); - } - assertEqualsPositions(bitsetanswer, answer); - assertEqualsPositions(bitsetanswer, EWAHCompressedBitmap32.or(ewah)); - int k = 0; - for (int j : answer) { - if (k != j) - System.out.println(answer.toDebugString()); - Assert.assertEquals(k, j); - k += 1; - } - } - } - - /** - * Test massive xor. - */ - @Test - public void testMassiveXOR() { - System.out.println("testing massive xor (can take a couple of minutes)"); - final int N = 16; - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - BitSet[] bset = new BitSet[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < bset.length; ++k) - bset[k] = new BitSet(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - bset[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap32 answer = ewah[0]; - BitSet bitsetanswer = bset[0]; - for (int k = 1; k < ewah.length; ++k) { - answer = answer.xor(ewah[k]); - bitsetanswer.xor(bset[k]); - assertEqualsPositions(bitsetanswer, answer); - } - int k = 0; - for (int j : answer) { - if (k != j) - System.out.println(answer.toDebugString()); - Assert.assertEquals(k, j); - k += 1; - } - } - - @Test - public void testMultiAnd() { - System.out.println("testing MultiAnd"); - // test bitmap3 has a literal word while bitmap1/2 have a run of 1 - EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); - bitmap1.addStreamOfEmptyWords(true, 1000); - EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.addStreamOfEmptyWords(true, 2000); - EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); - bitmap3.set(500); - bitmap3.set(502); - bitmap3.set(504); - - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // equal - bitmap1 = new EWAHCompressedBitmap32(); - bitmap1.set(35); - bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.set(35); - bitmap3 = new EWAHCompressedBitmap32(); - bitmap3.set(35); - - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // same number of words for each - bitmap3.set(63); - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // one word bigger - bitmap3.set(64); - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // two words bigger - bitmap3.set(130); - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // test that result can still be appended to - EWAHCompressedBitmap32 resultBitmap = EWAHCompressedBitmap32.and(bitmap1, - bitmap2, bitmap3); - resultBitmap.set(131); - - bitmap1.set(131); - assertEquals(bitmap1, resultBitmap); - - final int N = 128; - for (int howmany = 512; howmany <= 10000; howmany *= 2) { - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < howmany; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - for (int k = 1; k <= ewah.length; ++k) { - EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; - for (int i = 0; i < k; ++i) - shortewah[i] = ewah[i]; - assertAndEquals(shortewah); - } - } - } - - @Test - public void testMultiOr() { - System.out.println("testing MultiOr"); - // test bitmap3 has a literal word while bitmap1/2 have a run of 0 - EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); - bitmap1.set(1000); - EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.set(2000); - EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); - bitmap3.set(500); - bitmap3.set(502); - bitmap3.set(504); - - EWAHCompressedBitmap32 expected = bitmap1.or(bitmap2).or(bitmap3); - - assertEquals(expected, EWAHCompressedBitmap32.or(bitmap1, bitmap2, bitmap3)); - - final int N = 128; - for (int howmany = 512; howmany <= 10000; howmany *= 2) { - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < howmany; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - for (int k = 1; k <= ewah.length; ++k) { - EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; - for (int i = 0; i < k; ++i) - shortewah[i] = ewah[i]; - assertOrEquals(shortewah); - } - } - - } - - /** - * Test not. (Based on an idea by Ciaran Jessup) - */ - @Test - public void testNot() { - System.out.println("testing not"); - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - for (int i = 0; i <= 184; ++i) { - ewah.set(i); - } - Assert.assertEquals(ewah.cardinality(), 185); - ewah.not(); - Assert.assertEquals(ewah.cardinality(), 0); - } - - @Test - public void testOrCardinality() { - System.out.println("testing Or Cardinality"); - for (int N = 0; N < 1024; ++N) { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - for (int i = 0; i < N; i++) { - bitmap.set(i); - } - bitmap.set(1025); - bitmap.set(1026); - Assert.assertEquals(N+2, bitmap.cardinality()); - EWAHCompressedBitmap32 orbitmap = bitmap.or(bitmap); - assertEquals(orbitmap,bitmap); - Assert.assertEquals(N+2, orbitmap.cardinality()); - if(N+2 != bitmap.orCardinality(new EWAHCompressedBitmap32())) { - System.out.println("N = "+N); - System.out.println(bitmap.toDebugString()); - System.out.println("cardinality = "+bitmap.cardinality()); - System.out.println("orCardinality = "+bitmap.orCardinality(new EWAHCompressedBitmap32())); - } - - Assert.assertEquals(N+2, - bitmap.orCardinality(new EWAHCompressedBitmap32())); - } - } - - /** - * Test running length word. - */ - @Test - public void testRunningLengthWord() { - System.out.println("testing RunningLengthWord32"); - int x[] = new int[1]; - RunningLengthWord32 rlw = new RunningLengthWord32(x, 0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - rlw.setRunningBit(true); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - rlw.setRunningBit(false); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - for (int rl = RunningLengthWord32.largestliteralcount; rl >= 0; rl -= 1024) { - rlw.setNumberOfLiteralWords(rl); - Assert.assertEquals(rl, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - rlw.setNumberOfLiteralWords(0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - } - for (int rl = 0; rl <= RunningLengthWord32.largestrunninglengthcount; rl += 1024) { - rlw.setRunningLength(rl); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(rl, rlw.getRunningLength()); - rlw.setRunningLength(0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(false, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - } - rlw.setRunningBit(true); - for (int rl = 0; rl <= RunningLengthWord32.largestrunninglengthcount; rl += 1024) { - rlw.setRunningLength(rl); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(rl, rlw.getRunningLength()); - rlw.setRunningLength(0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - } - for (int rl = 0; rl <= RunningLengthWord32.largestliteralcount; rl += 128) { - rlw.setNumberOfLiteralWords(rl); - Assert.assertEquals(rl, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - rlw.setNumberOfLiteralWords(0); - Assert.assertEquals(0, rlw.getNumberOfLiteralWords()); - Assert.assertEquals(true, rlw.getRunningBit()); - Assert.assertEquals(0, rlw.getRunningLength()); - } - } - - /** - * Test sets and gets. - */ - @Test - public void testSetGet() { - System.out.println("testing EWAH set/get"); - EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); - int[] val = { 5, 4400, 44600, 55400, 1000000 }; - for (int k = 0; k < val.length; ++k) { - ewcb.set(val[k]); - } - List result = ewcb.getPositions(); - Assert.assertTrue(val.length == result.size()); - for (int k = 0; k < val.length; ++k) { - Assert.assertEquals(result.get(k).intValue(), val[k]); - } - } - - @Test - public void testHashCode() { - System.out.println("testing hashCode"); - EWAHCompressedBitmap32 ewcb = EWAHCompressedBitmap32.bitmapOf(50, 70).and(EWAHCompressedBitmap32.bitmapOf(50, 1000)); - Assert.assertEquals(EWAHCompressedBitmap32.bitmapOf(50), ewcb); - Assert.assertEquals(EWAHCompressedBitmap32.bitmapOf(50).hashCode(), ewcb.hashCode()); - } - - @Test - public void testSetSizeInBits() { - System.out.println("testing SetSizeInBits"); - testSetSizeInBits(130, 131); - testSetSizeInBits(63, 64); - testSetSizeInBits(64, 65); - testSetSizeInBits(64, 128); - testSetSizeInBits(35, 131); - testSetSizeInBits(130, 400); - testSetSizeInBits(130, 191); - testSetSizeInBits(130, 192); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(31); - bitmap.setSizeInBits(130, false); - bitmap.set(131); - BitSet jdkBitmap = new BitSet(); - jdkBitmap.set(31); - jdkBitmap.set(131); - assertEquals(jdkBitmap, bitmap); - } - - /** - * Test with parameters. - * - * @throws IOException - * Signals that an I/O exception has occurred. - */ - @Test - public void testWithParameters() throws IOException { - System.out - .println("These tests can run for several minutes. Please be patient."); - for (int k = 2; k < 1 << 24; k *= 8) - shouldSetBits(k); - PolizziTest(64); - PolizziTest(128); - PolizziTest(256); - PolizziTest(2048); - System.out.println("Your code is probably ok."); - } - - /** - * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, - * non-deterministic tests are bad, but the test is actually deterministic.) - */ - @Test - public void vanSchaikTest() { - System.out.println("testing vanSchaikTest (this takes some time)"); - final int totalNumBits = 32768; - final double odds = 0.9; - Random rand = new Random(323232323); - for (int t = 0; t < 100; t++) { - int numBitsSet = 0; - EWAHCompressedBitmap32 cBitMap = new EWAHCompressedBitmap32(); - for (int i = 0; i < totalNumBits; i++) { - if (rand.nextDouble() < odds) { - cBitMap.set(i); - numBitsSet++; + + @Test + public void testGet() { + for (int gap = 29; gap < 10000; gap *= 10) { + EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); + for (int k = 0; k < 100; ++k) + x.set(k * gap); + for (int k = 0; k < 100 * gap; ++k) + if (x.get(k)) { + if (k % gap != 0) + throw new RuntimeException( + "spotted an extra set bit at " + + k + " gap = " + + gap); + } else if (k % gap == 0) + throw new RuntimeException( + "missed a set bit " + k + + " gap = " + gap); + } } - } - Assert.assertEquals(cBitMap.cardinality(), numBitsSet); - } - - } - - /** - * Function used in a test inspired by Federico Fissore. - * - * @param size - * the number of set bits - * @param seed - * the random seed - * @return the pseudo-random array int[] - */ - public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { - Random random = new Random(seed); - // build raw int array - int[] bits = new int[size]; - for (int i = 0; i < bits.length; i++) { - bits[i] = random.nextInt(TEST_BS_SIZE); - } - // might generate duplicates - Arrays.sort(bits); - // first count how many distinct values - int counter = 0; - int oldx = -1; - for (int x : bits) { - if (x != oldx) - ++counter; - oldx = x; - } - // then construct new array - int[] answer = new int[counter]; - counter = 0; - oldx = -1; - for (int x : bits) { - if (x != oldx) { - answer[counter] = x; - ++counter; - } - oldx = x; - } - return answer; - } - - - /** - * Test inspired by Bilal Tayara - */ - @Test - public void TayaraTest() { - System.out.println("Tayara test"); - for(int offset = 64; offset<(1<<30);offset*=2){ - EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); - for(int k = 0; k< 64; ++k) { - a.set(offset+k); - b.set(offset+k); - } - if(!a.and(b).equals(a)) throw new RuntimeException("bug"); - if(!a.or(b).equals(a)) throw new RuntimeException("bug"); - } - } + + @SuppressWarnings({ "deprecation", "boxing" }) + @Test + public void OKaserBugReportJuly2013() { + System.out.println("testing OKaserBugReportJuly2013"); + int[][] data = { {}, { 5, 6, 7, 8, 9 }, { 1 }, { 2 }, { 2, 5, 7 }, + { 1 }, { 2 }, { 1, 6, 9 }, { 1, 3, 4, 6, 8, 9 }, + { 1, 3, 4, 6, 8, 9 }, { 1, 3, 6, 8, 9 }, { 2, 5, 7 }, + { 2, 5, 7 }, { 1, 3, 9 }, { 3, 8, 9 } }; + + EWAHCompressedBitmap32[] toBeOred = new EWAHCompressedBitmap32[data.length]; + Set bruteForceAnswer = new HashSet(); + for (int i = 0; i < toBeOred.length; ++i) { + toBeOred[i] = new EWAHCompressedBitmap32(); + for (int j : data[i]) { + toBeOred[i].set(j); + bruteForceAnswer.add(j); + } + toBeOred[i].setSizeInBits(1000,false); + } + + long rightcard = bruteForceAnswer.size(); + EWAHCompressedBitmap32 foo = new EWAHCompressedBitmap32(); + FastAggregation32.legacy_orWithContainer(foo, toBeOred); + Assert.assertEquals(rightcard, foo.cardinality()); + EWAHCompressedBitmap32 e1 = FastAggregation.or(toBeOred); + Assert.assertEquals(rightcard, e1.cardinality()); + EWAHCompressedBitmap32 e2 = FastAggregation32.bufferedor(65536, + toBeOred); + Assert.assertEquals(rightcard, e2.cardinality()); + } + + @Test + public void testSizeInBitsWithAnd() { + System.out.println("testing SizeInBitsWithAnd"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10); + b.setSizeInBits(10); + + EWAHCompressedBitmap32 and = a.and(b); + Assert.assertEquals(10, and.sizeInBits()); + EWAHCompressedBitmap32 and2 = EWAHCompressedBitmap32.and(a,b); + Assert.assertEquals(10, and2.sizeInBits()); + } + @Test + public void testSizeInBitsWithAndNot() { + System.out.println("testing SizeInBitsWithAndNot"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10); + b.setSizeInBits(10); + + EWAHCompressedBitmap32 and = a.andNot(b); + Assert.assertEquals(10, and.sizeInBits()); + } + + @Test + public void testSizeInBitsWithOr() { + System.out.println("testing SizeInBitsWithOr"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10); + b.setSizeInBits(10); + + EWAHCompressedBitmap32 or = a.or(b); + Assert.assertEquals(10, or.sizeInBits()); + EWAHCompressedBitmap32 or2 = EWAHCompressedBitmap32.or(a,b); + Assert.assertEquals(10, or2.sizeInBits()); + } + + + @Test + public void testSizeInBitsWithXor() { + System.out.println("testing SizeInBitsWithXor"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10); + b.setSizeInBits(10); + + EWAHCompressedBitmap32 xor = a.xor(b); + Assert.assertEquals(10, xor.sizeInBits()); + EWAHCompressedBitmap32 xor2 = EWAHCompressedBitmap32.xor(a,b); + Assert.assertEquals(10, xor2.sizeInBits()); + } + + + @Test + public void testDebugSetSizeInBitsTest() { + System.out.println("testing DebugSetSizeInBits"); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + b.set(4); + + b.setSizeInBits(6, true); + + List positions = b.getPositions(); + + Assert.assertEquals(2, positions.size()); + Assert.assertEquals(Integer.valueOf(4), positions.get(0)); + Assert.assertEquals(Integer.valueOf(5), positions.get(1)); + + Iterator iterator = b.iterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(Integer.valueOf(4), iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(Integer.valueOf(5), iterator.next()); + Assert.assertFalse(iterator.hasNext()); + + IntIterator intIterator = b.intIterator(); + Assert.assertTrue(intIterator.hasNext()); + Assert.assertEquals(4, intIterator.next()); + Assert.assertTrue(intIterator.hasNext()); + Assert.assertEquals(5, intIterator.next()); + Assert.assertFalse(intIterator.hasNext()); + + } + + /** + * Created: 2/4/11 6:03 PM By: Arnon Moscona. + */ + @Test + public void EwahIteratorProblem() { + System.out.println("testing ArnonMoscona"); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + for (int i = 9434560; i <= 9435159; i++) { + bitmap.set(i); + } + IntIterator iterator = bitmap.intIterator(); + List v = bitmap.getPositions(); + int[] array = bitmap.toArray(); + for (int k = 0; k < v.size(); ++k) { + Assert.assertTrue(array[k] == v.get(k).intValue()); + Assert.assertTrue(iterator.hasNext()); + final int ival = iterator.next(); + final int vval = v.get(k).intValue(); + Assert.assertTrue(ival == vval); + } + Assert.assertTrue(!iterator.hasNext()); + // + for (int k = 2; k <= 1024; k *= 2) { + int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, 434455 + 5 * k); + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + for (int i : bitsToSet) { + ewah.set(i); + } + equal(ewah.iterator(), bitsToSet); + } + } + + /** + * Test submitted by Gregory Ssi-Yan-Kai + */ + @Test + public void SsiYanKaiTest() { + System.out.println("testing SsiYanKaiTest"); + EWAHCompressedBitmap32 a = EWAHCompressedBitmap32.bitmapOf(39935, + 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, + 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, + 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, + 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, + 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, + 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, + 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, + 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, + 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, + 40017, 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, + 40026, 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, + 40035, 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, + 40044, 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, + 40053, 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, + 40062, 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, + 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, + 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, + 40089, 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, + 40098, 40099, 40100); + EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf(39935, + 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, + 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, + 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, + 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, + 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, + 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, + 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, + 39999, 270000); + LinkedHashSet aPositions = new LinkedHashSet( + a.getPositions()); + int intersection = 0; + EWAHCompressedBitmap32 inter = new EWAHCompressedBitmap32(); + LinkedHashSet bPositions = new LinkedHashSet( + b.getPositions()); + for (Integer integer : bPositions) { + if (aPositions.contains(integer)) { + inter.set(integer.intValue()); + ++intersection; + } + } + EWAHCompressedBitmap32 and2 = a.and(b); + if (!and2.equals(inter)) + throw new RuntimeException("intersections don't match"); + if (intersection != and2.cardinality()) + throw new RuntimeException("cardinalities don't match"); + } + + /** + * Test inspired by William Habermaas. + */ + @Test + public void habermaasTest() { + System.out.println("testing habermaasTest"); + BitSet bitsetaa = new BitSet(); + EWAHCompressedBitmap32 aa = new EWAHCompressedBitmap32(); + int[] val = { 55400, 1000000, 1000128 }; + for (int k = 0; k < val.length; ++k) { + aa.set(val[k]); + bitsetaa.set(val[k]); + } + equal(aa, bitsetaa); + BitSet bitsetab = new BitSet(); + EWAHCompressedBitmap32 ab = new EWAHCompressedBitmap32(); + for (int i = 4096; i < (4096 + 5); i++) { + ab.set(i); + bitsetab.set(i); + } + ab.set(99000); + bitsetab.set(99000); + ab.set(1000130); + bitsetab.set(1000130); + equal(ab, bitsetab); + EWAHCompressedBitmap32 bb = aa.or(ab); + EWAHCompressedBitmap32 bbAnd = aa.and(ab); + try { + EWAHCompressedBitmap32 abnot = ab.clone(); + abnot.not(); + EWAHCompressedBitmap32 bbAnd2 = aa.andNot(abnot); + assertEquals(bbAnd2, bbAnd); + } catch (CloneNotSupportedException e) { + e.printStackTrace(); + } + BitSet bitsetbb = (BitSet) bitsetaa.clone(); + bitsetbb.or(bitsetab); + BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); + bitsetbbAnd.and(bitsetab); + equal(bbAnd, bitsetbbAnd); + equal(bb, bitsetbb); + } + + @Test + public void testAndResultAppend() { + System.out.println("testing AndResultAppend"); + EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); + bitmap1.set(35); + EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.set(35); + bitmap2.set(130); + + EWAHCompressedBitmap32 resultBitmap = bitmap1.and(bitmap2); + resultBitmap.set(131); + + bitmap1.set(131); + assertEquals(bitmap1, resultBitmap); + } + + /** + * Test cardinality. + */ + @Test + public void testCardinality() { + System.out.println("testing EWAH cardinality"); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(Integer.MAX_VALUE - 32); + // System.out.format("Total Items %d\n", bitmap.cardinality()); + Assert.assertTrue(bitmap.cardinality() == 1); + } + + /** + * Test clear function + */ + @Test + public void testClear() { + System.out.println("testing Clear"); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(5); + bitmap.clear(); + bitmap.set(7); + Assert.assertTrue(1 == bitmap.cardinality()); + Assert.assertTrue(1 == bitmap.getPositions().size()); + Assert.assertTrue(1 == bitmap.toArray().length); + Assert.assertTrue(7 == bitmap.getPositions().get(0).intValue()); + Assert.assertTrue(7 == bitmap.toArray()[0]); + bitmap.clear(); + bitmap.set(5000); + Assert.assertTrue(1 == bitmap.cardinality()); + Assert.assertTrue(1 == bitmap.getPositions().size()); + Assert.assertTrue(1 == bitmap.toArray().length); + Assert.assertTrue(5000 == bitmap.getPositions().get(0).intValue()); + bitmap.set(5001); + bitmap.set(5005); + bitmap.set(5100); + bitmap.set(5500); + bitmap.clear(); + bitmap.set(5); + bitmap.set(7); + bitmap.set(1000); + bitmap.set(1001); + Assert.assertTrue(4 == bitmap.cardinality()); + List positions = bitmap.getPositions(); + Assert.assertTrue(4 == positions.size()); + Assert.assertTrue(5 == positions.get(0).intValue()); + Assert.assertTrue(7 == positions.get(1).intValue()); + Assert.assertTrue(1000 == positions.get(2).intValue()); + Assert.assertTrue(1001 == positions.get(3).intValue()); + } + + /** + * Test ewah compressed bitmap. + */ + @Test + public void testEWAHCompressedBitmap() { + System.out.println("testing EWAH"); + int zero = 0; + int specialval = 1 | (1 << 4) | (1 << 31); + int notzero = ~zero; + EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); + myarray1.add(zero); + myarray1.add(zero); + myarray1.add(zero); + myarray1.add(specialval); + myarray1.add(specialval); + myarray1.add(notzero); + myarray1.add(zero); + Assert.assertEquals(myarray1.getPositions().size(), 6 + 32); + EWAHCompressedBitmap32 myarray2 = new EWAHCompressedBitmap32(); + myarray2.add(zero); + myarray2.add(specialval); + myarray2.add(specialval); + myarray2.add(notzero); + myarray2.add(zero); + myarray2.add(zero); + myarray2.add(zero); + Assert.assertEquals(myarray2.getPositions().size(), 6 + 32); + List data1 = myarray1.getPositions(); + List data2 = myarray2.getPositions(); + Vector logicalor = new Vector(); + { + HashSet tmp = new HashSet(); + tmp.addAll(data1); + tmp.addAll(data2); + logicalor.addAll(tmp); + } + Collections.sort(logicalor); + Vector logicaland = new Vector(); + logicaland.addAll(data1); + logicaland.retainAll(data2); + Collections.sort(logicaland); + EWAHCompressedBitmap32 arrayand = myarray1.and(myarray2); + Assert.assertTrue(arrayand.getPositions().equals(logicaland)); + EWAHCompressedBitmap32 arrayor = myarray1.or(myarray2); + Assert.assertTrue(arrayor.getPositions().equals(logicalor)); + EWAHCompressedBitmap32 arrayandbis = myarray2.and(myarray1); + Assert.assertTrue(arrayandbis.getPositions().equals(logicaland)); + EWAHCompressedBitmap32 arrayorbis = myarray2.or(myarray1); + Assert.assertTrue(arrayorbis.getPositions().equals(logicalor)); + EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); + for (Integer i : myarray1.getPositions()) { + x.set(i.intValue()); + } + Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); + x = new EWAHCompressedBitmap32(); + for (Integer i : myarray2.getPositions()) { + x.set(i.intValue()); + } + Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); + x = new EWAHCompressedBitmap32(); + for (Iterator k = myarray1.iterator(); k.hasNext();) { + x.set(extracted(k).intValue()); + } + Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); + x = new EWAHCompressedBitmap32(); + for (Iterator k = myarray2.iterator(); k.hasNext();) { + x.set(extracted(k).intValue()); + } + Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); + } + + /** + * Test externalization. + * + * @throws IOException + * Signals that an I/O exception has occurred. + */ + @Test + public void testExternalization() throws IOException { + System.out.println("testing EWAH externalization"); + EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); + int[] val = { 5, 4400, 44600, 55400, 1000000 }; + for (int k = 0; k < val.length; ++k) { + ewcb.set(val[k]); + } + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oo = new ObjectOutputStream(bos); + ewcb.writeExternal(oo); + oo.close(); + ewcb = null; + ewcb = new EWAHCompressedBitmap32(); + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + ewcb.readExternal(new ObjectInputStream(bis)); + List result = ewcb.getPositions(); + Assert.assertTrue(val.length == result.size()); + for (int k = 0; k < val.length; ++k) { + Assert.assertTrue(result.get(k).intValue() == val[k]); + } + } + + @Test + public void testExtremeRange() { + System.out.println("testing EWAH at its extreme range"); + EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); + int N = 1024; + for (int i = 0; i < N; ++i) { + myarray1.set(Integer.MAX_VALUE - 32 - N + i); + Assert.assertTrue(myarray1.cardinality() == i + 1); + int[] val = myarray1.toArray(); + Assert.assertTrue(val[0] == Integer.MAX_VALUE - 32 - N); + } + } + + /** + * Test the intersects method + */ + @Test + public void testIntersectsMethod() { + System.out.println("testing Intersets Bug"); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(1); + EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.set(1); + bitmap2.set(11); + bitmap2.set(111); + bitmap2.set(1111111); + bitmap2.set(11111111); + Assert.assertTrue(bitmap.intersects(bitmap2)); + Assert.assertTrue(bitmap2.intersects(bitmap)); + + EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); + bitmap3.set(101); + EWAHCompressedBitmap32 bitmap4 = new EWAHCompressedBitmap32(); + for (int i = 0; i < 100; i++) { + bitmap4.set(i); + } + Assert.assertFalse(bitmap3.intersects(bitmap4)); + Assert.assertFalse(bitmap4.intersects(bitmap3)); + + EWAHCompressedBitmap32 bitmap5 = new EWAHCompressedBitmap32(); + bitmap5.set(0); + bitmap5.set(10); + bitmap5.set(20); + EWAHCompressedBitmap32 bitmap6 = new EWAHCompressedBitmap32(); + bitmap6.set(1); + bitmap6.set(11); + bitmap6.set(21); + bitmap6.set(1111111); + bitmap6.set(11111111); + Assert.assertFalse(bitmap5.intersects(bitmap6)); + Assert.assertFalse(bitmap6.intersects(bitmap5)); + + bitmap5.set(21); + Assert.assertTrue(bitmap5.intersects(bitmap6)); + Assert.assertTrue(bitmap6.intersects(bitmap5)); + + EWAHCompressedBitmap32 bitmap7 = new EWAHCompressedBitmap32(); + bitmap7.set(1); + bitmap7.set(10); + bitmap7.set(20); + bitmap7.set(1111111); + bitmap7.set(11111111); + EWAHCompressedBitmap32 bitmap8 = new EWAHCompressedBitmap32(); + for (int i = 0; i < 1000; i++) { + if (i != 1 && i != 10 && i != 20) { + bitmap8.set(i); + } + } + Assert.assertFalse(bitmap7.intersects(bitmap8)); + Assert.assertFalse(bitmap8.intersects(bitmap7)); + } + + /** + * as per renaud.delbru, Feb 12, 2009 this might throw an error out of bound + * exception. + */ + @Test + public void testLargeEWAHCompressedBitmap() { + System.out.println("testing EWAH over a large array"); + EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); + int N = 11000000; + for (int i = 0; i < N; ++i) { + myarray1.set(i); + } + Assert.assertTrue(myarray1.sizeInBits() == N); + } + + /** + * Test massive and. + */ + @Test + public void testMassiveAnd() { + System.out.println("testing massive logical and"); + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[1024]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap32 answer = ewah[0]; + for (int k = 1; k < ewah.length; ++k) + answer = answer.and(ewah[k]); + // result should be empty + if (answer.getPositions().size() != 0) + System.out.println(answer.toDebugString()); + Assert.assertTrue(answer.getPositions().size() == 0); + Assert.assertTrue(EWAHCompressedBitmap32.and(ewah).getPositions() + .size() == 0); + } + + /** + * Test massive and not. + */ + @Test + public void testMassiveAndNot() { + System.out.println("testing massive and not"); + final int N = 1024; + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap32 answer = ewah[0]; + EWAHCompressedBitmap32 answer2 = ewah[0]; + for (int k = 1; k < ewah.length; ++k) { + answer = answer.andNot(ewah[k]); + EWAHCompressedBitmap32 copy = null; + try { + copy = ewah[k].clone(); + copy.not(); + answer2.and(copy); + assertEqualsPositions(answer, answer2); + } catch (CloneNotSupportedException e) { + e.printStackTrace(); + } + } + } + + @Test + public void testsetSizeInBits() { + System.out.println("testing setSizeInBits"); + for (int k = 0; k < 4096; ++k) { + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + ewah.setSizeInBits(k,false); + Assert.assertEquals(ewah.sizeinbits, k); + Assert.assertEquals(ewah.cardinality(), 0); + EWAHCompressedBitmap32 ewah2 = new EWAHCompressedBitmap32(); + ewah2.setSizeInBits(k, false); + Assert.assertEquals(ewah2.sizeinbits, k); + Assert.assertEquals(ewah2.cardinality(), 0); + EWAHCompressedBitmap32 ewah3 = new EWAHCompressedBitmap32(); + for (int i = 0; i < k; ++i) { + ewah3.set(i); + } + Assert.assertEquals(ewah3.sizeinbits, k); + Assert.assertEquals(ewah3.cardinality(), k); + EWAHCompressedBitmap32 ewah4 = new EWAHCompressedBitmap32(); + ewah4.setSizeInBits(k, true); + Assert.assertEquals(ewah4.sizeinbits, k); + Assert.assertEquals(ewah4.cardinality(), k); + } + } + + /** + * Test massive or. + */ + @Test + public void testMassiveOr() { + System.out + .println("testing massive logical or (can take a couple of minutes)"); + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + BitSet[] bset = new BitSet[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < bset.length; ++k) + bset[k] = new BitSet(); + for (int k = 0; k < N; ++k) + assertEqualsPositions(bset[k], ewah[k]); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + bset[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 0; k < N; ++k) + assertEqualsPositions(bset[k], ewah[k]); + EWAHCompressedBitmap32 answer = ewah[0]; + BitSet bitsetanswer = bset[0]; + for (int k = 1; k < ewah.length; ++k) { + EWAHCompressedBitmap32 tmp = answer.or(ewah[k]); + bitsetanswer.or(bset[k]); + answer = tmp; + assertEqualsPositions(bitsetanswer, answer); + } + assertEqualsPositions(bitsetanswer, answer); + assertEqualsPositions(bitsetanswer, EWAHCompressedBitmap32.or(ewah)); + int k = 0; + for (int j : answer) { + if (k != j) + System.out.println(answer.toDebugString()); + Assert.assertEquals(k, j); + k += 1; + } + } + } + + /** + * Test massive xor. + */ + @Test + public void testMassiveXOR() { + System.out + .println("testing massive xor (can take a couple of minutes)"); + final int N = 16; + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + BitSet[] bset = new BitSet[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < bset.length; ++k) + bset[k] = new BitSet(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + bset[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap32 answer = ewah[0]; + BitSet bitsetanswer = bset[0]; + for (int k = 1; k < ewah.length; ++k) { + answer = answer.xor(ewah[k]); + bitsetanswer.xor(bset[k]); + assertEqualsPositions(bitsetanswer, answer); + } + int k = 0; + for (int j : answer) { + if (k != j) + System.out.println(answer.toDebugString()); + Assert.assertEquals(k, j); + k += 1; + } + } + + @Test + public void testMultiAnd() { + System.out.println("testing MultiAnd"); + // test bitmap3 has a literal word while bitmap1/2 have a run of 1 + EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); + bitmap1.addStreamOfEmptyWords(true, 1000); + EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.addStreamOfEmptyWords(true, 2000); + EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); + bitmap3.set(500); + bitmap3.set(502); + bitmap3.set(504); + + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // equal + bitmap1 = new EWAHCompressedBitmap32(); + bitmap1.set(35); + bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.set(35); + bitmap3 = new EWAHCompressedBitmap32(); + bitmap3.set(35); + + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // same number of words for each + bitmap3.set(63); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // one word bigger + bitmap3.set(64); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // two words bigger + bitmap3.set(130); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // test that result can still be appended to + EWAHCompressedBitmap32 resultBitmap = EWAHCompressedBitmap32.and( + bitmap1, bitmap2, bitmap3); + resultBitmap.set(131); + + bitmap1.set(131); + assertEquals(bitmap1, resultBitmap); + + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 1; k <= ewah.length; ++k) { + EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; + for (int i = 0; i < k; ++i) + shortewah[i] = ewah[i]; + assertAndEquals(shortewah); + } + } + } + + @Test + public void testMultiOr() { + System.out.println("testing MultiOr"); + // test bitmap3 has a literal word while bitmap1/2 have a run of 0 + EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); + bitmap1.set(1000); + EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.set(2000); + EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); + bitmap3.set(500); + bitmap3.set(502); + bitmap3.set(504); + + EWAHCompressedBitmap32 expected = bitmap1.or(bitmap2).or(bitmap3); + + assertEquals(expected, + EWAHCompressedBitmap32.or(bitmap1, bitmap2, bitmap3)); + + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 1; k <= ewah.length; ++k) { + EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; + for (int i = 0; i < k; ++i) + shortewah[i] = ewah[i]; + assertOrEquals(shortewah); + } + } + + } + + /** + * Test not. (Based on an idea by Ciaran Jessup) + */ + @Test + public void testNot() { + System.out.println("testing not"); + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + for (int i = 0; i <= 184; ++i) { + ewah.set(i); + } + Assert.assertEquals(ewah.cardinality(), 185); + ewah.not(); + Assert.assertEquals(ewah.cardinality(), 0); + } + + @Test + public void testOrCardinality() { + System.out.println("testing Or Cardinality"); + for (int N = 0; N < 1024; ++N) { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + for (int i = 0; i < N; i++) { + bitmap.set(i); + } + bitmap.set(1025); + bitmap.set(1026); + Assert.assertEquals(N + 2, bitmap.cardinality()); + EWAHCompressedBitmap32 orbitmap = bitmap.or(bitmap); + assertEquals(orbitmap, bitmap); + Assert.assertEquals(N + 2, orbitmap.cardinality()); + if (N + 2 != bitmap.orCardinality(new EWAHCompressedBitmap32())) { + System.out.println("N = " + N); + System.out.println(bitmap.toDebugString()); + System.out.println("cardinality = " + bitmap.cardinality()); + System.out.println("orCardinality = " + + bitmap.orCardinality(new EWAHCompressedBitmap32())); + } + + Assert.assertEquals(N + 2, + bitmap.orCardinality(new EWAHCompressedBitmap32())); + } + } + + + /** + * Test sets and gets. + */ + @Test + public void testSetGet() { + System.out.println("testing EWAH set/get"); + EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); + int[] val = { 5, 4400, 44600, 55400, 1000000 }; + for (int k = 0; k < val.length; ++k) { + ewcb.set(val[k]); + } + List result = ewcb.getPositions(); + Assert.assertTrue(val.length == result.size()); + for (int k = 0; k < val.length; ++k) { + Assert.assertEquals(result.get(k).intValue(), val[k]); + } + } + + @Test + public void testHashCode() { + System.out.println("testing hashCode"); + EWAHCompressedBitmap32 ewcb = EWAHCompressedBitmap32.bitmapOf(50, 70) + .and(EWAHCompressedBitmap32.bitmapOf(50, 1000)); + Assert.assertEquals(EWAHCompressedBitmap32.bitmapOf(50), ewcb); + Assert.assertEquals(EWAHCompressedBitmap32.bitmapOf(50).hashCode(), + ewcb.hashCode()); + } + + @Test + public void testSetSizeInBits() { + System.out.println("testing SetSizeInBits"); + testSetSizeInBits(130, 131); + testSetSizeInBits(63, 64); + testSetSizeInBits(64, 65); + testSetSizeInBits(64, 128); + testSetSizeInBits(35, 131); + testSetSizeInBits(130, 400); + testSetSizeInBits(130, 191); + testSetSizeInBits(130, 192); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(31); + bitmap.setSizeInBits(130, false); + bitmap.set(131); + BitSet jdkBitmap = new BitSet(); + jdkBitmap.set(31); + jdkBitmap.set(131); + assertEquals(jdkBitmap, bitmap); + } - + /** + * Test with parameters. + * + * @throws IOException + * Signals that an I/O exception has occurred. + */ + @Test + public void testWithParameters() throws IOException { + System.out + .println("These tests can run for several minutes. Please be patient."); + for (int k = 2; k < 1 << 24; k *= 8) + shouldSetBits(k); + PolizziTest(64); + PolizziTest(128); + PolizziTest(256); + PolizziTest(2048); + System.out.println("Your code is probably ok."); + } + + /** + * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, + * non-deterministic tests are bad, but the test is actually deterministic.) + */ + @Test + public void vanSchaikTest() { + System.out.println("testing vanSchaikTest (this takes some time)"); + final int totalNumBits = 32768; + final double odds = 0.9; + Random rand = new Random(323232323); + for (int t = 0; t < 100; t++) { + int numBitsSet = 0; + EWAHCompressedBitmap32 cBitMap = new EWAHCompressedBitmap32(); + for (int i = 0; i < totalNumBits; i++) { + if (rand.nextDouble() < odds) { + cBitMap.set(i); + numBitsSet++; + } + } + Assert.assertEquals(cBitMap.cardinality(), numBitsSet); + } + + } + + /** + * Function used in a test inspired by Federico Fissore. + * + * @param size + * the number of set bits + * @param seed + * the random seed + * @return the pseudo-random array int[] + */ + public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { + Random random = new Random(seed); + // build raw int array + int[] bits = new int[size]; + for (int i = 0; i < bits.length; i++) { + bits[i] = random.nextInt(TEST_BS_SIZE); + } + // might generate duplicates + Arrays.sort(bits); + // first count how many distinct values + int counter = 0; + int oldx = -1; + for (int x : bits) { + if (x != oldx) + ++counter; + oldx = x; + } + // then construct new array + int[] answer = new int[counter]; + counter = 0; + oldx = -1; + for (int x : bits) { + if (x != oldx) { + answer[counter] = x; + ++counter; + } + oldx = x; + } + return answer; + } + + /** + * Test inspired by Bilal Tayara + */ + @Test + public void TayaraTest() { + System.out.println("Tayara test"); + for (int offset = 64; offset < (1 << 30); offset *= 2) { + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + for (int k = 0; k < 64; ++k) { + a.set(offset + k); + b.set(offset + k); + } + if (!a.and(b).equals(a)) + throw new RuntimeException("bug"); + if (!a.or(b).equals(a)) + throw new RuntimeException("bug"); + } + } - @Test + @Test public void TestCloneEwahCompressedBitArray() throws CloneNotSupportedException { System.out.println("testing EWAH clone"); @@ -918,422 +1037,428 @@ EWAHCompressedBitmap32 b; - b = (EWAHCompressedBitmap32) a.clone(); + b = a.clone(); a.setSizeInBits(487123, false); b.setSizeInBits(487123, false); Assert.assertTrue(a.equals(b)); } - - /** - * a non-deterministic test proposed by Marc Polizzi. - * - * @param maxlength - * the maximum uncompressed size of the bitmap - */ - public static void PolizziTest(int maxlength) { - System.out.println("Polizzi test with max length = " + maxlength); - for (int k = 0; k < 10000; ++k) { - final Random rnd = new Random(); - final EWAHCompressedBitmap32 ewahBitmap1 = new EWAHCompressedBitmap32(); - final BitSet jdkBitmap1 = new BitSet(); - final EWAHCompressedBitmap32 ewahBitmap2 = new EWAHCompressedBitmap32(); - final BitSet jdkBitmap2 = new BitSet(); - final EWAHCompressedBitmap32 ewahBitmap3 = new EWAHCompressedBitmap32(); - final BitSet jdkBitmap3 = new BitSet(); - final int len = rnd.nextInt(maxlength); - for (int pos = 0; pos < len; pos++) { // random *** number of bits set *** - if (rnd.nextInt(7) == 0) { // random *** increasing *** values - ewahBitmap1.set(pos); - jdkBitmap1.set(pos); - } - if (rnd.nextInt(11) == 0) { // random *** increasing *** values - ewahBitmap2.set(pos); - jdkBitmap2.set(pos); - } - if (rnd.nextInt(7) == 0) { // random *** increasing *** values - ewahBitmap3.set(pos); - jdkBitmap3.set(pos); - } - } - assertEquals(jdkBitmap1, ewahBitmap1); - assertEquals(jdkBitmap2, ewahBitmap2); - assertEquals(jdkBitmap3, ewahBitmap3); - // XOR - { - final EWAHCompressedBitmap32 xorEwahBitmap = ewahBitmap1 - .xor(ewahBitmap2); - final BitSet xorJdkBitmap = (BitSet) jdkBitmap1.clone(); - xorJdkBitmap.xor(jdkBitmap2); - assertEquals(xorJdkBitmap, xorEwahBitmap); - } - // AND - { - final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap1 - .and(ewahBitmap2); - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - assertEquals(andJdkBitmap, andEwahBitmap); - } - // AND - { - final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap2 - .and(ewahBitmap1); - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - assertEquals(andJdkBitmap, andEwahBitmap); - assertEquals(andJdkBitmap, - EWAHCompressedBitmap32.and(ewahBitmap1, ewahBitmap2)); - } - // MULTI AND - { - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - andJdkBitmap.and(jdkBitmap3); - assertEquals(andJdkBitmap, - EWAHCompressedBitmap32.and(ewahBitmap1, ewahBitmap2, ewahBitmap3)); - assertEquals(andJdkBitmap, - EWAHCompressedBitmap32.and(ewahBitmap3, ewahBitmap2, ewahBitmap1)); - Assert.assertEquals(andJdkBitmap.cardinality(), EWAHCompressedBitmap32 - .andCardinality(ewahBitmap1, ewahBitmap2, ewahBitmap3)); - } - // AND NOT - { - final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap1 - .andNot(ewahBitmap2); - final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1.clone(); - andNotJdkBitmap.andNot(jdkBitmap2); - assertEquals(andNotJdkBitmap, andNotEwahBitmap); - } - // AND NOT - { - final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap2 - .andNot(ewahBitmap1); - final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2.clone(); - andNotJdkBitmap.andNot(jdkBitmap1); - assertEquals(andNotJdkBitmap, andNotEwahBitmap); - } - // OR - { - final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap1.or(ewahBitmap2); - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - assertEquals(orJdkBitmap, orEwahBitmap); - assertEquals(orJdkBitmap, - EWAHCompressedBitmap32.or(ewahBitmap1, ewahBitmap2)); - Assert.assertEquals(orEwahBitmap.cardinality(), - ewahBitmap1.orCardinality(ewahBitmap2)); - } - // OR - { - final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap2.or(ewahBitmap1); - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - assertEquals(orJdkBitmap, orEwahBitmap); - } - // MULTI OR - { - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - orJdkBitmap.or(jdkBitmap3); - assertEquals(orJdkBitmap, - EWAHCompressedBitmap32.or(ewahBitmap1, ewahBitmap2, ewahBitmap3)); - assertEquals(orJdkBitmap, - EWAHCompressedBitmap32.or(ewahBitmap3, ewahBitmap2, ewahBitmap1)); - Assert.assertEquals(orJdkBitmap.cardinality(), EWAHCompressedBitmap32 - .orCardinality(ewahBitmap1, ewahBitmap2, ewahBitmap3)); - } - } - } - - /** - * Pseudo-non-deterministic test inspired by Federico Fissore. - * - * @param length - * the number of set bits in a bitmap - */ - public static void shouldSetBits(int length) { - System.out.println("testing shouldSetBits " + length); - int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, 434222); - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - System.out.println(" ... setting " + bitsToSet.length + " values"); - for (int i : bitsToSet) { - ewah.set(i); - } - System.out.println(" ... verifying " + bitsToSet.length + " values"); - equal(ewah.iterator(), bitsToSet); - System.out.println(" ... checking cardinality"); - Assert.assertEquals(bitsToSet.length, ewah.cardinality()); - } - - - @Test - public void testSizeInBits1() { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.setSizeInBits(1, false); - bitmap.not(); - Assert.assertEquals(1, bitmap.cardinality()); - } - - @Test - public void testHasNextSafe() { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(0); - IntIterator it = bitmap.intIterator(); - Assert.assertTrue(it.hasNext()); - Assert.assertEquals(0, it.next()); - } - - - @Test - public void testHasNextSafe2() { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(0); - IntIterator it = bitmap.intIterator(); - Assert.assertEquals(0, it.next()); - } - - @Test - public void testInfiniteLoop() { - System.out.println("Testing for an infinite loop"); + + /** + * a non-deterministic test proposed by Marc Polizzi. + * + * @param maxlength + * the maximum uncompressed size of the bitmap + */ + public static void PolizziTest(int maxlength) { + System.out.println("Polizzi test with max length = " + maxlength); + for (int k = 0; k < 10000; ++k) { + final Random rnd = new Random(); + final EWAHCompressedBitmap32 ewahBitmap1 = new EWAHCompressedBitmap32(); + final BitSet jdkBitmap1 = new BitSet(); + final EWAHCompressedBitmap32 ewahBitmap2 = new EWAHCompressedBitmap32(); + final BitSet jdkBitmap2 = new BitSet(); + final EWAHCompressedBitmap32 ewahBitmap3 = new EWAHCompressedBitmap32(); + final BitSet jdkBitmap3 = new BitSet(); + final int len = rnd.nextInt(maxlength); + for (int pos = 0; pos < len; pos++) { // random *** number of bits + // set *** + if (rnd.nextInt(7) == 0) { // random *** increasing *** values + ewahBitmap1.set(pos); + jdkBitmap1.set(pos); + } + if (rnd.nextInt(11) == 0) { // random *** increasing *** values + ewahBitmap2.set(pos); + jdkBitmap2.set(pos); + } + if (rnd.nextInt(7) == 0) { // random *** increasing *** values + ewahBitmap3.set(pos); + jdkBitmap3.set(pos); + } + } + assertEquals(jdkBitmap1, ewahBitmap1); + assertEquals(jdkBitmap2, ewahBitmap2); + assertEquals(jdkBitmap3, ewahBitmap3); + // XOR + { + final EWAHCompressedBitmap32 xorEwahBitmap = ewahBitmap1 + .xor(ewahBitmap2); + final BitSet xorJdkBitmap = (BitSet) jdkBitmap1.clone(); + xorJdkBitmap.xor(jdkBitmap2); + assertEquals(xorJdkBitmap, xorEwahBitmap); + } + // AND + { + final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap1 + .and(ewahBitmap2); + final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); + andJdkBitmap.and(jdkBitmap2); + assertEquals(andJdkBitmap, andEwahBitmap); + } + // AND + { + final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap2 + .and(ewahBitmap1); + final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); + andJdkBitmap.and(jdkBitmap2); + assertEquals(andJdkBitmap, andEwahBitmap); + assertEquals(andJdkBitmap, + EWAHCompressedBitmap32.and(ewahBitmap1, ewahBitmap2)); + } + // MULTI AND + { + final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); + andJdkBitmap.and(jdkBitmap2); + andJdkBitmap.and(jdkBitmap3); + assertEquals(andJdkBitmap, EWAHCompressedBitmap32.and( + ewahBitmap1, ewahBitmap2, ewahBitmap3)); + assertEquals(andJdkBitmap, EWAHCompressedBitmap32.and( + ewahBitmap3, ewahBitmap2, ewahBitmap1)); + Assert.assertEquals(andJdkBitmap.cardinality(), + EWAHCompressedBitmap32.andCardinality(ewahBitmap1, + ewahBitmap2, ewahBitmap3)); + } + // AND NOT + { + final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap1 + .andNot(ewahBitmap2); + final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1.clone(); + andNotJdkBitmap.andNot(jdkBitmap2); + assertEquals(andNotJdkBitmap, andNotEwahBitmap); + } + // AND NOT + { + final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap2 + .andNot(ewahBitmap1); + final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2.clone(); + andNotJdkBitmap.andNot(jdkBitmap1); + assertEquals(andNotJdkBitmap, andNotEwahBitmap); + } + // OR + { + final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap1 + .or(ewahBitmap2); + final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); + orJdkBitmap.or(jdkBitmap2); + assertEquals(orJdkBitmap, orEwahBitmap); + assertEquals(orJdkBitmap, + EWAHCompressedBitmap32.or(ewahBitmap1, ewahBitmap2)); + Assert.assertEquals(orEwahBitmap.cardinality(), + ewahBitmap1.orCardinality(ewahBitmap2)); + } + // OR + { + final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap2 + .or(ewahBitmap1); + final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); + orJdkBitmap.or(jdkBitmap2); + assertEquals(orJdkBitmap, orEwahBitmap); + } + // MULTI OR + { + final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); + orJdkBitmap.or(jdkBitmap2); + orJdkBitmap.or(jdkBitmap3); + assertEquals(orJdkBitmap, EWAHCompressedBitmap32.or( + ewahBitmap1, ewahBitmap2, ewahBitmap3)); + assertEquals(orJdkBitmap, EWAHCompressedBitmap32.or( + ewahBitmap3, ewahBitmap2, ewahBitmap1)); + Assert.assertEquals(orJdkBitmap.cardinality(), + EWAHCompressedBitmap32.orCardinality(ewahBitmap1, + ewahBitmap2, ewahBitmap3)); + } + } + } + + /** + * Pseudo-non-deterministic test inspired by Federico Fissore. + * + * @param length + * the number of set bits in a bitmap + */ + public static void shouldSetBits(int length) { + System.out.println("testing shouldSetBits " + length); + int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, 434222); + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + System.out.println(" ... setting " + bitsToSet.length + " values"); + for (int i : bitsToSet) { + ewah.set(i); + } + System.out.println(" ... verifying " + bitsToSet.length + " values"); + equal(ewah.iterator(), bitsToSet); + System.out.println(" ... checking cardinality"); + Assert.assertEquals(bitsToSet.length, ewah.cardinality()); + } + + @Test + public void testSizeInBits1() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.setSizeInBits(1, false); + bitmap.not(); + Assert.assertEquals(1, bitmap.cardinality()); + } + + @Test + public void testHasNextSafe() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(0); + IntIterator it = bitmap.intIterator(); + Assert.assertTrue(it.hasNext()); + Assert.assertEquals(0, it.next()); + } + + @Test + public void testHasNextSafe2() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(0); + IntIterator it = bitmap.intIterator(); + Assert.assertEquals(0, it.next()); + } + + @Test + public void testInfiniteLoop() { + System.out.println("Testing for an infinite loop"); EWAHCompressedBitmap32 b1 = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b2 = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b3 = new EWAHCompressedBitmap32(); - b3.setSizeInBits(5); + b3.setSizeInBits(5,false); b1.set(2); b2.set(4); EWAHCompressedBitmap32.and(b1, b2, b3); EWAHCompressedBitmap32.or(b1, b2, b3); - } - - @Test - public void testSizeInBits2() { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.setSizeInBits(1, true); - bitmap.not(); - Assert.assertEquals(0, bitmap.cardinality()); - } - - private static void assertAndEquals(EWAHCompressedBitmap32... bitmaps) { - EWAHCompressedBitmap32 expected = bitmaps[0]; - for (int i = 1; i < bitmaps.length; i++) { - expected = expected.and(bitmaps[i]); - } - assertEquals(expected, EWAHCompressedBitmap32.and(bitmaps)); - } - - private static void assertEquals(EWAHCompressedBitmap32 expected, - EWAHCompressedBitmap32 actual) { - Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); - assertEqualsPositions(expected, actual); - } - - private static void assertOrEquals(EWAHCompressedBitmap32... bitmaps) { - EWAHCompressedBitmap32 expected = bitmaps[0]; - for (int i = 1; i < bitmaps.length; i++) { - expected = expected.or(bitmaps[i]); - } - assertEquals(expected, EWAHCompressedBitmap32.or(bitmaps)); - } - - /** - * Extracted. - * - * @param bits - * the bits - * @return the integer - */ - private static Integer extracted(final Iterator bits) { - return bits.next(); - } - - private static void testSetSizeInBits(int size, int nextBit) { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.setSizeInBits(size, false); - bitmap.set(nextBit); - BitSet jdkBitmap = new BitSet(); - jdkBitmap.set(nextBit); - assertEquals(jdkBitmap, bitmap); - } - - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi - * - * @param jdkBitmap - * the uncompressed bitmap - * @param ewahBitmap - * the compressed bitmap - */ - static void assertCardinality(BitSet jdkBitmap, - EWAHCompressedBitmap32 ewahBitmap) { - final int c1 = jdkBitmap.cardinality(); - final int c2 = ewahBitmap.cardinality(); - Assert.assertEquals(c1, c2); - } - - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi. - * - * @param jdkBitmap - * the uncompressed bitmap - * @param ewahBitmap - * the compressed bitmap - */ - static void assertEquals(BitSet jdkBitmap, EWAHCompressedBitmap32 ewahBitmap) { - assertEqualsIterator(jdkBitmap, ewahBitmap); - assertEqualsPositions(jdkBitmap, ewahBitmap); - assertCardinality(jdkBitmap, ewahBitmap); - } - - static void assertEquals(int[] v, List p) { - assertEquals(p, v); - } - - static void assertEquals(List p, int[] v) { - if (v.length != p.size()) - throw new RuntimeException("Different lengths " + v.length + " " - + p.size()); - for (int k = 0; k < v.length; ++k) - if (v[k] != p.get(k).intValue()) - throw new RuntimeException("expected equal at " + k + " " + v[k] + " " - + p.get(k)); - } - - // - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi - * - * @param jdkBitmap - * the jdk bitmap - * @param ewahBitmap - * the ewah bitmap - */ - static void assertEqualsIterator(BitSet jdkBitmap, - EWAHCompressedBitmap32 ewahBitmap) { - final Vector positions = new Vector(); - final Iterator bits = ewahBitmap.iterator(); - while (bits.hasNext()) { - final int bit = extracted(bits).intValue(); - Assert.assertTrue(jdkBitmap.get(bit)); - positions.add(new Integer(bit)); - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - if (!positions.contains(new Integer(pos))) { - throw new RuntimeException("iterator: bitset got different bits"); - } - } - } - - // part of a test contributed by Marc Polizzi - /** - * Assert equals positions. - * - * @param jdkBitmap - * the jdk bitmap - * @param ewahBitmap - * the ewah bitmap - */ - static void assertEqualsPositions(BitSet jdkBitmap, - EWAHCompressedBitmap32 ewahBitmap) { - final List positions = ewahBitmap.getPositions(); - for (int position : positions) { - if (!jdkBitmap.get(position)) { - throw new RuntimeException("positions: bitset got different bits"); - } - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - if (!positions.contains(new Integer(pos))) { - throw new RuntimeException("positions: bitset got different bits"); - } - } - // we check again - final int[] fastpositions = ewahBitmap.toArray(); - for (int position : fastpositions) { - if (!jdkBitmap.get(position)) { - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - } - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - int index = Arrays.binarySearch(fastpositions, pos); - if (index < 0) - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - if (fastpositions[index] != pos) - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - } - } - - /** - * Assert equals positions. - * - * @param ewahBitmap1 - * the ewah bitmap1 - * @param ewahBitmap2 - * the ewah bitmap2 - */ - static void assertEqualsPositions(EWAHCompressedBitmap32 ewahBitmap1, - EWAHCompressedBitmap32 ewahBitmap2) { - final List positions1 = ewahBitmap1.getPositions(); - final List positions2 = ewahBitmap2.getPositions(); - if (!positions1.equals(positions2)) - throw new RuntimeException( - "positions: alternative got different bits (two bitmaps)"); - // - final int[] fastpositions1 = ewahBitmap1.toArray(); - assertEquals(fastpositions1, positions1); - final int[] fastpositions2 = ewahBitmap2.toArray(); - assertEquals(fastpositions2, positions2); - if (!Arrays.equals(fastpositions1, fastpositions2)) - throw new RuntimeException( - "positions: alternative got different bits with toArray but not with getPositions (two bitmaps)"); - } - - /** - * Convenience function to assess equality between a compressed bitset and an - * uncompressed bitset - * - * @param x - * the compressed bitset/bitmap - * @param y - * the uncompressed bitset/bitmap - */ - static void equal(EWAHCompressedBitmap32 x, BitSet y) { - Assert.assertEquals(x.cardinality(), y.cardinality()); - for (int i : x.getPositions()) - Assert.assertTrue(y.get(i)); - } - - /** - * Convenience function to assess equality between an array and an iterator - * over Integers - * - * @param i - * the iterator - * @param array - * the array - */ - static void equal(Iterator i, int[] array) { - int cursor = 0; - while (i.hasNext()) { - int x = extracted(i).intValue(); - int y = array[cursor++]; - Assert.assertEquals(x, y); - } - } + } + + @Test + public void testSizeInBits2() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.setSizeInBits(1, true); + bitmap.not(); + Assert.assertEquals(0, bitmap.cardinality()); + } + + private static void assertAndEquals(EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 expected = bitmaps[0]; + for (int i = 1; i < bitmaps.length; i++) { + expected = expected.and(bitmaps[i]); + } + Assert.assertTrue(expected.equals(EWAHCompressedBitmap32.and(bitmaps))); + } + + private static void assertEquals(EWAHCompressedBitmap32 expected, + EWAHCompressedBitmap32 actual) { + Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); + assertEqualsPositions(expected, actual); + } + + private static void assertOrEquals(EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 expected = bitmaps[0]; + for (int i = 1; i < bitmaps.length; i++) { + expected = expected.or(bitmaps[i]); + } + assertEquals(expected, EWAHCompressedBitmap32.or(bitmaps)); + } + + /** + * Extracted. + * + * @param bits + * the bits + * @return the integer + */ + private static Integer extracted(final Iterator bits) { + return bits.next(); + } + + private static void testSetSizeInBits(int size, int nextBit) { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.setSizeInBits(size, false); + bitmap.set(nextBit); + BitSet jdkBitmap = new BitSet(); + jdkBitmap.set(nextBit); + assertEquals(jdkBitmap, bitmap); + } + + /** + * Assess equality between an uncompressed bitmap and a compressed one, part + * of a test contributed by Marc Polizzi + * + * @param jdkBitmap + * the uncompressed bitmap + * @param ewahBitmap + * the compressed bitmap + */ + static void assertCardinality(BitSet jdkBitmap, + EWAHCompressedBitmap32 ewahBitmap) { + final int c1 = jdkBitmap.cardinality(); + final int c2 = ewahBitmap.cardinality(); + Assert.assertEquals(c1, c2); + } + + /** + * Assess equality between an uncompressed bitmap and a compressed one, part + * of a test contributed by Marc Polizzi. + * + * @param jdkBitmap + * the uncompressed bitmap + * @param ewahBitmap + * the compressed bitmap + */ + static void assertEquals(BitSet jdkBitmap, EWAHCompressedBitmap32 ewahBitmap) { + assertEqualsIterator(jdkBitmap, ewahBitmap); + assertEqualsPositions(jdkBitmap, ewahBitmap); + assertCardinality(jdkBitmap, ewahBitmap); + } + + static void assertEquals(int[] v, List p) { + assertEquals(p, v); + } + + static void assertEquals(List p, int[] v) { + if (v.length != p.size()) + throw new RuntimeException("Different lengths " + v.length + " " + + p.size()); + for (int k = 0; k < v.length; ++k) + if (v[k] != p.get(k).intValue()) + throw new RuntimeException("expected equal at " + k + " " + + v[k] + " " + p.get(k)); + } + + // + /** + * Assess equality between an uncompressed bitmap and a compressed one, part + * of a test contributed by Marc Polizzi + * + * @param jdkBitmap + * the jdk bitmap + * @param ewahBitmap + * the ewah bitmap + */ + static void assertEqualsIterator(BitSet jdkBitmap, + EWAHCompressedBitmap32 ewahBitmap) { + final Vector positions = new Vector(); + final Iterator bits = ewahBitmap.iterator(); + while (bits.hasNext()) { + final int bit = extracted(bits).intValue(); + Assert.assertTrue(jdkBitmap.get(bit)); + positions.add(new Integer(bit)); + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + if (!positions.contains(new Integer(pos))) { + throw new RuntimeException( + "iterator: bitset got different bits"); + } + } + } + + // part of a test contributed by Marc Polizzi + /** + * Assert equals positions. + * + * @param jdkBitmap + * the jdk bitmap + * @param ewahBitmap + * the ewah bitmap + */ + static void assertEqualsPositions(BitSet jdkBitmap, + EWAHCompressedBitmap32 ewahBitmap) { + final List positions = ewahBitmap.getPositions(); + for (int position : positions) { + if (!jdkBitmap.get(position)) { + throw new RuntimeException( + "positions: bitset got different bits"); + } + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + if (!positions.contains(new Integer(pos))) { + throw new RuntimeException( + "positions: bitset got different bits"); + } + } + // we check again + final int[] fastpositions = ewahBitmap.toArray(); + for (int position : fastpositions) { + if (!jdkBitmap.get(position)) { + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + } + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + int index = Arrays.binarySearch(fastpositions, pos); + if (index < 0) + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + if (fastpositions[index] != pos) + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + } + } + + /** + * Assert equals positions. + * + * @param ewahBitmap1 + * the ewah bitmap1 + * @param ewahBitmap2 + * the ewah bitmap2 + */ + static void assertEqualsPositions(EWAHCompressedBitmap32 ewahBitmap1, + EWAHCompressedBitmap32 ewahBitmap2) { + final List positions1 = ewahBitmap1.getPositions(); + final List positions2 = ewahBitmap2.getPositions(); + if (!positions1.equals(positions2)) + throw new RuntimeException( + "positions: alternative got different bits (two bitmaps)"); + // + final int[] fastpositions1 = ewahBitmap1.toArray(); + assertEquals(fastpositions1, positions1); + final int[] fastpositions2 = ewahBitmap2.toArray(); + assertEquals(fastpositions2, positions2); + if (!Arrays.equals(fastpositions1, fastpositions2)) + throw new RuntimeException( + "positions: alternative got different bits with toArray but not with getPositions (two bitmaps)"); + } + + /** + * Convenience function to assess equality between a compressed bitset and + * an uncompressed bitset + * + * @param x + * the compressed bitset/bitmap + * @param y + * the uncompressed bitset/bitmap + */ + static void equal(EWAHCompressedBitmap32 x, BitSet y) { + Assert.assertEquals(x.cardinality(), y.cardinality()); + for (int i : x.getPositions()) + Assert.assertTrue(y.get(i)); + } + + /** + * Convenience function to assess equality between an array and an iterator + * over Integers + * + * @param i + * the iterator + * @param array + * the array + */ + static void equal(Iterator i, int[] array) { + int cursor = 0; + while (i.hasNext()) { + int x = extracted(i).intValue(); + int y = array[cursor++]; + Assert.assertEquals(x, y); + } + } - /** The Constant MEGA: a large integer. */ - private static final int MEGA = 8 * 1024 * 1024; + /** The Constant MEGA: a large integer. */ + private static final int MEGA = 8 * 1024 * 1024; - /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ - private static final int TEST_BS_SIZE = 8 * MEGA; + /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ + private static final int TEST_BS_SIZE = 8 * MEGA; } diff -Nru libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLWTest32.java libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLWTest32.java --- libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLWTest32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLWTest32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,119 @@ +package com.googlecode.javaewah32; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Tests for utility class. Sketchy for now. + * + */ +@SuppressWarnings("javadoc") +public class IntIteratorOverIteratingRLWTest32 { + + @Test + // had problems with bitmaps beginning with two consecutive clean runs + public void testConsecClean() { + System.out + .println("testing int iteration, 2 consec clean runs starting with zeros"); + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + for (int i = 64; i < 128; ++i) + e.set(i); + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( + e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(64, ctr); + } + + @Test + public void testConsecCleanStartOnes() { + System.out + .println("testing int iteration, 2 consec clean runs starting with ones"); + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + for (int i = 0; i < 2 * 64; ++i) + e.set(i); + for (int i = 4 * 64; i < 5 * 64; ++i) + e.set(i); + + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( + e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(3 * 64, ctr); + } + + @Test + public void testStartDirty() { + System.out.println("testing int iteration, no initial runs"); + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + for (int i = 1; i < 2 * 64; ++i) + e.set(i); + for (int i = 4 * 64; i < 5 * 64; ++i) + e.set(i); + + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( + e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(3 * 64 - 1, ctr); + } + + @Test + public void testEmpty() { + System.out.println("testing int iteration over empty bitmap"); + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( + e.getIteratingRLW()); + assertFalse(ii.hasNext()); + } + + @Test + public void testRandomish() { + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + + int upperlimit = 100000; + for (int i = 0; i < upperlimit; ++i) { + double probabilityOfOne = i / (double) (upperlimit / 2); + if (probabilityOfOne > 1.0) + probabilityOfOne = 1.0; + if (Math.random() < probabilityOfOne) { + e.set(i); + } + } + + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( + e.getIteratingRLW()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + + assertEquals(e.cardinality(), ctr); + System.out + .println("checking int iteration over a var density bitset of size " + + e.cardinality()); + + } + +} diff -Nru libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java --- libjavaewah-java-0.6.12/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,145 @@ +package com.googlecode.javaewah32; + +import static org.junit.Assert.*; +import java.util.Iterator; +import org.junit.Test; +import com.googlecode.javaewah.benchmark.ClusteredDataGenerator; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Tests specifically for iterators. + * + */ +public class IteratorAggregationTest32 { + + /** + * @param N number of bitmaps to generate in each set + * @param nbr parameter determining the size of the arrays (in a log scale) + * @return an iterator over sets of bitmaps + */ + public static Iterator getCollections(final int N, final int nbr) { + final ClusteredDataGenerator cdg = new ClusteredDataGenerator(123); + return new Iterator() { + int sparsity = 1; + + @Override + public boolean hasNext() { + return this.sparsity < 5; + } + + @Override + public EWAHCompressedBitmap32[] next() { + int[][] data = new int[N][]; + int Max = (1 << (nbr + this.sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + this.sparsity += 3; + return ewah; + } + + @Override + public void remove() { + // unimplemented + } + + }; + + } + + /** + * + */ + @Test + public void testAnd() { + for (int N = 1; N < 10; ++N) { + System.out.println("testAnd N = " + N); + Iterator i = getCollections(N,3); + while (i.hasNext()) { + EWAHCompressedBitmap32[] x = i.next(); + EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.and(x); + EWAHCompressedBitmap32 x1 = IteratorUtil32 + .materialize(IteratorAggregation32.bufferedand(IteratorUtil32 + .toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + + } + + /** + * + */ + @Test + public void testOr() { + for (int N = 1; N < 10; ++N) { + System.out.println("testOr N = " + N); + Iterator i = getCollections(N,3); + while (i.hasNext()) { + EWAHCompressedBitmap32[] x = i.next(); + EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.or(x); + EWAHCompressedBitmap32 x1 = IteratorUtil32 + .materialize(IteratorAggregation32.bufferedor(IteratorUtil32 + .toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + } + + + /** + * + */ + @SuppressWarnings("deprecation") + @Test + public void testWideOr() { + for (int nbr = 3; nbr <= 24; nbr += 3) { + for (int N = 100; N < 1000; N += 100) { + System.out.println("testWideOr N = " + N); + Iterator i = getCollections(N, 3); + while (i.hasNext()) { + EWAHCompressedBitmap32[] x = i.next(); + EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.or(x); + EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + FastAggregation32.legacy_orWithContainer(container, x); + assertTrue(container.equals(tanswer)); + EWAHCompressedBitmap32 x1 = IteratorUtil32 + .materialize(IteratorAggregation32 + .bufferedor(IteratorUtil32.toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + } + } + /** + * + */ + @Test + public void testXor() { + System.out.println("testXor "); + Iterator i = getCollections(2,3); + while (i.hasNext()) { + EWAHCompressedBitmap32[] x = i.next(); + EWAHCompressedBitmap32 tanswer = x[0].xor(x[1]); + EWAHCompressedBitmap32 x1 = IteratorUtil32 + .materialize(IteratorAggregation32.bufferedxor( + x[0].getIteratingRLW(), x[1].getIteratingRLW())); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + +} diff -Nru libjavaewah-java-0.6.12/.travis.yml libjavaewah-java-0.7.9/.travis.yml --- libjavaewah-java-0.6.12/.travis.yml 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-0.7.9/.travis.yml 2013-11-12 14:31:20.000000000 +0000 @@ -0,0 +1,10 @@ +language: java + +jdk: + - oraclejdk7 + - openjdk7 + - openjdk6 + +install: true + +script: mvn test