diff -Nru libjavaewah-java-0.7.9/CHANGELOG libjavaewah-java-1.1.7/CHANGELOG --- libjavaewah-java-0.7.9/CHANGELOG 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/CHANGELOG 2019-11-08 21:55:59.000000000 +0000 @@ -1,3 +1,120 @@ +version 1.1.6 (April 26th 2016) + - Mostly just better testing + +version 1.1.5 (January 8th 2016) + - faster shift function (gssiyankai) + - ChunkIterator not iterating correctly #61 + +version 1.1.4 (December 17th 2015) + - Fixed issue 60: bitmap shift then or + +version 1.1.3 (December 3rd 2015) + - Fixed issue 59: ChunkIterator not iterating correctly + +version 1.1.2 (November 11th 2015) + - Fixed issue 58: ChunkIterator not iterating correctly? + +version 1.1.1 (November 7th 2015) + - Fixed issue 57: "The function insertLiteralWord in EWAHCompressedBitmap32 should have its last two lines in an else block. Otherwise, we get an IndexOutOfBoundsException when the number of literal words are more than RunningLengthWord32.LARGEST_LITERAL_COUNT" + +version 1.1.0 (October 28th 2015) + - Added "shift" function to shift a whole bitmap by b bits + +version 1.0.8 (October 14th 2015) + - Improving the performance of intersections and differences in some cases. + +version 1.0.7 (August 15th 2015) + - Fixing backward compatibility issue with versions prior to JDK 8. + +version 1.0.6 (August 15th 2015) + - Fixed bug in reverse iterators. + +version 1.0.5 (August 14th 2015) + - Fixed bug in reverse iterators. + +version 1.0.4 (August 12th 2015) + - Using priority queues for OR and XOR aggregation. + +version 1.0.3 (August 12th 2015) + - Simplified the multi-bitmap OR code, this should ensure more predictable performance + - Added functions to aggregate bitmaps provided by iterators. + +version 1.0.2 (April 10th 2015) + - More complete BitSet API. + - Fixed issue #54 clear(int i) would clear ALL map + +version 1.0.1 (March 23rd 2015) + - Fixed: Some bug about EWAHCompressedBitmap.reverseIntIterator() (#53) + +version 1.0.0 (January 6th 2015) + - Both EWAHCompressedBitmap and BitSet can now support memory-file mapping. For best performance, we recommend the very latest OpenJDK. + +version 0.9.2 (December 4th 2014) + - Fixed issue #47 + +version 0.9.1 (December 2nd 2014) + - Fixed issue #28: setting a capacity of 0 breaks the bitmap + +version 0.9.0 (September 8th 2014) + - Fixed bug in setSizeInBits + - We can now set the bits in any order + +version 0.8.12 (August 25th 2014) + - Faster "isEmpty" method + - Introducing reverse iterators + +version 0.8.11 (August 15th 2014) + - Refactoring: unify addStreamOfEmptyWords and fastaddStreamOfEmptyWord + - Optimize setSizeInBits + +version 0.8.10 (August 13th 2014) + - getFirstSetBit + - Capitalized some constants + - Implement composition using chunk iterators + +version 0.8.9 (August 11th 2014) + - Fixed bug in clearIntIterator with bitmap of zeros + +version 0.8.8 (August 11th 2014) + - added compose functions + +version 0.8.7 (July 18th 2014) + - added isEmpty methods + - methods setSizeInBits were renamed setSizeInBitsWithinLastWord and made safer + - we document better the behavior of iterator to solve this issue: https://github.com/lemire/javaewah/issues/35 + +version 0.8.6 (April 29th 2014) + - fixed bug in clone methods (Jean-Marc Astesana, issue 30) + - fixed bug in not methods (Jean-Marc Astesana, issue 31) + +version 0.8.5 (March 24th 2014) + - methods of type "ToContainer" will now clear the container before starting, for convenience + +version 0.8.4 (March 17th 2014) + - we can now iterate over "clear" bits + +version 0.8.3 (February 20th 2014) + - improved BitSet class + - renamed add to addWord (deprecation) + - renamed getPositions to toList (deprecation) + - clone no longer reports throwing an exception + +version 0.8.2 (February 1st 2014) + - removed the benchmark code + - cleaned up the documentation + - code reformatting + - optimized the extraction of the set bits (Shen Liang) + - added threshold function for 32-bit EWAH + +version 0.8.1 (January 7th 2014) + - Optimized threshold function + - created a nicer function call for the threshold function + - fixed documentation in README + +version 0.8.0 (January 2nd 2014) + - Introducing package symmetric for symmetric Boolean functions, implemented + a fast threshold function. + version 0.7.9 (November 12th 2013) - Spelled out the license in the source code to avoid any confusion @@ -64,7 +181,7 @@ version 0.6.6 (December 12, 2012) - - Fixed an off-by-one bug in setSizeInBits(final int size, final boolean defaultvalue). + - Fixed an off-by-one bug in setSizeInBits(final int size, final boolean defaultValue). - Added corresponding unit test. version 0.6.5 (November 26, 2012) @@ -137,7 +254,7 @@ version 0.4.3 (April 9, 2012) - fast aggregation through logical AND of many bitmaps using a new method -- fixed a rarely occuring bug in the set method due to faulty bitmap size extension +- fixed a rarely occurring bug in the set method due to faulty bitmap size extension version 0.4.2 (April 5, 2012) diff -Nru libjavaewah-java-0.7.9/debian/changelog libjavaewah-java-1.1.7/debian/changelog --- libjavaewah-java-0.7.9/debian/changelog 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/changelog 2021-02-01 10:33:20.000000000 +0000 @@ -1,3 +1,17 @@ +libjavaewah-java (1.1.7-1) unstable; urgency=medium + + * Team upload. + * New upstream release + - Restored the EWAHCompressedBitmap.wordinbits constant to preserve + the backward compatibility + * Changed the name of the jar installed in /usr/share/java + * Removed the -java-doc package + * Standards-Version updated to 4.5.1 + * Switch to debhelper level 13 + * Fixed the watch file to track the releases with 2-digit minor versions + + -- Emmanuel Bourg Mon, 01 Feb 2021 11:33:20 +0100 + libjavaewah-java (0.7.9-1) unstable; urgency=medium * Team upload. diff -Nru libjavaewah-java-0.7.9/debian/compat libjavaewah-java-1.1.7/debian/compat --- libjavaewah-java-0.7.9/debian/compat 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/compat 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -12 diff -Nru libjavaewah-java-0.7.9/debian/control libjavaewah-java-1.1.7/debian/control --- libjavaewah-java-0.7.9/debian/control 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/control 2021-02-01 10:29:44.000000000 +0000 @@ -6,13 +6,12 @@ Jakub Adam , tony mancill Build-Depends: - debhelper (>= 12), + debhelper-compat (= 13), default-jdk, junit4, libmaven-bundle-plugin-java, - libmaven-javadoc-plugin-java, maven-debian-helper (>= 1.6.3) -Standards-Version: 4.3.0 +Standards-Version: 4.5.1 Vcs-Git: https://salsa.debian.org/java-team/libjavaewah-java.git Vcs-Browser: https://salsa.debian.org/java-team/libjavaewah-java Homepage: https://github.com/lemire/javaewah @@ -21,7 +20,6 @@ Architecture: all Depends: ${maven:Depends}, ${misc:Depends} Recommends: ${maven:OptionalDepends} -Suggests: libjavaewah-java-doc Description: Compressed variant of the Java bitset class The bit array data structure is implemented in Java as the BitSet class. Unfortunately, this fails to scale without compression. @@ -37,17 +35,3 @@ scheme implemented is always more efficient storage-wise than an uncompressed bitmap as implemented in the BitSet class). Unlike some alternatives, javaewah does not rely on a patented scheme. - -Package: libjavaewah-java-doc -Architecture: all -Section: doc -Depends: ${maven:DocDepends}, ${misc:Depends} -Recommends: ${maven:DocOptionalDepends} -Suggests: libjavaewah-java -Description: Compressed variant of the Java bitset class (documentation) - JavaEWAH is a word-aligned compressed variant of the Java bitset class. It uses - a 64-bit run-length encoding (RLE) compression scheme. It trades-off some - compression for better processing speed. It also has a 32-bit version which - compresses better, but is not as fast. - . - This package contains the API documentation of libjavaewah-java. diff -Nru libjavaewah-java-0.7.9/debian/libjavaewah-java-doc.doc-base.api libjavaewah-java-1.1.7/debian/libjavaewah-java-doc.doc-base.api --- libjavaewah-java-0.7.9/debian/libjavaewah-java-doc.doc-base.api 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/libjavaewah-java-doc.doc-base.api 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ -Document: libjavaewah-java -Title: API Javadoc for JavaEWAH bitset class -Author: JavaEWAH team -Abstract: This is the API Javadoc provided for the - libjavaewah-java library. -Section: Programming - -Format: HTML -Index: /usr/share/doc/libjavaewah-java/api/index.html -Files: /usr/share/doc/libjavaewah-java/api/* diff -Nru libjavaewah-java-0.7.9/debian/libjavaewah-java-doc.install libjavaewah-java-1.1.7/debian/libjavaewah-java-doc.install --- libjavaewah-java-0.7.9/debian/libjavaewah-java-doc.install 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/libjavaewah-java-doc.install 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -target/apidocs/* usr/share/doc/libjavaewah-java/api diff -Nru libjavaewah-java-0.7.9/debian/libjavaewah-java.poms libjavaewah-java-1.1.7/debian/libjavaewah-java.poms --- libjavaewah-java-0.7.9/debian/libjavaewah-java.poms 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/libjavaewah-java.poms 2021-02-01 10:32:32.000000000 +0000 @@ -25,4 +25,4 @@ # --site-xml=: Optional, the location for site.xml if it needs to be installed. # Empty by default. [mh_install] # -pom.xml --no-parent +pom.xml --no-parent --java-lib --usj-name=javaewah diff -Nru libjavaewah-java-0.7.9/debian/maven.ignoreRules libjavaewah-java-1.1.7/debian/maven.ignoreRules --- libjavaewah-java-0.7.9/debian/maven.ignoreRules 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/maven.ignoreRules 2021-02-01 10:29:44.000000000 +0000 @@ -1,3 +1,6 @@ +* animal-sniffer-maven-plugin * * * * +* jacoco-maven-plugin * * * * * maven-gpg-plugin * * * * +* maven-javadoc-plugin * * * * * maven-source-plugin * * * * diff -Nru libjavaewah-java-0.7.9/debian/patches/01-backward-compatibility.patch libjavaewah-java-1.1.7/debian/patches/01-backward-compatibility.patch --- libjavaewah-java-0.7.9/debian/patches/01-backward-compatibility.patch 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/patches/01-backward-compatibility.patch 2021-02-01 10:29:58.000000000 +0000 @@ -0,0 +1,16 @@ +Description: Restore the compatibility with the previous releases +Author: Emmanuel Bourg +Forwarded: not-needed +--- a/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java ++++ b/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java +@@ -2133,6 +2133,10 @@ + */ + public static final int WORD_IN_BITS = 64; + ++ /** The Constant wordinbits represents the number of bits in a long. */ ++ @Deprecated ++ public static final int wordinbits = 64; ++ + static final long serialVersionUID = 1L; + + } diff -Nru libjavaewah-java-0.7.9/debian/patches/series libjavaewah-java-1.1.7/debian/patches/series --- libjavaewah-java-0.7.9/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/patches/series 2021-02-01 10:29:44.000000000 +0000 @@ -0,0 +1 @@ +01-backward-compatibility.patch diff -Nru libjavaewah-java-0.7.9/debian/rules libjavaewah-java-1.1.7/debian/rules --- libjavaewah-java-0.7.9/debian/rules 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/rules 2021-02-01 10:29:44.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/make -f %: - dh $@ --buildsystem=maven + dh $@ diff -Nru libjavaewah-java-0.7.9/debian/watch libjavaewah-java-1.1.7/debian/watch --- libjavaewah-java-0.7.9/debian/watch 2019-02-28 22:09:54.000000000 +0000 +++ libjavaewah-java-1.1.7/debian/watch 2021-02-01 10:29:44.000000000 +0000 @@ -1,2 +1,2 @@ version=4 -https://github.com/lemire/javaewah/releases .*/JavaEWAH-(\d+\.\d+\.\d+)\.tar\.gz +https://github.com/lemire/javaewah/releases .*/JavaEWAH-([\d\.]+)\.tar\.gz diff -Nru libjavaewah-java-0.7.9/example.java libjavaewah-java-1.1.7/example.java --- libjavaewah-java-0.7.9/example.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/example.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,52 +1,82 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; + import java.io.*; +import java.nio.ByteBuffer; /** - * @author lemire - * - */ -public class example { - - /** - * @param args arguments from the command line - * @throws IOException if an IO error occurs + * Simple illustrative example. + * + * @author Daniel Lemire + * */ -public static void main(final String[] args) throws java.io.IOException { - EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0,2,64,1<<30); - EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1,3,64,1<<30); - System.out.println("bitmap 1: "+ewahBitmap1); - System.out.println("bitmap 2: "+ewahBitmap2); - // or - EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2); - System.out.println("bitmap 1 OR bitmap 2: "+orbitmap); - System.out.println("memory usage: " + orbitmap.sizeInBytes() + " bytes"); - // and - EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2); - System.out.println("bitmap 1 AND bitmap 2: "+andbitmap); - System.out.println("memory usage: " + andbitmap.sizeInBytes() + " bytes"); - // xor - EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2); - System.out.println("bitmap 1 XOR bitmap 2:"+xorbitmap); - System.out.println("memory usage: " + xorbitmap.sizeInBytes() + " bytes"); - // fast aggregation over many bitmaps - EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap.bitmapOf(55,5,1<<30); - EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap.bitmapOf(4,66,1<<30); - System.out.println("bitmap 3: "+ewahBitmap3); - System.out.println("bitmap 4: "+ewahBitmap4); - andbitmap = EWAHCompressedBitmap.and(ewahBitmap1,ewahBitmap2, - ewahBitmap3,ewahBitmap4); - System.out.println("b1 AND b2 AND b3 AND b4: "+andbitmap); - // serialization - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - // Note: you could use a file output steam instead of ByteArrayOutputStream - ObjectOutputStream oo = new ObjectOutputStream(bos); - ewahBitmap1.writeExternal(oo); - oo.close(); - ewahBitmap1 = null; - ewahBitmap1 = new EWAHCompressedBitmap(); - ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); - ewahBitmap1.readExternal(new ObjectInputStream(bis)); - System.out.println("bitmap 1 (recovered) : "+ewahBitmap1); - } +public class example +{ + + /** + * @param args + * arguments from the command line + * @throws IOException + * if an IO error occurs + */ + public static void main(final String[] args) throws Exception { + EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0, 2, 55, + 64, 1 << 30); + EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1, 3, 64, + 1 << 30); + System.out.println("bitmap 1: " + ewahBitmap1); + System.out.println("bitmap 2: " + ewahBitmap2); + // or + EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2); + System.out.println("bitmap 1 OR bitmap 2: " + orbitmap); + System.out.println("memory usage: " + orbitmap.sizeInBytes() + " bytes"); + // and + EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2); + System.out.println("bitmap 1 AND bitmap 2: " + andbitmap); + System.out.println("memory usage: " + andbitmap.sizeInBytes() + " bytes"); + // xor + EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2); + System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap); + System.out.println("memory usage: " + xorbitmap.sizeInBytes() + " bytes"); + // fast aggregation over many bitmaps + EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap.bitmapOf(5, 55, + 1 << 30); + EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap.bitmapOf(4, 66, + 1 << 30); + System.out.println("bitmap 3: " + ewahBitmap3); + System.out.println("bitmap 4: " + ewahBitmap4); + andbitmap = EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2, ewahBitmap3, + ewahBitmap4); + System.out.println("b1 AND b2 AND b3 AND b4: " + andbitmap); + // serialization + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // Note: you could use a file output steam instead of ByteArrayOutputStream + ewahBitmap1.serialize(new DataOutputStream(bos)); + EWAHCompressedBitmap ewahBitmap1new = new EWAHCompressedBitmap(); + byte[] bout = bos.toByteArray(); + ewahBitmap1new.deserialize(new DataInputStream(new ByteArrayInputStream( + bout))); + System.out.println("bitmap 1 (recovered) : " + ewahBitmap1new); + if (!ewahBitmap1.equals(ewahBitmap1new)) + throw new RuntimeException("Will not happen"); + // + // we can use a ByteBuffer as backend for a bitmap + // which allows memory-mapped bitmaps + // + ByteBuffer bb = ByteBuffer.wrap(bout); + EWAHCompressedBitmap rmap = new EWAHCompressedBitmap(bb); + System.out.println("bitmap 1 (mapped) : " + rmap); + + if (!rmap.equals(ewahBitmap1)) + throw new RuntimeException("Will not happen"); + // + // support for threshold function (new as of version 0.8.0): + // mark as true a bit that occurs at least T times in the source + // bitmaps + // + EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2, + ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4); + System.out.println("threshold 2 : " + threshold2); + + } } diff -Nru libjavaewah-java-0.7.9/examples/BitSetMemoryMappingExample.java libjavaewah-java-1.1.7/examples/BitSetMemoryMappingExample.java --- libjavaewah-java-0.7.9/examples/BitSetMemoryMappingExample.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/examples/BitSetMemoryMappingExample.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,37 @@ +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; + +import com.googlecode.javaewah.datastructure.BitSet; +import com.googlecode.javaewah.datastructure.ImmutableBitSet; + + +public class BitSetMemoryMappingExample { + + public static void main(String[] args) throws IOException { + File tmpfile = File.createTempFile("javaewah", "bin"); + tmpfile.deleteOnExit(); + final FileOutputStream fos = new FileOutputStream(tmpfile); + BitSet Bitmap = BitSet.bitmapOf(0, 2, 55, 64, 512); + System.out.println("Created the bitmap " + Bitmap); + Bitmap.serialize(new DataOutputStream(fos)); + long totalcount = fos.getChannel().position(); + System.out.println("Serialized total count = " + totalcount + " bytes"); + fos.close(); + RandomAccessFile memoryMappedFile = new RandomAccessFile(tmpfile, "r"); + ByteBuffer bb = memoryMappedFile.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, totalcount); + ImmutableBitSet mapped = new ImmutableBitSet(bb.asLongBuffer()); + System.out.println("Mapped the bitmap " + mapped); + memoryMappedFile.close(); + if (!mapped.equals(Bitmap)) + throw new RuntimeException("Will not happen"); + } +} diff -Nru libjavaewah-java-0.7.9/examples/BitSetSimpleExample.java libjavaewah-java-1.1.7/examples/BitSetSimpleExample.java --- libjavaewah-java-0.7.9/examples/BitSetSimpleExample.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/examples/BitSetSimpleExample.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,57 @@ +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; + +import com.googlecode.javaewah.datastructure.BitSet; +import com.googlecode.javaewah.datastructure.ImmutableBitSet; + + +public class BitSetSimpleExample { + public static void main(String[] args) throws IOException { + BitSet Bitmap1 = BitSet.bitmapOf(0, 2, 55, 64, 512); + BitSet Bitmap2 = BitSet.bitmapOf(1, 3, 64, 512); + System.out.println("bitmap 1: " + Bitmap1); + System.out.println("bitmap 2: " + Bitmap2); + // or + BitSet orbitmap = Bitmap1.clone(); + orbitmap.or(Bitmap2); + System.out.println("bitmap 1 OR bitmap 2: " + orbitmap); + // and + BitSet andbitmap = Bitmap1.clone(); + andbitmap.and(Bitmap2); + System.out.println("bitmap 1 AND bitmap 2: " + andbitmap); + // xor + BitSet xorbitmap = Bitmap1.clone(); + xorbitmap.xor(Bitmap2); + System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap); + // serialization + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // Note: you could use a file output steam instead of ByteArrayOutputStream + Bitmap1.serialize(new DataOutputStream(bos)); + BitSet Bitmap1new = new BitSet(); + byte[] bout = bos.toByteArray(); + Bitmap1new.deserialize(new DataInputStream(new ByteArrayInputStream(bout))); + System.out.println("bitmap 1 (recovered) : " + Bitmap1new); + if (!Bitmap1.equals(Bitmap1new)) + throw new RuntimeException("Will not happen"); + // + // we can use a ByteBuffer as backend for a bitmap + // which allows memory-mapped bitmaps + // + ByteBuffer bb = ByteBuffer.wrap(bout); + ImmutableBitSet rmap = new ImmutableBitSet(bb.asLongBuffer()); + System.out.println("bitmap 1 (mapped) : " + rmap); + + if (!rmap.equals(Bitmap1)) + throw new RuntimeException("Will not happen"); + + + } +} diff -Nru libjavaewah-java-0.7.9/examples/MemoryMappingExample.java libjavaewah-java-1.1.7/examples/MemoryMappingExample.java --- libjavaewah-java-0.7.9/examples/MemoryMappingExample.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/examples/MemoryMappingExample.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,26 @@ +import com.googlecode.javaewah.EWAHCompressedBitmap; +import java.io.*; +import java.nio.*; +import java.nio.channels.FileChannel; + + +public class MemoryMappingExample { + + public static void main(String[] args) throws IOException { + File tmpfile = File.createTempFile("javaewah", "bin"); + tmpfile.deleteOnExit(); + final FileOutputStream fos = new FileOutputStream(tmpfile); + EWAHCompressedBitmap ewahBitmap = EWAHCompressedBitmap.bitmapOf(0, 2, 55, + 64, 1 << 30); + System.out.println("Created the bitmap "+ewahBitmap); + ewahBitmap.serialize(new DataOutputStream(fos)); + long totalcount = fos.getChannel().position(); + System.out.println("Serialized total count = "+totalcount+" bytes"); + fos.close(); + RandomAccessFile memoryMappedFile = new RandomAccessFile(tmpfile, "r"); + ByteBuffer bb = memoryMappedFile.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, totalcount); + EWAHCompressedBitmap mapped = new EWAHCompressedBitmap(bb); + System.out.println("Mapped the bitmap "+mapped); + memoryMappedFile.close(); + } +} diff -Nru libjavaewah-java-0.7.9/examples/run.sh libjavaewah-java-1.1.7/examples/run.sh --- libjavaewah-java-0.7.9/examples/run.sh 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/examples/run.sh 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,15 @@ +cd .. && mvn -Dmaven.test.skip=true package && cd examples +echo "Running MemoryMappingExample" +javac -cp "../target/*" MemoryMappingExample.java && java -cp ../target/*:. MemoryMappingExample +echo + +echo "Running BitSetMemoryMappingExample" +javac -cp "../target/*" BitSetMemoryMappingExample.java && java -cp ../target/*:. BitSetMemoryMappingExample +echo + + +echo "Running BitSetSimpleExample" +javac -cp "../target/*" BitSetSimpleExample.java && java -cp ../target/*:. BitSetSimpleExample +echo + +rm *.class diff -Nru libjavaewah-java-0.7.9/LICENSE libjavaewah-java-1.1.7/LICENSE --- libjavaewah-java-0.7.9/LICENSE 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/LICENSE 2019-11-08 21:55:59.000000000 +0000 @@ -176,7 +176,7 @@ the same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] +(c) 2009-2016 Daniel Lemire (http://lemire.me/en/), Cliff Moon, David McIntosh (https://github.com/mctofu), Robert Becho (https://github.com/RBecho), Colby Ranger (https://github.com/crangeratgoogle), Veronika Zenz (https://github.com/veronikazenz), Owen Kaser (https://github.com/owenkaser), Gregory Ssi-Yan-Kai (https://github.com/gssiyankai), and Rory Graves (https://github.com/rorygraves) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff -Nru libjavaewah-java-0.7.9/pom.xml libjavaewah-java-1.1.7/pom.xml --- libjavaewah-java-0.7.9/pom.xml 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/pom.xml 2019-11-08 21:55:59.000000000 +0000 @@ -1,30 +1,28 @@ - 4.0.0 - - com.googlecode.javaewah - JavaEWAH - 0.7.9 - - bundle - - 1.6 - 1.6 - UTF-8 - - - - Apache 2 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - A business-friendly OSS license - - - - scm:git:git@github.com:lemire/javaewah.git - scm:git:git@github.com:lemire/javaewah.git - scm:git:git@github.com:lemire/javaewah.git - - + 4.0.0 + com.googlecode.javaewah + JavaEWAH + 1.1.7 + bundle + + 1.8 + 1.8 + UTF-8 + + + + Apache 2 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + scm:git:git@github.com:lemire/javaewah.git + scm:git:git@github.com:lemire/javaewah.git + scm:git:git@github.com:lemire/javaewah.git + + lemire Daniel Lemire @@ -43,37 +41,46 @@ - - - junit - junit - 4.10 - test - - - - Google Code Issue Tracking - http://code.google.com/p/javaewah/issues/list + + + junit + junit + 4.10 + test + + + + GitHub Issue Tracking + https://github.com/lemire/javaewah/issues - + org.sonatype.oss oss-parent 5 - + - - - org.apache.felix - maven-bundle-plugin - 2.3.7 - true - - - com.googlecode.javaewah.* - * - - + + org.apache.maven.plugins + maven-surefire-plugin + 2.19.1 + + 3 + true + -Xmx1024m + + + + org.apache.felix + maven-bundle-plugin + 2.3.7 + true + + + com.googlecode.javaewah.* + * + + org.apache.maven.plugins @@ -89,41 +96,40 @@ - - - org.apache.maven.plugins - maven-javadoc-plugin - 2.8 - - - - attach-javadocs - - jar - - - - - - org.apache.maven.plugins - maven-source-plugin - 2.1.2 - - - attach-sources - - jar - - - - + + org.apache.maven.plugins + maven-javadoc-plugin + 2.8 + + 8 + + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-source-plugin + 2.1.2 + + + attach-sources + + jar + + + + - JavaEWAH - http://code.google.com/p/javaewah/ - The bit array data structure is implemented in Java as the BitSet class. Unfortunately, this fails to scale without compression. - -JavaEWAH is a word-aligned compressed variant of the Java bitset class. It uses a 64-bit run-length encoding (RLE) compression scheme. - -The goal of word-aligned compression is not to achieve the best compression, but rather to improve query processing time. Hence, we try to save CPU cycles, maybe at the expense of storage. However, the EWAH scheme we implemented is always more efficient storage-wise than an uncompressed bitmap (implemented in Java as the BitSet class). Unlike some alternatives, javaewah does not rely on a patented scheme. + JavaEWAH + https://github.com/lemire/javaewah + The bit array data structure is implemented in Java as the BitSet class. Unfortunately, this fails to scale without compression. + JavaEWAH is a word-aligned compressed variant of the Java bitset class. It uses a 64-bit run-length encoding (RLE) compression scheme. + The goal of word-aligned compression is not to achieve the best compression, but rather to improve query processing time. Hence, we try to save CPU cycles, maybe at the expense of storage. However, the EWAH scheme we implemented is always more efficient storage-wise than an uncompressed bitmap (implemented in Java as the BitSet class). Unlike some alternatives, javaewah does not rely on a patented scheme. diff -Nru libjavaewah-java-0.7.9/README.md libjavaewah-java-1.1.7/README.md --- libjavaewah-java-0.7.9/README.md 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/README.md 2019-11-08 21:55:59.000000000 +0000 @@ -1,26 +1,34 @@ JavaEWAH ========================================================== - -(c) 2009-2013 -Daniel Lemire (http://lemire.me/en/), -Cliff Moon (https://github.com/cliffmoon), +[![Build Status](https://travis-ci.org/lemire/javaewah.png)](https://travis-ci.org/lemire/javaewah) +[![][maven img]][maven] +[![][license img]][license] +[![docs-badge][]][docs] +[![Coverage Status](https://coveralls.io/repos/lemire/javaewah/badge.svg?branch=master)](https://coveralls.io/r/lemire/javaewah?branch=master) +[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/java/g/lemire/javaewah.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/lemire/javaewah/context:java) + +(c) 2009-2016 +Daniel Lemire (http://lemire.me/en/), +Cliff Moon, David McIntosh (https://github.com/mctofu), Robert Becho (https://github.com/RBecho), -Colby Ranger (https://github.com/crangeratgoogle) -Veronika Zenz (https://github.com/veronikazenz) -and Owen Kaser (https://github.com/owenkaser) +Colby Ranger (https://github.com/crangeratgoogle), +Veronika Zenz (https://github.com/veronikazenz), +Owen Kaser (https://github.com/owenkaser), +Gregory Ssi-Yan-Kai (https://github.com/gssiyankai), +and Rory Graves (https://github.com/rorygraves) This code is licensed under Apache License, Version 2.0 (ASL2.0). (GPL 2.0 derivatives are allowed.) This is a word-aligned compressed variant of -the Java Bitset class. We provide both a 64-bit +the Java Bitset class. We provide both a 64-bit and a 32-bit RLE-like compression scheme. It can be used to implement bitmap indexes. -The goal of word-aligned compression is not to -achieve the best compression, but rather to +The goal of word-aligned compression is not to +achieve the best compression, but rather to improve query processing time. Hence, we try to save CPU cycles, maybe at the expense of storage. However, the EWAH scheme we implemented @@ -28,21 +36,128 @@ uncompressed bitmap (as implemented in the java BitSet class by Sun). +JavaEWAH offers competitive speed. In an exhaustive +comparison, Guzun et al. (ICDE 2014) found that "EWAH +offers the best query time for all distributions." + +JavaEWAH also supports memory-mapped files: we can +serialize the bitmaps to disk and then map them to +memory using the java.nio classes. This may avoid +wasteful serialization/deserialization routines. + +The library also provides a drop-in replacement for +the standard BitSet class. Like the other bitmap classes +in JavaEWAH, this uncompressed BitSet class supports +memory-mapped files as well as many other conveniences. For better performance, use a 64-bit JVM over 64-bit CPUs when using the 64-bit scheme (javaewah.EWAHCompressedBitmap). - The 32-bit version (javaewah32.EWAHCompressedBitmap32) should -compress better but be comparatively slower. +compress better but be comparatively slower. It is recommended however that you run your own benchmark. + + + +Java 6 or better is required. We found the very latest OpenJDK release +offered the best performance. + +Real-world usage +---------------- + +JavaEWAH is part of Apache Hive and its derivatives (e.g., Apache Spark) and Eclipse JGit. It has been used in production systems for many years. It is part of major Linux distributions. It is part of [Twitter algebird](https://github.com/twitter/algebird). + + +EWAH is used to accelerate the distributed version control system Git (http://githubengineering.com/counting-objects/). You can find the C port of EWAH written by the Git team at https://github.com/git/git/tree/master/ewah + +When should you use a bitmap? +---------------------------------------- + +Sets are a fundamental abstraction in +software. They can be implemented in various +ways, as hash sets, as trees, and so forth. +In databases and search engines, sets are often an integral +part of indexes. For example, we may need to maintain a set +of all documents or rows (represented by numerical identifier) +that satisfy some property. Besides adding or removing +elements from the set, we need fast functions +to compute the intersection, the union, the difference between sets, and so on. + + +To implement a set +of integers, a particularly appealing strategy is the +bitmap (also called bitset or bit vector). Using n bits, +we can represent any set made of the integers from the range +[0,n): it suffices to set the ith bit is set to one if integer i is present in the set. +Commodity processors use words of W=32 or W=64 bits. By combining many such words, we can +support large values of n. Intersections, unions and differences can then be implemented + as bitwise AND, OR and ANDNOT operations. +More complicated set functions can also be implemented as bitwise operations. + +When the bitset approach is applicable, it can be orders of +magnitude faster than other possible implementation of a set (e.g., as a hash set) +while using several times less memory. + +However, a bitset, even a compressed one is not always applicable. For example, if the +you have 1000 random-looking integers, then a simple array might be the best representation. +We refer to this case as the "sparse" scenario. + +When should you use compressed bitmaps? +---------------------------------------- -Java 6 or better is required. +An uncompressed BitSet can use a lot of memory. For example, if you take a BitSet +and set the bit at position 1,000,000 to true and you have just over 100kB. That's over 100kB +to store the position of one bit. This is wasteful even if you do not care about memory: +suppose that you need to compute the intersection between this BitSet and another one +that has a bit at position 1,000,001 to true, then you need to go through all these zeroes, +whether you like it or not. That can become very wasteful. + +This being said, there are definitively cases where attempting to use compressed bitmaps is wasteful. +For example, if you have a small universe size. E.g., your bitmaps represent sets of integers +from [0,n) where n is small (e.g., n=64 or n=128). If you are able to uncompressed BitSet and +it does not blow up your memory usage, then compressed bitmaps are probably not useful +to you. In fact, if you do not need compression, then a BitSet offers remarkable speed. +One of the downsides of a compressed bitmap like those provided by JavaEWAH is slower random access: +checking whether a bit is set to true in a compressed bitmap takes longer. + +The sparse scenario is another use case where compressed bitmaps should not be used. +Keep in mind that random-looking data is usually not compressible. E.g., if you have a small set of +32-bit random integers, it is not mathematically possible to use far less than 32 bits per integer, +and attempts at compression can be counterproductive. + +How does EWAH compares with the alternatives? +------------------------------------------- + +EWAH is part of a larger family of compressed bitmaps that are run-length-encoded +bitmaps. They identify long runs of 1s or 0s and they represent them with a marker word. +If you have a local mix of 1s and 0, you use an uncompressed word. + +There are many formats in this family beside EWAH: + +* Oracle's BBC is an obsolete format at this point: though it may provide good compression, +it is likely much slower than more recent alternatives due to excessive branching. +* WAH is a patented variation on BBC that provides better performance. +* Concise is a variation on the patented WAH. It some specific instances, it can compress +much better than WAH (up to 2x better), but it is generally slower. +* EWAH is both free of patent, and it is faster than all the above. On the downside, it +does not compress quite as well. It is faster because it allows some form of "skipping" +over uncompressed words. So though none of these formats are great at random access, EWAH +is better than the alternatives. + +There are other alternatives however. For example, the Roaring +format (https://github.com/lemire/RoaringBitmap) is not a run-length-encoded hybrid. It provides faster random access +than even EWAH. + + +Data format +------------ For more details regarding the compression format, please see Section 3 of the following paper: Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. http://arxiv.org/abs/0901.3751 - + + + (The PDF file is freely available on the arXiv site.) Benchmark @@ -52,7 +167,9 @@ WAH, ConciseSet, BitSet and other options, please see https://github.com/lemire/simplebitmapbenchmark - + +However, this is very naive. It is recommended that you run your own benchmarks. + Unit testing ------------ @@ -61,7 +178,7 @@ mvn test -See +See http://maven.apache.org/guides/introduction/introduction-to-the-lifecycle.html for details. @@ -69,8 +186,81 @@ Usage ----- +```java + EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0, 2, 55, 64, 1 << 30); + EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1, 3, 64, + 1 << 30); + System.out.println("bitmap 1: " + ewahBitmap1); + System.out.println("bitmap 2: " + ewahBitmap2); + // or + EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2); + System.out.println("bitmap 1 OR bitmap 2: " + orbitmap); + System.out.println("memory usage: " + orbitmap.sizeInBytes() + " bytes"); + // and + EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2); + System.out.println("bitmap 1 AND bitmap 2: " + andbitmap); + System.out.println("memory usage: " + andbitmap.sizeInBytes() + " bytes"); + // xor + EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2); + System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap); + System.out.println("memory usage: " + xorbitmap.sizeInBytes() + " bytes"); + // fast aggregation over many bitmaps + EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap.bitmapOf(5, 55, + 1 << 30); + EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap.bitmapOf(4, 66, + 1 << 30); + System.out.println("bitmap 3: " + ewahBitmap3); + System.out.println("bitmap 4: " + ewahBitmap4); + andbitmap = EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2, ewahBitmap3, + ewahBitmap4); + System.out.println("b1 AND b2 AND b3 AND b4: " + andbitmap); + // serialization + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // Note: you could use a file output steam instead of ByteArrayOutputStream + ewahBitmap1.serialize(new DataOutputStream(bos)); + EWAHCompressedBitmap ewahBitmap1new = new EWAHCompressedBitmap(); + byte[] bout = bos.toByteArray(); + ewahBitmap1new.deserialize(new DataInputStream(new ByteArrayInputStream(bout))); + System.out.println("bitmap 1 (recovered) : " + ewahBitmap1new); + if (!ewahBitmap1.equals(ewahBitmap1new)) throw new RuntimeException("Will not happen"); + // + // we can use a ByteBuffer as backend for a bitmap + // which allows memory-mapped bitmaps + // + ByteBuffer bb = ByteBuffer.wrap(bout); + EWAHCompressedBitmap rmap = new EWAHCompressedBitmap(bb); + System.out.println("bitmap 1 (mapped) : " + rmap); + + if (!rmap.equals(ewahBitmap1)) throw new RuntimeException("Will not happen"); + // + // support for threshold function (new as of version 0.8.0): + // mark as true a bit that occurs at least T times in the source + // bitmaps + // + EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2, + ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4); + System.out.println("threshold 2 : " + threshold2); +``` See example.java. +You can use our drop-in replacement for the BitSet class in a memory-mapped file +context as follows: + +```java + final FileOutputStream fos = new FileOutputStream(tmpfile); + BitSet Bitmap = BitSet.bitmapOf(0, 2, 55, 64, 512); + Bitmap.serialize(new DataOutputStream(fos)); + RandomAccessFile memoryMappedFile = new RandomAccessFile(tmpfile, "r"); + ByteBuffer bb = memoryMappedFile.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, totalcount); + ImmutableBitSet mapped = new ImmutableBitSet(bb.asLongBuffer()); +``` + + +There are more examples in the "examples" folder (e.g., +for memory-file mapping). + + Maven central repository ------------------------ @@ -79,24 +269,33 @@ You can also specify the dependency in the Maven "pom.xml" file: +```xml com.googlecode.javaewah JavaEWAH - 0.7.9 + [1.1,) +``` Naturally, you should replace "version" by the version you desire. +Ubuntu (Linux) +------------------ + +To install javaewah on Ubuntu, type: + + sudo apt-get install libjavaewah-java + Travis (Continuous integration) ------------------------------- You can check whether the latest version builds on your favorite version -of Java using Travis: https://travis-ci.org/lemire/javaewah/builds/11059867 +of Java using Travis: https://travis-ci.org/lemire/javaewah/builds/ -Clojure +Clojure ------- Joel Boehland wrote Clojure wrappers: @@ -106,12 +305,20 @@ Frequent questions ------------------ +Question: How do I build javaewah without testing or signing? + + mvn clean install -DskipTests -Dgpg.skip=true + +Question: Will JavaEWAH support long values? + +Answer: It might, but it does not at the moment. + Question: How do I check the value of a bit? -Answer: If you need to routinely check the value of a given bit quickly, then +Answer: If you need to routinely check the value of a given bit quickly, then EWAH might not be the right format. However, if you must do it, you can proceed as follows: - +```java /** * Suppose you have the following bitmap: */ @@ -119,5 +326,44 @@ /** * We want to know if bit 64 is set: */ - boolean is64set = (b.and(EWAHCompressedBitmap.bitmapOf(64)).cardinality() == 1); + boolean is64set = b.get(64); +``` + +API Documentation +----------------- + +http://www.javadoc.io/doc/com.googlecode.javaewah/JavaEWAH/ + +Mailing list and discussion group +--------------------------------- + +https://groups.google.com/forum/#!forum/javaewah + + +Further reading +--------------- + +Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned bitmap indexes, Data & Knowledge Engineering 69 (1), 2010. +http://arxiv.org/abs/0901.3751 + +Owen Kaser and Daniel Lemire, Compressed bitmap indexes: beyond unions and intersections, Software: Practice and Experience 46 (2), 2016. +http://arxiv.org/abs/1402.4466 + + + +Acknowledgement +--------------- + +Special thanks to Shen Liang for optimization advice. + +This work was supported by NSERC grant number 26143. + +[maven img]:https://maven-badges.herokuapp.com/maven-central/com.googlecode.javaewah/JavaEWAH/badge.svg +[maven]:http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.googlecode.javaewah%22%20 + +[license]:LICENSE-2.0.txt +[license img]:https://img.shields.io/badge/License-Apache%202-blue.svg + +[docs-badge]:https://img.shields.io/badge/API-docs-blue.svg?style=flat-square +[docs]:http://www.javadoc.io/doc/com.googlecode.javaewah/JavaEWAH/ diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,212 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ - -import java.text.DecimalFormat; -import java.util.List; -import com.googlecode.javaewah32.EWAHCompressedBitmap32; -import com.googlecode.javaewah.FastAggregation; -import com.googlecode.javaewah.IntIterator; -import com.googlecode.javaewah32.IteratingRLW32; -import com.googlecode.javaewah32.IteratorAggregation32; -import com.googlecode.javaewah32.IteratorUtil32; - -/** - * This class is used to benchmark the performance EWAH. - * - * @author Daniel Lemire - */ -public class Benchmark32 { - - @SuppressWarnings("javadoc") - public static void main(String args[]) { - test(100, 16, 1); -// test(2, 24, 1); - } - - @SuppressWarnings("javadoc") - public static void test(int N, int nbr, int repeat) { - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { - long bogus = 0; - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - System.out.println("# generating random data..."); - int[] inter = cdg.generateClustered(1 << (nbr/2), Max); - for (int k = 0; k < N; ++k) - data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); - System.out.println("# generating random data... ok."); - // building - bef = System.currentTimeMillis(); - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - int size = 0; - for (int r = 0; r < repeat; ++r) { - size = 0; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap32(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - size += ewah[k].sizeInBytes(); - } - } - aft = System.currentTimeMillis(); - line += "\t" + size; - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - int[] array = ewah[k].toArray(); - bogus += array.length; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - int[] array = new int[ewah[k].cardinality()]; - int c = 0; - for (int x : ewah[k]) - array[c++] = x; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - List L = ewah[k].getPositions(); - int[] array = new int[L.size()]; - int c = 0; - for (int x : L) - array[c++] = x; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IntIterator iter = ewah[k].intIterator(); - while (iter.hasNext()) { - bogus += iter.next(); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - line += "\t\t\t"; - // logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32 ewahor = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahor = ewahor.or(ewah[j]); - } - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 - .or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = FastAggregation.or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j].getIteratingRLW(); - } - IteratingRLW32 ewahor = IteratorAggregation32.bufferedor(ewahcp); - bogus += IteratorUtil32.materialize(ewahor).sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - line += "\t\t\t"; - // logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32 ewahand = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahand = ewahand.and(ewah[j]); - } - bogus += ewahand.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32 - .and(ewahcp); - bogus += ewahand.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - - // fast logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j].getIteratingRLW(); - } - IteratingRLW32 ewahand = IteratorAggregation32.bufferedand(ewahcp); - bogus += IteratorUtil32.materialize(ewahand).sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - - - System.out - .println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); - System.out.println(line); - System.out.println("# bogus =" + bogus); - } - } -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,130 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -import java.text.DecimalFormat; -import com.googlecode.javaewah32.*; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ -/** - * To benchmark the logical and (intersection) aggregate. - */ -public class BenchmarkIntersection32 { - - @SuppressWarnings("javadoc") - public static void main(String args[]) { - test(10, 18, 1); - } - - @SuppressWarnings({ "javadoc" }) - public static void test(int N, int nbr, int repeat) { - long bogus = 0; - - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { - for (int times = 0; times < 2; ++times) { - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - int[] inter = cdg.generateClustered(1 << (nbr/2), Max); - for (int k = 0; k < N; ++k) - data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); - // building - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap32(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - data[k] = null; - } - // sanity check - if (true) { - EWAHCompressedBitmap32 answer = ewah[0].and(ewah[1]); - for (int k = 2; k < ewah.length; ++k) - answer = answer.and(ewah[k]); - - EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32.and(ewah); - if (!answer.equals(ewahand)) - throw new RuntimeException( - "bug EWAHCompressedBitmap.and"); - EWAHCompressedBitmap32 ewahand2 = FastAggregation32 - .bufferedand(65536,ewah); - if (!ewahand.equals(ewahand2)) - throw new RuntimeException( - "bug FastAggregation.bufferedand "); - - } - - // logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32 ewahor = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahor = ewahor.and(ewah[j]); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 - .and(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = FastAggregation32 - .bufferedand(65536,ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = new IteratingBufferedRunningLengthWord32( - ewah[j]); - } - IteratingRLW32 ewahor = IteratorAggregation32.bufferedand(ewahcp); - int wordcounter = IteratorUtil32.cardinality(ewahor); - bogus += wordcounter; - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - - System.out - .println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); - - System.out.println(line); - } - System.out.println("# bogus =" + bogus); - - } - } -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,130 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -import java.text.DecimalFormat; -import com.googlecode.javaewah.*; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ -/** - * To benchmark the logical and (intersection) aggregate. - */ -public class BenchmarkIntersection { - - @SuppressWarnings("javadoc") - public static void main(String args[]) { - test(10, 18, 1); - } - - @SuppressWarnings({ "javadoc"}) - public static void test(int N, int nbr, int repeat) { - long bogus = 0; - - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { - for (int times = 0; times < 2; ++times) { - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - int[] inter = cdg.generateClustered(1 << (nbr/2), Max); - for (int k = 0; k < N; ++k) - data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); - // building - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - data[k] = null; - } - // sanity check - if (true) { - EWAHCompressedBitmap answer = ewah[0].and(ewah[1]); - for (int k = 2; k < ewah.length; ++k) - answer = answer.and(ewah[k]); - - EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewah); - if (!answer.equals(ewahand)) - throw new RuntimeException( - "bug EWAHCompressedBitmap.and"); - EWAHCompressedBitmap ewahand2 = FastAggregation - .bufferedand(65536,ewah); - if (!ewahand.equals(ewahand2)) - throw new RuntimeException( - "bug FastAggregation.bufferedand "); - - } - - // logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap ewahor = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahor = ewahor.and(ewah[j]); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = EWAHCompressedBitmap - .and(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = FastAggregation - .bufferedand(65536,ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = new IteratingBufferedRunningLengthWord( - ewah[j]); - } - IteratingRLW ewahor = IteratorAggregation.bufferedand(ewahcp); - int wordcounter = IteratorUtil.cardinality(ewahor); - bogus += wordcounter; - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - - System.out - .println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); - - System.out.println(line); - } - System.out.println("# bogus =" + bogus); - - } - } -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,284 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ - -import java.text.DecimalFormat; -import java.util.Arrays; -import java.util.List; -import com.googlecode.javaewah.EWAHCompressedBitmap; -import com.googlecode.javaewah.FastAggregation; -import com.googlecode.javaewah.IntIterator; -import com.googlecode.javaewah.IteratingRLW; -import com.googlecode.javaewah.IteratorAggregation; -import com.googlecode.javaewah.IteratorUtil; - -/** - * This class is used to benchmark the performance EWAH. - * - * @author Daniel Lemire - */ -public class Benchmark { - - /** - * Compute the union between two sorted arrays - * @param set1 first sorted array - * @param set2 second sorted array - * @return merged array - */ - static public int[] unite2by2(final int[] set1, final int[] set2) { - int pos = 0; - int k1 = 0, k2 = 0; - if (0 == set1.length) - return Arrays.copyOf(set2, set2.length); - if (0 == set2.length) - return Arrays.copyOf(set1, set1.length); - int[] buffer = new int[set1.length + set2.length]; - while (true) { - if (set1[k1] < set2[k2]) { - buffer[pos++] = set1[k1]; - ++k1; - if (k1 >= set1.length) { - for (; k2 < set2.length; ++k2) - buffer[pos++] = set2[k2]; - break; - } - } else if (set1[k1] == set2[k2]) { - buffer[pos++] = set1[k1]; - ++k1; - ++k2; - if (k1 >= set1.length) { - for (; k2 < set2.length; ++k2) - buffer[pos++] = set2[k2]; - break; - } - if (k2 >= set2.length) { - for (; k1 < set1.length; ++k1) - buffer[pos++] = set1[k1]; - break; - } - } else {// if (set1[k1]>set2[k2]) { - buffer[pos++] = set2[k2]; - ++k2; - if (k2 >= set2.length) { - for (; k1 < set1.length; ++k1) - buffer[pos++] = set1[k1]; - break; - } - } - } - return Arrays.copyOf(buffer, pos); - } - - - @SuppressWarnings("javadoc") - public static void main(String args[]) { - //test(2, 24, 1); - test(100, 16, 1); - } - - @SuppressWarnings("javadoc") - public static void test(int N, int nbr, int repeat) { - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { - long bogus = 0; - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - System.out.println("# generating random data..."); - int[] inter = cdg.generateClustered(1 << (nbr/2), Max); - for (int k = 0; k < N; ++k) - data[k] = unite2by2(cdg.generateClustered(1 << nbr, Max),inter); - System.out.println("# generating random data... ok."); - // building - bef = System.currentTimeMillis(); - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - int size = 0; - for (int r = 0; r < repeat; ++r) { - size = 0; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - size += ewah[k].sizeInBytes(); - } - } - aft = System.currentTimeMillis(); - line += "\t" + size; - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - int[] array = ewah[k].toArray(); - bogus += array.length; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - int[] array = new int[ewah[k].cardinality()]; - int c = 0; - for (int x : ewah[k]) - array[c++] = x; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - List L = ewah[k].getPositions(); - int[] array = new int[L.size()]; - int c = 0; - for (int x : L) - array[c++] = x; - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // uncompressing - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IntIterator iter = ewah[k].intIterator(); - while (iter.hasNext()) { - bogus += iter.next(); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - line += "\t\t\t"; - // logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap ewahor = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahor = ewahor.or(ewah[j]); - } - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = EWAHCompressedBitmap - .or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - // run sanity check - for (int k = 0; k < N; ++k) { - IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j].getIteratingRLW(); - } - IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); - EWAHCompressedBitmap ewahorp = EWAHCompressedBitmap.or(Arrays.copyOf(ewah, k+1)); - EWAHCompressedBitmap mewahor = IteratorUtil.materialize(ewahor); - if(!ewahorp.equals(mewahor)) throw new RuntimeException("bug"); - } - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j].getIteratingRLW(); - } - IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); - bogus += IteratorUtil.materialize(ewahor).sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - line += "\t\t\t"; - // logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap ewahand = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahand = ewahand.and(ewah[j]); - } - bogus += ewahand.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahand = EWAHCompressedBitmap - .and(ewahcp); - bogus += ewahand.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - for (int k = 0; k < N; ++k) { - IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j].getIteratingRLW(); - } - IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); - EWAHCompressedBitmap ewahandp = EWAHCompressedBitmap.and(Arrays.copyOf(ewah, k+1)); - EWAHCompressedBitmap mewahand = IteratorUtil.materialize(ewahand); - if(!ewahandp.equals(mewahand)) throw new RuntimeException("bug"); - } - // fast logical and - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j].getIteratingRLW(); - } - IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); - bogus += IteratorUtil.materialize(ewahand).sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - - - System.out - .println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); - System.out.println(line); - System.out.println("# bogus =" + bogus); - } - } -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,165 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -import java.text.DecimalFormat; - -import com.googlecode.javaewah.FastAggregation; -import com.googlecode.javaewah32.*; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ -/** - * To benchmark the logical or (union) aggregate. - */ -public class BenchmarkUnion32 { - - @SuppressWarnings("javadoc") - public static void main(String args[]) { - test(10, 18, 1); - } - - @SuppressWarnings({ "javadoc", "deprecation" }) - public static void test(int N, int nbr, int repeat) { - long bogus = 0; - - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { - for (int times = 0; times < 2; ++times) { - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - for (int k = 0; k < N; ++k) - data[k] = cdg.generateClustered(1 << nbr, Max); - // building - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap32(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - data[k] = null; - } - // sanity check - if(true){ - EWAHCompressedBitmap32 answer = ewah[0].or(ewah[1]); - for(int k = 2; k < ewah.length; ++k) - answer = answer.or(ewah[k]); - - EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 - .or(ewah); - if(!answer.equals(ewahor)) throw new RuntimeException("bug EWAHCompressedBitmap.or"); - EWAHCompressedBitmap32 ewahor3 = FastAggregation - .or(ewah); - if(!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or"); - EWAHCompressedBitmap32 ewahor2 = FastAggregation32 - .bufferedor(65536,ewah); - if(!ewahor.equals(ewahor2)) throw new RuntimeException("bug FastAggregation.bufferedor "); - - } - - // logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32 ewahor = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahor = ewahor.or(ewah[j]); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 - .or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = FastAggregation - .or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = FastAggregation32 - .bufferedor(65536,ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); - FastAggregation32.legacy_orWithContainer(x, ewahcp); - bogus += x.sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = new IteratingBufferedRunningLengthWord32(ewah[j]); - } - IteratingRLW32 ewahor = IteratorAggregation32 - .bufferedor(ewahcp); - int wordcounter = IteratorUtil32.cardinality(ewahor); - bogus += wordcounter; - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - - - System.out - .println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); - - System.out.println(line); - } - System.out.println("# bogus =" + bogus); - - } - } -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,164 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -import java.text.DecimalFormat; -import com.googlecode.javaewah.*; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ -/** - * To benchmark the logical or (union) aggregate. - */ -public class BenchmarkUnion { - - @SuppressWarnings("javadoc") - public static void main(String args[]) { - test(10, 18, 1); - } - - @SuppressWarnings({ "javadoc", "deprecation" }) - public static void test(int N, int nbr, int repeat) { - long bogus = 0; - - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { - for (int times = 0; times < 2; ++times) { - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - for (int k = 0; k < N; ++k) - data[k] = cdg.generateClustered(1 << nbr, Max); - // building - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - data[k] = null; - } - // sanity check - if (true) { - EWAHCompressedBitmap answer = ewah[0].or(ewah[1]); - for (int k = 2; k < ewah.length; ++k) - answer = answer.or(ewah[k]); - - EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewah); - if (!answer.equals(ewahor)) - throw new RuntimeException( - "bug EWAHCompressedBitmap.or"); - EWAHCompressedBitmap ewahor3 = FastAggregation.or(ewah); - if (!ewahor.equals(ewahor3)) - throw new RuntimeException("bug FastAggregation.or"); - EWAHCompressedBitmap ewahor2 = FastAggregation - .bufferedor(65536,ewah); - if (!ewahor.equals(ewahor2)) - throw new RuntimeException( - "bug FastAggregation.bufferedor "); - - } - - // logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap ewahor = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahor = ewahor.or(ewah[j]); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = EWAHCompressedBitmap - .or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = FastAggregation - .or(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = FastAggregation - .bufferedor(65536,ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap x = new EWAHCompressedBitmap(); - FastAggregation.legacy_orWithContainer(x, ewahcp); - bogus += x.sizeInBits(); - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical or - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = new IteratingBufferedRunningLengthWord( - ewah[j]); - } - IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); - int wordcounter = IteratorUtil.cardinality(ewahor); - bogus += wordcounter; - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - - System.out - .println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); - - System.out.println(line); - } - System.out.println("# bogus =" + bogus); - - } - } -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,137 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -import java.text.DecimalFormat; - -import com.googlecode.javaewah.FastAggregation; -import com.googlecode.javaewah32.*; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ -/** - * To benchmark the logical xor aggregate. - */ -public class BenchmarkXOR32 { - - @SuppressWarnings("javadoc") - public static void main(String args[]) { - test(10, 18, 1); - //test(2, 22, 1); - } - - @SuppressWarnings({ "javadoc" }) - public static void test(int N, int nbr, int repeat) { - long bogus = 0; - - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { - for (int times = 0; times < 2; ++times) { - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - for (int k = 0; k < N; ++k) - data[k] = cdg.generateClustered(1 << nbr, Max); - // building - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap32(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - data[k] = null; - } - // sanity check - if (true) { - EWAHCompressedBitmap32 answer = ewah[0].xor(ewah[1]); - for (int k = 2; k < ewah.length; ++k) - answer = answer.xor(ewah[k]); - EWAHCompressedBitmap32 ewahor3 = FastAggregation.xor(ewah); - if (!answer.equals(ewahor3)) - throw new RuntimeException("bug FastAggregation.xor"); - EWAHCompressedBitmap32 ewahor2 = FastAggregation32 - .bufferedxor(65536,ewah); - if (!answer.equals(ewahor2)) - throw new RuntimeException( - "bug FastAggregation.bufferedxor "); - EWAHCompressedBitmap32 iwah = IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(IteratorUtil32.toIterators(ewah))); - if (!answer.equals(iwah)) - throw new RuntimeException( - "bug xor it "); - - } - - // logical xor - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32 ewahor = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahor = ewahor.xor(ewah[j]); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical xor - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = FastAggregation - .xor(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - - // fast logical xor - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap32 ewahor = FastAggregation32 - .bufferedxor(65536,ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical xor - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = new IteratingBufferedRunningLengthWord32( - ewah[j]); - } - IteratingRLW32 ewahor = IteratorAggregation32.bufferedxor(ewahcp); - int wordcounter = IteratorUtil32.cardinality(ewahor); - bogus += wordcounter; - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - - - System.out - .println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); - - System.out.println(line); - } - System.out.println("# bogus =" + bogus); - - } - } -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,134 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -import java.text.DecimalFormat; -import com.googlecode.javaewah.*; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ -/** - * To benchmark the logical xor aggregate. - */ -public class BenchmarkXOR { - - @SuppressWarnings("javadoc") - public static void main(String args[]) { - //test(10, 18, 1); - test(2, 22, 1); - } - - @SuppressWarnings({ "javadoc" }) - public static void test(int N, int nbr, int repeat) { - long bogus = 0; - - DecimalFormat df = new DecimalFormat("0.###"); - ClusteredDataGenerator cdg = new ClusteredDataGenerator(); - for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { - for (int times = 0; times < 2; ++times) { - String line = ""; - long bef, aft; - line += sparsity; - int[][] data = new int[N][]; - int Max = (1 << (nbr + sparsity)); - for (int k = 0; k < N; ++k) - data[k] = cdg.generateClustered(1 << nbr, Max); - // building - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - data[k] = null; - } - // sanity check - if (true) { - EWAHCompressedBitmap answer = ewah[0].xor(ewah[1]); - for (int k = 2; k < ewah.length; ++k) - answer = answer.xor(ewah[k]); - EWAHCompressedBitmap ewahor3 = FastAggregation.xor(ewah); - if (!answer.equals(ewahor3)) - throw new RuntimeException("bug FastAggregation.xor"); - EWAHCompressedBitmap ewahor2 = FastAggregation - .bufferedxor(65536,ewah); - if (!answer.equals(ewahor2)) - throw new RuntimeException( - "bug FastAggregation.bufferedxor "); - EWAHCompressedBitmap iwah = IteratorUtil.materialize(IteratorAggregation.bufferedxor(IteratorUtil.toIterators(ewah))); - if (!answer.equals(iwah)) - throw new RuntimeException( - "bug xor it "); - - - } - - // logical xor - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap ewahor = ewah[0]; - for (int j = 1; j < k + 1; ++j) { - ewahor = ewahor.xor(ewah[j]); - } - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - // fast logical xor - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = FastAggregation - .xor(ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - - - // fast logical xor - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = ewah[j]; - } - EWAHCompressedBitmap ewahor = FastAggregation - .bufferedxor(65536,ewahcp); - bogus += ewahor.sizeInBits(); - } - aft = System.currentTimeMillis(); - line += "\t" + df.format((aft - bef) / 1000.0); - // fast logical xor - bef = System.currentTimeMillis(); - for (int r = 0; r < repeat; ++r) - for (int k = 0; k < N; ++k) { - IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; - for (int j = 0; j < k + 1; ++j) { - ewahcp[j] = new IteratingBufferedRunningLengthWord( - ewah[j]); - } - IteratingRLW ewahor = IteratorAggregation.bufferedxor(ewahcp); - int wordcounter = IteratorUtil.cardinality(ewahor); - bogus += wordcounter; - } - aft = System.currentTimeMillis(); - - line += "\t" + df.format((aft - bef) / 1000.0); - - System.out - .println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); - - System.out.println(line); - } - System.out.println("# bogus =" + bogus); - - } - } -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -package com.googlecode.javaewah.benchmark; - - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ - - -/** - * This class will generate lists of random integers with a "clustered" distribution. - * Reference: - * Anh VN, Moffat A. Index compression using 64-bit words. Software: Practice and Experience 2010; 40(2):131-147. - * - * @author Daniel Lemire - */ -public class ClusteredDataGenerator { - - /** - * - */ -public ClusteredDataGenerator() { - this.unidg = new UniformDataGenerator(); - } - - /** - * @param seed random seed - */ -public ClusteredDataGenerator(final int seed) { - this.unidg = new UniformDataGenerator(seed); -} - -/** - * generates randomly N distinct integers from 0 to Max. - * @param N number of integers - * @param Max maximum integer value - * @return a randomly generated array - */ - public int[] generateClustered(int N, int Max) { - int[] array = new int[N]; - fillClustered(array, 0, N, 0, Max); - return array; - } - - void fillClustered(int[] array, int offset, int length, int Min, int Max) { - final int range = Max - Min; - if ((range == length) || (length <= 10)) { - fillUniform(array, offset, length, Min, Max); - return; - } - final int cut = length / 2 - + ((range - length - 1 > 0) ? this.unidg.rand.nextInt(range - length - 1) : 0); - final double p = this.unidg.rand.nextDouble(); - if (p < 0.25) { - fillUniform(array, offset, length / 2, Min, Min + cut); - fillClustered(array, offset + length / 2, length - length / 2, Min + cut, - Max); - } else if (p < 0.5) { - fillClustered(array, offset, length / 2, Min, Min + cut); - fillUniform(array, offset + length / 2, length - length / 2, Min + cut, - Max); - } else { - fillClustered(array, offset, length / 2, Min, Min + cut); - fillClustered(array, offset + length / 2, length - length / 2, Min + cut, - Max); - } - } - - void fillUniform(int[] array, int offset, int length, int Min, int Max) { - int[] v = this.unidg.generateUniform(length, Max - Min); - for (int k = 0; k < v.length; ++k) - array[k + offset] = Min + v[k]; - } - - UniformDataGenerator unidg; - -} - diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,114 +0,0 @@ -package com.googlecode.javaewah.benchmark; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ - -import java.util.Arrays; -import java.util.BitSet; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Random; - -/** - * This class will generate "uniform" lists of random integers. - * - * @author Daniel Lemire - */ -public class UniformDataGenerator { - /** - * construct generator of random arrays. - */ - public UniformDataGenerator() { - this.rand = new Random(); - } - - /** - * @param seed random seed - */ - public UniformDataGenerator(final int seed) { - this.rand = new Random(seed); - } - - /** - * generates randomly N distinct integers from 0 to Max. - */ - int[] generateUniformHash(int N, int Max) { - if (N > Max) - throw new RuntimeException("not possible"); - int[] ans = new int[N]; - HashSet s = new HashSet(); - while (s.size() < N) - s.add(new Integer(this.rand.nextInt(Max))); - Iterator i = s.iterator(); - for (int k = 0; k < N; ++k) - ans[k] = i.next().intValue(); - Arrays.sort(ans); - return ans; - } - - /** - * output all integers from the range [0,Max) that are not - * in the array - */ - static int[] negate(int[] x, int Max) { - int[] ans = new int[Max - x.length]; - int i = 0; - int c = 0; - for (int j = 0; j < x.length; ++j) { - int v = x[j]; - for (; i < v; ++i) - ans[c++] = i; - ++i; - } - while (c < ans.length) - ans[c++] = i++; - return ans; - } - - - /** - * generates randomly N distinct integers from 0 to Max. - * @param N Number of integers to generate - * @param Max Maximum value of the integers - * @return array containing random integers - */ - public int[] generateUniform(int N, int Max) { - if(N * 2 > Max) { - return negate( generateUniform(Max - N, Max), Max ); - } - if (2048 * N > Max) - return generateUniformBitmap(N, Max); - return generateUniformHash(N, Max); - } - - /** - * generates randomly N distinct integers from 0 to Max using a bitmap. - * @param N Number of integers to generate - * @param Max Maximum value of the integers - * @return array containing random integers - */ - int[] generateUniformBitmap(int N, int Max) { - if (N > Max) - throw new RuntimeException("not possible"); - int[] ans = new int[N]; - BitSet bs = new BitSet(Max); - int cardinality = 0; - while (cardinality < N) { - int v = this.rand.nextInt(Max); - if (!bs.get(v)) { - bs.set(v); - cardinality++; - } - } - int pos = 0; - for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { - ans[pos++] = i; - } - return ans; - } - - Random rand = new Random(); - -} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BitCounter.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/BitCounter.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BitCounter.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/BitCounter.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,106 +1,105 @@ package com.googlecode.javaewah; - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** * BitCounter is a fake bitset data structure. Instead of storing the actual * data, it only records the number of set bits. - * - * @since 0.4.0 + * * @author David McIntosh + * @since 0.4.0 */ public final class BitCounter implements BitmapStorage { - /** - * Virtually add words directly to the bitmap - * - * @param newdata - * the word - */ - @Override - public void add(final long newdata) { - this.oneBits += Long.bitCount(newdata); - return; - } - - /** - * virtually add several literal words. - * - * @param data - * the literal words - * @param start - * the starting point in the array - * @param number - * the number of literal words to add - */ - @Override - public void addStreamOfLiteralWords(long[] data, int start, int number) { - for (int i = start; i < start + number; i++) { - add(data[i]); - } - return; - } - - /** - * virtually add many zeroes or ones. - * - * @param v - * zeros or ones - * @param number - * how many to words add - */ - @Override -public void addStreamOfEmptyWords(boolean v, long number) { - if (v) { - this.oneBits += number * EWAHCompressedBitmap.wordinbits; - } - return; - } - - /** - * virtually add several negated literal words. - * - * @param data - * the literal words - * @param start - * the starting point in the array - * @param number - * the number of literal words to add - */ - // @Override : causes problems with Java 1.5 - @Override -public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { - for (int i = start; i < start + number; i++) { - add(~data[i]); - } - return; - } - - /** - * As you act on this class, it records the number of set (true) bits. - * - * @return number of set bits - */ - public int getCount() { - return this.oneBits; - } - - /** - * should directly set the sizeinbits field, but is effectively ignored in - * this class. - * - * @param bits - * number of bits - */ - // @Override : causes problems with Java 1.5 - @Override -public void setSizeInBits(int bits) { - // no action - } + /** + * Virtually add words directly to the bitmap + * + * @param newData the word + */ + @Override + public void addWord(final long newData) { + this.oneBits += Long.bitCount(newData); + } + + /** + * Virtually add literal words directly to the bitmap + * + * @param newData the word + */ + @Override + public void addLiteralWord(final long newData) { + this.oneBits += Long.bitCount(newData); + } + + /** + * virtually add several literal words. + * + * @param buffer the buffer wrapping the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + @Override + public void addStreamOfLiteralWords(final Buffer buffer, int start, int number) { + for (int i = start; i < start + number; i++) { + addLiteralWord(buffer.getWord(i)); + } + } - private int oneBits; + /** + * virtually add many zeroes or ones. + * + * @param v zeros or ones + * @param number how many to words add + */ + @Override + public void addStreamOfEmptyWords(boolean v, long number) { + if (v) { + this.oneBits += (int)(number * EWAHCompressedBitmap.WORD_IN_BITS); + } + } + + /** + * virtually add several negated literal words. + * + * @param buffer the buffer wrapping the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + @Override + public void addStreamOfNegatedLiteralWords(final Buffer buffer, int start, int number) { + for (int i = start; i < start + number; i++) { + addLiteralWord(~buffer.getWord(i)); + } + } + + @Override + public void clear() { + this.oneBits = 0; + } + + /** + * As you act on this class, it records the number of set (true) bits. + * + * @return number of set bits + */ + public int getCount() { + return this.oneBits; + } + + /** + * should directly set the sizeInBits field, but is effectively ignored + * in this class. + * + * @param bits number of bits + */ + // @Override : causes problems with Java 1.5 + @Override + public void setSizeInBitsWithinLastWord(int bits) { + // no action + } + private int oneBits; } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BitmapStorage.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/BitmapStorage.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BitmapStorage.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/BitmapStorage.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,71 +1,75 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** * Low level bitset writing methods. - * - * @since 0.4.0 + * * @author David McIntosh + * @since 0.4.0 */ public interface BitmapStorage { - /** - * Adding words directly to the bitmap (for expert use). - * - * This is normally how you add data to the array. So you add bits in streams - * of 8*8 bits. - * - * @param newdata - * the word - */ - public void add(final long newdata); - - /** - * if you have several literal words to copy over, this might be faster. - * - * @param data - * the literal words - * @param start - * the starting point in the array - * @param number - * the number of literal words to add - */ - public void addStreamOfLiteralWords(final long[] data, final int start, - final int number); - - /** - * For experts: You want to add many zeroes or ones? This is the method you - * use. - * - * @param v - * zeros or ones - * @param number - * how many to words add - */ - public void addStreamOfEmptyWords(final boolean v, final long number); - - /** - * Like "addStreamOfLiteralWords" but negates the words being added. - * - * @param data - * the literal words - * @param start - * the starting point in the array - * @param number - * the number of literal words to add - */ - public void addStreamOfNegatedLiteralWords(long[] data, final int start, - final int number); - - /** - * directly set the sizeinbits field - * - * @param bits - * number of bits - */ - public void setSizeInBits(final int bits); + /** + * Adding words directly to the bitmap (for expert use). + * + * This is normally how you add data to the array. So you add bits in + * streams of 8*8 bits. + * + * @param newData the word + */ + void addWord(final long newData); + + /** + * Adding literal words directly to the bitmap (for expert use). + * + * @param newData the word + */ + void addLiteralWord(final long newData); + + /** + * if you have several literal words to copy over, this might be faster. + * + * @param buffer the buffer wrapping the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + void addStreamOfLiteralWords(final Buffer buffer, final int start, final int number); + + /** + * For experts: You want to add many zeroes or ones? This is the method + * you use. + * + * @param v zeros or ones + * @param number how many to words add + */ + void addStreamOfEmptyWords(final boolean v, final long number); + + /** + * Like "addStreamOfLiteralWords" but negates the words being added. + * + * @param buffer the buffer wrapping the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + void addStreamOfNegatedLiteralWords(final Buffer buffer, final int start, final int number); + + /** + * Empties the container. + */ + void clear(); + + /** + * Sets the size in bits of the bitmap as an *uncompressed* bitmap. + * Normally, this is used to reduce the size of the bitmaps within + * the scope of the last word. Specifically, this means that + * (sizeInBits()+63)/64 must be equal to (size +63)/64. + * If needed, the bitmap can be further padded with zeroes. + * + * @param size the size in bits + */ + void setSizeInBitsWithinLastWord(final int size); } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BufferedIterator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/BufferedIterator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BufferedIterator.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/BufferedIterator.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,151 +1,148 @@ package com.googlecode.javaewah; - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** * This class can be used to iterate over blocks of bitmap data. - * - * @author Daniel Lemire * + * @author Daniel Lemire */ -public class BufferedIterator implements IteratingRLW { - /** - * Instantiates a new iterating buffered running length word. - * - * @param iterator iterator - */ - public BufferedIterator(final CloneableIterator iterator) { - this.masteriterator = iterator; - if(this.masteriterator.hasNext()) { - this.iterator = this.masteriterator.next(); - this.brlw = new BufferedRunningLengthWord(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; - this.buffer = this.iterator.buffer(); - } - } - - - /** - * Discard first words, iterating to the next running length word if needed. - * - * @param x the number of words to be discarded - */ - @Override - public void discardFirstWords(long x) { - while (x > 0) { - if (this.brlw.RunningLength > x) { - this.brlw.RunningLength -= x; - return; - } - x -= this.brlw.RunningLength; - this.brlw.RunningLength = 0; - long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; - - this.literalWordStartPosition += toDiscard; - this.brlw.NumberOfLiteralWords -= toDiscard; - x -= toDiscard; - if ((x > 0) || (this.brlw.size() == 0)) { - if (!this.next()) { - break; - } - } - } - } - /** - * Move to the next RunningLengthWord - * @return whether the move was possible - */ - @Override - public boolean next() { - if (!this.iterator.hasNext()) { - if(!reload()) { - this.brlw.NumberOfLiteralWords = 0; - this.brlw.RunningLength = 0; - return false; - } - } - this.brlw.reset(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 - return true; - } - private boolean reload() { - if(!this.masteriterator.hasNext()) { - return false; - } - this.iterator = this.masteriterator.next(); - this.buffer = this.iterator.buffer(); - return true; - } - - - /** - * Get the nth literal word for the current running length word - * @param index zero based index - * @return the literal word - */ - @Override - public long getLiteralWordAt(int index) { - return this.buffer[this.literalWordStartPosition + index]; - } - - /** - * Gets the number of literal words for the current running length word. - * - * @return the number of literal words - */ - @Override - public int getNumberOfLiteralWords() { - return this.brlw.NumberOfLiteralWords; - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - @Override - public boolean getRunningBit() { - return this.brlw.RunningBit; - } - - /** - * Gets the running length. - * - * @return the running length - */ - @Override - public long getRunningLength() { - return this.brlw.RunningLength; - } - - /** - * Size in uncompressed words of the current running length word. - * - * @return the size - */ - @Override - public long size() { - return this.brlw.size(); - } - - - @Override - public BufferedIterator clone() throws CloneNotSupportedException { - BufferedIterator answer = (BufferedIterator) super.clone(); - answer.brlw = this.brlw.clone(); - answer.buffer = this.buffer; - answer.iterator = this.iterator.clone(); - answer.literalWordStartPosition = this.literalWordStartPosition; - answer.masteriterator = this.masteriterator.clone(); - return answer; - } - - private BufferedRunningLengthWord brlw; - private long[] buffer; - private int literalWordStartPosition; - private EWAHIterator iterator; - private CloneableIterator masteriterator; - } \ No newline at end of file +public class BufferedIterator implements IteratingRLW, Cloneable { + + /** + * Instantiates a new iterating buffered running length word. + * + * @param iterator iterator + */ + public BufferedIterator(final CloneableIterator iterator) { + this.masterIterator = iterator; + if (this.masterIterator.hasNext()) { + iteratingBrlw = new IteratingBufferedRunningLengthWord(this.masterIterator.next()); + } + } + + /** + * Discard first words, iterating to the next running length word if + * needed. + * + * @param x the number of words to be discarded + */ + @Override + public void discardFirstWords(long x) { + while (x > 0) { + if (this.iteratingBrlw.getRunningLength() > x) { + this.iteratingBrlw.discardFirstWords(x); + return; + } + this.iteratingBrlw.discardFirstWords(this.iteratingBrlw.getRunningLength()); + x -= this.iteratingBrlw.getRunningLength(); + long toDiscard = x > this.iteratingBrlw.getNumberOfLiteralWords() + ? this.iteratingBrlw.getNumberOfLiteralWords() + : x; + + this.iteratingBrlw.discardFirstWords(toDiscard); + x -= toDiscard; + if ((x > 0) || (this.iteratingBrlw.size() == 0)) { + if (!this.next()) { + break; + } + } + } + } + @Override + public void discardLiteralWords(long x) { + this.iteratingBrlw.discardLiteralWords(x); + if (this.iteratingBrlw.getNumberOfLiteralWords() == 0) + this.next(); + } + + @Override + public void discardRunningWords() { + this.iteratingBrlw.discardRunningWords(); + if (this.iteratingBrlw.getNumberOfLiteralWords() == 0) + this.next(); + } + + /** + * Move to the next RunningLengthWord + * + * @return whether the move was possible + */ + @Override + public boolean next() { + if (!this.iteratingBrlw.next()) { + if (!this.masterIterator.hasNext()) { + return false; + } else { + this.iteratingBrlw = new IteratingBufferedRunningLengthWord(this.masterIterator.next()); + } + } + return true; + } + + /** + * Get the nth literal word for the current running length word + * + * @param index zero based index + * @return the literal word + */ + @Override + public long getLiteralWordAt(int index) { + return this.iteratingBrlw.getLiteralWordAt(index); + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override + public int getNumberOfLiteralWords() { + return this.iteratingBrlw.getNumberOfLiteralWords(); + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override + public boolean getRunningBit() { + return this.iteratingBrlw.getRunningBit(); + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override + public long getRunningLength() { + return this.iteratingBrlw.getRunningLength(); + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the size + */ + @Override + public long size() { + return this.iteratingBrlw.size(); + } + + @Override + public BufferedIterator clone() throws CloneNotSupportedException { + BufferedIterator answer = (BufferedIterator) super.clone(); + answer.iteratingBrlw = this.iteratingBrlw.clone(); + answer.masterIterator = this.masterIterator.clone(); + return answer; + } + + private IteratingBufferedRunningLengthWord iteratingBrlw; + private CloneableIterator masterIterator; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,175 +1,177 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ - - /** - * Mostly for internal use. Similar to RunningLengthWord, but can - * be modified without access to the array, and has faster access. + * Mostly for internal use. Similar to RunningLengthWord, but can be modified + * without access to the array, and has faster access. * * @author Daniel Lemire * @since 0.1.0 - * */ public final class BufferedRunningLengthWord implements Cloneable { - /** - * Instantiates a new buffered running length word. - * - * @param a the word - */ - public BufferedRunningLengthWord(final long a) { - this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); - this.RunningBit = (a & 1) != 0; - this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); - } - - /** - * Instantiates a new buffered running length word. - * - * @param rlw the rlw - */ - public BufferedRunningLengthWord(final RunningLengthWord rlw) { - this(rlw.parent.buffer[rlw.position]); - } - - /** - * Discard first words. - * - * @param x the x - */ - public void discardFirstWords(long x) { - if (this.RunningLength >= x) { - this.RunningLength -= x; - return; - } - x -= this.RunningLength; - this.RunningLength = 0; - this.literalwordoffset += x; - this.NumberOfLiteralWords -= x; - } - - /** - * Gets the number of literal words. - * - * @return the number of literal words - */ - public int getNumberOfLiteralWords() { - return this.NumberOfLiteralWords; - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - public boolean getRunningBit() { - return this.RunningBit; - } - - /** - * Gets the running length. - * - * @return the running length - */ - public long getRunningLength() { - return this.RunningLength; - } - - /** - * Reset the values using the provided word. - * - * @param a the word - */ - public void reset(final long a) { - this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); - this.RunningBit = (a & 1) != 0; - this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); - this.literalwordoffset = 0; - } - - /** - * Reset the values of this running length word so that it has the same values - * as the other running length word. - * - * @param rlw the other running length word - */ - public void reset(final RunningLengthWord rlw) { - reset(rlw.parent.buffer[rlw.position]); - } - - /** - * Sets the number of literal words. - * - * @param number the new number of literal words - */ - public void setNumberOfLiteralWords(final int number) { - this.NumberOfLiteralWords = number; - } - - /** - * Sets the running bit. - * - * @param b the new running bit - */ - public void setRunningBit(final boolean b) { - this.RunningBit = b; - } - - /** - * Sets the running length. - * - * @param number the new running length - */ - public void setRunningLength(final long number) { - this.RunningLength = number; - } - - /** - * Size in uncompressed words. - * - * @return the long - */ - public long size() { - return this.RunningLength + this.NumberOfLiteralWords; - } - - /* - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "running bit = " + getRunningBit() + " running length = " - + getRunningLength() + " number of lit. words " - + getNumberOfLiteralWords(); - } - - @Override -public BufferedRunningLengthWord clone() throws CloneNotSupportedException { - BufferedRunningLengthWord answer = (BufferedRunningLengthWord) super.clone(); - answer.literalwordoffset = this.literalwordoffset; - answer.NumberOfLiteralWords = this.NumberOfLiteralWords; - answer.RunningBit = this.RunningBit; - answer.RunningLength = this.RunningLength; - return answer; - } - - - /** how many literal words have we read so far? */ - public int literalwordoffset = 0; - - /** The Number of literal words. */ - public int NumberOfLiteralWords; - - /** The Running bit. */ - public boolean RunningBit; - - /** The Running length. */ - public long RunningLength; + /** + * Instantiates a new buffered running length word. + * + * @param a the word + */ + public BufferedRunningLengthWord(final long a) { + this.numberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.RUNNING_LENGTH_BITS)); + this.runningBit = (a & 1) != 0; + this.runningLength = (int) ((a >>> 1) & RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT); + } + + /** + * Instantiates a new buffered running length word. + * + * @param rlw the rlw + */ + public BufferedRunningLengthWord(final RunningLengthWord rlw) { + this(rlw.buffer.getWord(rlw.position)); + } + + /** + * Discard first words. + * + * @param x the x + */ + public void discardFirstWords(long x) { + if (this.runningLength >= x) { + this.runningLength -= x; + return; + } + x -= this.runningLength; + this.runningLength = 0; + this.literalWordOffset += (int) x; + this.numberOfLiteralWords -= (int) x; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return this.numberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return this.runningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public long getRunningLength() { + return this.runningLength; + } + + /** + * Reset the values using the provided word. + * + * @param a the word + */ + public void reset(final long a) { + this.numberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.RUNNING_LENGTH_BITS)); + this.runningBit = (a & 1) != 0; + this.runningLength = (int) ((a >>> 1) & RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT); + this.literalWordOffset = 0; + } + + /** + * Reset the values of this running length word so that it has the same + * values as the other running length word. + * + * @param rlw the other running length word + */ + public void reset(final RunningLengthWord rlw) { + reset(rlw.buffer.getWord(rlw.position)); + } + + /** + * Sets the number of literal words. + * + * @param number the new number of literal words + */ + public void setNumberOfLiteralWords(final int number) { + this.numberOfLiteralWords = number; + } + + /** + * Sets the running bit. + * + * @param b the new running bit + */ + public void setRunningBit(final boolean b) { + this.runningBit = b; + } + + /** + * Sets the running length. + * + * @param number the new running length + */ + public void setRunningLength(final long number) { + this.runningLength = number; + } + + /** + * Size in uncompressed words. + * + * @return the long + */ + public long size() { + return this.runningLength + this.numberOfLiteralWords; + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + + " running length = " + getRunningLength() + + " number of lit. words " + getNumberOfLiteralWords(); + } + + @Override + public BufferedRunningLengthWord clone() throws CloneNotSupportedException { + BufferedRunningLengthWord answer = (BufferedRunningLengthWord) super.clone(); + answer.literalWordOffset = this.literalWordOffset; + answer.numberOfLiteralWords = this.numberOfLiteralWords; + answer.runningBit = this.runningBit; + answer.runningLength = this.runningLength; + return answer; + } - -} \ No newline at end of file + /** + * how many literal words have we read so far? + */ + protected int literalWordOffset = 0; + + /** + * The Number of literal words. + */ + protected int numberOfLiteralWords; + + /** + * The Running bit. + */ + protected boolean runningBit; + + /** + * The Running length. + */ + protected long runningLength; +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/Buffer.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/Buffer.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/Buffer.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/Buffer.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,159 @@ +package com.googlecode.javaewah; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Buffer interface. + * Users should not be concerned by this class. + * + * @author Gregory Ssi-Yan-Kai + */ +interface Buffer { + + /** + * Returns the actual size in words + */ + int sizeInWords(); + + /** + * Increases the size of the buffer if necessary + */ + void ensureCapacity(int capacity); + + /** + * Returns the word at a given position + * @param position + * @return the word + */ + long getWord(int position); + + /** + * Returns the last word of the buffer + * @return the last word + */ + long getLastWord(); + + /** + * Resets the buffer + * The buffer is not fully cleared and any new set operations should + * overwrite stale data + */ + void clear(); + + /** + * Reduces the internal buffer to its minimal allowable size. + * This can free memory. + */ + void trim(); + + /** + * Replaces the word at the given position in the buffer with + * the specified word. + * @param position + * @param word + */ + void setWord(int position, long word); + + /** + * Replaces the last word in the buffer with + * the specified word. + * @param word + */ + void setLastWord(long word); + + /** + * Appends the specified word to the end of the buffer + * @param word + */ + void push_back(long word); + + /** + * Appends the specified buffer words to the end of the buffer. + * @param buffer the buffer + * @param start the position of the first word to add + * @param number the number of words to add + */ + void push_back(Buffer buffer, int start, int number); + + /** + * Same as push_back, but the words are negated. + * + * @param buffer the buffer + * @param start the position of the first word to add + * @param number the number of words to add + */ + void negative_push_back(Buffer buffer, int start, int number); + + /** + * Removes the last word from the buffer + */ + void removeLastWord(); + + /** + * Negates the word at the given position in the buffer + * @param position + */ + void negateWord(int position); + + /** + * Replaces the word at the given position in the buffer + * with its bitwise-and with the given mask. + * @param position + * @param mask + */ + void andWord(int position, long mask); + + /** + * Replaces the word at the given position in the buffer + * with its bitwise-or with the given mask. + * @param position + * @param mask + */ + void orWord(int position, long mask); + + /** + * Replaces the last word position in the buffer + * with its bitwise-and with the given mask. + * @param mask + */ + void andLastWord(long mask); + + /** + * Replaces the last word position in the buffer + * with its bitwise-or with the given mask. + * @param mask + */ + void orLastWord(long mask); + + /** + * Expands the buffer by adding the given number of words at the given position. + * The added words may contain stale data. + * @param position the position of the buffer where to add words + * @param length the number of words to add + */ + void expand(int position, int length); + + /** + * Removes a given number of words at the given position in the buffer. + * The freed words at the end of the buffer are properly cleaned. + * @param position the position of the buffer where to add words + * @param length the number of words to add + */ + void collapse(int position, int length); + + /** + * Creates and returns a copy of the buffer + */ + Buffer clone() throws CloneNotSupportedException; + + /** + * Swap the content of the buffer with another. + * + * @param other buffer to swap with + */ + void swap(Buffer other); + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ChunkIteratorImpl.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ChunkIteratorImpl.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ChunkIteratorImpl.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ChunkIteratorImpl.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,151 @@ +package com.googlecode.javaewah; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import static com.googlecode.javaewah.EWAHCompressedBitmap.WORD_IN_BITS; + +/** + * The ChunkIteratorImpl is the 64 bit implementation of the ChunkIterator + * interface, which efficiently returns the chunks of ones and zeros represented by an + * EWAHIterator. + * + * @author Gregory Ssi-Yan-Kai + */ +final class ChunkIteratorImpl implements ChunkIterator { + + private final EWAHIterator ewahIter; + private final int sizeInBits; + private final Buffer buffer; + private int position; + private boolean runningBit; + private int runningLength; + private long word; + private long wordMask; + private int wordPosition; + private int wordLength; + private boolean hasNext; + private Boolean nextBit; + private int nextLength; + + ChunkIteratorImpl(EWAHIterator ewahIter, int sizeInBits) { + this.ewahIter = ewahIter; + this.sizeInBits = sizeInBits; + this.buffer = ewahIter.buffer(); + this.hasNext = moveToNextRLW(); + } + + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public boolean nextBit() { + return this.nextBit; + } + + @Override + public int nextLength() { + return this.nextLength; + } + + @Override + public void move() { + move(this.nextLength); + } + + @Override + public void move(int bits) { + this.nextLength -= bits; + if(this.nextLength <= 0) { + do { + this.nextBit = null; + updateNext(); + this.hasNext = moveToNextRLW(); + } while(this.nextLength <= 0 && this.hasNext); + } + } + + private boolean moveToNextRLW() { + while (!runningHasNext() && !literalHasNext()) { + if (!hasNextRLW()) { + return this.nextBit!=null; + } + setRLW(nextRLW()); + updateNext(); + } + return true; + } + + private void setRLW(RunningLengthWord rlw) { + this.runningLength = Math.min(this.sizeInBits, + this.position + WORD_IN_BITS * (int) rlw.getRunningLength()); + this.runningBit = rlw.getRunningBit(); + this.wordPosition = this.ewahIter.literalWords(); + this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); + } + + private boolean runningHasNext() { + return this.position < this.runningLength; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordMask == 0 && this.wordPosition < this.wordLength) { + this.word = this.buffer.getWord(this.wordPosition++); + this.wordMask = 1l; + } + return (this.word != 0 || this.wordMask != 0 || !hasNextRLW()) && this.position < this.sizeInBits; + } + + private boolean hasNextRLW() { + return this.ewahIter.hasNext(); + } + + private RunningLengthWord nextRLW() { + return this.ewahIter.next(); + } + + private void updateNext() { + if(runningHasNext()) { + if(this.nextBit == null || this.nextBit == this.runningBit) { + this.nextBit = this.runningBit; + int offset = runningOffset(); + this.nextLength += offset; + movePosition(offset); + updateNext(); + } + } else if (literalHasNext()) { + boolean b = currentWordBit(); + if(this.nextBit == null || this.nextBit == b) { + this.nextBit = b; + this.nextLength++; + movePosition(1); + shiftWordMask(); + updateNext(); + } + } else { + moveToNextRLW(); + } + } + + private int runningOffset() { + return this.runningLength - this.position; + } + + private void movePosition(int offset) { + this.position += offset; + } + + private boolean currentWordBit() { + return (this.word & this.wordMask) != 0; + } + + private void shiftWordMask() { + this.word &= ~this.wordMask; + this.wordMask = this.wordMask << 1; + } + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ChunkIterator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ChunkIterator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ChunkIterator.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ChunkIterator.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,48 @@ +package com.googlecode.javaewah; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +/** + * The ChunkIterator interface is used to iterate over chunks of ones or zeros. + * + * @author Gregory Ssi-Yan-Kai + */ +public interface ChunkIterator { + + /** + * Is there more? + * + * @return true, if there is more, false otherwise + */ + boolean hasNext(); + + /** + * Return the next bit + * + * @return the bit + */ + boolean nextBit(); + + /** + * Return the length of the next bit + * + * @return the length + */ + int nextLength(); + + /** + * Move the iterator at the next different bit + */ + void move(); + + /** + * Move the iterator at the next ith bit + * + * @param bits the number of bits to skip + */ + void move(int bits); + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ClearIntIterator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ClearIntIterator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ClearIntIterator.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ClearIntIterator.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,94 @@ +package com.googlecode.javaewah; + +import static com.googlecode.javaewah.EWAHCompressedBitmap.WORD_IN_BITS; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +/** + * This class is equivalent to IntIteratorImpl, except that it allows + * use to iterate over "clear" bits (bits set to 0). + * + * @author Gregory Ssi-Yan-Kai + */ +final class ClearIntIterator implements IntIterator { + + private final EWAHIterator ewahIter; + private final int sizeInBits; + private final Buffer buffer; + private int position; + private int runningLength; + private long word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasNext; + + ClearIntIterator(EWAHIterator ewahIter, int sizeInBits) { + this.ewahIter = ewahIter; + this.sizeInBits = sizeInBits; + this.buffer = ewahIter.buffer(); + this.hasNext = this.moveToNext(); + } + + public boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (!this.ewahIter.hasNext()) { + return false; + } + setRunningLengthWord(this.ewahIter.next()); + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final long t = this.word & -this.word; + answer = this.literalPosition + Long.bitCount(t - 1); + this.word ^= t; + } + this.hasNext = this.moveToNext(); + return answer; + } + + private void setRunningLengthWord(RunningLengthWord rlw) { + this.runningLength = Math.min(this.sizeInBits, + WORD_IN_BITS * (int) rlw.getRunningLength() + this.position); + if (rlw.getRunningBit()) { + this.position = this.runningLength; + } + + this.wordPosition = this.ewahIter.literalWords(); + this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); + } + + private boolean runningHasNext() { + return this.position < this.runningLength; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = ~this.buffer.getWord(this.wordPosition++); + if (this.wordPosition == this.wordLength && !this.ewahIter.hasNext()) { + final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; + if (usedBitsInLast > 0) { + this.word &= ((~0l) >>> (WORD_IN_BITS - usedBitsInLast)); + } + } + this.literalPosition = this.position; + this.position += WORD_IN_BITS; + } + return this.word != 0; + } +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/CloneableIterator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/CloneableIterator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/CloneableIterator.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/CloneableIterator.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,24 +1,31 @@ package com.googlecode.javaewah; +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + /** * Like a standard Java iterator, except that you can clone it. * * @param the data type of the iterator */ public interface CloneableIterator extends Cloneable { - - /** - * @return whether there is more - */ - public boolean hasNext(); - /** - * @return the next element - */ - public E next(); - /** - * @return a copy - * @throws CloneNotSupportedException this should never happen in practice - */ - public CloneableIterator clone() throws CloneNotSupportedException; -} \ No newline at end of file + /** + * @return whether there is more + */ + boolean hasNext(); + + /** + * @return the next element + */ + E next(); + + /** + * @return a copy + * @throws CloneNotSupportedException this should never happen in practice + */ + CloneableIterator clone() throws CloneNotSupportedException; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/datastructure/BitSet.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/datastructure/BitSet.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/datastructure/BitSet.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/datastructure/BitSet.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,673 @@ +package com.googlecode.javaewah.datastructure; + +import com.googlecode.javaewah.IntIterator; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; +import java.util.Iterator; + + +/** + *

This is an optimized version of Java's BitSet. In many cases, it can be used + * as a drop-in replacement.

+ * + *

It differs from the basic Java BitSet class in the following ways:

+ *
    + *
  • You can iterate over set bits using a simpler syntax for(int bs: myBitset).
  • + *
  • You can compute the cardinality of an intersection and union without writing it out + * or modifying your BitSets (see methods such as andcardinality).
  • + *
  • You can recover wasted memory with trim().
  • + *
  • It does not implicitly expand: you have to explicitly call resize. This helps to keep memory usage in check.
  • + *
  • It supports memory-file mapping (see the ImmutableBitSet class).
  • + *
  • It supports faster and more efficient serialization functions (serialize and deserialize).
  • + *
+ * + * @author Daniel Lemire + * @since 0.8.0 + */ +public class BitSet implements Cloneable, Iterable, Externalizable ,WordArray { + /** + * Construct a bitset with the specified number of bits (initially all + * false). The number of bits is rounded up to the nearest multiple of + * 64. + * + * @param sizeInBits the size in bits + */ + public BitSet(final int sizeInBits) { + this.data = new long[(sizeInBits + 63) / 64]; + } + + public BitSet() { + this.data = new long[0]; + } + + /** + * Compute bitwise AND. + * + * @param bs other bitset + */ + public void and(WordArray bs) { + for (int k = 0; k < Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); ++k) { + this.data[k] &= bs.getWord(k); + } + } + + /** + * Compute cardinality of bitwise AND. + * + * The current bitmap is modified. Consider calling trim() + * to recover wasted memory afterward. + * + * @param bs other bitset + * @return cardinality + */ + public int andcardinality(WordArray bs) { + int sum = 0; + for (int k = 0; k < Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); ++k) { + sum += Long.bitCount(this.getWord(k) & bs.getWord(k)); + } + return sum; + } + + /** + * Compute bitwise AND NOT. + * + * The current bitmap is modified. Consider calling trim() + * to recover wasted memory afterward. + * + * @param bs other bitset + */ + public void andNot(WordArray bs) { + for (int k = 0; k < Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); ++k) { + this.data[k] &= ~bs.getWord(k); + } + } + + /** + * Compute cardinality of bitwise AND NOT. + * + * @param bs other bitset + * @return cardinality + */ + public int andNotcardinality(WordArray bs) { + int sum = 0; + for (int k = 0; k < Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); ++k) { + sum += Long.bitCount(this.getWord(k) & (~bs.getWord(k))); + } + return sum; + } + + /** + * Compute the number of bits set to 1 + * + * @return the number of bits + */ + public int cardinality() { + int sum = 0; + for (long l : this.data) + sum += Long.bitCount(l); + return sum; + } + + /** + * Reset all bits to false. This might be wasteful: a better + * approach is to create a new empty bitmap. + */ + public void clear() { + Arrays.fill(this.data, 0); + } + + /** + * Set the bit to false. + * See {@link #unset(int)} + * + * @param index location of the bit + */ + public void clear(int index) { + unset(index); + } + + /** + * Set the bits in the range of indexes to false. + * This might throw an exception if size() is insufficient, consider calling resize(). + * + * @param start location of the first bit to set to zero + * @param end location of the last bit to set to zero (not included) + */ + public void clear(int start, int end) { + if (start == end) return; + int firstword = start / 64; + int endword = (end - 1 ) / 64; + if(firstword == endword) { + this.data[firstword] &= ~((~0L << start) & (~0L >>> -end)); + return; + } + this.data[firstword] &= ~(~0L << start); + for (int i = firstword+1; i < endword; i++) + this.data[i] = 0; + this.data[endword] &= ~(~0L >>> -end); + } + + @Override + public BitSet clone() { + BitSet b; + try { + b = (BitSet) super.clone(); + b.data = Arrays.copyOf(this.data, this.getNumberOfWords()); + return b; + } catch (CloneNotSupportedException e) { + return null; + } + } + + @Override + public boolean equals(Object o) { + if (o instanceof WordArray) { + WordArray bs = (WordArray) o; + for (int k = 0; k < Math.min(this.getNumberOfWords(), + bs.getNumberOfWords()); ++k) { + if (this.getWord(k) != bs.getWord(k)) + return false; + } + WordArray longer = bs.getNumberOfWords() < this.getNumberOfWords() ? this + : bs; + for (int k = Math.min(this.getNumberOfWords(), + bs.getNumberOfWords()); k < Math.max(this.getNumberOfWords(), + bs.getNumberOfWords()); ++k) { + if (longer.getWord(k) != 0) { + return false; + } + } + return true; + } + return false; + } + + /** + * Check whether a bitset contains a set bit. + * + * @return true if no set bit is found + */ + public boolean empty() { + for (long l : this.data) + if (l != 0) return false; + return true; + } + /** + * Flip the bit. This might throw an exception if size() is insufficient, consider calling resize(). + * + * @param i index of the bit + */ + public void flip(final int i) { + this.data[i / 64] ^= (1l << (i % 64)); + } + + /** + * Flip the bits in the range of indexes. + * This might throw an exception if size() is insufficient, consider calling resize(). + * + * @param start location of the first bit + * @param end location of the last bit (not included) + */ + public void flip(int start, int end) { + if (start == end) return; + int firstword = start / 64; + int endword = (end - 1 ) / 64; + this.data[firstword] ^= ~(~0L << start); + for (int i = firstword ; i < endword; i++) + this.data[i] = ~this.data[i]; + this.data[endword] ^= ~0L >>> -end; + } + + + /** + * Get the value of the bit. This might throw an exception if size() is insufficient, consider calling resize(). + * @param i index + * @return value of the bit + */ + public boolean get(final int i) { + return (this.data[i / 64] & (1l << (i % 64))) != 0; + } + + @Override + public int hashCode() { + int b = 31; + long hash = 0; + for(int k = 0; k < data.length; ++k) { + long aData = this.getWord(k); + hash = hash * b + aData; + } + return (int) hash; + } + + /** + * Iterate over the set bits + * + * @return an iterator + */ + public IntIterator intIterator() { + return new IntIterator() { + @Override + public boolean hasNext() { + return this.i >= 0; + } + + @Override + public int next() { + this.j = this.i; + this.i = BitSet.this.nextSetBit(this.i + 1); + return this.j; + } + + private int i = BitSet.this.nextSetBit(0); + + private int j; + + }; + } + + @Override + public Iterator iterator() { + return new Iterator() { + @Override + public boolean hasNext() { + return this.i >= 0; + } + + @Override + public Integer next() { + this.j = this.i; + this.i = BitSet.this.nextSetBit(this.i + 1); + return this.j; + } + + @Override + public void remove() { + BitSet.this.unset(this.j); + } + + private int i = BitSet.this.nextSetBit(0); + + private int j; + }; + } + + /** + * Checks whether two bitsets intersect. + * + * @param bs other bitset + * @return true if they have a non-empty intersection (result of AND) + */ + public boolean intersects(WordArray bs) { + for (int k = 0; k < Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); ++k) { + if ((this.getWord(k) & bs.getWord(k)) != 0) return true; + } + return false; + } + + /** + * Usage: for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)) { + * operate on index i here } + * + * @param i current set bit + * @return next set bit or -1 + */ + public int nextSetBit(final int i) { + int x = i / 64; + if (x >= this.getNumberOfWords()) + return -1; + long w = this.data[x]; + w >>>= i; + if (w != 0) { + return i + Long.numberOfTrailingZeros(w); + } + ++x; + for (; x < this.getNumberOfWords(); ++x) { + if (this.data[x] != 0) { + return x + * 64 + + Long.numberOfTrailingZeros(this.data[x]); + } + } + return -1; + } + + /** + * Usage: for(int i=bs.nextUnsetBit(0); i>=0; i=bs.nextUnsetBit(i+1)) + * { operate on index i here } + * + * @param i current unset bit + * @return next unset bit or -1 + */ + public int nextUnsetBit(final int i) { + int x = i / 64; + if (x >= this.getNumberOfWords()) + return -1; + long w = ~this.data[x]; + w >>>= i; + if (w != 0) { + return i + Long.numberOfTrailingZeros(w); + } + ++x; + for (; x < this.getNumberOfWords(); ++x) { + if (this.data[x] != ~0) { + return x + * 64 + + Long.numberOfTrailingZeros(~this.data[x]); + } + } + return -1; + } + + /** + * Compute bitwise OR. + * + * The current bitmap is modified. Consider calling trim() + * to recover wasted memory afterward. + * + * @param bs other bitset + */ + public void or(WordArray bs) { + if (this.getNumberOfWords() < bs.getNumberOfWords()) + this.resize(bs.getNumberOfWords()*64); + for (int k = 0; k < this.getNumberOfWords(); ++k) { + this.data[k] |= bs.getWord(k); + } + } + + /** + * Compute cardinality of bitwise OR. + * + * BitSets are not modified. + * + * @param bs other bitset + * @return cardinality + */ + public int orcardinality(WordArray bs) { + int sum = 0; + for (int k = 0; k < Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); ++k) { + sum += Long.bitCount(this.getWord(k) | bs.getWord(k)); + } + WordArray longer = bs.getNumberOfWords() < this.getNumberOfWords() ? this : bs; + for (int k = Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); k < Math + .max(this.getNumberOfWords(), bs.getNumberOfWords()); ++k) { + sum += Long.bitCount(longer.getWord(k)); + } + return sum; + } + + /** + * Remove a word. + * + * + * @param i index of the word to be removed. + */ + public void removeWord(int i) { + long[] newdata = new long[data.length - 1]; + if (i == 0) { + System.arraycopy(data, 1, newdata, 0, i - 1); + } + System.arraycopy(data, 0, newdata, 0, i - 1); + System.arraycopy(data, i, newdata, i - 1, data.length - i); + data = newdata; + } + + /** + * Resize the bitset + * + * @param sizeInBits new number of bits + */ + public void resize(int sizeInBits) { + this.data = Arrays.copyOf(this.data, (sizeInBits + 63) / 64); + } + + /** + * Set to true. This might throw an exception if size() is insufficient, consider calling resize(). + * + * @param i index of the bit + */ + public void set(final int i) { + this.data[i / 64] |= (1l << (i % 64)); + } + + /** + * Set to some value. This might throw an exception if size() is insufficient, consider calling resize(). + * + * @param i index + * @param b value of the bit + */ + public void set(final int i, final boolean b) { + if (b) + set(i); + else + unset(i); + } + + + /** + * Set the bits in the range of indexes true. + * This might throw an exception if size() is insufficient, consider calling resize(). + * + * @param start location of the first bit + * @param end location of the last bit (not included) + */ + public void set(int start, int end) { + if (start == end) return; + int firstword = start / 64; + int endword = (end - 1 ) / 64; + if(firstword == endword) { + this.data[firstword] |= (~0L << start) & (~0L >>> -end); + return; + } + this.data[firstword] |= ~0L << start; + for (int i = firstword+1; i < endword; i++) + this.data[i] = ~0; + this.data[endword] |= ~0L >>> -end; + } + + + + /** + * Set the bits in the range of indexes to the specified Boolean value. + * This might throw an exception if size() is insufficient, consider calling resize(). + * + * @param start location of the first bit + * @param end location of the last bit (not included) + * @param v Boolean value + */ + public void set(int start, int end, boolean v) { + if(v) + set(start,end); + else + clear(start,end); + } + + + /** + * Query the size + * + * @return the size in bits. + */ + public int size() { + return this.getNumberOfWords() * 64; + } + + /** + * Recovers wasted memory + */ + public void trim() { + for (int k = this.getNumberOfWords() - 1; k >= 0; --k) + if (this.getWord(k) != 0) { + if (k + 1 < this.getNumberOfWords()) + this.data = Arrays.copyOf(this.data, k + 1); + return; + } + this.data = new long[0]; + } + + /** + * Set to false + * + * @param i index of the bit + */ + public void unset(final int i) { + this.data[i / 64] &= ~(1l << (i % 64)); + } + + /** + * Iterate over the unset bits + * + * @return an iterator + */ + public IntIterator unsetIntIterator() { + return new IntIterator() { + @Override + public boolean hasNext() { + return this.i >= 0; + } + + @Override + public int next() { + this.j = this.i; + this.i = BitSet.this.nextUnsetBit(this.i + 1); + return this.j; + } + + private int i = BitSet.this.nextUnsetBit(0); + + private int j; + }; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + deserialize(in); + } + + /** + * Serialize. + * + * The current bitmap is not modified. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void serialize(DataOutput out) throws IOException { + out.writeLong(this.getNumberOfWords()); + for (long w : this.data) + out.writeLong(w); + } + + /** + * Deserialize. + * + * @param in the DataInput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + int length = (int) in.readLong(); + this.data = new long[length]; + for (int k = 0; k < length; ++k) + this.data[k] = in.readLong(); + } + /** + * Compute bitwise XOR. + * + * The current bitmap is modified. Consider calling trim() + * to recover wasted memory afterward. + * + * @param bs other bitset + */ + public void xor(WordArray bs) { + if (this.getNumberOfWords() < bs.getNumberOfWords()) + this.resize(bs.getNumberOfWords()*64); + for (int k = 0; k < this.getNumberOfWords(); ++k) { + this.data[k] ^= bs.getWord(k); + } + } + + /** + * Compute cardinality of bitwise XOR. + * + * BitSets are not modified. + * + * @param bs other bitset + * @return cardinality + */ + public int xorcardinality(WordArray bs) { + int sum = 0; + for (int k = 0; k < Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); ++k) { + sum += Long.bitCount(this.getWord(k) ^ bs.getWord(k)); + } + WordArray longer = bs.getNumberOfWords() < this.getNumberOfWords() ? this : bs; + + int start = Math.min(this.getNumberOfWords(), bs.getNumberOfWords()); + int end = Math.max(this.getNumberOfWords(), bs.getNumberOfWords()); + for (int k = start; k < end; ++k) { + sum += Long.bitCount(longer.getWord(k)); + } + + return sum; + } + @Override + public int getNumberOfWords() { + return data.length; + } + + @Override + public long getWord(int index) { + return this.data[index]; + } + + /** + * Return a bitmap with the bit set to true at the given positions. + * + * (This is a convenience method.) + * + * @param setBits list of set bit positions + * @return the bitmap + */ + public static BitSet bitmapOf(int... setBits) { + int maxv = 0; + for (int k : setBits) + if(maxv < k) maxv = k; + BitSet a = new BitSet(maxv + 1); + for (int k : setBits) + a.set(k); + return a; + } + + @Override + public String toString() { + StringBuilder answer = new StringBuilder(); + IntIterator i = this.intIterator(); + answer.append("{"); + if (i.hasNext()) + answer.append(i.next()); + while (i.hasNext()) { + answer.append(","); + answer.append(i.next()); + } + answer.append("}"); + return answer.toString(); + } + + long[] data; + + static final long serialVersionUID = 7997698588986878754L; + + + + + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/datastructure/ImmutableBitSet.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/datastructure/ImmutableBitSet.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/datastructure/ImmutableBitSet.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/datastructure/ImmutableBitSet.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,320 @@ +package com.googlecode.javaewah.datastructure; + +import java.nio.LongBuffer; +import java.util.Iterator; + +import com.googlecode.javaewah.IntIterator; + +/** + * + * This is an immutable version of the BitSet class in this same package. + * It is meant to be used with memory-file mapping. + * + *
final FileOutputStream fos = new FileOutputStream(tmpfile);
+ *	BitSet Bitmap = BitSet.bitmapOf(0, 2, 55, 64, 512);
+ *	Bitmap.serialize(new DataOutputStream(fos));
+ *		RandomAccessFile memoryMappedFile = new RandomAccessFile(tmpfile, "r");
+ *		ByteBuffer bb = memoryMappedFile.getChannel().map(
+ *				FileChannel.MapMode.READ_ONLY, 0, totalcount);
+ *		ImmutableBitSet mapped = new ImmutableBitSet(bb.asLongBuffer());
+ * + */ +public class ImmutableBitSet implements Cloneable, Iterable,WordArray { + /** + * Construct a ImmutableBitSet from the content of the LongBuffer + * which should have been initialized with BitSet.serialize (from the BitSet in this + * same package). + * + * The input is not modified. + * + * @param bs the data source + */ + public ImmutableBitSet(final LongBuffer bs) { + int length = (int) bs.get(0); + LongBuffer copy = bs.slice(); + copy.position(1); + data = copy.slice(); + data.limit(length); + } + + /** + * Get a copy of this ImmutableBitSet as a mutable BitSet. + * @return a copy + */ + public BitSet asBitSet() { + BitSet bs = new BitSet(this.size()); + this.data.rewind(); + this.data.get(bs.data, 0, bs.data.length); + return bs; + } + + /** + * Compute the number of bits set to 1 + * + * @return the number of bits + */ + public int cardinality() { + int sum = 0; + int length = this.data.limit(); + for(int k = 0; k < length; ++k) + sum += Long.bitCount(this.data.get(k)); + return sum; + } + + @Override + public ImmutableBitSet clone() { + ImmutableBitSet b; + try { + b = (ImmutableBitSet) super.clone(); + b.data = this.data.duplicate(); + return b; + } catch (CloneNotSupportedException e) { + return null; + } + } + + @Override + public boolean equals(Object o) { + if (o instanceof WordArray) { + WordArray bs = (WordArray) o; + for (int k = 0; k < Math.min(this.getNumberOfWords(), + bs.getNumberOfWords()); ++k) { + if (this.getWord(k) != bs.getWord(k)) + return false; + } + WordArray longer = bs.getNumberOfWords() < this.getNumberOfWords() ? this + : bs; + for (int k = Math.min(this.getNumberOfWords(), + bs.getNumberOfWords()); k < Math.max(this.getNumberOfWords(), + bs.getNumberOfWords()); ++k) { + if (longer.getWord(k) != 0) { + return false; + } + } + return true; + } + return false; + } + + /** + * Check whether a bitset contains a set bit. + * + * @return true if no set bit is found + */ + public boolean empty() { + int length = this.data.limit(); + for(int k = 0; k < length; ++k) + if (this.data.get(k) != 0) return false; + return true; + } + + /** + * get value of bit i + * + * @param i index + * @return value of the bit + */ + public boolean get(final int i) { + return (this.data.get(i / 64) & (1l << (i % 64))) != 0; + } + + @Override + public int hashCode() { + int b = 31; + long hash = 0; + int length = this.data.limit(); + for(int k = 0; k < length; ++k) { + long aData = this.data.get(k); + hash = hash * b + aData; + } + return (int) hash; + } + + /** + * Iterate over the set bits + * + * @return an iterator + */ + public IntIterator intIterator() { + return new IntIterator() { + @Override + public boolean hasNext() { + return this.i >= 0; + } + + @Override + public int next() { + this.j = this.i; + this.i = ImmutableBitSet.this.nextSetBit(this.i + 1); + return this.j; + } + + private int i = ImmutableBitSet.this.nextSetBit(0); + + private int j; + + }; + } + + @Override + public Iterator iterator() { + return new Iterator() { + @Override + public boolean hasNext() { + return this.i >= 0; + } + + @Override + public Integer next() { + this.j = this.i; + this.i = ImmutableBitSet.this.nextSetBit(this.i + 1); + return this.j; + } + + @Override + public void remove() { + throw new RuntimeException("Object is immutable"); + } + + private int i = ImmutableBitSet.this.nextSetBit(0); + + private int j; + }; + } + + /** + * Checks whether two bitsets intersect. + * + * @param bs other bitset + * @return true if they have a non-empty intersection (result of AND) + */ + public boolean intersects(BitSet bs) { + for (int k = 0; k < Math.min(this.data.limit(), bs.data.length); ++k) { + if ((this.data.get(k) & bs.data[k]) != 0) return true; + } + return false; + } + + /** + * Usage: for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)) { + * operate on index i here } + * + * @param i current set bit + * @return next set bit or -1 + */ + public int nextSetBit(final int i) { + int x = i / 64; + if (x >= this.data.limit()) + return -1; + long w = this.data.get(x); + w >>>= (i % 64); + if (w != 0) { + return i + Long.numberOfTrailingZeros(w); + } + ++x; + for (; x < this.data.limit(); ++x) { + if (this.data.get(x) != 0) { + return x + * 64 + + Long.numberOfTrailingZeros(this.data.get(x)); + } + } + return -1; + } + + /** + * Usage: for(int i=bs.nextUnsetBit(0); i>=0; i=bs.nextUnsetBit(i+1)) + * { operate on index i here } + * + * @param i current unset bit + * @return next unset bit or -1 + */ + public int nextUnsetBit(final int i) { + int x = i / 64; + if (x >= this.data.limit()) + return -1; + long w = ~this.data.get(x); + w >>>= (i % 64); + if (w != 0) { + return i + Long.numberOfTrailingZeros(w); + } + ++x; + for (; x < this.data.limit(); ++x) { + if (this.data.get(x) != ~0) { + return x + * 64 + + Long.numberOfTrailingZeros(~this.data.get(x)); + } + } + return -1; + } + + + + /** + * Query the size + * + * @return the size in bits. + */ + public int size() { + return this.data.limit() * 64; + } + + + /** + * Iterate over the unset bits + * + * @return an iterator + */ + public IntIterator unsetIntIterator() { + return new IntIterator() { + @Override + public boolean hasNext() { + return this.i >= 0; + } + + @Override + public int next() { + this.j = this.i; + this.i = ImmutableBitSet.this.nextUnsetBit(this.i + 1); + return this.j; + } + + private int i = ImmutableBitSet.this.nextUnsetBit(0); + + private int j; + }; + } + + + @Override + public int getNumberOfWords() { + return data.limit(); + } + + @Override + public long getWord(int index) { + return data.get(index); + } + @Override + public String toString() { + StringBuilder answer = new StringBuilder(); + IntIterator i = this.intIterator(); + answer.append("{"); + if (i.hasNext()) + answer.append(i.next()); + while (i.hasNext()) { + answer.append(","); + answer.append(i.next()); + } + answer.append("}"); + return answer.toString(); + } + + private LongBuffer data; + + + + + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/datastructure/PriorityQ.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/datastructure/PriorityQ.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/datastructure/PriorityQ.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/datastructure/PriorityQ.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,132 @@ +package com.googlecode.javaewah.datastructure; + +import java.util.Comparator; + +/** + * Special-purpose priority queue. Does limited error checking and supports + * toss, buildHeap, poll, peek, percolateDown. It is faster than the equivalent + * class from java.util. + * + * @param object type + * @author Owen Kaser + * @since 0.8.0 + */ +public final class PriorityQ { + final T[] a; + int lastIndex; + final Comparator comp; + + /** + * Construct a priority queue with a given capacity + * + * @param maxSize capacity + * @param c comparator + */ + @SuppressWarnings("unchecked") + public PriorityQ(final int maxSize, final Comparator c) { + this.a = (T[]) new Object[maxSize + 1]; + this.lastIndex = 0; + this.comp = c; + } + + /** + * @return the size of the queue + */ + public int size() { + return this.lastIndex; + } + + private int compare(T a, T b) { + return this.comp.compare(a, b); + } + + /** + * Add an element at the end of the queue + * + * @param t element to be added + */ + public void toss(final T t) { + this.a[++this.lastIndex] = t; + } + + /** + * Look at the top of the heap + * + * @return the element on top + */ + public T peek() { + return this.a[1]; + } + + /** + * build the heap... + */ + public void buildHeap() { + for (int i = this.lastIndex / 2; i > 0; --i) { + percolateDown(i); + } + } + + /** + * Signals that the element on top of the heap has been updated + */ + public void percolateDown() { + percolateDown(1); + } + + private void percolateDown(int i) { + T ai = this.a[i]; + while (true) { + int l = 2 * i; + int r = l + 1; + int smallest = i; + + if (r <= this.lastIndex) { // then l also okay + if (compare(this.a[l], ai) < 0) { // l beats i + smallest = l; + if (compare(this.a[r], this.a[smallest]) < 0) + smallest = r; + } else if (compare(this.a[r], ai) < 0) + smallest = r; + } else {// may have a l, don't have a r + if ((l <= this.lastIndex) + && (compare(this.a[l], ai) < 0)) + smallest = l; + } + if (i != smallest) { + // conceptually, swap a[i]& a[smallest] + // but as an opt., we use ai and just save at + // end + // temp = a[i]; + this.a[i] = this.a[smallest]; // move smallest + // one up into + // place of i + i = smallest; + } else { + this.a[smallest] = ai; + return; + } + } + } + + /** + * Remove the element on top of the heap + * + * @return the element being removed + */ + public T poll() { + T ans = this.a[1]; + this.a[1] = this.a[this.lastIndex--]; + percolateDown(1); + return ans; + } + + /** + * Check whether the heap is empty. + * + * @return true if empty + */ + public boolean isEmpty() { + return this.lastIndex == 0; + } +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/datastructure/WordArray.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/datastructure/WordArray.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/datastructure/WordArray.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/datastructure/WordArray.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,16 @@ +package com.googlecode.javaewah.datastructure; + +interface WordArray { + /** + * Get the total number of words contained in this data structure. + * @return the number + */ + int getNumberOfWords(); + /** + * Get the word at the given index + * @param index the index + * @return the word + */ + long getWord(int index); + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,13 +1,19 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ -import java.util.*; +import com.googlecode.javaewah.symmetric.RunningBitmapMerge; +import com.googlecode.javaewah.symmetric.ThresholdFuncBitmap; import java.io.*; - +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; /** @@ -22,1610 +28,2111 @@ * reducing as much as possible the CPU cycle usage. *

* + *

+ * Once constructed, the bitmap is essentially immutable (unless you call the + * "set" or "add" methods). Thus, it can be safely used in multi-threaded + * programs. + *

* *

* This implementation being 64-bit, it assumes a 64-bit CPU together with a * 64-bit Java Virtual Machine. This same code on a 32-bit machine may not be as - * fast. - *

+ * fast. There is also a 32-bit version of this code in the class + * javaewah32.EWAHCompressedBitmap32. + *

* *

- * There is also a 32-bit version of this code in the class - * javaewah32.EWAHCompressedBitmap32 + * Here is a code sample to illustrate usage: *

* - * @see com.googlecode.javaewah32.EWAHCompressedBitmap32 - * - *

- * For more details, see the following paper: - *

- * - *
    - *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves - * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages - * 3-28, 2010. http://arxiv.org/abs/0901.3751
  • - *
+ *
+ * EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0, 2, 55, 64,
+ *         1 << 30);
+ * EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1, 3, 64,
+ *         1 << 30);
+ * EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap
+ *         .bitmapOf(5, 55, 1 << 30);
+ * EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap
+ *         .bitmapOf(4, 66, 1 << 30);
+ * EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2);
+ * EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2);
+ * EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2);
+ * andbitmap = EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2, ewahBitmap3,
+ *         ewahBitmap4);
+ * ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ * ObjectOutputStream oo = new ObjectOutputStream(bos);
+ * ewahBitmap1.writeExternal(oo);
+ * oo.close();
+ * ewahBitmap1 = null;
+ * ewahBitmap1 = new EWAHCompressedBitmap();
+ * ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
+ * ewahBitmap1.readExternal(new ObjectInputStream(bis));
+ * EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2,
+ *         ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4);
+ * 
+ *

+ * For more details, see the following papers: + *

* - *

- * A 32-bit version of the compressed format was described by Wu et al. and - * named WBC: - *

+ *
    + *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned + * bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. http://arxiv.org/abs/0901.3751
  • + *
  • Owen Kaser and Daniel Lemire, Compressed bitmap indexes: beyond unions and intersections + * http://arxiv.org/abs/1402.4466
  • + *
* - *
    - *
  • K. Wu, E. J. Otoo, A. Shoshani, H. Nordberg, Notes on design and - * implementation of compressed bit vectors, Tech. Rep. LBNL/PUB-3161, - * Lawrence Berkeley National Laboratory, available from http://crd.lbl. - * gov/~kewu/ps/PUB-3161.html (2001).
  • - *
+ *

+ * A 32-bit version of the compressed format was described by Wu et al. and + * named WBC: + *

* - *

- * Probably, the best prior art is the Oracle bitmap compression scheme - * (BBC): - *

- *
    - *
  • G. Antoshenkov, Byte-Aligned Bitmap Compression, DCC'95, 1995.
  • - *
+ *
    + *
  • K. Wu, E. J. Otoo, A. Shoshani, H. Nordberg, Notes on design and + * implementation of compressed bit vectors, Tech. Rep. LBNL/PUB-3161, Lawrence + * Berkeley National Laboratory, available from http://crd.lbl. + * gov/~kewu/ps/PUB-3161.html (2001).
  • + *
* - *

- * 1- The authors do not know of any patent infringed by the following - * implementation. However, similar schemes, like WAH are covered by - * patents. - *

+ *

+ * Probably, the best prior art is the Oracle bitmap compression scheme (BBC): + *

+ *
    + *
  • G. Antoshenkov, Byte-Aligned Bitmap Compression, DCC'95, 1995.
  • + *
* + *

+ * 1- The authors do not know of any patent infringed by the following + * implementation. However, similar schemes, like WAH are covered by patents. + *

+ * + * @see com.googlecode.javaewah32.EWAHCompressedBitmap32 EWAHCompressedBitmap32 * @since 0.1.0 */ public final class EWAHCompressedBitmap implements Cloneable, Externalizable, - Iterable, BitmapStorage, LogicalElement { + Iterable, BitmapStorage, LogicalElement { - /** - * Creates an empty bitmap (no bit set to true). - */ - public EWAHCompressedBitmap() { - this.buffer = new long[defaultbuffersize]; - this.rlw = new RunningLengthWord(this, 0); - } - - /** - * Sets explicitly the buffer size (in 64-bit words). The initial memory usage - * will be "buffersize * 64". For large poorly compressible bitmaps, using - * large values may improve performance. - * - * @param buffersize - * number of 64-bit words reserved when the object is created) - */ - public EWAHCompressedBitmap(final int buffersize) { - this.buffer = new long[buffersize]; - this.rlw = new RunningLengthWord(this, 0); - } - - /** - * Adding words directly to the bitmap (for expert use). - * - * This is normally how you add data to the array. So you add bits in streams - * of 8*8 bits. - * - * Example: if you add 321, you are have added (in binary notation) - * 0b101000001, so you have effectively called set(0), set(6), set(8) - * in sequence. - * - * @param newdata - * the word - */ - @Override -public void add(final long newdata) { - add(newdata, wordinbits); - } - - /** - * Adding words directly to the bitmap (for expert use). - * - * @param newdata - * the word - * @param bitsthatmatter - * the number of significant bits (by default it should be 64) - */ - public void add(final long newdata, final int bitsthatmatter) { - this.sizeinbits += bitsthatmatter; - if (newdata == 0) { - addEmptyWord(false); - } else if (newdata == ~0l) { - addEmptyWord(true); - } else { - addLiteralWord(newdata); - } - } - - /** - * For internal use. - * - * @param v - * the boolean value - */ - private void addEmptyWord(final boolean v) { - final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0); - final long runlen = this.rlw.getRunningLength(); - if ((noliteralword) && (runlen == 0)) { - this.rlw.setRunningBit(v); - } - if ((noliteralword) && (this.rlw.getRunningBit() == v) - && (runlen < RunningLengthWord.largestrunninglengthcount)) { - this.rlw.setRunningLength(runlen + 1); - return; - } - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(1); - return; - } - - /** - * For internal use. - * - * @param newdata - * the literal word - */ - private void addLiteralWord(final long newdata) { - final int numbersofar = this.rlw.getNumberOfLiteralWords(); - if (numbersofar >= RunningLengthWord.largestliteralcount) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - this.rlw.setNumberOfLiteralWords(1); - push_back(newdata); - } - this.rlw.setNumberOfLiteralWords(numbersofar + 1); - push_back(newdata); - } - - /** - * if you have several literal words to copy over, this might be faster. - * - * - * @param data - * the literal words - * @param start - * the starting point in the array - * @param number - * the number of literal words to add - */ - @Override -public void addStreamOfLiteralWords(final long[] data, final int start, - final int number) { - int leftovernumber = number; - while(leftovernumber > 0) { - final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); - final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount - - NumberOfLiteralWords ? leftovernumber : RunningLengthWord.largestliteralcount - - NumberOfLiteralWords; - this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd); - leftovernumber -= whatwecanadd; - push_back(data, start, whatwecanadd); - this.sizeinbits += whatwecanadd * wordinbits; - if (leftovernumber > 0) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - } - } - } - - /** - * For experts: You want to add many zeroes or ones? This is the method you - * use. - * - * @param v - * the boolean value - * @param number - * the number - */ - @Override -public void addStreamOfEmptyWords(final boolean v, long number) { - if (number == 0) - return; - this.sizeinbits += number * wordinbits; - if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { - this.rlw.setRunningBit(v); - } else if ((this.rlw.getNumberOfLiteralWords() != 0) - || (this.rlw.getRunningBit() != v)) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); - } - final long runlen = this.rlw.getRunningLength(); - final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount - - runlen ? number : RunningLengthWord.largestrunninglengthcount - runlen; - this.rlw.setRunningLength(runlen + whatwecanadd); - number -= whatwecanadd; - while (number >= RunningLengthWord.largestrunninglengthcount) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(RunningLengthWord.largestrunninglengthcount); - number -= RunningLengthWord.largestrunninglengthcount; + /** + * Creates an empty bitmap (no bit set to true). + */ + public EWAHCompressedBitmap() { + this(new LongArray()); + } + + /** + * Sets explicitly the buffer size (in 64-bit words). The initial memory + * usage will be "bufferSize * 64". For large poorly compressible + * bitmaps, using large values may improve performance. + * + * If the requested bufferSize is less than 1, a value of 1 is used + * by default. In particular, negative values of bufferSize are + * effectively ignored. + * + * @param bufferSize number of 64-bit words reserved when the object is + * created) + */ + public EWAHCompressedBitmap(int bufferSize) { + this(new LongArray(bufferSize)); + } + + /** + * Creates a bitmap with the specified ByteBuffer backend. It assumes + * that a bitmap was serialized at this location. It is effectively "deserialized" + * though the actual content is not copied. + * This might be useful for implementing memory-mapped bitmaps. + * + * @param buffer data source + */ + public EWAHCompressedBitmap(ByteBuffer buffer) { + IntBuffer ib = buffer.asIntBuffer(); + this.sizeInBits = ib.get(0); + int sizeInWords = ib.get(1); + int rlwposition = ib.get(2 + sizeInWords * 2); + LongBuffer lb = buffer.asLongBuffer(); + lb.position(1); + this.buffer = new LongBufferWrapper(lb.slice(), sizeInWords); + this.rlw = new RunningLengthWord(this.buffer, rlwposition); + } + + /** + * Creates a bitmap with the specified java.nio.LongBuffer backend. + * The content of the LongBuffer is discarded. + * + * @param buffer data source + */ + public EWAHCompressedBitmap(LongBuffer buffer) { + this(new LongBufferWrapper(buffer)); + } + + private EWAHCompressedBitmap(Buffer buffer) { + this.buffer = buffer; + this.rlw = new RunningLengthWord(this.buffer, 0); + } + + /** + * @param newData the word + * @deprecated use addWord() instead. + */ + @Deprecated + public void add(final long newData) { + addWord(newData); + } + + /** + * @param newData the word + * @param bitsThatMatter the number of significant bits (by default it should + * be 64) + * @deprecated use addWord() instead. + */ + @Deprecated + public void add(final long newData, final int bitsThatMatter) { + addWord(newData, bitsThatMatter); + } + + + /** + * Adding words directly to the bitmap (for expert use). + * + * This method adds bits in words of 4*8 bits. It is not to + * be confused with the set method which sets individual bits. + * + * Most users will want the set method. + * + * Example: if you add word 321 to an empty bitmap, you are have + * added (in binary notation) 0b101000001, so you have effectively + * called set(0), set(6), set(8) in sequence. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * API change: prior to version 0.8.3, this method was called add. + * + * @param newData the word + */ + @Override + public void addWord(final long newData) { + addWord(newData, WORD_IN_BITS); + } + + /** + * Adding words directly to the bitmap (for expert use). + * Since this modifies the bitmap, this method is not thread-safe. + * + * API change: prior to version 0.8.3, this method was called add. + * + * @param newData the word + * @param bitsThatMatter the number of significant bits (by default it should + * be 64) + */ + public void addWord(final long newData, final int bitsThatMatter) { + this.sizeInBits += bitsThatMatter; + if (newData == 0) { + insertEmptyWord(false); + } else if (newData == ~0l) { + insertEmptyWord(true); + } else { + insertLiteralWord(newData); + } } - if (number > 0) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) + + /** + * For internal use. + * + * @param v the boolean value + */ + private void insertEmptyWord(final boolean v) { + final boolean noLiteralWords = (this.rlw.getNumberOfLiteralWords() == 0); + final long runningLength = this.rlw.getRunningLength(); + if (noLiteralWords && runningLength == 0) { + this.rlw.setRunningBit(v); + } + if (noLiteralWords && this.rlw.getRunningBit() == v + && (runningLength < RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT)) { + this.rlw.setRunningLength(runningLength + 1); + return; + } + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; this.rlw.setRunningBit(v); - this.rlw.setRunningLength(number); + this.rlw.setRunningLength(1); } - } - /** - * Same as addStreamOfLiteralWords, but the words are negated. - * - * @param data - * the literal words - * @param start - * the starting point in the array - * @param number - * the number of literal words to add - */ - @Override - public void addStreamOfNegatedLiteralWords(final long[] data, - final int start, final int number) { - int leftovernumber = number; - while (leftovernumber > 0) { - final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); - final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount - - NumberOfLiteralWords ? leftovernumber - : RunningLengthWord.largestliteralcount - - NumberOfLiteralWords; - this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords - + whatwecanadd); - leftovernumber -= whatwecanadd; - negative_push_back(data, start, whatwecanadd); - this.sizeinbits += whatwecanadd * wordinbits; - if (leftovernumber > 0) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - } - } - } - - /** - * Returns a new compressed bitmap containing the bitwise AND values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @since 0.4.3 - * @param a - * the other bitmap - * @return the EWAH compressed bitmap - */ - @Override -public EWAHCompressedBitmap and(final EWAHCompressedBitmap a) { - final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - container - .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords - : a.actualsizeinwords); - andToContainer(a, container); - return container; - } - /** - * Computes new compressed bitmap containing the bitwise AND values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * @since 0.4.0 - * @param a - * the other bitmap - * @param container - * where we store the result - */ - public void andToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { - final EWAHIterator i = a.getEWAHIterator(); - final EWAHIterator j = getEWAHIterator(); - final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); - final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); - while ((rlwi.size()>0) && (rlwj.size()>0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; - final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - container.addStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); + /** + * Adding literal word directly to the bitmap (for expert use). + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param newData the word + */ + @Override + public void addLiteralWord(final long newData) { + this.sizeInBits += WORD_IN_BITS; + insertLiteralWord(newData); + } + + /** + * For internal use. + * + * @param newData the literal word + */ + private void insertLiteralWord(final long newData) { + final int numberSoFar = this.rlw.getNumberOfLiteralWords(); + if (numberSoFar >= RunningLengthWord.LARGEST_LITERAL_COUNT) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + this.rlw.setNumberOfLiteralWords(1); + this.buffer.push_back(newData); } else { - final long index = prey.discharge(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - if(adjustContainerSizeWhenAggregating) { - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; - remaining.dischargeAsEmpty(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + this.rlw.setNumberOfLiteralWords(numberSoFar + 1); + this.buffer.push_back(newData); + } } - } - + + /** + * if you have several literal words to copy over, this might be faster. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param buffer the buffer wrapping the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + @Override + public void addStreamOfLiteralWords(final Buffer buffer, final int start, + final int number) { + int leftOverNumber = number; + while (leftOverNumber > 0) { + final int numberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); + final int whatWeCanAdd = leftOverNumber < RunningLengthWord.LARGEST_LITERAL_COUNT + - numberOfLiteralWords ? leftOverNumber + : RunningLengthWord.LARGEST_LITERAL_COUNT - numberOfLiteralWords; + this.rlw.setNumberOfLiteralWords(numberOfLiteralWords+ whatWeCanAdd); + leftOverNumber -= whatWeCanAdd; + this.buffer.push_back(buffer, start, whatWeCanAdd); + this.sizeInBits += whatWeCanAdd * WORD_IN_BITS; + if (leftOverNumber > 0) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + } + } + } + + /** + * For experts: You want to add many zeroes or ones? This is the method + * you use. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param v the boolean value + * @param number the number + */ + @Override + public void addStreamOfEmptyWords(final boolean v, long number) { + if (number == 0) + return; + this.sizeInBits += (int)(number * WORD_IN_BITS); + fastaddStreamOfEmptyWords(v, number); + } + + /** + * Same as addStreamOfLiteralWords, but the words are negated. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param buffer the buffer wrapping the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + @Override + public void addStreamOfNegatedLiteralWords(final Buffer buffer, + final int start, final int number) { + int leftOverNumber = number; + while (leftOverNumber > 0) { + final int numberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); + final int whatWeCanAdd = leftOverNumber < RunningLengthWord.LARGEST_LITERAL_COUNT + - numberOfLiteralWords ? leftOverNumber + : RunningLengthWord.LARGEST_LITERAL_COUNT + - numberOfLiteralWords; + this.rlw.setNumberOfLiteralWords(numberOfLiteralWords + whatWeCanAdd); + leftOverNumber -= whatWeCanAdd; + this.buffer.negative_push_back(buffer, start, whatWeCanAdd); + this.sizeInBits += whatWeCanAdd * WORD_IN_BITS; + if (leftOverNumber > 0) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + } + } + } + + /** + * Returns a new compressed bitmap containing the bitwise AND values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + * @since 0.4.3 + */ + @Override + public EWAHCompressedBitmap and(final EWAHCompressedBitmap a) { + int size = this.buffer.sizeInWords() > a.buffer.sizeInWords() ? this.buffer.sizeInWords() + : a.buffer.sizeInWords(); + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(size); + andToContainer(a, container); + return container; + } + + /** + * Computes new compressed bitmap containing the bitwise AND values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * The current bitmap is not modified. + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + * @since 0.4.0 + */ + public void andToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { + container.clear(); + final EWAHIterator i = a.getEWAHIterator(); + final EWAHIterator j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); + final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; + final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi; + if (!predator.getRunningBit()) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + } else { + final long index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); + } + predator.discardRunningWords(); + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) { + container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + } + rlwi.discardLiteralWords(nbre_literal); + rlwj.discardLiteralWords(nbre_literal); + } + } - /** - * Returns the cardinality of the result of a bitwise AND of the values of the - * current bitmap with some other bitmap. Avoids needing to allocate an - * intermediate bitmap to hold the result of the OR. - * - * @since 0.4.0 - * @param a - * the other bitmap - * @return the cardinality - */ - public int andCardinality(final EWAHCompressedBitmap a) { - final BitCounter counter = new BitCounter(); - andToContainer(a, counter); - return counter.getCount(); - } - - /** - * Returns a new compressed bitmap containing the bitwise AND NOT values of - * the current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param a - * the other bitmap - * @return the EWAH compressed bitmap - */ - @Override -public EWAHCompressedBitmap andNot(final EWAHCompressedBitmap a) { - final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - container - .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords - : a.actualsizeinwords); - andNotToContainer(a, container); - return container; - } - - /** - * Returns a new compressed bitmap containing the bitwise AND NOT values of - * the current bitmap with some other bitmap. This method is expected to - * be faster than doing A.and(B.clone().not()). - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * @since 0.4.0 - * @param a the other bitmap - * @param container where to store the result - */ - public void andNotToContainer(final EWAHCompressedBitmap a, - final BitmapStorage container) { - final EWAHIterator i = getEWAHIterator(); - final EWAHIterator j = a.getEWAHIterator(); - final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); - final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); - while ((rlwi.size()>0) && (rlwj.size()>0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; - final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj - : rlwi; - if ( ((predator.getRunningBit() == true) && (i_is_prey)) - || ((predator.getRunningBit() == false) && (!i_is_prey))){ - container.addStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); - } else if (i_is_prey) { - long index = prey.discharge(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } else { - long index = prey.dischargeNegated(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(true, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; - if(i_remains) - remaining.discharge(container); - else if(adjustContainerSizeWhenAggregating) - remaining.dischargeAsEmpty(container); - if(adjustContainerSizeWhenAggregating) - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); - } - - /** - * Returns the cardinality of the result of a bitwise AND NOT of the values of - * the current bitmap with some other bitmap. Avoids needing to allocate an - * intermediate bitmap to hold the result of the OR. - * - * @since 0.4.0 - * @param a - * the other bitmap - * @return the cardinality - */ - public int andNotCardinality(final EWAHCompressedBitmap a) { - final BitCounter counter = new BitCounter(); - andNotToContainer(a, counter); - return counter.getCount(); - } - - /** - * reports the number of bits set to true. Running time is proportional to - * compressed size (as reported by sizeInBytes). - * - * @return the number of bits set to true - */ - public int cardinality() { - int counter = 0; - final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); - while (i.hasNext()) { - RunningLengthWord localrlw = i.next(); - if (localrlw.getRunningBit()) { - counter += wordinbits * localrlw.getRunningLength(); - } - for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { - counter += Long.bitCount(i.buffer()[i.literalWords() + j]); - } - } - return counter; - } - - /** - * Clear any set bits and set size in bits back to 0 - */ - public void clear() { - this.sizeinbits = 0; - this.actualsizeinwords = 1; - this.rlw.position = 0; - // buffer is not fully cleared but any new set operations should overwrite - // stale data - this.buffer[0] = 0; - } - - /* - * @see java.lang.Object#clone() - */ - @Override - public EWAHCompressedBitmap clone() throws java.lang.CloneNotSupportedException { - final EWAHCompressedBitmap clone = (EWAHCompressedBitmap) super.clone(); - clone.buffer = this.buffer.clone(); - clone.rlw = new RunningLengthWord(clone, this.rlw.position); - clone.actualsizeinwords = this.actualsizeinwords; - clone.sizeinbits = this.sizeinbits; - return clone; - } - - /** - * Deserialize. - * - * @param in - * the DataInput stream - * @throws IOException - * Signals that an I/O exception has occurred. - */ - public void deserialize(DataInput in) throws IOException { - this.sizeinbits = in.readInt(); - this.actualsizeinwords = in.readInt(); - if (this.buffer.length < this.actualsizeinwords) { - this.buffer = new long[this.actualsizeinwords]; - } - for (int k = 0; k < this.actualsizeinwords; ++k) - this.buffer[k] = in.readLong(); - this.rlw = new RunningLengthWord(this, in.readInt()); - } - - /** - * Check to see whether the two compressed bitmaps contain the same set bits. - * - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object o) { - if (o instanceof EWAHCompressedBitmap) { - try { - this.xorToContainer((EWAHCompressedBitmap) o, new NonEmptyVirtualStorage()); - return true; - } catch (NonEmptyVirtualStorage.NonEmptyException e) { + if (ADJUST_CONTAINER_SIZE_WHEN_AGGREGATING) { + container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), a.sizeInBits())); + } + } + + /** + * Returns the cardinality of the result of a bitwise AND of the values + * of the current bitmap with some other bitmap. Avoids + * allocating an intermediate bitmap to hold the result of the OR. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the cardinality + * @since 0.4.0 + */ + public int andCardinality(final EWAHCompressedBitmap a) { + final BitCounter counter = new BitCounter(); + andToContainer(a, counter); + return counter.getCount(); + } + + /** + * Returns a new compressed bitmap containing the bitwise AND NOT values + * of the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap andNot(final EWAHCompressedBitmap a) { + int size = this.buffer.sizeInWords() > a.buffer.sizeInWords() ? + this.buffer.sizeInWords() : a.buffer.sizeInWords(); + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(size); + andNotToContainer(a, container); + return container; + } + + /** + * Returns a new compressed bitmap containing the bitwise AND NOT values + * of the current bitmap with some other bitmap. This method is expected + * to be faster than doing A.and(B.clone().not()). + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * The current bitmap is not modified. + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where to store the result + * @since 0.4.0 + */ + public void andNotToContainer(final EWAHCompressedBitmap a, + final BitmapStorage container) { + container.clear(); + final EWAHIterator i = getEWAHIterator(); + final EWAHIterator j = a.getEWAHIterator(); + final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); + final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; + final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi; + if (((predator.getRunningBit()) && (i_is_prey)) + || ((!predator.getRunningBit()) && (!i_is_prey))) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + } else if (i_is_prey) { + final long index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); + } else { + final long index = prey.dischargeNegated(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() - index); + } + predator.discardRunningWords(); + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))); + rlwi.discardLiteralWords(nbre_literal); + rlwj.discardLiteralWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size() > 0; + final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; + if (i_remains) + remaining.discharge(container); + if (ADJUST_CONTAINER_SIZE_WHEN_AGGREGATING) + container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), + a.sizeInBits())); + } + + /** + * Returns the cardinality of the result of a bitwise AND NOT of the + * values of the current bitmap with some other bitmap. Avoids + * allocating an intermediate bitmap to hold the result of the OR. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the cardinality + * @since 0.4.0 + */ + public int andNotCardinality(final EWAHCompressedBitmap a) { + final BitCounter counter = new BitCounter(); + andNotToContainer(a, counter); + return counter.getCount(); + } + + /** + * reports the number of bits set to true. Running time is proportional + * to compressed size (as reported by sizeInBytes). + * + * @return the number of bits set to true + */ + public int cardinality() { + int counter = 0; + final EWAHIterator i = this.getEWAHIterator(); + while (i.hasNext()) { + RunningLengthWord localrlw = i.next(); + if (localrlw.getRunningBit()) { + counter += (int)(WORD_IN_BITS * localrlw.getRunningLength()); + } + final int numberOfLiteralWords = localrlw.getNumberOfLiteralWords(); + final int literalWords = i.literalWords(); + for (int j = 0; j < numberOfLiteralWords; ++j) { + counter += Long.bitCount(i.buffer().getWord(literalWords + j)); + } + } + return counter; + } + + /** + * Clear any set bits and set size in bits back to 0 + */ + @Override + public void clear() { + this.sizeInBits = 0; + this.buffer.clear(); + this.rlw.position = 0; + } + + /* + * @see java.lang.Object#clone() + */ + @Override + public EWAHCompressedBitmap clone() throws CloneNotSupportedException { + EWAHCompressedBitmap clone = new EWAHCompressedBitmap(this.buffer.clone()); + clone.sizeInBits = this.sizeInBits; + clone.rlw = new RunningLengthWord(clone.buffer, this.rlw.position); + return clone; + } + + /** + * Serialize. + * + * The current bitmap is not modified. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void serialize(DataOutput out) throws IOException { + out.writeInt(this.sizeInBits); + final int siw = this.buffer.sizeInWords(); + out.writeInt(siw); + for(int i = 0; i < siw; ++i) { + out.writeLong(this.buffer.getWord(i)); + } + out.writeInt(this.rlw.position); + } + + /** + * Deserialize. + * + * @param in the DataInput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + this.sizeInBits = in.readInt(); + int sizeInWords = in.readInt(); + this.buffer.clear();//This creates a buffer with 1 word in it already! + this.buffer.removeLastWord(); + this.buffer.ensureCapacity(sizeInWords); + for(int i = 0; i < sizeInWords; ++i) { + this.buffer.push_back(in.readLong()); + } + this.rlw = new RunningLengthWord(this.buffer, in.readInt()); + } + + /** + * Check to see whether the two compressed bitmaps contain the same set + * bits. + * + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object o) { + if (o instanceof EWAHCompressedBitmap) { + try { + this.xorToContainer((EWAHCompressedBitmap) o, + new NonEmptyVirtualStorage()); + return true; + } catch (NonEmptyVirtualStorage.NonEmptyException e) { + return false; + } + } return false; - } } - return false; - } - /** - * For experts: You want to add many zeroes or ones faster? - * - * This method does not update sizeinbits. - * - * @param v - * the boolean value - * @param number - * the number (must be greater than 0) - */ - private void fastaddStreamOfEmptyWords(final boolean v, long number) { - if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { - this.rlw.setRunningBit(v); - } else if ((this.rlw.getNumberOfLiteralWords() != 0) - || (this.rlw.getRunningBit() != v)) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); + /** + * For experts: You want to add many zeroes or ones faster? + * + * This method does not update sizeInBits. + * + * @param v the boolean value + * @param number the number (must be greater than 0) + */ + private void fastaddStreamOfEmptyWords(final boolean v, long number) { + if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { + this.rlw.setRunningBit(v); + } else if ((this.rlw.getNumberOfLiteralWords() != 0) || (this.rlw.getRunningBit() != v)) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + if (v) + this.rlw.setRunningBit(true); + } + + final long runLen = this.rlw.getRunningLength(); + final long whatWeCanAdd = number < RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT + - runLen ? number + : RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT - runLen; + this.rlw.setRunningLength(runLen + whatWeCanAdd); + number -= whatWeCanAdd; + + while (number >= RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + if (v) + this.rlw.setRunningBit(true); + this.rlw.setRunningLength(RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT); + number -= RunningLengthWord.LARGEST_RUNNING_LENGTH_COUNT; + } + if (number > 0) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + if (v) + this.rlw.setRunningBit(true); + this.rlw.setRunningLength(number); + } } - final long runlen = this.rlw.getRunningLength(); - final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount - - runlen ? number : RunningLengthWord.largestrunninglengthcount - runlen; - this.rlw.setRunningLength(runlen + whatwecanadd); - number -= whatwecanadd; - - while (number >= RunningLengthWord.largestrunninglengthcount) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(RunningLengthWord.largestrunninglengthcount); - number -= RunningLengthWord.largestrunninglengthcount; + /** + * Gets an EWAHIterator over the data. This is a customized iterator + * which iterates over run length words. For experts only. + * + * The current bitmap is not modified. + * + * @return the EWAHIterator + */ + public EWAHIterator getEWAHIterator() { + return new EWAHIterator(this.buffer); + } + + /** + * Gets a ReverseEWAHIterator over the data. This is a customized iterator + * which iterates over run length words in reverse order. For experts only. + * + * The current bitmap is not modified. + * + * @return the ReverseEWAHIterator + */ + private ReverseEWAHIterator getReverseEWAHIterator() { + return new ReverseEWAHIterator(this.buffer); + } + + /** + * Gets an IteratingRLW to iterate over the data. For experts only. + * + * Note that iterator does not know about the size in bits of the + * bitmap: the size in bits is effectively rounded up to the nearest + * multiple of 64. However, if you materialize a bitmap from + * an iterator, you can set the desired size in bits using the + * setSizeInBitsWithinLastWord methods: + * + * + * EWAHCompressedBitmap n = IteratorUtil.materialize(bitmap.getIteratingRLW())); + * n.setSizeInBitsWithinLastWord(bitmap.sizeInBits()); + * + + * + * The current bitmap is not modified. + * + * @return the IteratingRLW iterator corresponding to this bitmap + */ + public IteratingRLW getIteratingRLW() { + return new IteratingBufferedRunningLengthWord(this); + } + + /** + * @return a list + * @deprecated use toList() instead. + */ + @Deprecated + public List getPositions() { + return toList(); + } + + /** + * Gets the locations of the true values as one list. (May use more + * memory than iterator().) + * + * The current bitmap is not modified. + * + * API change: prior to version 0.8.3, this method was called getPositions. + * + * @return the positions in a list + */ + public List toList() { + final ArrayList v = new ArrayList(); + final EWAHIterator i = this.getEWAHIterator(); + int pos = 0; + while (i.hasNext()) { + RunningLengthWord localrlw = i.next(); + if (localrlw.getRunningBit()) { + final long N = localrlw.getRunningLength(); + for (long j = 0; j < N; ++j) { + for (int c = 0; c < WORD_IN_BITS; ++c) + v.add(pos++); + } + } else { + pos += (int)(WORD_IN_BITS * localrlw.getRunningLength()); + } + final int nlw = localrlw.getNumberOfLiteralWords(); + for (int j = 0; j < nlw; ++j) { + long data = i.buffer().getWord(i.literalWords() + j); + while (data != 0) { + final long T = data & -data; + v.add(Long.bitCount(T - 1) + pos); + data ^= T; + } + pos += WORD_IN_BITS; + } + } + while ((v.size() > 0) && (v.get(v.size() - 1) >= this.sizeInBits)) + v.remove(v.size() - 1); + return v; + } + + /** + * Returns a customized hash code (based on Karp-Rabin). Naturally, if + * the bitmaps are equal, they will hash to the same value. + * + * The current bitmap is not modified. + */ + @Override + public int hashCode() { + int karprabin = 0; + final int B = 0x9e3779b1; + final EWAHIterator i = this.getEWAHIterator(); + while (i.hasNext()) { + i.next(); + if (i.rlw.getRunningBit()) { + final long rl = i.rlw.getRunningLength(); + karprabin += (int)(B * (rl & 0xFFFFFFFF)); + karprabin += (int)(B * ((rl>>>32) & 0xFFFFFFFF)); + } + final int nlw = i.rlw.getNumberOfLiteralWords(); + final int lw = i.literalWords(); + for (int k = 0; k < nlw ; ++k) { + long W = this.buffer.getWord(lw + k); + karprabin += B * (W & 0xFFFFFFFF); + karprabin += B * ((W>>>32) & 0xFFFFFFFF); + } + } + return karprabin; } - if (number > 0) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(number); + + /** + * Return true if the two EWAHCompressedBitmap have both at least one + * true bit in the same position. Equivalently, you could call "and" and + * check whether there is a set bit, but intersects will run faster if + * you don't need the result of the "and" operation. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return whether they intersect + * @since 0.3.2 + */ + public boolean intersects(final EWAHCompressedBitmap a) { + NonEmptyVirtualStorage nevs = new NonEmptyVirtualStorage(); + try { + this.andToContainer(a, nevs); + } catch (NonEmptyVirtualStorage.NonEmptyException nee) { + return true; + } + return false; } - } - /** - * Gets an EWAHIterator over the data. This is a customized iterator which - * iterates over run length word. For experts only. - * - * @return the EWAHIterator - */ - public EWAHIterator getEWAHIterator() { - return new EWAHIterator(this, this.actualsizeinwords); - } - - /** - * @return the IteratingRLW iterator corresponding to this bitmap - */ - public IteratingRLW getIteratingRLW() { - return new IteratingBufferedRunningLengthWord(this); - } - /** - * get the locations of the true values as one vector. (may use more memory - * than iterator()) - * - * @return the positions - */ - public List getPositions() { - final ArrayList v = new ArrayList(); - final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); - int pos = 0; - while (i.hasNext()) { - RunningLengthWord localrlw = i.next(); - if (localrlw.getRunningBit()) { - for (int j = 0; j < localrlw.getRunningLength(); ++j) { - for (int c = 0; c < wordinbits; ++c) - v.add(new Integer(pos++)); - } - } else { - pos += wordinbits * localrlw.getRunningLength(); - } - for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { - long data = i.buffer()[i.literalWords() + j]; - while (data != 0) { - final int ntz = Long.numberOfTrailingZeros(data); - data ^= (1l << ntz); - v.add(new Integer(ntz + pos)); - } - pos += wordinbits; - } - } - while ((v.size() > 0) - && (v.get(v.size() - 1).intValue() >= this.sizeinbits)) - v.remove(v.size() - 1); - return v; - } - - /** - * Returns a customized hash code (based on Karp-Rabin). Naturally, if the - * bitmaps are equal, they will hash to the same value. - * - */ - @Override - public int hashCode() { - int karprabin = 0; - final int B = 31; - final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); - while( i.hasNext() ) { - i.next(); - if (i.rlw.getRunningBit() == true) { - karprabin += B * karprabin - + (i.rlw.getRunningLength() & ((1l << 32) - 1)); - karprabin += B * karprabin + (i.rlw.getRunningLength() >>> 32); - } - for (int k = 0; k < i.rlw.getNumberOfLiteralWords(); ++k) { - karprabin += B * karprabin + (this.buffer[i.literalWords() + k] & ((1l << 32) - 1)); - karprabin += B * karprabin + (this.buffer[i.literalWords() + k] >>> 32); - } - } - return karprabin; - } - - /** - * Return true if the two EWAHCompressedBitmap have both at least one true bit - * in the same position. Equivalently, you could call "and" and check whether - * there is a set bit, but intersects will run faster if you don't need the - * result of the "and" operation. - * - * @since 0.3.2 - * @param a - * the other bitmap - * @return whether they intersect - */ - public boolean intersects(final EWAHCompressedBitmap a) { - NonEmptyVirtualStorage nevs = new NonEmptyVirtualStorage(); - try { - this.andToContainer(a, nevs); - } catch (NonEmptyVirtualStorage.NonEmptyException nee) { - return true; - } - return false; - } - - /** - * Iterator over the set bits (this is what most people will want to use to - * browse the content if they want an iterator). The location of the set bits - * is returned, in increasing order. - * - * @return the int iterator - */ - public IntIterator intIterator() { - return new IntIteratorImpl( - new EWAHIterator(this, this.actualsizeinwords)); - } - - /** - * iterate over the positions of the true values. This is similar to - * intIterator(), but it uses Java generics. - * - * @return the iterator - */ - @Override -public Iterator iterator() { - return new Iterator() { - @Override - public boolean hasNext() { - return this.under.hasNext(); - } - - @Override - public Integer next() { - return new Integer(this.under.next()); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("bitsets do not support remove"); - } - - final private IntIterator under = intIterator(); - }; - } - - /** - * For internal use. - * - * @param data - * the array of words to be added - * @param start - * the starting point - * @param number - * the number of words to add - */ - private void negative_push_back(final long[] data, final int start, - final int number) { - while (this.actualsizeinwords + number >= this.buffer.length) { - final long oldbuffer[] = this.buffer; - if((this.actualsizeinwords + number) < 32768) - this.buffer = new long[ (this.actualsizeinwords + number) * 2]; - else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow - this.buffer = new long[Integer.MAX_VALUE]; - else - this.buffer = new long[(this.actualsizeinwords + number) * 3 / 2]; - System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.parent.buffer = this.buffer; - } - for (int k = 0; k < number; ++k) - this.buffer[this.actualsizeinwords + k] = ~data[start + k]; - this.actualsizeinwords += number; - } - - /** - * Negate (bitwise) the current bitmap. To get a negated copy, do - * EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); x.not(); - * - * The running time is proportional to the compressed size (as reported by - * sizeInBytes()). - * - */ - @Override -public void not() { - final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); - if (!i.hasNext()) - return; - - while (true) { - final RunningLengthWord rlw1 = i.next(); - rlw1.setRunningBit(!rlw1.getRunningBit()); - for (int j = 0; j < rlw1.getNumberOfLiteralWords(); ++j) { - i.buffer()[i.literalWords() + j] = ~i.buffer()[i.literalWords() + j]; - } - - if (!i.hasNext()) {// must potentially adjust the last literal word - final int usedbitsinlast = this.sizeinbits % wordinbits; - if (usedbitsinlast == 0) + /** + * Iterator over the set bits (this is what most people will want to use + * to browse the content if they want an iterator). The location of the + * set bits is returned, in increasing order. + * + * The current bitmap is not modified. + * + * @return the int iterator + */ + public IntIterator intIterator() { + return new IntIteratorImpl(this.getEWAHIterator()); + } + + /** + * Iterator over the set bits in reverse order. + * + * The current bitmap is not modified. + * + * @return the int iterator + */ + public IntIterator reverseIntIterator() { + return new ReverseIntIterator(this.getReverseEWAHIterator(), this.sizeInBits); + } + + /** + * Checks whether this bitmap is empty (has a cardinality of zero). + * + * @return true if no bit is set + */ + public boolean isEmpty() { + return getFirstSetBit() < 0; + } + + /** + * Iterator over the clear bits. The location of the clear bits is + * returned, in increasing order. + * + * The current bitmap is not modified. + * + * @return the int iterator + */ + public IntIterator clearIntIterator() { + return new ClearIntIterator(this.getEWAHIterator(), this.sizeInBits); + } + + /** + * Iterator over the chunk of bits. + * + * The current bitmap is not modified. + * + * @return the chunk iterator + */ + public ChunkIterator chunkIterator() { + return new ChunkIteratorImpl(this.getEWAHIterator(), this.sizeInBits); + } + + /** + * Iterates over the positions of the true values. This is similar to + * intIterator(), but it uses Java generics. + * + * The current bitmap is not modified. + * + * @return the iterator + */ + @Override + public Iterator iterator() { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return this.under.next(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException( + "bitsets do not support remove"); + } + + private final IntIterator under = intIterator(); + }; + } + + /** + * Negate (bitwise) the current bitmap. To get a negated copy, do + * EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); + * x.not(); + * + * The running time is proportional to the compressed size (as reported + * by sizeInBytes()). + * + * Because this modifies the bitmap, this method is not thread-safe. + */ + @Override + public void not() { + final EWAHIterator i = this.getEWAHIterator(); + if (!i.hasNext()) return; - if (rlw1.getNumberOfLiteralWords() == 0) { - if((rlw1.getRunningLength()>0) && (rlw1.getRunningBit())) { - rlw1.setRunningLength(rlw1.getRunningLength()-1); - this.addLiteralWord((~0l) >>> (wordinbits - usedbitsinlast)); - } - return; - } - i.buffer()[i.literalWords() + rlw1.getNumberOfLiteralWords() - 1] &= ((~0l) >>> (wordinbits - usedbitsinlast)); - return; - } - } - } - - /** - * Returns a new compressed bitmap containing the bitwise OR values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param a - * the other bitmap - * @return the EWAH compressed bitmap - */ - @Override -public EWAHCompressedBitmap or(final EWAHCompressedBitmap a) { - final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - container.reserve(this.actualsizeinwords + a.actualsizeinwords); - orToContainer(a, container); - return container; - } - - - - /** - * Computes the bitwise or between the current bitmap and the bitmap "a". - * Stores the result in the container. - * - * @since 0.4.0 - * @param a - * the other bitmap - * @param container - * where we store the result - */ - public void orToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { - final EWAHIterator i = a.getEWAHIterator(); - final EWAHIterator j = getEWAHIterator(); - final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); - final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); - while ((rlwi.size()>0) && (rlwj.size()>0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi - : rlwj; - final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == true) { - container.addStreamOfEmptyWords(true, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); - } else { - long index = prey.discharge(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) { - container.add(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)); - } - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; - remaining.discharge(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); - } - - /** - * Returns the cardinality of the result of a bitwise OR of the values of the - * current bitmap with some other bitmap. Avoids needing to allocate an - * intermediate bitmap to hold the result of the OR. - * - * @since 0.4.0 - * @param a - * the other bitmap - * @return the cardinality - */ - public int orCardinality(final EWAHCompressedBitmap a) { - final BitCounter counter = new BitCounter(); - orToContainer(a, counter); - return counter.getCount(); - } - - /** - * For internal use. - * - * @param data - * the word to be added - */ - private void push_back(final long data) { - if (this.actualsizeinwords == this.buffer.length) { - final long oldbuffer[] = this.buffer; - if(oldbuffer.length < 32768) - this.buffer = new long[ oldbuffer.length * 2]; - else if(oldbuffer.length * 3 / 2 < oldbuffer.length) // overflow - this.buffer = new long[Integer.MAX_VALUE]; - else - this.buffer = new long[oldbuffer.length * 3 / 2]; - System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.parent.buffer = this.buffer; - } - this.buffer[this.actualsizeinwords++] = data; - } - - /** - * For internal use. - * - * @param data - * the array of words to be added - * @param start - * the starting point - * @param number - * the number of words to add - */ - private void push_back(final long[] data, final int start, final int number) { - if (this.actualsizeinwords + number >= this.buffer.length) { - final long oldbuffer[] = this.buffer; - if(this.actualsizeinwords + number < 32768) - this.buffer = new long[(this.actualsizeinwords + number) * 2]; - else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow - this.buffer = new long[Integer.MAX_VALUE]; - else - this.buffer = new long[( this.actualsizeinwords + number) * 3 / 2]; - System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.parent.buffer = this.buffer; - } - System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number); - this.actualsizeinwords += number; - } - - /* - * @see java.io.Externalizable#readExternal(java.io.ObjectInput) - */ - @Override -public void readExternal(ObjectInput in) throws IOException { - deserialize(in); - } - - /** - * For internal use (trading off memory for speed). - * - * @param size - * the number of words to allocate - * @return True if the operation was a success. - */ - private boolean reserve(final int size) { - if (size > this.buffer.length) { - final long oldbuffer[] = this.buffer; - this.buffer = new long[size]; - System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.parent.buffer = this.buffer; - return true; - } - return false; - } - - /** - * Serialize. - * - * @param out - * the DataOutput stream - * @throws IOException - * Signals that an I/O exception has occurred. - */ - public void serialize(DataOutput out) throws IOException { - out.writeInt(this.sizeinbits); - out.writeInt(this.actualsizeinwords); - for (int k = 0; k < this.actualsizeinwords; ++k) - out.writeLong(this.buffer[k]); - out.writeInt(this.rlw.position); - } - - /** - * Report the size required to serialize this bitmap - * - * @return the size in bytes - */ - public int serializedSizeInBytes() { - return this.sizeInBytes() + 3 * 4; - } - - - /** - * Query the value of a single bit. Relying on this method when speed is - * needed is discouraged. The complexity is linear with the size of the - * bitmap. - * - * (This implementation is based on zhenjl's Go version of JavaEWAH.) - * - * @param i - * the bit we are interested in - * @return whether the bit is set to true - */ - public boolean get(final int i) { - if ((i < 0) || (i >= this.sizeinbits)) - return false; - int WordChecked = 0; - final IteratingRLW j = getIteratingRLW(); - final int wordi = i/wordinbits; - while (WordChecked <= wordi ) { - WordChecked += j.getRunningLength(); - if (wordi < WordChecked) { - return j.getRunningBit(); - } - if (wordi < WordChecked + j.getNumberOfLiteralWords()) { - final long w = j.getLiteralWordAt(wordi - WordChecked); - return (w & (1l << i)) != 0; + while (true) { + final RunningLengthWord rlw1 = i.next(); + rlw1.setRunningBit(!rlw1.getRunningBit()); + int nlw = rlw1.getNumberOfLiteralWords(); + for (int j = 0; j < nlw; ++j) { + int position = i.literalWords() + j; + i.buffer().negateWord(position); + } + + if (!i.hasNext()) {// must potentially adjust the last + // literal word + final int usedBitsInLast = this.sizeInBits + % WORD_IN_BITS; + if (usedBitsInLast == 0) + return; + + if (rlw1.getNumberOfLiteralWords() == 0) { + if ((rlw1.getRunningLength() > 0) + && (rlw1.getRunningBit())) { + if((rlw1.getRunningLength() == 1) && (rlw1.position > 0)) { + // we need to prune ending + final EWAHIterator j = this.getEWAHIterator(); + int newrlwpos = this.rlw.position; + while (j.hasNext()) { + RunningLengthWord r = j.next(); + if (r.position < rlw1.position) { + newrlwpos = r.position; + } else break; + } + this.rlw.position = newrlwpos; + this.buffer.removeLastWord(); + } else { + rlw1.setRunningLength(rlw1 + .getRunningLength() - 1); } - WordChecked += j.getNumberOfLiteralWords(); - j.next(); + this.insertLiteralWord((~0l) >>> (WORD_IN_BITS - usedBitsInLast)); + } + return; } - return false; + i.buffer().andWord(i.literalWords() + rlw1.getNumberOfLiteralWords() - 1, + (~0l) >>> (WORD_IN_BITS - usedBitsInLast)); + return; + } } + } - /** - * Set the bit at position i to true, the bits must be set in (strictly) increasing - * order. For example, set(15) and then set(7) will fail. You must do set(7) - * and then set(15). - * - * @param i - * the index - * @return true if the value was set (always true when i greater or equal to sizeInBits()). - * @throws IndexOutOfBoundsException - * if i is negative or greater than Integer.MAX_VALUE - 64 - */ - public boolean set(final int i) { - if ((i > Integer.MAX_VALUE - wordinbits) || (i < 0)) - throw new IndexOutOfBoundsException("Set values should be between 0 and " - + (Integer.MAX_VALUE - wordinbits)); - if (i < this.sizeinbits) - return false; - // distance in words: - final int dist = (i + wordinbits) / wordinbits - - (this.sizeinbits + wordinbits - 1) / wordinbits; - this.sizeinbits = i + 1; - if (dist > 0) {// easy - if (dist > 1) - fastaddStreamOfEmptyWords(false, dist - 1); - addLiteralWord(1l << (i % wordinbits)); - return true; - } - if (this.rlw.getNumberOfLiteralWords() == 0) { - this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); - addLiteralWord(1l << (i % wordinbits)); - return true; - } - this.buffer[this.actualsizeinwords - 1] |= 1l << (i % wordinbits); - if (this.buffer[this.actualsizeinwords - 1] == ~0l) { - this.buffer[this.actualsizeinwords - 1] = 0; - --this.actualsizeinwords; - this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); - // next we add one clean word - addEmptyWord(true); - } - return true; - } - - /** - * Set the size in bits. This does not change the compressed bitmap. - * - * @since 0.4.0 - */ - @Override -public void setSizeInBits(final int size) { - if((size+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits) - throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean)."); - this.sizeinbits = size; - } - - /** - * Change the reported size in bits of the *uncompressed* bitmap represented - * by this compressed bitmap. It may change the underlying compressed bitmap. - * It is not possible to reduce the sizeInBits, but - * it can be extended. The new bits are set to false or true depending on the - * value of defaultvalue. - * - * @param size - * the size in bits - * @param defaultvalue - * the default boolean value - * @return true if the update was possible - */ - public boolean setSizeInBits(final int size, final boolean defaultvalue) { - if (size < this.sizeinbits) - return false; - if (defaultvalue == false) - extendEmptyBits(this, this.sizeinbits, size); - else { - // next bit could be optimized - while (((this.sizeinbits % wordinbits) != 0) && (this.sizeinbits < size)) { - this.set(this.sizeinbits); - } - this.addStreamOfEmptyWords(defaultvalue, (size / wordinbits) - - this.sizeinbits / wordinbits); - // next bit could be optimized - while (this.sizeinbits < size) { - this.set(this.sizeinbits); - } - } - this.sizeinbits = size; - return true; - } - - /** - * Returns the size in bits of the *uncompressed* bitmap represented by this - * compressed bitmap. Initially, the sizeInBits is zero. It is extended - * automatically when you set bits to true. - * - * @return the size in bits - */ - @Override -public int sizeInBits() { - return this.sizeinbits; - } - - /** - * Report the *compressed* size of the bitmap (equivalent to memory usage, - * after accounting for some overhead). - * - * @return the size in bytes - */ - @Override -public int sizeInBytes() { - return this.actualsizeinwords * (wordinbits / 8); - } - - /** - * Populate an array of (sorted integers) corresponding to the location of the - * set bits. - * - * @return the array containing the location of the set bits - */ - public int[] toArray() { - int[] ans = new int[this.cardinality()]; - int inanspos = 0; - int pos = 0; - final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); - while (i.hasNext()) { - RunningLengthWord localrlw = i.next(); - if (localrlw.getRunningBit()) { - for (int j = 0; j < localrlw.getRunningLength(); ++j) { - for (int c = 0; c < wordinbits; ++c) { - ans[inanspos++] = pos++; - } - } - } else { - pos += wordinbits * localrlw.getRunningLength(); - } - for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { - long data = i.buffer()[i.literalWords() + j]; - if (!usetrailingzeros) { - for (int c = 0; c < wordinbits; ++c) { - if ((data & (1l << c)) != 0) - ans[inanspos++] = c + pos; - } - pos += wordinbits; + /** + * Returns a new compressed bitmap containing the bitwise OR values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap or(final EWAHCompressedBitmap a) { + int size = this.buffer.sizeInWords() + a.buffer.sizeInWords(); + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(size); + orToContainer(a, container); + return container; + } + + /** + * Computes the bitwise or between the current bitmap and the bitmap + * "a". Stores the result in the container. + * + * The current bitmap is not modified. + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + * @since 0.4.0 + */ + public void orToContainer(final EWAHCompressedBitmap a, + final BitmapStorage container) { + container.clear(); + final EWAHIterator i = a.getEWAHIterator(); + final EWAHIterator j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); + final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi + .getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi + : rlwj; + final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit()) { + container.addStreamOfEmptyWords(true, + predator.getRunningLength()); + prey.discardFirstWords(predator + .getRunningLength()); + } else { + final long index = prey.discharge(container, + predator.getRunningLength()); + container.addStreamOfEmptyWords(false, + predator.getRunningLength() + - index + ); + } + predator.discardRunningWords(); + } + final int nbre_literal = Math.min( + rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) { + container.addWord(rlwi.getLiteralWordAt(k) + | rlwj.getLiteralWordAt(k)); + } + rlwi.discardLiteralWords(nbre_literal); + rlwj.discardLiteralWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size() > 0; + final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi + : rlwj; + remaining.discharge(container); + container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), a.sizeInBits())); + } + + /** + * Returns the cardinality of the result of a bitwise OR of the values + * of the current bitmap with some other bitmap. Avoids + * allocating an intermediate bitmap to hold the result of the OR. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the cardinality + * @since 0.4.0 + */ + public int orCardinality(final EWAHCompressedBitmap a) { + final BitCounter counter = new BitCounter(); + orToContainer(a, counter); + return counter.getCount(); + } + + /* + * @see java.io.Externalizable#readExternal(java.io.ObjectInput) + */ + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + deserialize(in); + } + + /* + * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) + */ + @Override + public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } + + /** + * Report the number of bytes required to serialize this bitmap + * + * The current bitmap is not modified. + * + * @return the size in bytes + */ + public int serializedSizeInBytes() { + return this.sizeInBytes() + 3 * 4; + } + + /** + * Query the value of a single bit. Relying on this method when speed is + * needed is discouraged. The complexity is linear with the size of the + * bitmap. + * + * (This implementation is based on zhenjl's Go version of JavaEWAH.) + * + * The current bitmap is not modified. + * + * @param i the bit we are interested in + * @return whether the bit is set to true + */ + public boolean get(final int i) { + if ((i < 0) || (i >= this.sizeInBits)) + return false; + int wordChecked = 0; + final IteratingRLW j = getIteratingRLW(); + final int wordi = i / WORD_IN_BITS; + while (wordChecked <= wordi) { + wordChecked += (int) j.getRunningLength(); + if (wordi < wordChecked) { + return j.getRunningBit(); + } + if (wordi < wordChecked + j.getNumberOfLiteralWords()) { + final long w = j.getLiteralWordAt(wordi + - wordChecked); + return (w & (1l << i)) != 0; + } + wordChecked += j.getNumberOfLiteralWords(); + j.next(); + } + return false; + } + + /** + * getFirstSetBit is a light-weight method that returns the + * location of the set bit (=1) or -1 if there is none. + * + * @return location of the first set bit or -1 + */ + public int getFirstSetBit() { + int nword = 0; + final int siw = this.buffer.sizeInWords(); + for(int pos = 0; pos < siw; ++pos) { + long rl = RunningLengthWord.getRunningLength(this.buffer, pos); + boolean rb = RunningLengthWord.getRunningBit(this.buffer, pos); + if((rl > 0) && rb) { + return nword * WORD_IN_BITS; + } + nword += (int) rl; + long lw = RunningLengthWord.getNumberOfLiteralWords(this.buffer, pos); + if(lw > 0) { + long word = this.buffer.getWord(pos + 1); + if(word != 0l) { + long T = word & -word; + return nword * WORD_IN_BITS + Long.bitCount(T - 1); + } + } + } + return -1; + } + + /** + * Set the bit at position i to false. + * + * Though you can clear the bits in any order (e.g., clear(100), clear(10), clear(1), + * you will typically get better performance if you clear the bits in increasing order (e.g., clear(1), clear(10), clear(100)). + * + * Clearing a bit that is larger than the biggest bit is a constant time operation. + * Clearing a bit that is smaller than the biggest bit can require time proportional + * to the compressed size of the bitmap, as the bitmap may need to be rewritten. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param i the index + * @return true if the value was unset + * @throws IndexOutOfBoundsException if i is negative or greater than Integer.MAX_VALUE - 64 + */ + public boolean clear(final int i) { + return set(i, false); + } + + /** + * Set the bit at position i to true. + * + * Though you can set the bits in any order (e.g., set(100), set(10), set(1), + * you will typically get better performance if you set the bits in increasing order (e.g., set(1), set(10), set(100)). + * + * Setting a bit that is larger than any of the current set bit + * is a constant time operation. Setting a bit that is smaller than an + * already set bit can require time proportional to the compressed + * size of the bitmap, as the bitmap may need to be rewritten. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param i the index + * @return true if the value was set + * @throws IndexOutOfBoundsException if i is negative or greater than Integer.MAX_VALUE - 64 + */ + public boolean set(final int i) { + return set(i, true); + } + + /** + * For internal use. + * + * @param i the index + * @param value the value + */ + private boolean set(int i, boolean value) { + if ((i > Integer.MAX_VALUE - WORD_IN_BITS) || (i < 0)) + throw new IndexOutOfBoundsException( + "Position should be between 0 and " + + (Integer.MAX_VALUE - WORD_IN_BITS) + ); + if (i < this.sizeInBits) { + locateAndSet(i, value); } else { - while (data != 0) { - final int ntz = Long.numberOfTrailingZeros(data); - data ^= (1l << ntz); - ans[inanspos++] = ntz + pos; - } - pos += wordinbits; - } - } - } - return ans; - - } - - /** - * A more detailed string describing the bitmap (useful for debugging). - * - * @return the string - */ - public String toDebugString() { - String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits - + " size in words = " + this.actualsizeinwords + "\n"; - final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); - while (i.hasNext()) { - RunningLengthWord localrlw = i.next(); - if (localrlw.getRunningBit()) { - ans += localrlw.getRunningLength() + " 1x11\n"; - } else { - ans += localrlw.getRunningLength() + " 0x00\n"; - } - ans += localrlw.getNumberOfLiteralWords() + " dirties\n"; - for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { - long data = i.buffer()[i.literalWords() + j]; - ans += "\t" + data + "\n"; - } - } - return ans; - } - - /** - * A string describing the bitmap. - * - * @return the string - */ - @Override - public String toString() { - StringBuffer answer = new StringBuffer(); - IntIterator i = this.intIterator(); - answer.append("{"); - if (i.hasNext()) - answer.append(i.next()); - while (i.hasNext()) { - answer.append(","); - answer.append(i.next()); - } - answer.append("}"); - return answer.toString(); - } - - /** - * swap the content of the bitmap with another. - * @param other bitmap to swap with - */ -public void swap(final EWAHCompressedBitmap other) { - long[] tmp = this.buffer; - this.buffer = other.buffer; - other.buffer = tmp; - - - int tmp2 = this.rlw.position; - this.rlw.position = other.rlw.position; - other.rlw.position = tmp2; - - int tmp3 = this.actualsizeinwords; - this.actualsizeinwords = other.actualsizeinwords; - other.actualsizeinwords = tmp3; - - int tmp4 = this.sizeinbits; - this.sizeinbits = other.sizeinbits; - other.sizeinbits = tmp4; - } - - /** - * Reduce the internal buffer to its minimal allowable size (given - * by this.actualsizeinwords). This can free memory. - */ - public void trim() { - this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords); - } - - /* - * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) - */ - @Override -public void writeExternal(ObjectOutput out) throws IOException { - serialize(out); - } - - /** - * Returns a new compressed bitmap containing the bitwise XOR values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param a - * the other bitmap - * @return the EWAH compressed bitmap - */ - @Override -public EWAHCompressedBitmap xor(final EWAHCompressedBitmap a) { - final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - container.reserve(this.actualsizeinwords + a.actualsizeinwords); - xorToContainer(a, container); - return container; - } - - /** - * Computes a new compressed bitmap containing the bitwise XOR values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * @since 0.4.0 - * @param a - * the other bitmap - * @param container - * where we store the result - */ - public void xorToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { - final EWAHIterator i = a.getEWAHIterator(); - final EWAHIterator j = getEWAHIterator(); - final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); - final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); - while ((rlwi.size()>0) && (rlwj.size()>0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; - final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - long index = prey.discharge(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); + extendAndSet(i, value); + } + return true; + } + + /** + * For internal use. + * + * @param i the index + * @param value the value + */ + private void extendAndSet(int i, boolean value) { + final int dist = distanceInWords(i); + this.sizeInBits = i + 1; + if (value) { + if (dist > 0) { + if (dist > 1) { + fastaddStreamOfEmptyWords(false, dist - 1); + } + insertLiteralWord(1l << (i % WORD_IN_BITS)); + return; + } + if (this.rlw.getNumberOfLiteralWords() == 0) { + this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); + insertLiteralWord(1l << (i % WORD_IN_BITS)); + return; + } + this.buffer.orLastWord(1l << (i % WORD_IN_BITS)); + if (this.buffer.getLastWord() == ~0l) { + this.buffer.removeLastWord(); + this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); + // next we add one clean word + insertEmptyWord(true); + } } else { - long index = prey.dischargeNegated(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(true, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; - remaining.discharge(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); - } - - /** - * Returns the cardinality of the result of a bitwise XOR of the values of the - * current bitmap with some other bitmap. Avoids needing to allocate an - * intermediate bitmap to hold the result of the OR. - * - * @since 0.4.0 - * @param a - * the other bitmap - * @return the cardinality - */ - public int xorCardinality(final EWAHCompressedBitmap a) { - final BitCounter counter = new BitCounter(); - xorToContainer(a, counter); - return counter.getCount(); - } - - /** - * For internal use. Computes the bitwise and of the provided bitmaps and - * stores the result in the container. - * - * @param container - * where the result is stored - * @param bitmaps - * bitmaps to AND - * @since 0.4.3 - */ - public static void andWithContainer(final BitmapStorage container, - final EWAHCompressedBitmap... bitmaps) { - if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap"); - if(bitmaps.length == 2) { - bitmaps[0].andToContainer(bitmaps[1],container); - return; - } - EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); - EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(); - bitmaps[0].andToContainer(bitmaps[1], answer); - for(int k = 2; k < bitmaps.length - 1; ++k) { - answer.andToContainer(bitmaps[k], tmp); - tmp.swap(answer); - tmp.clear(); - } - answer.andToContainer(bitmaps[bitmaps.length - 1], container); - } - - /** - * Returns a new compressed bitmap containing the bitwise AND values of the - * provided bitmaps. - * - * It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)). - * - * If only one bitmap is provided, it is returned as is. - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @since 0.4.3 - * @param bitmaps - * bitmaps to AND together - * @return result of the AND - */ - public static EWAHCompressedBitmap and(final EWAHCompressedBitmap... bitmaps) { - if(bitmaps.length == 1) return bitmaps[0]; - if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]); - EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); - EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(); - bitmaps[0].andToContainer(bitmaps[1], answer); - for(int k = 2; k < bitmaps.length; ++k) { - answer.andToContainer(bitmaps[k], tmp); - tmp.swap(answer); - tmp.clear(); - } - return answer; - } - - /** - * Returns the cardinality of the result of a bitwise AND of the values of the - * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold - * the result of the AND. - * - * @since 0.4.3 - * @param bitmaps - * bitmaps to AND - * @return the cardinality - */ - public static int andCardinality(final EWAHCompressedBitmap... bitmaps) { - if(bitmaps.length == 1) return bitmaps[0].cardinality(); - final BitCounter counter = new BitCounter(); - andWithContainer(counter, bitmaps); - return counter.getCount(); - } - - /** - * Return a bitmap with the bit set to true at the given - * positions. The positions should be given in sorted order. - * - * (This is a convenience method.) - * - * @since 0.4.5 - * @param setbits list of set bit positions - * @return the bitmap - */ - public static EWAHCompressedBitmap bitmapOf(int ... setbits) { - EWAHCompressedBitmap a = new EWAHCompressedBitmap(); - for (int k : setbits) - a.set(k); - return a; - } + if (dist > 0) { + fastaddStreamOfEmptyWords(false, dist); + } + } + } + /** + * For internal use. + * + * @param i the index + * @param value the value + */ + private void locateAndSet(int i, boolean value) { + int nbits = 0; + final int siw = this.buffer.sizeInWords(); + for(int pos = 0; pos < siw; ) { + long rl = RunningLengthWord.getRunningLength(this.buffer, pos); + boolean rb = RunningLengthWord.getRunningBit(this.buffer, pos); + long lw = RunningLengthWord.getNumberOfLiteralWords(this.buffer, pos); + long rbits = rl * WORD_IN_BITS; + if(i < nbits + rbits) { + setInRunningLength(value, i, nbits, pos, rl, rb, lw); + return; + } + nbits += (int) rbits; + long lbits = lw * WORD_IN_BITS; + if(i < nbits + lbits) { + setInLiteralWords(value, i, nbits, pos, rl, rb, lw); + return; + } + nbits += (int) lbits; + pos += (int) (lw + 1); + } + } + private void setInRunningLength(boolean value, int i, int nbits, int pos, long rl, boolean rb, long lw) { + if(value != rb) { + int wordPosition = (i - nbits) / WORD_IN_BITS + 1; + int addedWords = (wordPosition==rl) ? 1 : 2; + this.buffer.expand(pos+1, addedWords); + long mask = 1l << i % WORD_IN_BITS; + this.buffer.setWord(pos+1, value ? mask : ~mask); + if(this.rlw.position >= pos+1) { + this.rlw.position += addedWords; + } + if(addedWords==1) { + setRLWInfo(pos, rb, rl-1, lw+1); + } else { + setRLWInfo(pos, rb, wordPosition-1, 1l); + setRLWInfo(pos+2, rb, rl-wordPosition, lw); + if(this.rlw.position == pos) { + this.rlw.position += 2; + } + } + } + } + private void setInLiteralWords(boolean value, int i, int nbits, int pos, long rl, boolean rb, long lw) { + int wordPosition = (i - nbits) / WORD_IN_BITS + 1; + long mask = 1l << i % WORD_IN_BITS; + if(value) { + this.buffer.orWord(pos + wordPosition, mask); + } else { + this.buffer.andWord(pos + wordPosition, ~mask); + } + long emptyWord = value ? ~0l : 0l; + if(this.buffer.getWord(pos + wordPosition) == emptyWord) { + boolean canMergeInCurrentRLW = mergeLiteralWordInCurrentRunningLength(value, rb, rl, wordPosition); + boolean canMergeInNextRLW = mergeLiteralWordInNextRunningLength(value, lw, pos, wordPosition); + if(canMergeInCurrentRLW && canMergeInNextRLW) { + long nextRl = RunningLengthWord.getRunningLength(this.buffer, pos + 2); + long nextLw = RunningLengthWord.getNumberOfLiteralWords(this.buffer, pos + 2); + this.buffer.collapse(pos, 2); + setRLWInfo(pos, value, rl + 1 + nextRl, nextLw); + if(this.rlw.position >= pos+2) { + this.rlw.position -= 2; + } + } else if(canMergeInCurrentRLW) { + this.buffer.collapse(pos + 1, 1); + setRLWInfo(pos, value, rl+1, lw-1); + if(this.rlw.position >= pos+2) { + this.rlw.position--; + } + } else if(canMergeInNextRLW) { + int nextRLWPos = (int) (pos + lw + 1); + long nextRl = RunningLengthWord.getRunningLength(this.buffer, nextRLWPos); + long nextLw = RunningLengthWord.getNumberOfLiteralWords(this.buffer, nextRLWPos); + this.buffer.collapse(pos+wordPosition, 1); + setRLWInfo(pos, rb, rl, lw-1); + setRLWInfo(pos+wordPosition, value, nextRl+1, nextLw); + if(this.rlw.position >= nextRLWPos) { + this.rlw.position -= lw + 1 - wordPosition; + } + } else { + setRLWInfo(pos, rb, rl, wordPosition-1); + setRLWInfo(pos+wordPosition, value, 1l, lw-wordPosition); + if(this.rlw.position == pos) { + this.rlw.position += wordPosition; + } + } + } + } - /** - * For internal use. This simply adds a stream of words made of zeroes so that - * we pad to the desired size. - * - * @param storage - * bitmap to extend - * @param currentSize - * current size (in bits) - * @param newSize - * new desired size (in bits) - * @since 0.4.3 - */ - private static void extendEmptyBits(final BitmapStorage storage, - final int currentSize, final int newSize) { - final int currentLeftover = currentSize % wordinbits; - final int finalLeftover = newSize % wordinbits; - storage.addStreamOfEmptyWords(false, (newSize / wordinbits) - currentSize - / wordinbits + (finalLeftover != 0 ? 1 : 0) - + (currentLeftover != 0 ? -1 : 0)); - } + private boolean mergeLiteralWordInCurrentRunningLength(boolean value, boolean rb, long rl, int wordPosition) { + return (value==rb || rl==0) && wordPosition==1; + } - - /** - * Uses an adaptive technique to compute the logical OR. - * Mostly for internal use. - * - * @param container where the aggregate is written. - * @param bitmaps to be aggregated - */ - public static void orWithContainer(final BitmapStorage container, - final EWAHCompressedBitmap... bitmaps) { - if (bitmaps.length < 2) - throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); - long size = 0L; - long sinbits = 0L; - for (EWAHCompressedBitmap b : bitmaps) { - size += b.sizeInBytes(); - if (sinbits < b.sizeInBits()) - sinbits = b.sizeInBits(); - } - if (size * 8 > sinbits) { - FastAggregation.bufferedorWithContainer(container, 65536, bitmaps); - } else { - FastAggregation.orToContainer(container, bitmaps); - } - } - - - /** - * Uses an adaptive technique to compute the logical XOR. - * Mostly for internal use. - * - * @param container where the aggregate is written. - * @param bitmaps to be aggregated - */ - public static void xorWithContainer(final BitmapStorage container, - final EWAHCompressedBitmap... bitmaps) { - if (bitmaps.length < 2) - throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); - long size = 0L; - long sinbits = 0L; - for (EWAHCompressedBitmap b : bitmaps) { - size += b.sizeInBytes(); - if (sinbits < b.sizeInBits()) - sinbits = b.sizeInBits(); - } - if (size * 8 > sinbits) { - FastAggregation.bufferedxorWithContainer(container, 65536, bitmaps); - } else { - FastAggregation.xorToContainer(container, bitmaps); - } - } - /** - * Returns a new compressed bitmap containing the bitwise OR values of the - * provided bitmaps. This is typically faster than doing the aggregation - * two-by-two (A.or(B).or(C).or(D)). - * - * If only one bitmap is provided, it is returned as is. - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @since 0.4.0 - * @param bitmaps - * bitmaps to OR together - * @return result of the OR - */ - public static EWAHCompressedBitmap or(final EWAHCompressedBitmap... bitmaps) { - if(bitmaps.length == 1) - return bitmaps[0]; - final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - int largestSize = 0; - for (EWAHCompressedBitmap bitmap : bitmaps) { - largestSize = Math.max(bitmap.actualsizeinwords, largestSize); - } - container.reserve((int) (largestSize * 1.5)); - orWithContainer(container, bitmaps); - return container; - } - /** - * Returns a new compressed bitmap containing the bitwise XOR values of the - * provided bitmaps. This is typically faster than doing the aggregation - * two-by-two (A.xor(B).xor(C).xor(D)). - * - * If only one bitmap is provided, it is returned as is. - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param bitmaps - * bitmaps to XOR together - * @return result of the XOR - */ - public static EWAHCompressedBitmap xor(final EWAHCompressedBitmap... bitmaps) { - if(bitmaps.length == 1) - return bitmaps[0]; - final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - int largestSize = 0; - for (EWAHCompressedBitmap bitmap : bitmaps) { - largestSize = Math.max(bitmap.actualsizeinwords, largestSize); - } - container.reserve((int) (largestSize * 1.5)); - xorWithContainer(container, bitmaps); - return container; - } - - /** - * Returns the cardinality of the result of a bitwise OR of the values of the - * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold - * the result of the OR. - * - * @since 0.4.0 - * @param bitmaps - * bitmaps to OR - * @return the cardinality - */ - public static int orCardinality(final EWAHCompressedBitmap... bitmaps) { - if(bitmaps.length == 1) return bitmaps[0].cardinality(); - final BitCounter counter = new BitCounter(); - orWithContainer(counter, bitmaps); - return counter.getCount(); - } - - /** The actual size in words. */ - int actualsizeinwords = 1; - - /** The buffer (array of 64-bit words) */ - long buffer[] = null; - - /** The current (last) running length word. */ - RunningLengthWord rlw = null; - - /** sizeinbits: number of bits in the (uncompressed) bitmap. */ - int sizeinbits = 0; - - /** - * The Constant defaultbuffersize: default memory allocation when the object - * is constructed. - */ - static final int defaultbuffersize = 4; + private boolean mergeLiteralWordInNextRunningLength(boolean value, long lw, int pos, int wordPosition) { + int nextRLWPos = (int) (pos + lw + 1); + if(lw==wordPosition && nextRLWPos (this.sizeInBits + WORD_IN_BITS - 1) / WORD_IN_BITS) { + setSizeInBits(size,false); + return; + } + if ((size + WORD_IN_BITS - 1) / WORD_IN_BITS != (this.sizeInBits + WORD_IN_BITS - 1) / WORD_IN_BITS) + throw new RuntimeException( + "You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInBits(int,boolean)."); + this.sizeInBits = size; + final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; + if (usedBitsInLast == 0) + return; + if (this.rlw.getNumberOfLiteralWords() == 0) { + if (this.rlw.getRunningLength() > 0) { + this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); + final long word = this.rlw.getRunningBit() ? (~0l) >>> (WORD_IN_BITS - usedBitsInLast) : 0l; + this.insertLiteralWord(word); + } + return; + } + this.buffer.andLastWord((~0l) >>> (WORD_IN_BITS - usedBitsInLast)); + } + + /** + * Change the reported size in bits of the *uncompressed* bitmap + * represented by this compressed bitmap. It may change the underlying + * compressed bitmap. It is not possible to reduce the sizeInBits, but + * it can be extended. The new bits are set to false or true depending + * on the value of defaultValue. + * + * This method is not thread-safe. + * + * @param size the size in bits + * @param defaultValue the default boolean value + * @return true if the update was possible + */ + public boolean setSizeInBits(final int size, final boolean defaultValue) { + if (size <= this.sizeInBits) { + return false; + } + if ((this.sizeInBits % WORD_IN_BITS) != 0) { + if (!defaultValue) { + if (this.rlw.getNumberOfLiteralWords() > 0) { + final int bitsToAdd = size - this.sizeInBits; + final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; + final int freeBitsInLast = WORD_IN_BITS - usedBitsInLast; + if (this.buffer.getLastWord() == 0l) { + this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); + this.buffer.removeLastWord(); + this.sizeInBits -= usedBitsInLast; + } else if (usedBitsInLast > 0) { + this.sizeInBits += Math.min(bitsToAdd, freeBitsInLast); + } + } + } else { + if (this.rlw.getNumberOfLiteralWords() == 0) { + this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); + insertLiteralWord(0); + } + final int maskWidth = Math.min(WORD_IN_BITS - this.sizeInBits % WORD_IN_BITS, + size - this.sizeInBits); + final int maskShift = this.sizeInBits % WORD_IN_BITS; + final long mask = ((~0l) >>> (WORD_IN_BITS - maskWidth)) << maskShift; + this.buffer.orLastWord(mask); + if (this.buffer.getLastWord() == ~0l) { + this.buffer.removeLastWord(); + this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); + insertEmptyWord(true); + } + this.sizeInBits += maskWidth; + } + } + this.addStreamOfEmptyWords(defaultValue, + (size / WORD_IN_BITS) - (this.sizeInBits / WORD_IN_BITS) + ); + if (this.sizeInBits < size) { + final int dist = distanceInWords(size - 1); + if (dist > 0) { + insertLiteralWord(0); + } + if (defaultValue) { + final int maskWidth = size - this.sizeInBits; + final int maskShift = this.sizeInBits % WORD_IN_BITS; + final long mask = ((~0l) >>> (WORD_IN_BITS - maskWidth)) << maskShift; + this.buffer.orLastWord(mask); + } + this.sizeInBits = size; + } + return true; + } + + /** + * For internal use. + * + * @param i the index + */ + private int distanceInWords(int i) { + return (i + WORD_IN_BITS) / WORD_IN_BITS + - (this.sizeInBits + WORD_IN_BITS - 1) / WORD_IN_BITS; + } + + /** + * Returns the size in bits of the *uncompressed* bitmap represented by + * this compressed bitmap. Initially, the sizeInBits is zero. It is + * extended automatically when you set bits to true. + * + * The current bitmap is not modified. + * + * @return the size in bits + */ + @Override + public int sizeInBits() { + return this.sizeInBits; + } + + /** + * Report the *compressed* size of the bitmap (equivalent to memory + * usage, after accounting for some overhead). + * + * @return the size in bytes + */ + @Override + public int sizeInBytes() { + return this.buffer.sizeInWords() * (WORD_IN_BITS / 8); + } + + /** + * Compute a Boolean threshold function: bits are true where at least t + * bitmaps have a true bit. + * + * @param t the threshold + * @param bitmaps input data + * @return the aggregated bitmap + * @since 0.8.1 + */ + public static EWAHCompressedBitmap threshold(final int t, + final EWAHCompressedBitmap... bitmaps) { + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + thresholdWithContainer(container, t, bitmaps); + return container; + } + + static int maxSizeInBits(final EWAHCompressedBitmap... bitmaps) { + int maxSizeInBits = 0; + for(EWAHCompressedBitmap bitmap : bitmaps) { + maxSizeInBits = Math.max(maxSizeInBits, bitmap.sizeInBits()); + } + return maxSizeInBits; + } + + /** + * Compute a Boolean threshold function: bits are true where at least T + * bitmaps have a true bit. + * + * The content of the container is overwritten. + * + * @param t the threshold + * @param bitmaps input data + * @param container where we write the aggregated bitmap + * @since 0.8.1 + */ + public static void thresholdWithContainer(final BitmapStorage container, final int t, + final EWAHCompressedBitmap... bitmaps) { + (new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(t), container, bitmaps); + } + + /** + * Populate an array of (sorted integers) corresponding to the location + * of the set bits. + * + * @return the array containing the location of the set bits + */ + public int[] toArray() { + int[] ans = new int[this.cardinality()]; + int inAnsPos = 0; + int pos = 0; + final EWAHIterator i = this.getEWAHIterator(); + while (i.hasNext()) { + RunningLengthWord localRlw = i.next(); + final long runningLength = localRlw.getRunningLength(); + if (localRlw.getRunningBit()) { + for (int j = 0; j < runningLength; ++j) { + for (int c = 0; c < WORD_IN_BITS; ++c) { + ans[inAnsPos++] = pos++; + } + } + } else { + pos += WORD_IN_BITS * runningLength; + } + final int numberOfLiteralWords = localRlw.getNumberOfLiteralWords(); + final int literalWords = i.literalWords(); + for (int j = 0; j < numberOfLiteralWords; ++j) { + long data = i.buffer().getWord(literalWords + j); + while (data != 0) { + final long T = data & -data; + ans[inAnsPos++] = Long.bitCount(T - 1) + + pos; + data ^= T; + } + pos += WORD_IN_BITS; + } + } + return ans; + + } + + /** + * A more detailed string describing the bitmap (useful for debugging). + * + * @return the string + */ + public String toDebugString() { + StringBuilder ans = new StringBuilder(); + ans.append(" EWAHCompressedBitmap, size in bits = "); + ans.append(this.sizeInBits).append(" size in words = "); + ans.append(this.buffer.sizeInWords()).append("\n"); + final EWAHIterator i = this.getEWAHIterator(); + while (i.hasNext()) { + RunningLengthWord localrlw = i.next(); + if (localrlw.getRunningBit()) { + ans.append(localrlw.getRunningLength()).append(" 1x11\n"); + } else { + ans.append(localrlw.getRunningLength()).append(" 0x00\n"); + } + ans.append(localrlw.getNumberOfLiteralWords()).append(" dirties\n"); + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + long data = i.buffer().getWord(i.literalWords() + j); + ans.append("\t").append(data).append("\n"); + } + } + return ans.toString(); + } + + /** + * A string describing the bitmap. + * + * @return the string + */ + @Override + public String toString() { + StringBuilder answer = new StringBuilder(); + IntIterator i = this.intIterator(); + answer.append("{"); + if (i.hasNext()) + answer.append(i.next()); + while (i.hasNext()) { + answer.append(","); + answer.append(i.next()); + } + answer.append("}"); + return answer.toString(); + } + + /** + * Swap the content of the bitmap with another. + * + * @param other bitmap to swap with + */ + public void swap(final EWAHCompressedBitmap other) { + this.buffer.swap(other.buffer); + + int tmp2 = this.rlw.position; + this.rlw.position = other.rlw.position; + other.rlw.position = tmp2; + + int tmp3 = this.sizeInBits; + this.sizeInBits = other.sizeInBits; + other.sizeInBits = tmp3; + } + + /** + * Reduce the internal buffer to its minimal allowable size. + * This can free memory. + */ + public void trim() { + this.buffer.trim(); + } + + /** + * Returns a new compressed bitmap containing the bitwise XOR values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap xor(final EWAHCompressedBitmap a) { + int size = this.buffer.sizeInWords() + a.buffer.sizeInWords(); + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(size); + xorToContainer(a, container); + return container; + } + + /** + * Computes a new compressed bitmap containing the bitwise XOR values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * The current bitmap is not modified. + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + * @since 0.4.0 + */ + public void xorToContainer(final EWAHCompressedBitmap a, + final BitmapStorage container) { + container.clear(); + final EWAHIterator i = a.getEWAHIterator(); + final EWAHIterator j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); + final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); + final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; + final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi; + final long index = (!predator.getRunningBit()) ? prey.discharge(container, + predator.getRunningLength()) : prey.dischargeNegated(container, + predator.getRunningLength()); + container.addStreamOfEmptyWords(predator.getRunningBit(), predator.getRunningLength() - index); + predator.discardRunningWords(); + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardLiteralWords(nbre_literal); + rlwj.discardLiteralWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size() > 0; + final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; + remaining.discharge(container); + container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), a.sizeInBits())); + } + + /** + * Returns the cardinality of the result of a bitwise XOR of the values + * of the current bitmap with some other bitmap. Avoids + * allocating an intermediate bitmap to hold the result of the OR. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the cardinality + * @since 0.4.0 + */ + public int xorCardinality(final EWAHCompressedBitmap a) { + final BitCounter counter = new BitCounter(); + xorToContainer(a, counter); + return counter.getCount(); + } + + /** + * Returns a new compressed bitmap containing the composition of + * the current bitmap with some other bitmap. + * + * The composition A.compose(B) is defined as follows: we retain + * the ith set bit of A only if the ith bit of B is set. For example, + * if you have the following bitmap A = { 0, 1, 0, 1, 1, 0 } and want + * to keep only the second and third ones, you can call A.compose(B) + * with B = { 0, 1, 1 } and you will get C = { 0, 0, 0, 1, 1, 0 }. + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap compose(EWAHCompressedBitmap a) { + int size = this.buffer.sizeInWords(); + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(size); + composeToContainer(a, container); + return container; + } + + /** + * Computes a new compressed bitmap containing the composition of + * the current bitmap with some other bitmap. + * + * The composition A.compose(B) is defined as follows: we retain + * the ith set bit of A only if the ith bit of B is set. For example, + * if you have the following bitmap A = { 0, 1, 0, 1, 1, 0 } and want + * to keep only the second and third ones, you can call A.compose(B) + * with B = { 0, 1, 1 } and you will get C = { 0, 0, 0, 1, 1, 0 }. + * + * The current bitmap is not modified. + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + */ + public void composeToContainer(final EWAHCompressedBitmap a, + final EWAHCompressedBitmap container) { + container.clear(); + final ChunkIterator iterator = chunkIterator(); + final ChunkIterator aIterator = a.chunkIterator(); + int index = 0; + while(iterator.hasNext() && aIterator.hasNext()) { + if (!iterator.nextBit()) { + int length = iterator.nextLength(); + index += length; + container.setSizeInBits(index, false); + iterator.move(length); + } else { + int length = Math.min(iterator.nextLength(), aIterator.nextLength()); + index += length; + container.setSizeInBits(index, aIterator.nextBit()); + iterator.move(length); + aIterator.move(length); + } + } + container.setSizeInBits(sizeInBits, false); + } + + /** + * For internal use. Computes the bitwise and of the provided bitmaps + * and stores the result in the container. + * + * The content of the container is overwritten. + * + * @param container where the result is stored + * @param bitmaps bitmaps to AND + * @since 0.4.3 + */ + public static void andWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length == 1) + throw new IllegalArgumentException("Need at least one bitmap"); + if (bitmaps.length == 2) { + bitmaps[0].andToContainer(bitmaps[1], container); + return; + } + + int initialSize = calculateInitialSize(bitmaps); + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(initialSize); + EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(initialSize); + + bitmaps[0].andToContainer(bitmaps[1], answer); + for (int k = 2; k < bitmaps.length - 1; ++k) { + answer.andToContainer(bitmaps[k], tmp); + EWAHCompressedBitmap tmp2 = answer; + answer = tmp; + tmp = tmp2; + tmp.clear(); + } + answer.andToContainer(bitmaps[bitmaps.length - 1], container); + } + + private static int calculateInitialSize(final EWAHCompressedBitmap... bitmaps) { + int initialSize = 0; + for (EWAHCompressedBitmap bitmap : bitmaps) + initialSize = Math.max(bitmap.buffer.sizeInWords(), initialSize); + return initialSize; + } + + /** + * Returns a new compressed bitmap containing the bitwise AND values of + * the provided bitmaps. + * + * It may or may not be faster than doing the aggregation two-by-two + * (A.and(B).and(C)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * @param bitmaps bitmaps to AND together + * @return result of the AND + * @since 0.4.3 + */ + public static EWAHCompressedBitmap and( + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length == 1) + return bitmaps[0]; + if (bitmaps.length == 2) + return bitmaps[0].and(bitmaps[1]); + + int initialSize = calculateInitialSize(bitmaps); + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(initialSize); + EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(initialSize); + bitmaps[0].andToContainer(bitmaps[1], answer); + for (int k = 2; k < bitmaps.length; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + return answer; + } + + /** + * Returns the cardinality of the result of a bitwise AND of the values + * of the provided bitmaps. Avoids allocating an intermediate + * bitmap to hold the result of the AND. + * + * @param bitmaps bitmaps to AND + * @return the cardinality + * @since 0.4.3 + */ + public static int andCardinality(final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length == 1) + return bitmaps[0].cardinality(); + final BitCounter counter = new BitCounter(); + andWithContainer(counter, bitmaps); + return counter.getCount(); + } + + /** + * Return a bitmap with the bit set to true at the given positions. The + * positions should be given in sorted order. + * + * (This is a convenience method.) + * + * @param setBits list of set bit positions + * @return the bitmap + * @since 0.4.5 + */ + public static EWAHCompressedBitmap bitmapOf(int... setBits) { + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + for (int k : setBits) + a.set(k); + return a; + } + + /** + * Uses an adaptive technique to compute the logical OR. Mostly for + * internal use. + * + * The content of the container is overwritten. + * + * @param container where the aggregate is written. + * @param bitmaps to be aggregated + */ + public static void orWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException( + "You should provide at least two bitmaps, provided " + + bitmaps.length + ); + FastAggregation.orToContainer(container, bitmaps); + } + + /** + * Uses an adaptive technique to compute the logical XOR. Mostly for + * internal use. + * + * The content of the container is overwritten. + * + * @param container where the aggregate is written. + * @param bitmaps to be aggregated + */ + public static void xorWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException( + "You should provide at least two bitmaps, provided " + + bitmaps.length + ); + FastAggregation.xorToContainer(container, bitmaps); + } + + /** + * Returns a new compressed bitmap containing the bitwise OR values of + * the provided bitmaps. This is typically faster than doing the + * aggregation two-by-two (A.or(B).or(C).or(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * @param bitmaps bitmaps to OR together + * @return result of the OR + * @since 0.4.0 + */ + public static EWAHCompressedBitmap or( + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length == 1) + return bitmaps[0]; + + int largestSize = calculateInitialSize(bitmaps); + final EWAHCompressedBitmap container = new EWAHCompressedBitmap((int) (largestSize * 1.5)); + orWithContainer(container, bitmaps); + return container; + } + + /** + * Returns a new compressed bitmap containing the bitwise XOR values of + * the provided bitmaps. This is typically faster than doing the + * aggregation two-by-two (A.xor(B).xor(C).xor(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * @param bitmaps bitmaps to XOR together + * @return result of the XOR + */ + public static EWAHCompressedBitmap xor( + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length == 1) + return bitmaps[0]; + + int largestSize = calculateInitialSize(bitmaps); + + int size = (int) (largestSize * 1.5); + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(size); + xorWithContainer(container, bitmaps); + return container; + } + + /** + * Returns the cardinality of the result of a bitwise OR of the values + * of the provided bitmaps. Avoids allocating an intermediate + * bitmap to hold the result of the OR. + * + * @param bitmaps bitmaps to OR + * @return the cardinality + * @since 0.4.0 + */ + public static int orCardinality(final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length == 1) + return bitmaps[0].cardinality(); + final BitCounter counter = new BitCounter(); + orWithContainer(counter, bitmaps); + return counter.getCount(); + } + + + + /** + * Generates a new bitmap shifted by "b" bits. + * If b is positive, the position of all set bits is increased by + * b. The negative case is not supported. + * + * @param b number of bits + * @return new shifted bitmap + */ + public EWAHCompressedBitmap shift(final int b) { + if (b < 0) + throw new IllegalArgumentException( + "Negative shifts unsupported at the moment."); // TODO: add + // support + int sz = this.buffer.sizeInWords(); + int newsz = b > 0 ? sz + (b + (WORD_IN_BITS - 1)) / WORD_IN_BITS : sz; + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(newsz); + IteratingRLW i = this.getIteratingRLW(); + int fullwords = b / WORD_IN_BITS; + int shift = b % WORD_IN_BITS; + answer.addStreamOfEmptyWords(false, fullwords); + if (shift == 0) { + answer.buffer.push_back(this.buffer, 0, sz); + } else { + // whether the shift should justify a new word + final boolean shiftextension = ((this.sizeInBits + WORD_IN_BITS - 1) % WORD_IN_BITS) + shift >= WORD_IN_BITS; + long w = 0; + while (true) { + long rl = i.getRunningLength(); + if (rl > 0) { + if (i.getRunningBit()) { + long sw = w | (-1l << shift); + answer.addWord(sw); + w = -1l >>> (WORD_IN_BITS - shift); + } else { + answer.addWord(w); + w = 0; + } + if (rl > 1) { + answer.addStreamOfEmptyWords(i.getRunningBit(), rl - 1); + } + } + int x = i.getNumberOfLiteralWords(); + for (int k = 0; k < x; ++k) { + long neww = i.getLiteralWordAt(k); + long sw = w | (neww << shift); + answer.addWord(sw); + w = neww >>> (WORD_IN_BITS - shift); + } + if (!i.next()) { + if(shiftextension) answer.addWord(w); + break; + } + } + } + answer.sizeInBits = this.sizeInBits + b; + return answer; + } + + /** + * The buffer + */ + final Buffer buffer; + + /** + * The current (last) running length word. + */ + private RunningLengthWord rlw = null; + + /** + * sizeInBits: number of bits in the (uncompressed) bitmap. + */ + private int sizeInBits = 0; + + /** + * whether we adjust after some aggregation by adding in zeroes * + */ + public static final boolean ADJUST_CONTAINER_SIZE_WHEN_AGGREGATING = true; + + /** + * The Constant WORD_IN_BITS represents the number of bits in a long. + */ + public static final int WORD_IN_BITS = 64; - /** The Constant wordinbits represents the number of bits in a long. */ - public static final int wordinbits = 64; + static final long serialVersionUID = 1L; } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/EWAHIterator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/EWAHIterator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/EWAHIterator.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/EWAHIterator.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,98 +1,104 @@ package com.googlecode.javaewah; - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** - * The class EWAHIterator represents a special type of - * efficient iterator iterating over (uncompressed) words of bits. - * It is not meant for end users. + * The class EWAHIterator represents a special type of efficient iterator + * iterating over (uncompressed) words of bits. It is not meant for end users. + * * @author Daniel Lemire * @since 0.1.0 - * */ public final class EWAHIterator implements Cloneable { - - /** - * Instantiates a new EWAH iterator. - * - * @param a the array of words - * @param sizeinwords the number of words that are significant in the array of words - */ - public EWAHIterator(final EWAHCompressedBitmap a, final int sizeinwords) { - this.rlw = new RunningLengthWord(a, 0); - this.size = sizeinwords; - this.pointer = 0; - } - - /** - * Allow expert developers to instantiate an EWAHIterator. - * - * @param bitmap we want to iterate over - * @return an iterator - */ - public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) { - return bitmap.getEWAHIterator(); - } - - - /** - * Access to the array of words - * - * @return the long[] - */ - public long[] buffer() { - return this.rlw.parent.buffer; - } - - /** - * Position of the literal words represented by this running length word. - * - * @return the int - */ - public int literalWords() { - return this.pointer - this.rlw.getNumberOfLiteralWords(); - } - - /** - * Checks for next. - * - * @return true, if successful - */ - public boolean hasNext() { - return this.pointer < this.size; - } - - /** - * Next running length word. - * - * @return the running length word - */ - public RunningLengthWord next() { - this.rlw.position = this.pointer; - this.pointer += this.rlw.getNumberOfLiteralWords() + 1; - return this.rlw; - } - - @Override - public EWAHIterator clone() throws CloneNotSupportedException { - EWAHIterator ans = (EWAHIterator) super.clone(); - ans.rlw = this.rlw.clone(); - ans.size = this.size; - ans.pointer = this.pointer; - return ans; - } - /** The pointer represent the location of the current running length - * word in the array of words (embedded in the rlw attribute). */ - int pointer; - - /** The current running length word. */ - RunningLengthWord rlw; - /** The size in words. */ - int size; + /** + * Instantiates a new EWAH iterator. + * + * @param buffer the buffer + */ + public EWAHIterator(final Buffer buffer) { + this.rlw = new RunningLengthWord(buffer, 0); + this.size = buffer.sizeInWords(); + this.pointer = 0; + } + + private EWAHIterator(int pointer, RunningLengthWord rlw, int size){ + this.pointer = pointer; + this.rlw = rlw; + this.size = size; + } + + /** + * Allow expert developers to instantiate an EWAHIterator. + * + * @param bitmap we want to iterate over + * @return an iterator + */ + public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) { + return bitmap.getEWAHIterator(); + } + + /** + * Access to the buffer + * + * @return the buffer + */ + public Buffer buffer() { + return this.rlw.buffer; + } + + /** + * Position of the literal words represented by this running length + * word. + * + * @return the int + */ + public int literalWords() { + return this.pointer - this.rlw.getNumberOfLiteralWords(); + } + + /** + * Checks for next. + * + * @return true, if successful + */ + public boolean hasNext() { + return this.pointer < this.size; + } + + /** + * Next running length word. + * + * @return the running length word + */ + public RunningLengthWord next() { + this.rlw.position = this.pointer; + this.pointer += this.rlw.getNumberOfLiteralWords() + 1; + return this.rlw; + } + + @Override + public EWAHIterator clone() throws CloneNotSupportedException { + return new EWAHIterator(pointer,rlw.clone(),size); + } + + /** + * The pointer represent the location of the current running length word + * in the array of words (embedded in the rlw attribute). + */ + private int pointer; + + /** + * The current running length word. + */ + final RunningLengthWord rlw; + + /** + * The size in words. + */ + private final int size; } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/FastAggregation.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/FastAggregation.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/FastAggregation.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/FastAggregation.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,12 +1,13 @@ package com.googlecode.javaewah; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.PriorityQueue; - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ @@ -14,423 +15,439 @@ * Fast algorithms to aggregate many bitmaps. These algorithms are just given as * reference. They may not be faster than the corresponding methods in the * EWAHCompressedBitmap class. - * + * * @author Daniel Lemire - * */ -public class FastAggregation { - /** - * Compute the and aggregate using a temporary uncompressed bitmap. - * @param bitmaps the source bitmaps - * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) - * @return the or aggregate. - */ - public static EWAHCompressedBitmap bufferedand(final int bufsize, - final EWAHCompressedBitmap... bitmaps) { - EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); - bufferedandWithContainer(answer,bufsize, bitmaps); - return answer; - } - /** - * Compute the and aggregate using a temporary uncompressed bitmap. - * - * @param container where the aggregate is written - * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) - * @param bitmaps the source bitmaps - */ - public static void bufferedandWithContainer(final BitmapStorage container,final int bufsize, - final EWAHCompressedBitmap... bitmaps) { - - java.util.LinkedList al = new java.util.LinkedList(); - for (EWAHCompressedBitmap bitmap : bitmaps) { - al.add(new IteratingBufferedRunningLengthWord(bitmap)); - } - - long[] hardbitmap = new long[bufsize*bitmaps.length]; - - for(IteratingRLW i : al) - if (i.size() == 0) { - al.clear(); - break; - } - - while (!al.isEmpty()) { - Arrays.fill(hardbitmap, ~0l); - long effective = Integer.MAX_VALUE; - for(IteratingRLW i : al) { - int eff = IteratorAggregation.inplaceand(hardbitmap, i); - if (eff < effective) - effective = eff; - } - for (int k = 0; k < effective; ++k) - container.add(hardbitmap[k]); - for(IteratingRLW i : al) - if (i.size() == 0) { - al.clear(); - break; - } - } - } - - /** - * Compute the or aggregate using a temporary uncompressed bitmap. - * @param bitmaps the source bitmaps - * @param bufsize buffer size used during the computation in 64-bit words - * @return the or aggregate. - */ - public static EWAHCompressedBitmap bufferedor(final int bufsize, - final EWAHCompressedBitmap... bitmaps) { - EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); - bufferedorWithContainer(answer, bufsize, bitmaps); - return answer; - } - - /** - * Compute the or aggregate using a temporary uncompressed bitmap. - * - * @param container where the aggregate is written - * @param bufsize buffer size used during the computation in 64-bit words - * @param bitmaps the source bitmaps - */ - public static void bufferedorWithContainer(final BitmapStorage container, final int bufsize, - final EWAHCompressedBitmap... bitmaps) { - int range = 0; - EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); - Arrays.sort(sbitmaps, new Comparator() { - @Override - public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { - return b.sizeinbits - a.sizeinbits; - } - }); - - java.util.ArrayList al = new java.util.ArrayList(); - for (EWAHCompressedBitmap bitmap : sbitmaps) { - if (bitmap.sizeinbits > range) - range = bitmap.sizeinbits; - al.add(new IteratingBufferedRunningLengthWord(bitmap)); - } - long[] hardbitmap = new long[bufsize]; - int maxr = al.size(); - while (maxr > 0) { - long effective = 0; - for (int k = 0; k < maxr; ++k) { - if (al.get(k).size() > 0) { - int eff = IteratorAggregation.inplaceor(hardbitmap, al.get(k)); - if (eff > effective) - effective = eff; - } else - maxr = k; - } - for (int k = 0; k < effective; ++k) - container.add(hardbitmap[k]); - Arrays.fill(hardbitmap, 0); - - } - container.setSizeInBits(range); - } - - /** - * Compute the xor aggregate using a temporary uncompressed bitmap. - * @param bitmaps the source bitmaps - * @param bufsize buffer size used during the computation in 64-bit words - * @return the xor aggregate. - */ - public static EWAHCompressedBitmap bufferedxor(final int bufsize, - final EWAHCompressedBitmap... bitmaps) { - EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); - bufferedxorWithContainer(answer, bufsize,bitmaps); - return answer; - } - - - /** - * Compute the xor aggregate using a temporary uncompressed bitmap. - * - * @param container where the aggregate is written - * @param bufsize buffer size used during the computation in 64-bit words - * @param bitmaps the source bitmaps - */ - public static void bufferedxorWithContainer(final BitmapStorage container, final int bufsize, - final EWAHCompressedBitmap... bitmaps) { - int range = 0; - EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); - Arrays.sort(sbitmaps, new Comparator() { - @Override - public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { - return b.sizeinbits - a.sizeinbits; - } - }); - - java.util.ArrayList al = new java.util.ArrayList(); - for (EWAHCompressedBitmap bitmap : sbitmaps) { - if (bitmap.sizeinbits > range) - range = bitmap.sizeinbits; - al.add(new IteratingBufferedRunningLengthWord(bitmap)); - } - long[] hardbitmap = new long[bufsize]; - int maxr = al.size(); - while (maxr > 0) { - long effective = 0; - for (int k = 0; k < maxr; ++k) { - if (al.get(k).size() > 0) { - int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k)); - if (eff > effective) - effective = eff; - } else - maxr = k; - } - for (int k = 0; k < effective; ++k) - container.add(hardbitmap[k]); - Arrays.fill(hardbitmap, 0); - } - container.setSizeInBits(range); - } - - /** - * Uses a priority queue to compute the or aggregate. - * @param a class extending LogicalElement (like a compressed bitmap) - * @param bitmaps - * bitmaps to be aggregated - * @return the or aggregate - */ - @SuppressWarnings({ "rawtypes", "unchecked" }) - public static T or(T... bitmaps) { - PriorityQueue pq = new PriorityQueue(bitmaps.length, - new Comparator() { - @Override - public int compare(T a, T b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (T x : bitmaps) { - pq.add(x); - } - while (pq.size() > 1) { - T x1 = pq.poll(); - T x2 = pq.poll(); - pq.add((T) x1.or(x2)); - } - return pq.poll(); - } - /** - * Uses a priority queue to compute the or aggregate. - * @param container where we write the result - * @param bitmaps to be aggregated - */ - public static void orToContainer(final BitmapStorage container, - final EWAHCompressedBitmap ... bitmaps) { - if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); - PriorityQueue pq = new PriorityQueue(bitmaps.length, - new Comparator() { - @Override - public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (EWAHCompressedBitmap x : bitmaps) { - pq.add(x); - } - while (pq.size() > 2) { - EWAHCompressedBitmap x1 = pq.poll(); - EWAHCompressedBitmap x2 = pq.poll(); - pq.add(x1.or(x2)); - } - pq.poll().orToContainer(pq.poll(), container); - } - - - /** - * Uses a priority queue to compute the xor aggregate. - * - * @param a class extending LogicalElement (like a compressed bitmap) - * @param bitmaps - * bitmaps to be aggregated - * @return the xor aggregate - */ - @SuppressWarnings({ "rawtypes", "unchecked" }) - public static T xor(T... bitmaps) { - PriorityQueue pq = new PriorityQueue(bitmaps.length, - new Comparator() { - - @Override - public int compare(T a, T b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (T x : bitmaps) - pq.add(x); - while (pq.size() > 1) { - T x1 = pq.poll(); - T x2 = pq.poll(); - pq.add((T) x1.xor(x2)); - } - return pq.poll(); - } - - /** - * Uses a priority queue to compute the xor aggregate. - * @param container where we write the result - * @param bitmaps to be aggregated - */ - public static void xorToContainer(final BitmapStorage container, - final EWAHCompressedBitmap ... bitmaps) { - if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); - PriorityQueue pq = new PriorityQueue(bitmaps.length, - new Comparator() { - @Override - public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (EWAHCompressedBitmap x : bitmaps) { - pq.add(x); - } - while (pq.size() > 2) { - EWAHCompressedBitmap x1 = pq.poll(); - EWAHCompressedBitmap x2 = pq.poll(); - pq.add(x1.xor(x2)); - } - pq.poll().xorToContainer(pq.poll(), container); - } - - /** - * For internal use. Computes the bitwise or of the provided bitmaps and - * stores the result in the container. (This used to be the default.) - * - * @deprecated use EWAHCompressedBitmap.or instead - * @since 0.4.0 - * @param container where store the result - * @param bitmaps to be aggregated - */ - @Deprecated - public static void legacy_orWithContainer(final BitmapStorage container, - final EWAHCompressedBitmap... bitmaps) { - if (bitmaps.length == 2) { - // should be more efficient - bitmaps[0].orToContainer(bitmaps[1], container); - return; - } - - // Sort the bitmaps in descending order by sizeinbits. We will exhaust the - // sorted bitmaps from right to left. - final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); - Arrays.sort(sortedBitmaps, new Comparator() { - @Override - public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { - return a.sizeinbits < b.sizeinbits ? 1 - : a.sizeinbits == b.sizeinbits ? 0 : -1; - } - }); - - final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; - int maxAvailablePos = 0; - for (EWAHCompressedBitmap bitmap : sortedBitmaps) { - EWAHIterator iterator = bitmap.getEWAHIterator(); - if (iterator.hasNext()) { - rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord( - iterator); - } - } - - if (maxAvailablePos == 0) { // this never happens... - container.setSizeInBits(0); - return; - } - - int maxSize = sortedBitmaps[0].sizeinbits; - - while (true) { - long maxOneRl = 0; - long minZeroRl = Long.MAX_VALUE; - long minSize = Long.MAX_VALUE; - int numEmptyRl = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - long size = rlw.size(); - if (size == 0) { - maxAvailablePos = i; - break; - } - minSize = Math.min(minSize, size); - - if (rlw.getRunningBit()) { - long rl = rlw.getRunningLength(); - maxOneRl = Math.max(maxOneRl, rl); - minZeroRl = 0; - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } else { - long rl = rlw.getRunningLength(); - minZeroRl = Math.min(minZeroRl, rl); - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } - } - - if (maxAvailablePos == 0) { - break; - } else if (maxAvailablePos == 1) { - // only one bitmap is left so just write the rest of it out - rlws[0].discharge(container); - break; - } - - if (maxOneRl > 0) { - container.addStreamOfEmptyWords(true, maxOneRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(maxOneRl); - } - } else if (minZeroRl > 0) { - container.addStreamOfEmptyWords(false, minZeroRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(minZeroRl); - } - } else { - int index = 0; - - if (numEmptyRl == 1) { - // if one rlw has literal words to process and the rest have a run of - // 0's we can write them out here - IteratingBufferedRunningLengthWord emptyRl = null; - long minNonEmptyRl = Long.MAX_VALUE; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - long rl = rlw.getRunningLength(); - if (rl == 0) { - assert emptyRl == null; - emptyRl = rlw; - } else { - minNonEmptyRl = Math.min(minNonEmptyRl, rl); - } - } - long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; - if (emptyRl != null) - emptyRl.writeLiteralWords((int) wordsToWrite, container); - index += wordsToWrite; - } - - while (index < minSize) { - long word = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - if (rlw.getRunningLength() <= index) { - word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength()); - } - } - container.add(word); - index++; - } - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord rlw = rlws[i]; - rlw.discardFirstWords(minSize); - } - } - } - container.setSizeInBits(maxSize); - } - +public final class FastAggregation { + + /** Private constructor to prevent instantiation */ + private FastAggregation() {} + + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * + * This function does not seek to match the "sizeinbits" attributes + * of the input bitmaps. + * + * @param bitmaps the source bitmaps + * @param bufSize buffer size used during the computation in 64-bit + * words (per input bitmap) + * @return the or aggregate. + */ + public static EWAHCompressedBitmap bufferedand(final int bufSize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedandWithContainer(answer, bufSize, bitmaps); + return answer; + } + + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * + * This function does not seek to match the "sizeinbits" attributes + * of the input bitmaps. + * + * @param container where the aggregate is written + * @param bufSize buffer size used during the computation in 64-bit + * words (per input bitmap) + * @param bitmaps the source bitmaps + */ + public static void bufferedandWithContainer( + final BitmapStorage container, final int bufSize, + final EWAHCompressedBitmap... bitmaps) { + + java.util.LinkedList al = new java.util.LinkedList(); + for (EWAHCompressedBitmap bitmap : bitmaps) { + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + + long[] hardbitmap = new long[bufSize * bitmaps.length]; + + for (IteratingRLW i : al) + if (i.size() == 0) { + al.clear(); + break; + } + + while (!al.isEmpty()) { + Arrays.fill(hardbitmap, ~0l); + long effective = Integer.MAX_VALUE; + for (IteratingRLW i : al) { + int eff = IteratorAggregation.inplaceand( + hardbitmap, i); + if (eff < effective) + effective = eff; + } + for (int k = 0; k < effective; ++k) + container.addWord(hardbitmap[k]); + for (IteratingRLW i : al) + if (i.size() == 0) { + al.clear(); + break; + } + } + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * + * @param bitmaps the source bitmaps + * @param bufSize buffer size used during the computation in 64-bit + * words + * @return the or aggregate. + */ + public static EWAHCompressedBitmap bufferedor(final int bufSize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedorWithContainer(answer, bufSize, bitmaps); + return answer; + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufSize buffer size used during the computation in 64-bit + * words + * @param bitmaps the source bitmaps + */ + public static void bufferedorWithContainer( + final BitmapStorage container, final int bufSize, + final EWAHCompressedBitmap... bitmaps) { + int range = 0; + EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, + EWAHCompressedBitmap b) { + return b.sizeInBits() - a.sizeInBits(); + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap bitmap : sbitmaps) { + if (bitmap.sizeInBits() > range) + range = bitmap.sizeInBits(); + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + long[] hardbitmap = new long[bufSize]; + int maxr = al.size(); + while (maxr > 0) { + long effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation + .inplaceor(hardbitmap, + al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.addWord(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + + } + container.setSizeInBitsWithinLastWord(range); + } + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * + * @param bitmaps the source bitmaps + * @param bufSize buffer size used during the computation in 64-bit + * words + * @return the xor aggregate. + */ + public static EWAHCompressedBitmap bufferedxor(final int bufSize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedxorWithContainer(answer, bufSize, bitmaps); + return answer; + } + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * + * + * @param container where the aggregate is written + * @param bufSize buffer size used during the computation in 64-bit + * words + * @param bitmaps the source bitmaps + */ + public static void bufferedxorWithContainer( + final BitmapStorage container, final int bufSize, + final EWAHCompressedBitmap... bitmaps) { + int range = 0; + EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, + EWAHCompressedBitmap b) { + return b.sizeInBits() - a.sizeInBits(); + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap bitmap : sbitmaps) { + if (bitmap.sizeInBits() > range) + range = bitmap.sizeInBits(); + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + long[] hardbitmap = new long[bufSize]; + int maxr = al.size(); + while (maxr > 0) { + long effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.addWord(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + } + container.setSizeInBitsWithinLastWord(range); + } + + /** + * Uses a priority queue to compute the or aggregate. + * + * This algorithm runs in linearithmic time (O(n log n)) with respect to the number of bitmaps. + * + * @param a class extending LogicalElement (like a compressed + * bitmap) + * @param bitmaps bitmaps to be aggregated + * @return the or aggregate + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + public static T or(T... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(T a, T b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + while (pq.size() > 1) { + T x1 = pq.poll(); + T x2 = pq.poll(); + pq.add((T) x1.or(x2)); + } + return pq.poll(); + } + + /** + * Uses a priority queue to compute the or aggregate. + * + * The content of the container is overwritten. + * + * This algorithm runs in linearithmic time (O(n log n)) with respect to the number of bitmaps. + * + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void orToContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException( + "We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue( + bitmaps.length, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, + EWAHCompressedBitmap b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + while (pq.size() > 2) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.or(x2)); + } + pq.poll().orToContainer(pq.poll(), container); + } + + /** + * Simple algorithm that computes the OR aggregate. + * + * @param bitmaps input bitmaps + * @return new bitmap containing the aggregate + */ + public static EWAHCompressedBitmap or(final EWAHCompressedBitmap... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + if(pq.isEmpty()) return new EWAHCompressedBitmap(); + while (pq.size() > 1) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.or(x2)); + } + return pq.poll(); + } + + /** + * Simple algorithm that computes the XOR aggregate. + * + * @param bitmaps input bitmaps + * @return new bitmap containing the aggregate + */ + public static EWAHCompressedBitmap xor(final EWAHCompressedBitmap... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + if(pq.isEmpty()) return new EWAHCompressedBitmap(); + while (pq.size() > 1) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + return pq.poll(); + } + + /** + * Simple algorithm that computes the OR aggregate. + * + * @param bitmaps input bitmaps + * @return new bitmap containing the aggregate + */ + public static EWAHCompressedBitmap or(final Iterator bitmaps) { + PriorityQueue pq = new PriorityQueue(32, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + while(bitmaps.hasNext()) + pq.add(bitmaps.next()); + if(pq.isEmpty()) return new EWAHCompressedBitmap(); + while (pq.size() > 1) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.or(x2)); + } + return pq.poll(); + } + + /** + * Simple algorithm that computes the XOR aggregate. + * + * @param bitmaps input bitmaps + * @return new bitmap containing the aggregate + */ + public static EWAHCompressedBitmap xor(final Iterator bitmaps) { + PriorityQueue pq = new PriorityQueue(32, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + while(bitmaps.hasNext()) + pq.add(bitmaps.next()); + if(pq.isEmpty()) return new EWAHCompressedBitmap(); + while (pq.size() > 1) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + return pq.poll(); + } + + + /** + * Uses a priority queue to compute the xor aggregate. + * + * This algorithm runs in linearithmic time (O(n log n)) with respect to the number of bitmaps. + * + * @param a class extending LogicalElement (like a compressed + * bitmap) + * @param bitmaps bitmaps to be aggregated + * @return the xor aggregate + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + public static T xor(T... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + + @Override + public int compare(T a, T b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + while (pq.size() > 1) { + T x1 = pq.poll(); + T x2 = pq.poll(); + pq.add((T) x1.xor(x2)); + } + return pq.poll(); + } + + /** + * Uses a priority queue to compute the xor aggregate. + * + * The content of the container is overwritten. + * + * This algorithm runs in linearithmic time (O(n log n)) with respect to the number of bitmaps. + * + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void xorToContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException( + "We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue( + bitmaps.length, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, + EWAHCompressedBitmap b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + while (pq.size() > 2) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + pq.poll().xorToContainer(pq.poll(), container); + } + } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java 2019-11-08 21:55:59.000000000 +0000 @@ -5,83 +5,83 @@ * Licensed under the Apache License, Version 2.0. */ -import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; +import static com.googlecode.javaewah.EWAHCompressedBitmap.WORD_IN_BITS; /** - * The IntIteratorImpl is the 64 bit implementation of the - * IntIterator interface, which efficiently returns the stream of integers - * represented by an EWAHIterator. + * The IntIteratorImpl is the 64 bit implementation of the IntIterator + * interface, which efficiently returns the stream of integers represented by an + * EWAHIterator. * * @author Colby Ranger * @since 0.5.6 */ final class IntIteratorImpl implements IntIterator { - private final EWAHIterator ewahIter; - private final long[] ewahBuffer; - private int position; - private int runningLength; - private long word; - private int wordPosition; - private int wordLength; - private int literalPosition; - private boolean hasnext; - - IntIteratorImpl(EWAHIterator ewahIter) { - this.ewahIter = ewahIter; - this.ewahBuffer = ewahIter.buffer(); - this.hasnext = this.moveToNext(); - } - - public final boolean moveToNext() { - while (!runningHasNext() && !literalHasNext()) { - if (!this.ewahIter.hasNext()) { - return false; - } - setRunningLengthWord(this.ewahIter.next()); - } - return true; - } - - @Override -public boolean hasNext() { - return this.hasnext; - } - - @Override -public final int next() { - final int answer; - if (runningHasNext()) { - answer = this.position++; - } else { - final int bit = Long.numberOfTrailingZeros(this.word); - this.word ^= (1l << bit); - answer = this.literalPosition + bit; - } - this.hasnext = this.moveToNext(); - return answer; - } - - private final void setRunningLengthWord(RunningLengthWord rlw) { - this.runningLength = wordinbits * (int) rlw.getRunningLength() + this.position; - if (!rlw.getRunningBit()) { - this.position = this.runningLength; - } - - this.wordPosition = this.ewahIter.literalWords(); - this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); - } - - private final boolean runningHasNext() { - return this.position < this.runningLength; - } - - private final boolean literalHasNext() { - while (this.word == 0 && this.wordPosition < this.wordLength) { - this.word = this.ewahBuffer[this.wordPosition++]; - this.literalPosition = this.position; - this.position += wordinbits; + private final EWAHIterator ewahIter; + private final Buffer buffer; + private int position; + private int runningLength; + private long word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasNext; + + IntIteratorImpl(EWAHIterator ewahIter) { + this.ewahIter = ewahIter; + this.buffer = ewahIter.buffer(); + this.hasNext = this.moveToNext(); + } + + public boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (!this.ewahIter.hasNext()) { + return false; + } + setRunningLengthWord(this.ewahIter.next()); + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final long t = this.word & -this.word; + answer = this.literalPosition + Long.bitCount(t - 1); + this.word ^= t; + } + this.hasNext = this.moveToNext(); + return answer; + } + + private void setRunningLengthWord(RunningLengthWord rlw) { + this.runningLength = WORD_IN_BITS * (int) rlw.getRunningLength() + this.position; + if (!rlw.getRunningBit()) { + this.position = this.runningLength; + } + + this.wordPosition = this.ewahIter.literalWords(); + this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); + } + + private boolean runningHasNext() { + return this.position < this.runningLength; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.buffer.getWord(this.wordPosition++); + this.literalPosition = this.position; + this.position += WORD_IN_BITS; + } + return this.word != 0; } - return this.word != 0; - } } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIterator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IntIterator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIterator.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IntIterator.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,31 +1,29 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** - * - * The IntIterator interface is used to iterate over a stream of integers. - * + * The IntIterator interface is used to iterate over a stream of integers. + * * @author Daniel Lemire * @since 0.2.0 - * */ public interface IntIterator { - - /** - * Is there more? - * - * @return true, if there is more, false otherwise - */ - public boolean hasNext(); - /** - * Return the next integer - * - * @return the integer - */ - public int next(); + /** + * Is there more? + * + * @return true, if there is more, false otherwise + */ + boolean hasNext(); + + /** + * Return the next integer + * + * @return the integer + */ + int next(); } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,89 +1,88 @@ package com.googlecode.javaewah; -import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; - +import static com.googlecode.javaewah.EWAHCompressedBitmap.WORD_IN_BITS; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** * Implementation of an IntIterator over an IteratingRLW. - * - * */ public class IntIteratorOverIteratingRLW implements IntIterator { - IteratingRLW parent; - private int position; - private int runningLength; - private long word; - private int wordPosition; - private int wordLength; - private int literalPosition; - private boolean hasnext; - - /** - * @param p iterator we wish to iterate over - */ - public IntIteratorOverIteratingRLW(final IteratingRLW p) { - this.parent = p; - this.position = 0; + final IteratingRLW parent; + private int position; + private int runningLength; + private long word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasNext; + + /** + * @param p iterator we wish to iterate over + */ + public IntIteratorOverIteratingRLW(final IteratingRLW p) { + this.parent = p; + this.position = 0; + setupForCurrentRunningLengthWord(); + this.hasNext = moveToNext(); + } + + /** + * @return whether we could find another set bit; don't move if there is + * an unprocessed value + */ + private boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (this.parent.next()) setupForCurrentRunningLengthWord(); - this.hasnext = moveToNext(); - } - - /** - * @return whether we could find another set bit; don't move if there is an unprocessed value - */ - private final boolean moveToNext() { - while (!runningHasNext() && !literalHasNext()) { - if (this.parent.next()) - setupForCurrentRunningLengthWord(); - else return false; - } - return true; - } - - @Override - public boolean hasNext() { - return this.hasnext; - } - - @Override - public final int next() { - final int answer; - if (runningHasNext()) { - answer = this.position++; - } else { - final int bit = Long.numberOfTrailingZeros(this.word); - this.word ^= (1l << bit); - answer = this.literalPosition + bit; - } - this.hasnext = this.moveToNext(); - return answer; - } - - private final void setupForCurrentRunningLengthWord() { - this.runningLength = wordinbits * (int) this.parent.getRunningLength() - + this.position; - - if (!this.parent.getRunningBit()) { - this.position = this.runningLength; - } - this.wordPosition = 0; - this.wordLength = this.parent.getNumberOfLiteralWords(); - } - - private final boolean runningHasNext() { - return this.position < this.runningLength; - } - - private final boolean literalHasNext() { - while (this.word == 0 && this.wordPosition < this.wordLength) { - this.word = this.parent.getLiteralWordAt(this.wordPosition++); - this.literalPosition = this.position; - this.position += wordinbits; - } - return this.word != 0; - } + else + return false; + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public final int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final long t = this.word & -this.word; + answer = this.literalPosition + Long.bitCount(t - 1); + this.word ^= t; + } + this.hasNext = this.moveToNext(); + return answer; + } + + private void setupForCurrentRunningLengthWord() { + this.runningLength = WORD_IN_BITS * (int) this.parent.getRunningLength() + this.position; + + if (!this.parent.getRunningBit()) { + this.position = this.runningLength; + } + this.wordPosition = 0; + this.wordLength = this.parent.getNumberOfLiteralWords(); + } + + private boolean runningHasNext() { + return this.position < this.runningLength; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.parent.getLiteralWordAt(this.wordPosition++); + this.literalPosition = this.position; + this.position += WORD_IN_BITS; + } + return this.word != 0; + } } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratingBufferedRunningLengthWord.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IteratingBufferedRunningLengthWord.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratingBufferedRunningLengthWord.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IteratingBufferedRunningLengthWord.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,276 +1,303 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** - * Mostly for internal use. Similar to BufferedRunningLengthWord, but automatically - * advances to the next BufferedRunningLengthWord as words are discarded. + * Mostly for internal use. Similar to BufferedRunningLengthWord, but + * automatically advances to the next BufferedRunningLengthWord as words are + * discarded. * - * @since 0.4.0 * @author David McIntosh + * @since 0.4.0 */ -public final class IteratingBufferedRunningLengthWord implements IteratingRLW, Cloneable{ - /** - * Instantiates a new iterating buffered running length word. - * - * @param iterator iterator - */ - public IteratingBufferedRunningLengthWord(final EWAHIterator iterator) { - this.iterator = iterator; - this.brlw = new BufferedRunningLengthWord(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; - this.buffer = this.iterator.buffer(); - } - - - +public final class IteratingBufferedRunningLengthWord implements IteratingRLW, + Cloneable { /** * Instantiates a new iterating buffered running length word. - * @param bitmap over which we want to iterate * - */ -public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) { - this.iterator = EWAHIterator.getEWAHIterator(bitmap); - this.brlw = new BufferedRunningLengthWord(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; - this.buffer = this.iterator.buffer(); - } + * @param iterator iterator + */ + public IteratingBufferedRunningLengthWord(final EWAHIterator iterator) { + this.iterator = iterator; + this.brlw = new BufferedRunningLengthWord(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + + this.brlw.literalWordOffset; + this.buffer = this.iterator.buffer(); + } + /** + * Instantiates a new iterating buffered running length word. + * + * @param bitmap over which we want to iterate + */ + public IteratingBufferedRunningLengthWord( + final EWAHCompressedBitmap bitmap) { + this(EWAHIterator.getEWAHIterator(bitmap)); + } + /** + * Discard first words, iterating to the next running length word if + * needed. + * + * @param x the number of words to be discarded + */ + @Override + public void discardFirstWords(long x) { + while (x > 0) { + if (this.brlw.runningLength > x) { + this.brlw.runningLength -= x; + return; + } + x -= this.brlw.runningLength; + this.brlw.runningLength = 0; + long toDiscard = x > this.brlw.numberOfLiteralWords ? this.brlw.numberOfLiteralWords + : x; + + this.literalWordStartPosition += (int) toDiscard; + this.brlw.numberOfLiteralWords -= toDiscard; + x -= toDiscard; + if ((x > 0) || (this.brlw.size() == 0)) { + if (!this.iterator.hasNext()) { + break; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator + .literalWords(); + } + } + } + @Override + public void discardLiteralWords(long x) { + this.literalWordStartPosition += x; + this.brlw.numberOfLiteralWords -= x; + if (this.brlw.numberOfLiteralWords == 0) { + if (!this.iterator.hasNext()) { + return; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); + } + } + @Override + public void discardRunningWords() { + this.brlw.runningLength = 0; + if (this.brlw.getNumberOfLiteralWords() == 0) + this.next(); + } - - /** - * Discard first words, iterating to the next running length word if needed. - * - * @param x the number of words to be discarded - */ - @Override -public void discardFirstWords(long x) { - while (x > 0) { - if (this.brlw.RunningLength > x) { - this.brlw.RunningLength -= x; - return; - } - x -= this.brlw.RunningLength; - this.brlw.RunningLength = 0; - long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; - - this.literalWordStartPosition += toDiscard; - this.brlw.NumberOfLiteralWords -= toDiscard; - x -= toDiscard; - if ((x > 0) || (this.brlw.size() == 0)) { + /** + * Move to the next RunningLengthWord + * + * @return whether the move was possible + */ + @Override + public boolean next() { if (!this.iterator.hasNext()) { - break; + this.brlw.numberOfLiteralWords = 0; + this.brlw.runningLength = 0; + return false; } this.brlw.reset(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 - } + this.literalWordStartPosition = this.iterator.literalWords(); // + + // this.brlw.literalWordOffset + // ==0 + return true; + } + + /** + * Write out up to max words, returns how many were written + * + * @param container target for writes + * @param max maximal number of writes + * @return how many written + */ + public long discharge(BitmapStorage container, long max) { + long index = 0; + while (true) { + if (index + getRunningLength() > max) { + final int offset = (int) (max - index); + container.addStreamOfEmptyWords(getRunningBit(), offset); + this.brlw.runningLength -= offset; + return max; + } + container.addStreamOfEmptyWords(getRunningBit(), getRunningLength()); + index += getRunningLength(); + if (getNumberOfLiteralWords() + index > max) { + final int offset =(int) (max - index); + writeLiteralWords(offset, container); + this.brlw.runningLength = 0; + this.brlw.numberOfLiteralWords -= offset; + this.literalWordStartPosition += offset; + return max; + } + writeLiteralWords(getNumberOfLiteralWords(), container); + index += getNumberOfLiteralWords(); + if(!next()) break; + } + return index; + } + + /** + * Write out up to max words (negated), returns how many were written + * + * @param container target for writes + * @param max maximal number of writes + * @return how many written + */ + public long dischargeNegated(BitmapStorage container, long max) { + long index = 0; + while ((index < max) && (size() > 0)) { + // first run + long pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + container.addStreamOfEmptyWords(!getRunningBit(), pl); + index += pl; + int pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = (int) (max - index); + } + writeNegatedLiteralWords(pd, container); + discardFirstWords(pl + pd); + index += pd; + } + return index; + } + + /** + * Write out the remain words, transforming them to zeroes. + * + * @param container target for writes + */ + public void dischargeAsEmpty(BitmapStorage container) { + while (size() > 0) { + container.addStreamOfEmptyWords(false, size()); + discardFirstWords(size()); + } + } + + /** + * Write out the remaining words + * + * @param container target for writes + */ + public void discharge(BitmapStorage container) { + this.brlw.literalWordOffset = this.literalWordStartPosition - this.iterator.literalWords(); + discharge(this.brlw, this.iterator, container); } - } - /** - * Move to the next RunningLengthWord - * @return whether the move was possible - */ - @Override -public boolean next() { - if (!this.iterator.hasNext()) { - this.brlw.NumberOfLiteralWords = 0; - this.brlw.RunningLength = 0; - return false; - } - this.brlw.reset(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 - return true; - } - - /** - * Write out up to max words, returns how many were written - * @param container target for writes - * @param max maximal number of writes - * @return how many written - */ - public long discharge(BitmapStorage container, long max) { - long index = 0; - while ((index < max) && (size() > 0)) { - // first run - long pl = getRunningLength(); - if (index + pl > max) { - pl = max - index; - } - container.addStreamOfEmptyWords(getRunningBit(), pl); - index += pl; - int pd = getNumberOfLiteralWords(); - if (pd + index > max) { - pd = (int) (max - index); - } - writeLiteralWords(pd, container); - discardFirstWords(pl+pd); - index += pd; - } - return index; - } - - /** - * Write out up to max words (negated), returns how many were written - * @param container target for writes - * @param max maximal number of writes - * @return how many written - */ - public long dischargeNegated(BitmapStorage container, long max) { - long index = 0; - while ((index < max) && (size() > 0)) { - // first run - long pl = getRunningLength(); - if (index + pl > max) { - pl = max - index; - } - container.addStreamOfEmptyWords(!getRunningBit(), pl); - index += pl; - int pd = getNumberOfLiteralWords(); - if (pd + index > max) { - pd = (int) (max - index); - } - writeNegatedLiteralWords(pd, container); - discardFirstWords(pl+pd); - index += pd; - } - return index; - } - - - /** - * Write out the remain words, transforming them to zeroes. - * @param container target for writes - */ - public void dischargeAsEmpty(BitmapStorage container) { - while(size()>0) { - container.addStreamOfEmptyWords(false, size()); - discardFirstWords(size()); - } - } - - - - /** - * Write out the remaining words - * @param container target for writes - */ - public void discharge(BitmapStorage container) { - this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); - discharge(this.brlw, this.iterator, container); - } - - /** - * Get the nth literal word for the current running length word - * @param index zero based index - * @return the literal word - */ - @Override -public long getLiteralWordAt(int index) { - return this.buffer[this.literalWordStartPosition + index]; - } - - /** - * Gets the number of literal words for the current running length word. - * - * @return the number of literal words - */ - @Override -public int getNumberOfLiteralWords() { - return this.brlw.NumberOfLiteralWords; - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - @Override -public boolean getRunningBit() { - return this.brlw.RunningBit; - } - - /** - * Gets the running length. - * - * @return the running length - */ - @Override -public long getRunningLength() { - return this.brlw.RunningLength; - } - - /** - * Size in uncompressed words of the current running length word. - * - * @return the long - */ - @Override -public long size() { - return this.brlw.size(); - } - - /** - * write the first N literal words to the target bitmap. Does not discard the words or perform iteration. - * @param numWords number of words to be written - * @param container where we write - */ - public void writeLiteralWords(int numWords, BitmapStorage container) { - container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); - } - - /** - * write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. - * @param numWords number of words to be written - * @param container where we write - */ - public void writeNegatedLiteralWords(int numWords, BitmapStorage container) { - container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); - } - - /** - * For internal use. (One could use the non-static discharge method instead, - * but we expect them to be slower.) - * - * @param initialWord - * the initial word - * @param iterator - * the iterator - * @param container - * the container - */ - private static void discharge(final BufferedRunningLengthWord initialWord, - final EWAHIterator iterator, final BitmapStorage container) { - BufferedRunningLengthWord runningLengthWord = initialWord; - for (;;) { - final long runningLength = runningLengthWord.getRunningLength(); - container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), - runningLength); - container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() - + runningLengthWord.literalwordoffset, - runningLengthWord.getNumberOfLiteralWords()); - if (!iterator.hasNext()) - break; - runningLengthWord = new BufferedRunningLengthWord(iterator.next()); - } - } - - - @Override - public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException { - IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super.clone(); - answer.brlw = this.brlw.clone(); - answer.buffer = this.buffer; - answer.iterator = this.iterator.clone(); - answer.literalWordStartPosition = this.literalWordStartPosition; - return answer; - } - - - private BufferedRunningLengthWord brlw; - private long[] buffer; - private int literalWordStartPosition; - private EWAHIterator iterator; + + /** + * Get the nth literal word for the current running length word + * + * @param index zero based index + * @return the literal word + */ + @Override + public long getLiteralWordAt(int index) { + return this.buffer.getWord(this.literalWordStartPosition + index); + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override + public int getNumberOfLiteralWords() { + return this.brlw.numberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override + public boolean getRunningBit() { + return this.brlw.runningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override + public long getRunningLength() { + return this.brlw.runningLength; + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the long + */ + @Override + public long size() { + return this.brlw.size(); + } + + /** + * write the first N literal words to the target bitmap. Does not + * discard the words or perform iteration. + * + * @param numWords number of words to be written + * @param container where we write + */ + public void writeLiteralWords(int numWords, BitmapStorage container) { + container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); + } + + /** + * write the first N literal words (negated) to the target bitmap. Does + * not discard the words or perform iteration. + * + * @param numWords number of words to be written + * @param container where we write + */ + public void writeNegatedLiteralWords(int numWords, BitmapStorage container) { + container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); + } + + /** + * For internal use. (One could use the non-static discharge method + * instead, but we expect them to be slower.) + * + * @param initialWord the initial word + * @param iterator the iterator + * @param container the container + */ + private static void discharge(final BufferedRunningLengthWord initialWord, + final EWAHIterator iterator, final BitmapStorage container) { + BufferedRunningLengthWord runningLengthWord = initialWord; + for (; ; ) { + final long runningLength = runningLengthWord.getRunningLength(); + container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), runningLength); + container.addStreamOfLiteralWords(iterator.buffer(), + iterator.literalWords() + runningLengthWord.literalWordOffset, + runningLengthWord.getNumberOfLiteralWords() + ); + if (!iterator.hasNext()) + break; + runningLengthWord = new BufferedRunningLengthWord(iterator.next()); + } + } + + @Override + public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException { + IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super .clone(); + answer.brlw = this.brlw.clone(); + answer.iterator = this.iterator.clone(); + return answer; + } + + private BufferedRunningLengthWord brlw; + private final Buffer buffer; + private int literalWordStartPosition; + private EWAHIterator iterator; + } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratingRLW.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IteratingRLW.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratingRLW.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IteratingRLW.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,49 +1,64 @@ package com.googlecode.javaewah; - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** * High-level iterator over a compressed bitmap. - * */ public interface IteratingRLW { - /** - * @return whether there is more - */ - public boolean next() ; - /** - * @param index where the literal word is - * @return the literal word at the given index. - */ - public long getLiteralWordAt(int index); - /** - * @return the number of literal (non-fill) words - */ - public int getNumberOfLiteralWords() ; - /** - * @return the bit used for the fill bits - */ - public boolean getRunningBit() ; - /** - * @return sum of getRunningLength() and getNumberOfLiteralWords() - */ - public long size() ; - /** - * @return length of the run of fill words - */ - public long getRunningLength() ; - /** - * @param x the number of words to discard - */ - public void discardFirstWords(long x); + /** + * @return whether there is more + */ + boolean next(); + + /** + * @param index where the literal word is + * @return the literal word at the given index. + */ + long getLiteralWordAt(int index); + + /** + * @return the number of literal (non-fill) words + */ + int getNumberOfLiteralWords(); + + /** + * @return the bit used for the fill bits + */ + boolean getRunningBit(); + + /** + * @return sum of getRunningLength() and getNumberOfLiteralWords() + */ + long size(); + + /** + * @return length of the run of fill words + */ + long getRunningLength(); + + /** + * @param x the number of words to discard + */ + void discardFirstWords(long x); + + /** + * Discard all running words + */ + void discardRunningWords(); + + /** + * Discard x literal words (assumes that there is no running word) + * @param x the number of words to discard + */ + void discardLiteralWords(long x); /** * @return a copy of the iterator * @throws CloneNotSupportedException this should not be thrown in theory */ - public IteratingRLW clone() throws CloneNotSupportedException; + IteratingRLW clone() throws CloneNotSupportedException; } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratorAggregation.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IteratorAggregation.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratorAggregation.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IteratorAggregation.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,616 +1,608 @@ package com.googlecode.javaewah; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** * Set of helper functions to aggregate bitmaps. - * */ -public class IteratorAggregation { +public final class IteratorAggregation { - /** - * @param x iterator to negate - * @return negated version of the iterator - */ - public static IteratingRLW not(final IteratingRLW x) { - return new IteratingRLW() { + /** Private constructor to prevent instantiation */ + private IteratorAggregation() {} - @Override - public boolean next() { - return x.next(); - } - - @Override - public long getLiteralWordAt(int index) { - return ~x.getLiteralWordAt(index); - } - - @Override - public int getNumberOfLiteralWords() { - return x.getNumberOfLiteralWords(); - } - - @Override - public boolean getRunningBit() { - return ! x.getRunningBit(); - } + /** + * @param x iterator to negate + * @return negated version of the iterator + */ + public static IteratingRLW not(final IteratingRLW x) { + return new IteratingRLW() { + + @Override + public boolean next() { + return x.next(); + } + + @Override + public long getLiteralWordAt(int index) { + return ~x.getLiteralWordAt(index); + } + + @Override + public int getNumberOfLiteralWords() { + return x.getNumberOfLiteralWords(); + } + + @Override + public boolean getRunningBit() { + return !x.getRunningBit(); + } + + @Override + public long size() { + return x.size(); + } + + @Override + public long getRunningLength() { + return x.getRunningLength(); + } + + @Override + public void discardFirstWords(long y) { + x.discardFirstWords(y); + } + + @Override + public void discardRunningWords() { + x.discardRunningWords(); + } + + @Override + public IteratingRLW clone() + throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } @Override - public long size() { - return x.size(); + public void discardLiteralWords(long y) { + x.discardLiteralWords(y); } + }; + } - @Override - public long getRunningLength() { - return x.getRunningLength(); - } - - @Override - public void discardFirstWords(long y) { - x.discardFirstWords(y); - } - - @Override - public IteratingRLW clone() throws CloneNotSupportedException { - throw new CloneNotSupportedException(); - } - - - }; - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al set of iterators to aggregate - * @return and aggregate - */ - public static IteratingRLW bufferedand(final IteratingRLW... al) { - return bufferedand(DEFAULTMAXBUFSIZE,al); - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al set of iterators to aggregate - * @param bufsize size of the internal buffer used by the iterator in 64-bit words (per input iterator) - * @return and aggregate - */ - public static IteratingRLW bufferedand(final int bufsize, final IteratingRLW... al) { - if (al.length == 0) - throw new IllegalArgumentException("Need at least one iterator"); - if (al.length == 1) - return al[0]; - final LinkedList basell = new LinkedList(); - for (IteratingRLW i : al) - basell.add(i); - return new BufferedIterator(new BufferedAndIterator(basell,bufsize)); - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al set of iterators to aggregate - * @return or aggregate - */ - public static IteratingRLW bufferedor(final IteratingRLW... al) { - return bufferedor(DEFAULTMAXBUFSIZE,al); - } - - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al iterators to aggregate - * @param bufsize size of the internal buffer used by the iterator in 64-bit words - * @return or aggregate - */ - public static IteratingRLW bufferedor(final int bufsize, final IteratingRLW... al) { - if (al.length == 0) - throw new IllegalArgumentException("Need at least one iterator"); - if (al.length == 1) - return al[0]; - - final LinkedList basell = new LinkedList(); - for (IteratingRLW i : al) - basell.add(i); - return new BufferedIterator(new BufferedORIterator(basell,bufsize)); - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al set of iterators to aggregate - * @return xor aggregate - */ - public static IteratingRLW bufferedxor(final IteratingRLW... al) { - return bufferedxor(DEFAULTMAXBUFSIZE,al); - } - - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al iterators to aggregate - * @param bufsize size of the internal buffer used by the iterator in 64-bit words - * @return xor aggregate - */ - public static IteratingRLW bufferedxor(final int bufsize, final IteratingRLW... al) { - if (al.length == 0) - throw new IllegalArgumentException("Need at least one iterator"); - if (al.length == 1) - return al[0]; - - - final LinkedList basell = new LinkedList(); - for (IteratingRLW i : al) - basell.add(i); - - return new BufferedIterator(new BufferedXORIterator(basell, bufsize)); - } - - - /** - * Write out the content of the iterator, but as if it were all zeros. - * - * @param container - * where we write - * @param i - * the iterator - */ - protected static void dischargeAsEmpty(final BitmapStorage container, - final IteratingRLW i) { - while (i.size() > 0) { - container.addStreamOfEmptyWords(false, i.size()); - i.next(); - - } - } - - /** - * Write out up to max words, returns how many were written - * @param container target for writes - * @param i source of data - * @param max maximal number of writes - * @return how many written - */ - - protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) { - long counter = 0; - while (i.size() > 0 && counter < max) { - long L1 = i.getRunningLength(); - if (L1 > 0) { - if (L1 + counter > max) - L1 = max - counter; - container.addStreamOfEmptyWords(i.getRunningBit(), L1); - counter += L1; - } - long L = i.getNumberOfLiteralWords(); - if(L + counter > max) L = max - counter; - for (int k = 0; k < L; ++k) { - container.add(i.getLiteralWordAt(k)); - } - counter += L; - i.discardFirstWords(L+L1); - } - return counter; - } - - - /** - * Write out up to max negated words, returns how many were written - * @param container target for writes - * @param i source of data - * @param max maximal number of writes - * @return how many written - */ - protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) { - long counter = 0; - while (i.size() > 0 && counter < max) { - long L1 = i.getRunningLength(); - if (L1 > 0) { - if (L1 + counter > max) - L1 = max - counter; - container.addStreamOfEmptyWords(!i.getRunningBit(), L1); - counter += L1; - } - long L = i.getNumberOfLiteralWords(); - if(L + counter > max) L = max - counter; - for (int k = 0; k < L; ++k) { - container.add(~i.getLiteralWordAt(k)); - } - counter += L; - i.discardFirstWords(L+L1); - } - return counter; - } - - static void andToContainer(final BitmapStorage container, - int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) { - while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingRLW prey = i_is_prey ? rlwi : rlwj; - final IteratingRLW predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - container.addStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); - } else { - final long index = discharge(container, prey, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - desiredrlwcount -= nbre_literal; - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - } - - static void andToContainer(final BitmapStorage container, - final IteratingRLW rlwi, IteratingRLW rlwj) { - while ((rlwi.size()>0) && (rlwj.size()>0) ) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingRLW prey = i_is_prey ? rlwi : rlwj; - final IteratingRLW predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - container.addStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); - } else { - final long index = discharge(container, prey, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - } - - - /** - * Compute the first few words of the XOR aggregate between two iterators. - * - * @param container where to write - * @param desiredrlwcount number of words to be written (max) - * @param rlwi first iterator to aggregate - * @param rlwj second iterator to aggregate - */ - public static void xorToContainer(final BitmapStorage container, - int desiredrlwcount, final IteratingRLW rlwi, final IteratingRLW rlwj) { - while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingRLW prey = i_is_prey ? rlwi : rlwj; - final IteratingRLW predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - long index = discharge(container, prey, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } else { - long index = dischargeNegated(container, prey, predator.getRunningLength()); - container.addStreamOfEmptyWords(true, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - desiredrlwcount -= nbre_literal; - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - } - - protected static int inplaceor(long[] bitmap, - IteratingRLW i) { - - int pos = 0; - long s; - while ((s = i.size()) > 0) { - if (pos + s < bitmap.length) { - final int L = (int) i.getRunningLength(); - if (i.getRunningBit()) - java.util.Arrays.fill(bitmap, pos, pos + L, ~0l); - pos += L; - final int LR = i.getNumberOfLiteralWords(); - - for (int k = 0; k < LR; ++k) - bitmap[pos++] |= i.getLiteralWordAt(k); - if (!i.next()) { - return pos; - } - } else { - int howmany = bitmap.length - pos; - int L = (int) i.getRunningLength(); - - if (pos + L > bitmap.length) { - if (i.getRunningBit()) { - java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0l); - } - i.discardFirstWords(howmany); - return bitmap.length; - } - if (i.getRunningBit()) - java.util.Arrays.fill(bitmap, pos, pos + L, ~0l); - pos += L; - for (int k = 0; pos < bitmap.length; ++k) - bitmap[pos++] |= i.getLiteralWordAt(k); - i.discardFirstWords(howmany); - return pos; - } - } - return pos; - } - - protected static int inplacexor(long[] bitmap, - IteratingRLW i) { - int pos = 0; - long s; - while ((s = i.size()) > 0) { - if (pos + s < bitmap.length) { - final int L = (int) i.getRunningLength(); - if (i.getRunningBit()) { - for(int k = pos ; k < pos + L; ++k) - bitmap[k] = ~bitmap[k]; - } - pos += L; - final int LR = i.getNumberOfLiteralWords(); - for (int k = 0; k < LR; ++k) - bitmap[pos++] ^= i.getLiteralWordAt(k); - if (!i.next()) { - return pos; - } - } else { - int howmany = bitmap.length - pos; - int L = (int) i.getRunningLength(); - if (pos + L > bitmap.length) { - if (i.getRunningBit()) { - for(int k = pos ; k < bitmap.length; ++k) - bitmap[k] = ~bitmap[k]; - } - i.discardFirstWords(howmany); - return bitmap.length; - } - if (i.getRunningBit()) - for(int k = pos ; k < pos + L; ++k) - bitmap[k] = ~bitmap[k]; - pos += L; - for (int k = 0; pos < bitmap.length; ++k) - bitmap[pos++] ^= i.getLiteralWordAt(k); - i.discardFirstWords(howmany); - return pos; - } - } - return pos; - } - protected static int inplaceand(long[] bitmap, - IteratingRLW i) { - int pos = 0; - long s; - while ((s = i.size()) > 0) { - if (pos + s < bitmap.length) { - final int L = (int) i.getRunningLength(); - if (!i.getRunningBit()) { - for(int k = pos ; k < pos + L; ++k) - bitmap[k] = 0; - } - pos += L; - final int LR = i.getNumberOfLiteralWords(); - for (int k = 0; k < LR; ++k) - bitmap[pos++] &= i.getLiteralWordAt(k); - if (!i.next()) { - return pos; - } - } else { - int howmany = bitmap.length - pos; - int L = (int) i.getRunningLength(); - if (pos + L > bitmap.length) { - if (!i.getRunningBit()) { - for(int k = pos ; k < bitmap.length; ++k) - bitmap[k] = 0; - } - i.discardFirstWords(howmany); - return bitmap.length; - } - if (!i.getRunningBit()) - for(int k = pos ; k < pos + L; ++k) - bitmap[k] = 0; - pos += L; - for (int k = 0; pos < bitmap.length; ++k) - bitmap[pos++] &= i.getLiteralWordAt(k); - i.discardFirstWords(howmany); - return pos; - } - } - return pos; - } - - /** - * An optimization option. Larger values may improve speed, but at - * the expense of memory. - */ - public final static int DEFAULTMAXBUFSIZE = 65536; + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return and aggregate + */ + public static IteratingRLW bufferedand(final IteratingRLW... al) { + return bufferedand(DEFAULT_MAX_BUF_SIZE, al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @param bufSize size of the internal buffer used by the iterator in + * 64-bit words (per input iterator) + * @return and aggregate + */ + public static IteratingRLW bufferedand(final int bufSize, final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + final LinkedList basell = new LinkedList(); + Collections.addAll(basell, al); + return new BufferedIterator(new BufferedAndIterator(basell, + bufSize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return or aggregate + */ + public static IteratingRLW bufferedor(final IteratingRLW... al) { + return bufferedor(DEFAULT_MAX_BUF_SIZE, al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufSize size of the internal buffer used by the iterator in + * 64-bit words + * @return or aggregate + */ + public static IteratingRLW bufferedor(final int bufSize, + final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + Collections.addAll(basell, al); + return new BufferedIterator(new BufferedORIterator(basell, bufSize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return xor aggregate + */ + public static IteratingRLW bufferedxor(final IteratingRLW... al) { + return bufferedxor(DEFAULT_MAX_BUF_SIZE, al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufSize size of the internal buffer used by the iterator in 64-bit words + * @return xor aggregate + */ + public static IteratingRLW bufferedxor(final int bufSize, final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + Collections.addAll(basell, al); + + return new BufferedIterator(new BufferedXORIterator(basell, bufSize)); + } + + /** + * Write out the content of the iterator, but as if it were all zeros. + * + * @param container where we write + * @param i the iterator + */ + protected static void dischargeAsEmpty(final BitmapStorage container, + final IteratingRLW i) { + while (i.size() > 0) { + container.addStreamOfEmptyWords(false, i.size()); + i.next(); + } + } + + /** + * Write out up to max words, returns how many were written + * + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + + protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) { + long counter = 0; + while (i.size() > 0 && counter < max) { + long l1 = i.getRunningLength(); + if (l1 > 0) { + if (l1 + counter > max) + l1 = max - counter; + container.addStreamOfEmptyWords(i.getRunningBit(), l1); + counter += l1; + } + long l = i.getNumberOfLiteralWords(); + if (l + counter > max) + l = max - counter; + for (int k = 0; k < l; ++k) { + container.addWord(i.getLiteralWordAt(k)); + } + counter += l; + i.discardFirstWords(l + l1); + } + return counter; + } + + /** + * Write out up to max negated words, returns how many were written + * + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) { + long counter = 0; + while (i.size() > 0 && counter < max) { + long l1 = i.getRunningLength(); + if (l1 > 0) { + if (l1 + counter > max) + l1 = max - counter; + container.addStreamOfEmptyWords(!i.getRunningBit(), l1); + counter += l1; + } + long l = i.getNumberOfLiteralWords(); + if (l + counter > max) + l = max - counter; + for (int k = 0; k < l; ++k) { + container.addWord(~i.getLiteralWordAt(k)); + } + counter += l; + i.discardFirstWords(l + l1); + } + return counter; + } + + static void andToContainer(final BitmapStorage container, + int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) { + while ((rlwi.size() > 0) && (rlwj.size() > 0) + && (desiredrlwcount-- > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj : rlwi; + if (!predator.getRunningBit()) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + static void andToContainer(final BitmapStorage container, + final IteratingRLW rlwi, IteratingRLW rlwj) { + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj : rlwi; + if (!predator.getRunningBit()) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + /** + * Compute the first few words of the XOR aggregate between two + * iterators. + * + * @param container where to write + * @param desiredrlwcount number of words to be written (max) + * @param rlwi first iterator to aggregate + * @param rlwj second iterator to aggregate + */ + public static void xorToContainer(final BitmapStorage container, + int desiredrlwcount, final IteratingRLW rlwi, + final IteratingRLW rlwj) { + while ((rlwi.size() > 0) && (rlwj.size() > 0) + && (desiredrlwcount-- > 0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj : rlwi; + if (!predator.getRunningBit()) { + long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + long index = dischargeNegated(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + protected static int inplaceor(long[] bitmap, IteratingRLW i) { + + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (i.getRunningBit()) + java.util.Arrays.fill(bitmap, pos, pos + L, ~0l); + pos += L; + final int LR = i.getNumberOfLiteralWords(); + + for (int k = 0; k < LR; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int l = (int) i.getRunningLength(); + + if (pos + l > bitmap.length) { + if (i.getRunningBit()) { + java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0l); + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + java.util.Arrays.fill(bitmap, pos, pos + l, ~0l); + pos += l; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + protected static int inplacexor(long[] bitmap, IteratingRLW i) { + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (i.getRunningBit()) { + for (int k = pos; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howMany = bitmap.length - pos; + int l = (int) i.getRunningLength(); + if (pos + l > bitmap.length) { + if (i.getRunningBit()) { + for (int k = pos; k < bitmap.length; ++k) + bitmap[k] = ~bitmap[k]; + } + i.discardFirstWords(howMany); + return bitmap.length; + } + if (i.getRunningBit()) + for (int k = pos; k < pos + l; ++k) + bitmap[k] = ~bitmap[k]; + pos += l; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + i.discardFirstWords(howMany); + return pos; + } + } + return pos; + } + + protected static int inplaceand(long[] bitmap, IteratingRLW i) { + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (!i.getRunningBit()) { + for (int k = pos; k < pos + L; ++k) + bitmap[k] = 0; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int l = (int) i.getRunningLength(); + if (pos + l > bitmap.length) { + if (!i.getRunningBit()) { + for (int k = pos; k < bitmap.length; ++k) + bitmap[k] = 0; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (!i.getRunningBit()) + for (int k = pos; k < pos + l; ++k) + bitmap[k] = 0; + pos += l; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + /** + * An optimization option. Larger values may improve speed, but at the + * expense of memory. + */ + public static final int DEFAULT_MAX_BUF_SIZE = 65536; } + class BufferedORIterator implements CloneableIterator { - EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); - long[] hardbitmap; - LinkedList ll; - int buffersize; - - BufferedORIterator(LinkedList basell, int bufsize) { - this.ll = basell; - this.hardbitmap = new long[bufsize]; - } - - @Override - public BufferedXORIterator clone() throws CloneNotSupportedException { - BufferedXORIterator answer = (BufferedXORIterator) super.clone(); - answer.buffer = this.buffer.clone(); - answer.hardbitmap = this.hardbitmap.clone(); - answer.ll = (LinkedList) this.ll.clone(); - return answer; - } - - @Override - public boolean hasNext() { - return !this.ll.isEmpty(); - } - - @Override - public EWAHIterator next() { - this.buffer.clear(); - long effective = 0; - Iterator i = this.ll.iterator(); - while (i.hasNext()) { - IteratingRLW rlw = i.next(); - if (rlw.size() > 0) { - int eff = IteratorAggregation.inplaceor(this.hardbitmap, rlw); - if (eff > effective) - effective = eff; - } else - i.remove(); - } - for (int k = 0; k < effective; ++k) { - this.buffer.add(this.hardbitmap[k]); - } - - Arrays.fill(this.hardbitmap, 0); - return this.buffer.getEWAHIterator(); - } + final EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + final long[] hardBitmap; + final LinkedList ll; + + BufferedORIterator(LinkedList basell, int bufSize) { + this.ll = basell; + this.hardBitmap = new long[bufSize]; + } + + @Override + public BufferedXORIterator clone() throws CloneNotSupportedException { + BufferedXORIterator answer = (BufferedXORIterator) super + .clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardBitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + long effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation.inplaceor(this.hardBitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) { + this.buffer.addWord(this.hardBitmap[k]); + } + + Arrays.fill(this.hardBitmap, 0); + return this.buffer.getEWAHIterator(); + } } class BufferedXORIterator implements CloneableIterator { - EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); - long[] hardbitmap; + EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + long[] hardbitmap; LinkedList ll; - int buffersize; - - BufferedXORIterator(LinkedList basell, int bufsize) { - this.ll = basell; - this.hardbitmap = new long[bufsize]; - } - - @Override - public BufferedXORIterator clone() throws CloneNotSupportedException { - BufferedXORIterator answer = (BufferedXORIterator) super.clone(); - answer.buffer = this.buffer.clone(); - answer.hardbitmap = this.hardbitmap.clone(); - answer.ll = (LinkedList) this.ll.clone(); - return answer; - } - - @Override - public boolean hasNext() { - return !this.ll.isEmpty(); - } - - @Override - public EWAHIterator next() { - this.buffer.clear(); - long effective = 0; - Iterator i = this.ll.iterator(); - while (i.hasNext()) { - IteratingRLW rlw = i.next(); - if (rlw.size() > 0) { - int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw); - if (eff > effective) - effective = eff; - } else - i.remove(); - } - for (int k = 0; k < effective; ++k) - this.buffer.add(this.hardbitmap[k]); - Arrays.fill(this.hardbitmap, 0); - return this.buffer.getEWAHIterator(); - } -} + BufferedXORIterator(LinkedList basell, int bufSize) { + this.ll = basell; + this.hardbitmap = new long[bufSize]; + } + + @Override + public BufferedXORIterator clone() throws CloneNotSupportedException { + BufferedXORIterator answer = (BufferedXORIterator) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + long effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.addWord(this.hardbitmap[k]); + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} class BufferedAndIterator implements CloneableIterator { - EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); - LinkedList ll; - int buffersize; - - public BufferedAndIterator(LinkedList basell, int bufsize) { - this.ll = basell; - this.buffersize = bufsize; - - } - - @Override - public boolean hasNext() { - return !this.ll.isEmpty(); - } - - @Override - public BufferedAndIterator clone() throws CloneNotSupportedException { - BufferedAndIterator answer = (BufferedAndIterator) super.clone(); - answer.buffer = this.buffer.clone(); - answer.ll = (LinkedList) this.ll.clone(); - return answer; - } - - @Override - public EWAHIterator next() { - this.buffer.clear(); - IteratorAggregation.andToContainer(this.buffer, this.buffersize * this.ll.size(), - this.ll.get(0), this.ll.get(1)); - if (this.ll.size() > 2) { - Iterator i = this.ll.iterator(); - i.next(); - i.next(); - EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap(); - while (i.hasNext() && this.buffer.sizeInBytes() > 0) { - IteratorAggregation.andToContainer(tmpbuffer, - this.buffer.getIteratingRLW(), i.next()); - this.buffer.swap(tmpbuffer); - tmpbuffer.clear(); - } - } - Iterator i = this.ll.iterator(); - while(i.hasNext()) { - if(i.next().size() == 0) { - this.ll.clear(); - break; - } - } - return this.buffer.getEWAHIterator(); - } + EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + LinkedList ll; + final int bufferSize; + public BufferedAndIterator(LinkedList basell, int bufSize) { + this.ll = basell; + this.bufferSize = bufSize; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public BufferedAndIterator clone() throws CloneNotSupportedException { + BufferedAndIterator answer = (BufferedAndIterator) super.clone(); + answer.buffer = this.buffer.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + IteratorAggregation.andToContainer(this.buffer, this.bufferSize * this.ll.size(), + this.ll.get(0), this.ll.get(1)); + if (this.ll.size() > 2) { + Iterator i = this.ll.iterator(); + i.next(); + i.next(); + EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap(); + while (i.hasNext() && this.buffer.sizeInBytes() > 0) { + IteratorAggregation.andToContainer(tmpbuffer, this.buffer.getIteratingRLW(), i.next()); + this.buffer.swap(tmpbuffer); + tmpbuffer.clear(); + } + } + for (IteratingRLW aLl : this.ll) { + if (aLl.size() == 0) { + this.ll.clear(); + break; + } + } + return this.buffer.getEWAHIterator(); + } } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratorUtil.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IteratorUtil.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/IteratorUtil.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/IteratorUtil.java 2019-11-08 21:55:59.000000000 +0000 @@ -3,130 +3,153 @@ import java.util.Iterator; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** * Convenience functions for working over iterators - * */ -public class IteratorUtil { - - /** - * @param i iterator we wish to iterate over - * @return an iterator over the set bits corresponding to the iterator - */ - public static IntIterator toSetBitsIntIterator(final IteratingRLW i) { - return new IntIteratorOverIteratingRLW(i); - } - - /** - * @param i iterator we wish to iterate over - * @return an iterator over the set bits corresponding to the iterator - */ - public static Iterator toSetBitsIterator(final IteratingRLW i) { - return new Iterator() { - @Override - public boolean hasNext() { - return this.under.hasNext(); - } - - @Override - public Integer next() { - return new Integer(this.under.next()); - } - - @Override - public void remove() { - } - - final private IntIterator under = toSetBitsIntIterator(i); - }; - - } - - /** - * Generate a bitmap from an iterator - * - * @param i iterator we wish to materialize - * @param c where we write - */ - public static void materialize(final IteratingRLW i, final BitmapStorage c) { - while (true) { - if (i.getRunningLength() > 0) { - c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); - } - for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) - c.add(i.getLiteralWordAt(k)); - if (!i.next()) - break; - } - } - - /** - * @param i iterator we wish to iterate over - * @return the cardinality (number of set bits) corresponding to the iterator - */ - public static int cardinality(final IteratingRLW i) { - int answer = 0; - while (true) { - if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap.wordinbits; - for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) - answer += Long.bitCount(i.getLiteralWordAt(k)); - if(!i.next()) break; - } - return answer; - } - - /** - * @param x set of bitmaps - * @return an array of iterators corresponding to the array of bitmaps - */ - public static IteratingRLW[] toIterators(final EWAHCompressedBitmap... x) { - IteratingRLW[] X = new IteratingRLW[x.length]; - for (int k = 0; k < X.length; ++k) { - X[k] = new IteratingBufferedRunningLengthWord(x[k]); - } - return X; - } - /** - * Turn an iterator into a bitmap. - * - * @param i iterator we wish to materialize - * @param c where we write - * @param Max maximum number of words we wish to materialize - * @return how many words were actually materialized - */ - public static long materialize(final IteratingRLW i, final BitmapStorage c, long Max) { - final long origMax = Max; - while (true) { - if (i.getRunningLength() > 0) { - long L = i.getRunningLength(); - if(L > Max) L = Max; - c.addStreamOfEmptyWords(i.getRunningBit(), L); - Max -= L; - } - long L = i.getNumberOfLiteralWords(); - for (int k = 0; k < L; ++k) - c.add(i.getLiteralWordAt(k)); - if(Max>0) { - if (!i.next()) - break; - } - else break; - } - return origMax - Max; - } - /** - * Turn an iterator into a bitmap - * - * @param i iterator we wish to materialize - * @return materialized version of the iterator - */ - public static EWAHCompressedBitmap materialize(final IteratingRLW i) { - EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); - materialize(i, ewah); - return ewah; - } +public final class IteratorUtil { + + /** Private constructor to prevent instantiation */ + private IteratorUtil() {} + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static IntIterator toSetBitsIntIterator(final IteratingRLW i) { + return new IntIteratorOverIteratingRLW(i); + } + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static Iterator toSetBitsIterator(final IteratingRLW i) { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return this.under.next(); + } + + @Override + public void remove() { + } + + private final IntIterator under = toSetBitsIntIterator(i); + }; + + } + + /** + * Generate a bitmap from an iterator. + * + * + * + * @param i iterator we wish to materialize + * @param c where we write + */ + public static void materialize(final IteratingRLW i, + final BitmapStorage c) { + while (true) { + if (i.getRunningLength() > 0) { + c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); + } + int il = i.getNumberOfLiteralWords(); + for (int k = 0; k < il ; ++k) + c.addWord(i.getLiteralWordAt(k)); + if (!i.next()) + break; + } + } + + /** + * @param i iterator we wish to iterate over + * @return the cardinality (number of set bits) corresponding to the + * iterator + */ + public static int cardinality(final IteratingRLW i) { + int answer = 0; + while (true) { + if (i.getRunningBit()) + answer += (int) (i.getRunningLength() * EWAHCompressedBitmap.WORD_IN_BITS); + int lw = i.getNumberOfLiteralWords(); + for (int k = 0; k < lw ; ++k) + answer += Long.bitCount(i.getLiteralWordAt(k)); + if (!i.next()) + break; + } + return answer; + } + + /** + * @param x set of bitmaps + * @return an array of iterators corresponding to the array of bitmaps + */ + public static IteratingRLW[] toIterators( + final EWAHCompressedBitmap... x) { + IteratingRLW[] X = new IteratingRLW[x.length]; + for (int k = 0; k < X.length; ++k) { + X[k] = new IteratingBufferedRunningLengthWord(x[k]); + } + return X; + } + + /** + * Turn an iterator into a bitmap. + * + * @param i iterator we wish to materialize + * @param c where we write + * @param max maximum number of words we wish to materialize + * @return how many words were actually materialized + */ + public static long materialize(final IteratingRLW i, + final BitmapStorage c, long max) { + final long origMax = max; + while (true) { + if (i.getRunningLength() > 0) { + long L = i.getRunningLength(); + if (L > max) + L = max; + c.addStreamOfEmptyWords(i.getRunningBit(), L); + max -= L; + } + long L = i.getNumberOfLiteralWords(); + for (int k = 0; k < L; ++k) + c.addWord(i.getLiteralWordAt(k)); + if (max > 0) { + if (!i.next()) + break; + } else + break; + } + return origMax - max; + } + + /** + * Turn an iterator into a bitmap. + * + * This can be used to effectively clone a bitmap in the following + * manner: + * + * + * EWAHCompressedBitmap n = IteratorUtil.materialize(bitmap.getIteratingRLW())); + * n.setSizeInBitsWithinLastWord(bitmap.sizeInBits()); + * + * + * @param i iterator we wish to materialize + * @return materialized version of the iterator + */ + public static EWAHCompressedBitmap materialize(final IteratingRLW i) { + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + materialize(i, ewah); + return ewah; + } } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/LogicalElement.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/LogicalElement.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/LogicalElement.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/LogicalElement.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,61 +1,74 @@ package com.googlecode.javaewah; +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + /** - * A prototypical model for bitmaps. Used by the - * class FastAggregation. Users should probably not - * be concerned by this class. - * - * @author Daniel Lemire - * @param the type of element (e.g., a bitmap class) + * A prototypical model for bitmaps. Used by the class FastAggregation. Users + * should probably not be concerned by this class. * + * @param the type of element (e.g., a bitmap class) + * @author Daniel Lemire */ public interface LogicalElement { - /** - * Compute the bitwise logical and - * @param le element - * @return the result of the operation - */ - public T and(T le); - - /** - * Compute the bitwise logical and not - * @param le element - * @return the result of the operation - */ - public T andNot(T le); - - /** - * Compute the bitwise logical not (in place) - */ - public void not(); - - - @SuppressWarnings({ "rawtypes", "javadoc" }) - /** - * Compute the bitwise logical or - * @param le another element - * @return the result of the operation - */ - public LogicalElement or(T le); - - /** - * How many logical bits does this element represent? - * - * @return the number of bits represented by this element - */ - public int sizeInBits(); - - /** - * Should report the storage requirement - * @return How many bytes - * @since 0.6.2 - */ - public int sizeInBytes(); - - /** - * Compute the bitwise logical Xor - * @param le element - * @return the results of the operation - */ - public T xor(T le); + /** + * Compute the bitwise logical and + * + * @param le element + * @return the result of the operation + */ + T and(T le); + + /** + * Compute the bitwise logical and not + * + * @param le element + * @return the result of the operation + */ + T andNot(T le); + + /** + * Compute the bitwise logical not (in place) + */ + void not(); + + /** + * Compute the bitwise logical or + * @param le another element + * @return the result of the operation + */ + T or(T le); + + /** + * How many logical bits does this element represent? + * + * @return the number of bits represented by this element + */ + int sizeInBits(); + + /** + * Should report the storage requirement + * + * @return How many bytes + * @since 0.6.2 + */ + int sizeInBytes(); + + /** + * Compute the bitwise logical Xor + * + * @param le element + * @return the result of the operation + */ + T xor(T le); + + /** + * Compute the composition + * + * @param le another element + * @return the result of the operation + */ + T compose(T le); } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/LongArray.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/LongArray.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/LongArray.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/LongArray.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,236 @@ +package com.googlecode.javaewah; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import java.util.Arrays; + +/** + * Long array wrapper. + * Users should not be concerned by this class. + * + * @author Gregory Ssi-Yan-Kai + */ +final class LongArray implements Buffer, Cloneable { + + /** + * Creates a buffer with default size + */ + public LongArray() { + this(DEFAULT_BUFFER_SIZE); + } + + /** + * Creates a buffer with explicit size + * @param bufferSize + */ + public LongArray(int bufferSize) { + if(bufferSize < 1) { + bufferSize = 1; + } + this.buffer = new long[bufferSize]; + } + + @Override + public int sizeInWords() { + return this.actualSizeInWords; + } + + @Override + public void ensureCapacity(int capacity) { + resizeBuffer(capacity - this.actualSizeInWords); + } + + @Override + public long getWord(int position) { + return this.buffer[position]; + } + + @Override + public long getLastWord() { + return getWord(this.actualSizeInWords - 1); + } + + @Override + public void clear() { + this.actualSizeInWords = 1; + this.buffer[0] = 0; + } + + @Override + public void trim() { + this.buffer = Arrays.copyOf(this.buffer, this.actualSizeInWords); + } + + @Override + public void setWord(int position, long word) { + this.buffer[position] = word; + } + + @Override + public void setLastWord(long word) { + setWord(this.actualSizeInWords - 1, word); + } + + @Override + public void push_back(long word) { + resizeBuffer(1); + this.buffer[this.actualSizeInWords++] = word; + } + + @Override + public void push_back(Buffer buffer, int start, int number) { + resizeBuffer(number); + if(buffer instanceof LongArray) { + long[] data = ((LongArray)buffer).buffer; + System.arraycopy(data, start, this.buffer, this.actualSizeInWords, number); + } else { + for(int i = 0; i < number; ++i) { + this.buffer[this.actualSizeInWords + i] = buffer.getWord(start + i); + } + } + this.actualSizeInWords += number; + } + + @Override + public void negative_push_back(Buffer buffer, int start, int number) { + resizeBuffer(number); + for (int i = 0; i < number; ++i) { + this.buffer[this.actualSizeInWords + i] = ~buffer.getWord(start + i); + } + this.actualSizeInWords += number; + } + + @Override + public void removeLastWord() { + setWord(--this.actualSizeInWords, 0l); + } + + @Override + public void negateWord(int position) { + this.buffer[position] = ~this.buffer[position]; + } + + @Override + public void andWord(int position, long mask) { + this.buffer[position] &= mask; + } + + @Override + public void orWord(int position, long mask) { + this.buffer[position] |= mask; + } + + @Override + public void andLastWord(long mask) { + andWord(this.actualSizeInWords - 1, mask); + } + + @Override + public void orLastWord(long mask) { + orWord(this.actualSizeInWords - 1, mask); + } + + @Override + public void expand(int position, int length) { + resizeBuffer(length); + System.arraycopy(this.buffer, position, this.buffer, position + length, this.actualSizeInWords - position); + this.actualSizeInWords += length; + } + + @Override + public void collapse(int position, int length) { + System.arraycopy(this.buffer, position + length, this.buffer, position, this.actualSizeInWords - position - length); + for(int i = 0; i < length; ++i) { + removeLastWord(); + } + } + + @Override + public LongArray clone() { + LongArray clone = null; + try { + clone = (LongArray) super.clone(); + clone.buffer = this.buffer.clone(); + clone.actualSizeInWords = this.actualSizeInWords; + } catch (CloneNotSupportedException e) { + e.printStackTrace(); // cannot happen + } + return clone; + } + + @Override + public void swap(final Buffer other) { + if(other instanceof LongArray) { + long[] tmp = this.buffer; + this.buffer = ((LongArray)other).buffer; + ((LongArray)other).buffer = tmp; + + int tmp2 = this.actualSizeInWords; + this.actualSizeInWords = ((LongArray)other).actualSizeInWords; + ((LongArray)other).actualSizeInWords = tmp2; + } else { + long[] tmp = new long[other.sizeInWords()]; + for(int i = 0; i < other.sizeInWords(); ++i) { + tmp[i] = other.getWord(i); + } + int tmp2 = other.sizeInWords(); + + other.clear(); + other.removeLastWord(); + other.push_back(this, 0, this.sizeInWords()); + + this.buffer = tmp; + this.actualSizeInWords = tmp2; + } + } + + /** + * Resizes the buffer if the number of words to add exceeds the buffer capacity. + * @param number the number of words to add + */ + private void resizeBuffer(int number) { + int size = newSizeInWords(number); + if (size >= this.buffer.length) { + long oldBuffer[] = this.buffer; + this.buffer = new long[size]; + System.arraycopy(oldBuffer, 0, this.buffer, 0, oldBuffer.length); + } + } + + /** + * Returns the resulting buffer size in words given the number of words to add. + * @param number the number of words to add + */ + private int newSizeInWords(int number) { + int size = this.actualSizeInWords + number; + if (size >= this.buffer.length) { + if (size < 32768) + size = size * 2; + else if (size * 3 / 2 < size) // overflow + size = Integer.MAX_VALUE; + else + size = size * 3 / 2; + } + return size; + } + + /** + * The actual size in words. + */ + private int actualSizeInWords = 1; + + /** + * The buffer (array of 64-bit words) + */ + private long buffer[] = null; + + /** + * The Constant DEFAULT_BUFFER_SIZE: default memory allocation when the + * object is constructed. + */ + private static final int DEFAULT_BUFFER_SIZE = 4; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/LongBufferWrapper.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/LongBufferWrapper.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/LongBufferWrapper.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/LongBufferWrapper.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,166 @@ +package com.googlecode.javaewah; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import java.nio.LongBuffer; + +/** + * java.nio.LongBuffer wrapper. + * Users should not be concerned by this class. + * + * @author Gregory Ssi-Yan-Kai + */ +final class LongBufferWrapper implements Buffer, Cloneable { + + public LongBufferWrapper(LongBuffer buffer) { + this.buffer = buffer; + } + + public LongBufferWrapper(LongBuffer slice, int sizeInWords) { + this.buffer = slice; + this.actualSizeInWords = sizeInWords; + } + + @Override + public int sizeInWords() { + return this.actualSizeInWords; + } + + @Override + public void ensureCapacity(int capacity) { + if(capacity > buffer.capacity()) { + throw new RuntimeException("Cannot increase buffer capacity. Current capacity: " + buffer.capacity() + ". New capacity: " + capacity); + } + } + + @Override + public long getWord(int position) { + return this.buffer.get(position); + } + + @Override + public long getLastWord() { + return getWord(this.actualSizeInWords - 1); + } + + @Override + public void clear() { + this.actualSizeInWords = 1; + setWord(0, 0); + } + + @Override + public void trim() { + } + + @Override + public void setWord(int position, long word) { + this.buffer.put(position, word); + } + + @Override + public void setLastWord(long word) { + setWord(this.actualSizeInWords - 1, word); + } + + @Override + public void push_back(long word) { + setWord(this.actualSizeInWords++, word); + } + + @Override + public void push_back(Buffer buffer, int start, int number) { + for(int i = 0; i < number; ++i) { + push_back(buffer.getWord(start + i)); + } + } + + @Override + public void negative_push_back(Buffer buffer, int start, int number) { + for(int i = 0; i < number; ++i) { + push_back(~buffer.getWord(start + i)); + } + } + + @Override + public void removeLastWord() { + setWord(--this.actualSizeInWords, 0l); + } + + @Override + public void negateWord(int position) { + setWord(position, ~getWord(position)); + } + + @Override + public void andWord(int position, long mask) { + setWord(position, getWord(position) & mask); + } + + @Override + public void orWord(int position, long mask) { + setWord(position, getWord(position) | mask); + } + + @Override + public void andLastWord(long mask) { + andWord(this.actualSizeInWords - 1, mask); + } + + @Override + public void orLastWord(long mask) { + orWord(this.actualSizeInWords - 1, mask); + } + + @Override + public void expand(int position, int length) { + for(int i = this.actualSizeInWords - position - 1; i >= 0; --i) { + setWord(position + length + i, getWord(position + i)); + } + this.actualSizeInWords += length; + } + + @Override + public void collapse(int position, int length) { + for(int i = 0; i < this.actualSizeInWords - position - length; ++i) { + setWord(position + i, getWord(position + length + i)); + } + for(int i = 0; i < length; ++i) { + removeLastWord(); + } + } + + @Override + public LongBufferWrapper clone() throws CloneNotSupportedException { + return new LongBufferWrapper(this.buffer, this.actualSizeInWords); + } + + @Override + public void swap(final Buffer other) { + if (other instanceof LongBufferWrapper) {// optimized version + LongBufferWrapper o = (LongBufferWrapper) other; + LongBuffer tmp = this.buffer; + int tmp2 = this.actualSizeInWords; + this.actualSizeInWords = o.actualSizeInWords; + this.buffer = o.buffer; + o.actualSizeInWords = tmp2; + o.buffer = tmp; + } else { + other.swap(this); + } + } + + /** + * The actual size in words. + */ + private int actualSizeInWords = 1; + + /** + * The buffer + */ + private LongBuffer buffer; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,92 +1,105 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** * This is a BitmapStorage that can be used to determine quickly if the result * of an operation is non-trivial... that is, whether there will be at least on * set bit. - * - * @since 0.4.2 + * * @author Daniel Lemire and Veronika Zenz - * + * @since 0.4.2 */ public class NonEmptyVirtualStorage implements BitmapStorage { - static class NonEmptyException extends RuntimeException { - private static final long serialVersionUID = 1L; + private static final NonEmptyException nonEmptyException = new NonEmptyException(); + + /** + * If the word to be added is non-zero, a NonEmptyException exception is + * thrown. + * + * @see com.googlecode.javaewah.BitmapStorage#addWord(long) + */ + @Override + public void addWord(long newData) { + if (newData != 0) + throw nonEmptyException; + } /** - * Do not fill in the stack trace for this exception - * for performance reasons. + * If the word to be added is non-zero, a NonEmptyException exception is + * thrown. * - * @return this instance - * @see java.lang.Throwable#fillInStackTrace() + * @see com.googlecode.javaewah.BitmapStorage#addWord(long) */ @Override - public synchronized Throwable fillInStackTrace() { - return this; + public void addLiteralWord(long newData) { + if (newData != 0) + throw nonEmptyException; } - } - - private static final NonEmptyException nonEmptyException = new NonEmptyException(); - - /** - * If the word to be added is non-zero, a NonEmptyException exception is - * thrown. - * - * @see com.googlecode.javaewah.BitmapStorage#add(long) - */ - @Override -public void add(long newdata) { - if (newdata != 0) - throw nonEmptyException; - return; - } - - /** - * throws a NonEmptyException exception when number is greater than 0 - * - */ - @Override -public void addStreamOfLiteralWords(long[] data, int start, int number) { - if(number>0){ - throw nonEmptyException; - } - } - - /** - * If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, - * otherwise, nothing happens. - * - * @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, long) - */ - @Override -public void addStreamOfEmptyWords(boolean v, long number) { - if (v && (number>0)) - throw nonEmptyException; - return; - } - - /** - * throws a NonEmptyException exception when number is greater than 0 - * - */ - @Override -public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { - if(number>0){ - throw nonEmptyException; - } - } - - /** - * Does nothing. - * - * @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) - */ - @Override -public void setSizeInBits(int bits) { - } + /** + * throws a NonEmptyException exception when number is greater than 0 + */ + @Override + public void addStreamOfLiteralWords(Buffer buffer, int start, int number) { + for(int x = start; x < start + number ; ++x) + if(buffer.getWord(x)!=0) throw nonEmptyException; + } + + /** + * If the boolean value is true and number is greater than 0, then it + * throws a NonEmptyException exception, otherwise, nothing happens. + * + * @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, + * long) + */ + @Override + public void addStreamOfEmptyWords(boolean v, long number) { + if (v && (number > 0)) + throw nonEmptyException; + } + + /** + * throws a NonEmptyException exception when number is greater than 0 + */ + @Override + public void addStreamOfNegatedLiteralWords(Buffer buffer, int start, + int number) { + if (number > 0) { + throw nonEmptyException; + } + } + + @Override + public void clear() { + } + + + /** + * Does nothing. + * + * @see com.googlecode.javaewah.BitmapStorage#setSizeInBitsWithinLastWord(int) + */ + @Override + public void setSizeInBitsWithinLastWord(int bits) { + } + + static class NonEmptyException extends RuntimeException { + private static final long serialVersionUID = 1L; + + /** + * Do not fill in the stack trace for this exception for + * performance reasons. + * + * @return this instance + * @see java.lang.Throwable#fillInStackTrace() + */ + @Override + public synchronized Throwable fillInStackTrace() { + return this; + } + } } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ReverseEWAHIterator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ReverseEWAHIterator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ReverseEWAHIterator.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ReverseEWAHIterator.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,89 @@ +package com.googlecode.javaewah; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import java.util.Stack; + +/** + * The class ReverseEWAHIterator represents a special type of efficient iterator + * iterating over (uncompressed) words of bits in reverse order. + * + * @author Gregory Ssi-Yan-Kai + */ +final class ReverseEWAHIterator { + + /** + * Instantiates a new reverse EWAH iterator. + * + * @param buffer the buffer + */ + public ReverseEWAHIterator(final Buffer buffer) { + this.pointer = 0; + this.rlw = new RunningLengthWord(buffer, this.pointer); + this.positions = new Stack(); + this.positions.ensureCapacity(buffer.sizeInWords()); + while(this.pointer < buffer.sizeInWords()) { + this.positions.push(this.pointer); + this.rlw.position = this.pointer; + this.pointer += this.rlw.getNumberOfLiteralWords() + 1; + } + } + + /** + * Access to the buffer + * + * @return the buffer + */ + public Buffer buffer() { + return this.rlw.buffer; + } + + /** + * Position of the current running length word. + * + * @return the int + */ + public int position() { + return this.pointer; + } + + /** + * Checks for previous. + * + * @return true, if successful + */ + public boolean hasPrevious() { + return !this.positions.isEmpty(); + } + + /** + * Previous running length word. + * + * @return the running length word + */ + public RunningLengthWord previous() { + this.pointer = this.positions.pop(); + this.rlw.position = this.pointer; + return this.rlw; + } + + /** + * The positions of running length words (embedded in the rlw attribute). + */ + private Stack positions; + + /** + * The pointer representing the location of the current running length word + * in the array of words (embedded in the rlw attribute). + */ + private int pointer; + + /** + * The current running length word. + */ + protected RunningLengthWord rlw; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ReverseIntIterator.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ReverseIntIterator.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/ReverseIntIterator.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/ReverseIntIterator.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,101 @@ +package com.googlecode.javaewah; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import static com.googlecode.javaewah.EWAHCompressedBitmap.WORD_IN_BITS; + +/** + * The ReverseIntIterator is the 64 bit implementation of the IntIterator + * interface, which efficiently returns the stream of integers represented by a + * ReverseEWAHIterator in reverse order. + * + * @author Gregory Ssi-Yan-Kai + */ +final class ReverseIntIterator implements IntIterator { + + private final ReverseEWAHIterator ewahIter; + private final int sizeInBits; + private final Buffer buffer; + private int position; + private boolean runningBit; + private int runningLength; + private long word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasNext; + + ReverseIntIterator(ReverseEWAHIterator ewahIter, int sizeInBits) { + this.ewahIter = ewahIter; + this.sizeInBits = sizeInBits; + this.buffer = ewahIter.buffer(); + this.runningLength = sizeInBits - 1; + this.hasNext = this.moveToPreviousRLW(); + } + + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public int next() { + final int answer; + if (literalHasNext()) { + final long t = this.word & -this.word; + answer = this.literalPosition - Long.bitCount(t - 1); + this.word ^= t; + } else { + answer = this.position--; + } + this.hasNext = this.moveToPreviousRLW(); + return answer; + } + + private boolean moveToPreviousRLW() { + while (!literalHasNext() && !runningHasNext()) { + if (!this.ewahIter.hasPrevious()) { + return false; + } + setRLW(this.ewahIter.previous()); + } + return true; + } + + private void setRLW(RunningLengthWord rlw) { + this.wordLength = rlw.getNumberOfLiteralWords(); + this.wordPosition = this.ewahIter.position(); + this.position = this.runningLength; + this.runningLength -= WORD_IN_BITS * (rlw.getRunningLength() + this.wordLength); + if (this.position == this.sizeInBits - 1) { + final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; + if(usedBitsInLast > 0) { + this.runningLength += WORD_IN_BITS - usedBitsInLast; + if(this.wordLength > 0) { + this.word = Long.reverse(this.buffer.getWord(this.wordPosition + this.wordLength--)); + this.word = (this.word >>> (WORD_IN_BITS - usedBitsInLast)); + this.literalPosition = this.position; + this.position -= usedBitsInLast; + } + } + } + this.runningBit = rlw.getRunningBit(); + } + + private boolean runningHasNext() { + return this.runningBit && this.runningLength < this.position; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordLength > 0) { + this.word = Long.reverse(this.buffer.getWord(this.wordPosition + this.wordLength--)); + this.literalPosition = this.position; + this.position -= WORD_IN_BITS; + } + return this.word != 0; + } + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/RunningLengthWord.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/RunningLengthWord.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/RunningLengthWord.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/RunningLengthWord.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,152 +1,172 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. - * - * @since 0.1.0 + * * @author Daniel Lemire + * @since 0.1.0 */ public final class RunningLengthWord implements Cloneable { - /** - * Instantiates a new running length word. - * - * @param a - * an array of 64-bit words - * @param p - * position in the array where the running length word is - * located. - */ - RunningLengthWord(final EWAHCompressedBitmap a, final int p) { - this.parent = a; - this.position = p; - } - - /** - * Gets the number of literal words. - * - * @return the number of literal words - */ - public int getNumberOfLiteralWords() { - return (int) (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - public boolean getRunningBit() { - return (this.parent.buffer[this.position] & 1) != 0; - } - - /** - * Gets the running length. - * - * @return the running length - */ - public long getRunningLength() { - return (this.parent.buffer[this.position] >>> 1) - & largestrunninglengthcount; - } - - /** - * Sets the number of literal words. - * - * @param number - * the new number of literal words - */ - public void setNumberOfLiteralWords(final long number) { - this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; - this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) - | runninglengthplusrunningbit; - } - - /** - * Sets the running bit. - * - * @param b - * the new running bit - */ - public void setRunningBit(final boolean b) { - if (b) - this.parent.buffer[this.position] |= 1l; - else - this.parent.buffer[this.position] &= ~1l; - } - - /** - * Sets the running length. - * - * @param number - * the new running length - */ - public void setRunningLength(final long number) { - this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; - this.parent.buffer[this.position] &= (number << 1) - | notshiftedlargestrunninglengthcount; - } - - /** - * Return the size in uncompressed words represented by this running - * length word. - * - * @return the size - */ - public long size() { - return getRunningLength() + getNumberOfLiteralWords(); - } - - /* - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "running bit = " + getRunningBit() - + " running length = " + getRunningLength() - + " number of lit. words " + getNumberOfLiteralWords(); - } - - @Override - public RunningLengthWord clone() throws CloneNotSupportedException { - RunningLengthWord answer; - answer = (RunningLengthWord) super.clone(); - answer.parent = this.parent; - answer.position = this.position; - return answer; - } - - /** The array of words. */ - public EWAHCompressedBitmap parent; - - /** The position in array. */ - public int position; - - /** - * number of bits dedicated to marking of the running length of clean - * words - */ - public static final int runninglengthbits = 32; - - private static final int literalbits = 64 - 1 - runninglengthbits; - - /** largest number of literal words in a run. */ - public static final int largestliteralcount = (1 << literalbits) - 1; - - /** largest number of clean words in a run */ - public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1; - - private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1; + /** + * Instantiates a new running length word. + * + * @param buffer the buffer + * @param p position in the buffer where the running length word is + * located. + */ + RunningLengthWord(final Buffer buffer, final int p) { + this.buffer = buffer; + this.position = p; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return getNumberOfLiteralWords(this.buffer, this.position); + } + + static int getNumberOfLiteralWords(final Buffer buffer, final int position) { + return (int) (buffer.getWord(position) >>> (1 + RUNNING_LENGTH_BITS)); + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return getRunningBit(this.buffer, this.position); + } + + static boolean getRunningBit(final Buffer buffer, final int position) { + return (buffer.getWord(position) & 1) != 0; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public long getRunningLength() { + return getRunningLength(this.buffer, this.position); + } + + static long getRunningLength(final Buffer buffer, final int position) { + return (buffer.getWord(position) >>> 1) & LARGEST_RUNNING_LENGTH_COUNT; + } + + /** + * Sets the number of literal words. + * + * @param number the new number of literal words + */ + public void setNumberOfLiteralWords(final long number) { + setNumberOfLiteralWords(this.buffer, this.position, number); + } + + static void setNumberOfLiteralWords(final Buffer buffer, final int position, final long number) { + buffer.orWord(position, NOT_RUNNING_LENGTH_PLUS_RUNNING_BIT); + buffer.andWord(position, (number << (RUNNING_LENGTH_BITS + 1)) | RUNNING_LENGTH_PLUS_RUNNING_BIT); + } + + /** + * Sets the running bit. + * + * @param b the new running bit + */ + public void setRunningBit(final boolean b) { + setRunningBit(this.buffer, this.position, b); + } + + static void setRunningBit(final Buffer buffer, final int position, final boolean b) { + if (b) + buffer.orWord(position, 1l); + else + buffer.andWord(position, ~1l); + } + + /** + * Sets the running length. + * + * @param number the new running length + */ + public void setRunningLength(final long number) { + setRunningLength(this.buffer, this.position, number); + } + + static void setRunningLength(final Buffer buffer, final int position, final long number) { + buffer.orWord(position, SHIFTED_LARGEST_RUNNING_LENGTH_COUNT); + buffer.andWord(position, (number << 1) | NOT_SHIFTED_LARGEST_RUNNING_LENGTH_COUNT); + } + + /** + * Return the size in uncompressed words represented by this running + * length word. + * + * @return the size + */ + public long size() { + return getRunningLength() + getNumberOfLiteralWords(); + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + + " running length = " + getRunningLength() + + " number of lit. words " + getNumberOfLiteralWords(); + } + + @Override + public RunningLengthWord clone() throws CloneNotSupportedException { + return (RunningLengthWord) super.clone(); + } + + /** + * The array of words. + */ + final Buffer buffer; + + /** + * The position in array. + */ + int position; + + /** + * number of bits dedicated to marking of the running length of clean + * words + */ + public static final int RUNNING_LENGTH_BITS = 32; + + private static final int LITERAL_BITS = 64 - 1 - RUNNING_LENGTH_BITS; + + /** + * largest number of literal words in a run. + */ + public static final int LARGEST_LITERAL_COUNT = (1 << LITERAL_BITS) - 1; + + /** + * largest number of clean words in a run + */ + public static final long LARGEST_RUNNING_LENGTH_COUNT = (1l << RUNNING_LENGTH_BITS) - 1; + + private static final long RUNNING_LENGTH_PLUS_RUNNING_BIT = (1l << (RUNNING_LENGTH_BITS + 1)) - 1; - private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; + private static final long SHIFTED_LARGEST_RUNNING_LENGTH_COUNT = LARGEST_RUNNING_LENGTH_COUNT << 1; - private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; + private static final long NOT_RUNNING_LENGTH_PLUS_RUNNING_BIT = ~RUNNING_LENGTH_PLUS_RUNNING_BIT; - private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; + private static final long NOT_SHIFTED_LARGEST_RUNNING_LENGTH_COUNT = ~SHIFTED_LARGEST_RUNNING_LENGTH_COUNT; -} \ No newline at end of file +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/BitmapSymmetricAlgorithm.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/BitmapSymmetricAlgorithm.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/BitmapSymmetricAlgorithm.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/BitmapSymmetricAlgorithm.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,23 @@ +package com.googlecode.javaewah.symmetric; + +import com.googlecode.javaewah.BitmapStorage; +import com.googlecode.javaewah.EWAHCompressedBitmap; + +/** + * Generic interface to compute symmetric Boolean functions. + * + * @author Daniel Lemire + * @see http://en.wikipedia.org/wiki/Symmetric_Boolean_function + * @since 0.8.0 + */ +public interface BitmapSymmetricAlgorithm { + /** + * Compute a Boolean symmetric query. + * + * @param f symmetric boolean function to be processed + * @param out the result of the query + * @param set the inputs + */ + void symmetric(UpdateableBitmapFunction f, BitmapStorage out, EWAHCompressedBitmap... set); +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/EWAHPointer.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/EWAHPointer.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/EWAHPointer.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/EWAHPointer.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,114 @@ +package com.googlecode.javaewah.symmetric; + +import com.googlecode.javaewah.IteratingBufferedRunningLengthWord; + +/** + * Wrapper around an IteratingBufferedRunningLengthWord used by the + * RunningBitmapMerge class. + * + * @author Daniel Lemire + * @since 0.8.0 + */ +public final class EWAHPointer implements Comparable { + private int endrun; + private final int pos; + private boolean isLiteral; + private boolean value; + private boolean dead = false; + /** + * Underlying iterator + */ + public final IteratingBufferedRunningLengthWord iterator; + + /** + * Construct a pointer over an IteratingBufferedRunningLengthWord. + * + * @param previousEndRun word where the previous run ended + * @param rw the iterator + * @param pos current position (in word) + */ + public EWAHPointer(final int previousEndRun, + final IteratingBufferedRunningLengthWord rw, final int pos) { + this.pos = pos; + this.iterator = rw; + if (this.iterator.getRunningLength() > 0) { + this.endrun = previousEndRun + (int) this.iterator.getRunningLength(); + this.isLiteral = false; + this.value = this.iterator.getRunningBit(); + } else if (this.iterator.getNumberOfLiteralWords() > 0) { + this.isLiteral = true; + this.endrun = previousEndRun + this.iterator.getNumberOfLiteralWords(); + } else { + this.endrun = previousEndRun; + this.dead = true; + } + } + + /** + * @return the end of the current run + */ + public int endOfRun() { + return this.endrun; + } + + /** + * @return the beginning of the current run + */ + public int beginOfRun() { + if (this.isLiteral) + return this.endrun - this.iterator.getNumberOfLiteralWords(); + return (int) (this.endrun - this.iterator.getRunningLength()); + } + + /** + * Process the next run + */ + public void parseNextRun() { + if ((this.isLiteral) + || (this.iterator.getNumberOfLiteralWords() == 0)) { + // no choice, must load next runs + this.iterator.discardFirstWords(this.iterator.size()); + if (this.iterator.getRunningLength() > 0) { + this.endrun += (int) this.iterator.getRunningLength(); + this.isLiteral = false; + this.value = this.iterator.getRunningBit(); + } else if (this.iterator.getNumberOfLiteralWords() > 0) { + this.isLiteral = true; + this.endrun += this.iterator.getNumberOfLiteralWords(); + } else { + this.dead = true; + } + + } else { + this.isLiteral = true; + this.endrun += this.iterator.getNumberOfLiteralWords(); + } + + } + + /** + * @return true if there is no more data + */ + public boolean hasNoData() { + return this.dead; + } + + /** + * @param f call the function with the current information + */ + public void callbackUpdate(final UpdateableBitmapFunction f) { + if (this.dead) + f.setZero(this.pos); + else if (this.isLiteral) + f.setLiteral(this.pos); + else if (this.value) + f.setOne(this.pos); + else + f.setZero(this.pos); + } + + @Override + public int compareTo(EWAHPointer other) { + return this.endrun - other.endrun; + } +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/RunningBitmapMerge.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/RunningBitmapMerge.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/RunningBitmapMerge.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/RunningBitmapMerge.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,76 @@ +package com.googlecode.javaewah.symmetric; + +import com.googlecode.javaewah.BitmapStorage; +import com.googlecode.javaewah.EWAHCompressedBitmap; +import com.googlecode.javaewah.IteratingBufferedRunningLengthWord; +import com.googlecode.javaewah.datastructure.PriorityQ; + +import java.util.Comparator; + +/** + * This is an implementation of the RunningBitmapMerge algorithm running on top + * of JavaEWAH. It is well suited to computing symmetric Boolean queries. + * + * It is a revised version of an algorithm described in the following reference: + *
  • + * Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned + * bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. + *
+ * + * @author Daniel Lemire + * @since 0.8.0 + */ +public class RunningBitmapMerge implements BitmapSymmetricAlgorithm { + + @Override + public void symmetric(UpdateableBitmapFunction f, BitmapStorage out, + EWAHCompressedBitmap... set) { + out.clear(); + final PriorityQ h = new PriorityQ( + set.length, new Comparator() { + @Override + public int compare(EWAHPointer arg0, + EWAHPointer arg1) { + return arg0.compareTo(arg1); + } + } + ); + f.resize(set.length); + + for (int k = 0; k < set.length; ++k) { + final EWAHPointer x = new EWAHPointer(0, new IteratingBufferedRunningLengthWord(set[k]), k); + if (x.hasNoData()) + continue; + f.rw[k] = x; + x.callbackUpdate(f); + h.toss(x); + } + h.buildHeap(); // just in case we use an insane number of inputs + + int lasta = 0; + if (h.isEmpty()) + return; + mainloop: + while (true) { // goes until no more active inputs + final int a = h.peek().endOfRun(); + // I suppose we have a run of length a - lasta here. + f.dispatch(out, lasta, a); + lasta = a; + + while (h.peek().endOfRun() == a) { + final EWAHPointer p = h.peek(); + p.parseNextRun(); + p.callbackUpdate(f); + if (p.hasNoData()) { + h.poll(); // we just remove it + if (h.isEmpty()) + break mainloop; + } else { + h.percolateDown(); // since we have + // increased the key + } + } + } + } + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/ThresholdFuncBitmap.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/ThresholdFuncBitmap.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/ThresholdFuncBitmap.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/ThresholdFuncBitmap.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,143 @@ +package com.googlecode.javaewah.symmetric; + +import com.googlecode.javaewah.BitmapStorage; + +import java.util.Arrays; + +/** + * A threshold Boolean function returns true if the number of true values exceed + * a threshold. It is a symmetric Boolean function. + * + * This class implements an algorithm described in the following paper: + * + * Owen Kaser and Daniel Lemire, Compressed bitmap indexes: beyond unions and intersections + * http://arxiv.org/abs/1402.4466 + * + * It is not thread safe: you should use one object per thread. + * + * @author Daniel Lemire + * @see http://en.wikipedia.org/wiki/Symmetric_Boolean_function + * @since 0.8.0 + */ +public final class ThresholdFuncBitmap extends UpdateableBitmapFunction { + private final int min; + private long[] buffers; + private int bufferUsed; + private final int[] bufCounters = new int[64]; + private static final int[] zeroes64 = new int[64]; + + /** + * Construction a threshold function with a given threshold + * + * @param min threshold + */ + public ThresholdFuncBitmap(final int min) { + super(); + this.min = min; + this.buffers = new long[16]; + this.bufferUsed = 0; + } + + @Override + public void dispatch(BitmapStorage out, int runBegin, int runEnd) { + final int runLength = runEnd - runBegin; + if (this.hammingWeight >= this.min) { + out.addStreamOfEmptyWords(true, runLength); + } else if (this.litWeight + this.hammingWeight < this.min) { + out.addStreamOfEmptyWords(false, runLength); + } else { + final int deficit = this.min - this.hammingWeight; + if (deficit == 1) { + orLiterals(out, runBegin, runLength); + return; + } + this.bufferUsed = this.getNumberOfLiterals(); + if (this.bufferUsed == deficit) { + andLiterals(out, runBegin, runLength); + } else { + generalLiterals(deficit, out, runBegin, runLength); + } + } + } + + private long threshold2buf(final int t, final long[] buf, final int bufUsed) { + long result = 0L; + final int[] counters = this.bufCounters; + System.arraycopy(zeroes64, 0, counters, 0, 64); + for (int k = 0; k < bufUsed; ++k) { + long bitset = buf[k]; + while (bitset != 0) { + long t2 = bitset & -bitset; + counters[Long.bitCount(t2 - 1)]++; + bitset ^= t2; + } + } + for (int pos = 0; pos < 64; ++pos) { + if (counters[pos] >= t) + result |= (1L << pos); + } + return result; + } + + private static long threshold3(final int t, final long[] buffers, final int bufUsed) { + if (buffers.length == 0) + return 0; + final long[] v = new long[t]; + v[0] = buffers[0]; + for (int k = 1; k < bufUsed; ++k) { + final long c = buffers[k]; + final int m = Math.min(t - 1, k); + for (int j = m; j >= 1; --j) { + v[j] |= (c & v[j - 1]); + } + v[0] |= c; + } + return v[t - 1]; + } + + private long threshold4(final int T, final long[] buf, final int bufUsed) { + if (T >= 128) + return threshold2buf(T, buf, bufUsed); + int B = 0; + for (int k = 0; k < bufUsed; ++k) + B += Long.bitCount(buf[k]); + if (2 * B >= bufUsed * T) + return threshold3(T, buf, bufUsed);//looped + return threshold2buf(T, buf, bufUsed);//scancount + } + + private void orLiterals(final BitmapStorage out, final int runBegin, final int runLength) { + for (int i = 0; i < runLength; ++i) { + long w = 0; + for (EWAHPointer R : this.getLiterals()) { + w |= R.iterator.getLiteralWordAt(i + runBegin - R.beginOfRun()); + } + out.addWord(w); + } + } + + private void andLiterals(final BitmapStorage out, final int runBegin, final int runLength) { + for (int i = 0; i < runLength; ++i) { + long w = ~0; + for (EWAHPointer R : this.getLiterals()) { + w &= R.iterator.getLiteralWordAt(i + runBegin - R.beginOfRun()); + } + out.addWord(w); + } + } + + private void generalLiterals(final int deficit, + final BitmapStorage out, final int runBegin, final int runLength) { + if (this.bufferUsed > this.buffers.length) + this.buffers = Arrays.copyOf(this.buffers, 2 * this.bufferUsed); + for (int i = 0; i < runLength; ++i) { + int p = 0; + for (EWAHPointer R : this.getLiterals()) { + this.buffers[p++] = R.iterator.getLiteralWordAt(i + runBegin - R.beginOfRun()); + } + out.addWord(threshold4(deficit, this.buffers, this.bufferUsed)); + } + } + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/UpdateableBitmapFunction.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/UpdateableBitmapFunction.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah/symmetric/UpdateableBitmapFunction.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah/symmetric/UpdateableBitmapFunction.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,148 @@ +package com.googlecode.javaewah.symmetric; + +import com.googlecode.javaewah.BitmapStorage; +import com.googlecode.javaewah.datastructure.BitSet; + +import java.util.Iterator; +import java.util.List; + +/** + * This is a Java specification for an "updatable" Boolean function meant to run + * over EWAH bitmaps. + * + * Reference: + * + * Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned + * bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. + * + * @author Daniel Lemire + * @since 0.8.0 + */ +public abstract class UpdateableBitmapFunction { + EWAHPointer[] rw = new EWAHPointer[0]; + int hammingWeight = 0; + int litWeight = 0; + boolean[] b = new boolean[0]; + final BitSet litwlist = new BitSet(0); + + UpdateableBitmapFunction() { + } + + /** + * @return the current number of literal words + */ + public final int getNumberOfLiterals() { + return this.litwlist.cardinality(); + } + + /** + * Goes through the literals. + * + * @return an iterator + */ + public final Iterable getLiterals() { + return new Iterable() { + + @Override + public Iterator iterator() { + return new Iterator() { + int k = UpdateableBitmapFunction.this.litwlist.nextSetBit(0); + + @Override + public boolean hasNext() { + return this.k >= 0; + } + + @Override + public EWAHPointer next() { + EWAHPointer answer = UpdateableBitmapFunction.this.rw[this.k]; + this.k = UpdateableBitmapFunction.this.litwlist.nextSetBit(this.k + 1); + return answer; + } + + @Override + public void remove() { + throw new RuntimeException("N/A"); + } + }; + } + }; + } + + /** + * append to the list the literal words as EWAHPointer + * + * @param container where we write + */ + public final void fillWithLiterals(final List container) { + for (int k = this.litwlist.nextSetBit(0); k >= 0; k = this.litwlist.nextSetBit(k + 1)) { + container.add(this.rw[k]); + } + } + + /** + * @param newsize the number of inputs + */ + public final void resize(final int newsize) { + this.rw = java.util.Arrays.copyOf(this.rw, newsize); + this.litwlist.resize(newsize); + this.b = java.util.Arrays.copyOf(this.b, newsize); + } + + /** + * @param pos position of a literal + */ + public void setLiteral(final int pos) { + if (!this.litwlist.get(pos)) { + this.litwlist.set(pos); + this.litWeight++; + if (this.b[pos]) { + this.b[pos] = false; + --this.hammingWeight; + } + } + } + + /** + * @param pos position where a literal was removed + */ + public void clearLiteral(final int pos) { + if (this.litwlist.get(pos)) { + // litwlist.unset(pos); + this.litwlist.set(pos, false); + this.litWeight--; + } + } + + /** + * @param pos position where a zero word was added + */ + public final void setZero(final int pos) { + if (this.b[pos]) { + this.b[pos] = false; + --this.hammingWeight; + } else { + clearLiteral(pos); + } + } + + /** + * @param pos position were a 11...1 word was added + */ + public final void setOne(final int pos) { + if (!this.b[pos]) { + clearLiteral(pos); + this.b[pos] = true; + ++this.hammingWeight; + } + } + + /** + * Writes out the answer. + * + * @param out output buffer + * @param runBegin beginning of the run + * @param runEnd end of the run + */ + public abstract void dispatch(BitmapStorage out, int runBegin, int runEnd); +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/BitCounter32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/BitCounter32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/BitCounter32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/BitCounter32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,102 +1,103 @@ package com.googlecode.javaewah32; - - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** - * BitCounter is a fake bitset data structure. Instead of storing the actual data, - * it only records the number of set bits. + * BitCounter is a fake bitset data structure. Instead of storing the actual + * data, it only records the number of set bits. * - * @since 0.5.0 * @author Daniel Lemire and David McIntosh + * @since 0.5.0 */ public final class BitCounter32 implements BitmapStorage32 { - /** - * Virtually add words directly to the bitmap - * - * @param newdata the word - */ - // @Override : causes problems with Java 1.5 - @Override -public void add(final int newdata) { - this.oneBits += Integer.bitCount(newdata); - } - - - /** - * virtually add several literal words. - * - * @param data the literal words - * @param start the starting point in the array - * @param number the number of literal words to add - */ - // @Override : causes problems with Java 1.5 - @Override -public void addStreamOfLiteralWords(int[] data, int start, int number) { - for(int i=start;i iterator) { - this.masteriterator = iterator; - if(this.masteriterator.hasNext()) { - this.iterator = this.masteriterator.next(); - this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; - this.buffer = this.iterator.buffer(); - } - } - - - /** - * Discard first words, iterating to the next running length word if needed. - * - * @param x the number of words to be discarded - */ - @Override - public void discardFirstWords(int x) { - while (x > 0) { - if (this.brlw.RunningLength > x) { - this.brlw.RunningLength -= x; - return; - } - x -= this.brlw.RunningLength; - this.brlw.RunningLength = 0; - int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; - - this.literalWordStartPosition += toDiscard; - this.brlw.NumberOfLiteralWords -= toDiscard; - x -= toDiscard; - if ((x > 0) || (this.brlw.size() == 0)) { - if (!this.next()) { - break; - } - } - } - } - /** - * Move to the next RunningLengthWord - * @return whether the move was possible - */ - @Override - public boolean next() { - if (!this.iterator.hasNext()) { - if(!reload()) { - this.brlw.NumberOfLiteralWords = 0; - this.brlw.RunningLength = 0; - return false; - } - } - this.brlw.reset(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 - return true; - } - private boolean reload() { - if(!this.masteriterator.hasNext()) { - return false; - } - this.iterator = this.masteriterator.next(); - this.buffer = this.iterator.buffer(); - return true; - } - - - /** - * Get the nth literal word for the current running length word - * @param index zero based index - * @return the literal word - */ - @Override - public int getLiteralWordAt(int index) { - return this.buffer[this.literalWordStartPosition + index]; - } - - /** - * Gets the number of literal words for the current running length word. - * - * @return the number of literal words - */ - @Override - public int getNumberOfLiteralWords() { - return this.brlw.NumberOfLiteralWords; - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - @Override - public boolean getRunningBit() { - return this.brlw.RunningBit; - } - - /** - * Gets the running length. - * - * @return the running length - */ - @Override - public int getRunningLength() { - return this.brlw.RunningLength; - } - - /** - * Size in uncompressed words of the current running length word. - * - * @return the size - */ - @Override - public int size() { - return this.brlw.size(); - } - - @Override - public BufferedIterator32 clone() throws CloneNotSupportedException { - BufferedIterator32 answer = (BufferedIterator32) super.clone(); - answer.brlw = this.brlw.clone(); - answer.buffer = this.buffer; - answer.iterator = this.iterator.clone(); - answer.literalWordStartPosition = this.literalWordStartPosition; - answer.masteriterator = this.masteriterator.clone(); - return answer; - } - - private BufferedRunningLengthWord32 brlw; - private int[] buffer; - private int literalWordStartPosition; - private EWAHIterator32 iterator; - private CloneableIterator masteriterator; - } \ No newline at end of file +public class BufferedIterator32 implements IteratingRLW32, Cloneable { + /** + * Instantiates a new iterating buffered running length word. + * + * @param iterator iterator + */ + public BufferedIterator32( + final CloneableIterator iterator) { + this.masterIterator = iterator; + if (this.masterIterator.hasNext()) { + iteratingBrlw = new IteratingBufferedRunningLengthWord32(this.masterIterator.next()); + } + } + + /** + * Discard first words, iterating to the next running length word if + * needed. + * + * @param x the number of words to be discarded + */ + @Override + public void discardFirstWords(int x) { + while (x > 0) { + if (this.iteratingBrlw.getRunningLength() > x) { + this.iteratingBrlw.discardFirstWords(x); + return; + } + this.iteratingBrlw.discardFirstWords(this.iteratingBrlw.getRunningLength()); + x -= this.iteratingBrlw.getRunningLength(); + int toDiscard = x > this.iteratingBrlw.getNumberOfLiteralWords() + ? this.iteratingBrlw.getNumberOfLiteralWords() + : x; + + this.iteratingBrlw.discardFirstWords(toDiscard); + x -= toDiscard; + if ((x > 0) || (this.iteratingBrlw.size() == 0)) { + if (!this.next()) { + break; + } + } + } + } + + @Override + public void discardLiteralWords(int x) { + this.iteratingBrlw.discardLiteralWords(x); + if (this.iteratingBrlw.getNumberOfLiteralWords() == 0) + this.next(); + } + + + @Override + public void discardRunningWords() { + this.iteratingBrlw.discardRunningWords(); + if (this.iteratingBrlw.getNumberOfLiteralWords() == 0) + this.next(); + } + + /** + * Move to the next RunningLengthWord + * + * @return whether the move was possible + */ + @Override + public boolean next() { + if (!this.iteratingBrlw.next()) { + if (!this.masterIterator.hasNext()) { + return false; + } else { + this.iteratingBrlw = new IteratingBufferedRunningLengthWord32(this.masterIterator.next()); + } + } + return true; + } + + /** + * Get the nth literal word for the current running length word + * + * @param index zero based index + * @return the literal word + */ + @Override + public int getLiteralWordAt(int index) { + return this.iteratingBrlw.getLiteralWordAt(index); + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override + public int getNumberOfLiteralWords() { + return this.iteratingBrlw.getNumberOfLiteralWords(); + } + + /** + * Gets the running bit. + *RunningBit + * @return the running bit + */ + @Override + public boolean getRunningBit() { + return this.iteratingBrlw.getRunningBit(); + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override + public int getRunningLength() { + return this.iteratingBrlw.getRunningLength(); + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the size + */ + @Override + public int size() { + return this.iteratingBrlw.size(); + } + + @Override + public BufferedIterator32 clone() throws CloneNotSupportedException { + BufferedIterator32 answer = (BufferedIterator32) super.clone(); + answer.iteratingBrlw = this.iteratingBrlw.clone(); + answer.masterIterator = this.masterIterator.clone(); + return answer; + } + + private IteratingBufferedRunningLengthWord32 iteratingBrlw; + private CloneableIterator masterIterator; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,174 +1,180 @@ package com.googlecode.javaewah32; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ - - /** - * Mostly for internal use. Similar to RunningLengthWord, but can - * be modified without access to the array, and has faster access. + * Mostly for internal use. Similar to RunningLengthWord, but can be modified + * without access to the array, and has faster access. * * @author Daniel Lemire * @since 0.5.0 - * */ -public final class BufferedRunningLengthWord32 implements Cloneable { +public final class BufferedRunningLengthWord32 implements Cloneable { + + /** + * Instantiates a new buffered running length word. + * + * @param a the word + */ + public BufferedRunningLengthWord32(final int a) { + this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.RUNNING_LENGTH_BITS)); + this.RunningBit = (a & 1) != 0; + this.RunningLength = ((a >>> 1) & RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT); + } + + /** + * Instantiates a new buffered running length word. + * + * @param rlw the rlw + */ + public BufferedRunningLengthWord32(final RunningLengthWord32 rlw) { + this(rlw.buffer.getWord(rlw.position)); + } + + /** + * Discard first words. + * + * @param x the number of words to be discarded + */ + public void discardFirstWords(int x) { + if (this.RunningLength >= x) { + this.RunningLength -= x; + return; + } + x -= this.RunningLength; + this.RunningLength = 0; + this.literalWordOffset += x; + this.NumberOfLiteralWords -= x; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return this.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return this.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public int getRunningLength() { + return this.RunningLength; + } + + /** + * Reset the values using the provided word. + * + * @param a the word + */ + public void reset(final int a) { + this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.RUNNING_LENGTH_BITS)); + this.RunningBit = (a & 1) != 0; + this.RunningLength = ((a >>> 1) & RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT); + this.literalWordOffset = 0; + } + + /** + * Reset the values of this running length word so that it has the same + * values as the other running length word. + * + * @param rlw the other running length word + */ + public void reset(final RunningLengthWord32 rlw) { + reset(rlw.buffer.getWord(rlw.position)); + } + + /** + * Sets the number of literal words. + * + * @param number the new number of literal words + */ + public void setNumberOfLiteralWords(final int number) { + this.NumberOfLiteralWords = number; + } + + /** + * Sets the running bit. + * + * @param b the new running bit + */ + public void setRunningBit(final boolean b) { + this.RunningBit = b; + } + + /** + * Sets the running length. + * + * @param number the new running length + */ + public void setRunningLength(final int number) { + this.RunningLength = number; + } + + /** + * Size in uncompressed words. + * + * @return the int + */ + public int size() { + return this.RunningLength + this.NumberOfLiteralWords; + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + + " running length = " + getRunningLength() + + " number of lit. words " + getNumberOfLiteralWords(); + } + + @Override + public BufferedRunningLengthWord32 clone() + throws CloneNotSupportedException { + BufferedRunningLengthWord32 answer = (BufferedRunningLengthWord32) super + .clone(); + answer.literalWordOffset = this.literalWordOffset; + answer.NumberOfLiteralWords = this.NumberOfLiteralWords; + answer.RunningBit = this.RunningBit; + answer.RunningLength = this.RunningLength; + return answer; + } - /** - * Instantiates a new buffered running length word. - * - * @param a the word - */ - public BufferedRunningLengthWord32(final int a) { - this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); - this.RunningBit = (a & 1) != 0; - this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); - } - - /** - * Instantiates a new buffered running length word. - * - * @param rlw the rlw - */ - public BufferedRunningLengthWord32(final RunningLengthWord32 rlw) { - this(rlw.parent.buffer[rlw.position]); - } - - /** - * Discard first words. - * - * @param x the number of words to be discarded - */ - public void discardFirstWords(int x) { - if (this.RunningLength >= x) { - this.RunningLength -= x; - return; - } - x -= this.RunningLength; - this.RunningLength = 0; - this.literalwordoffset += x; - this.NumberOfLiteralWords -= x; - } - - /** - * Gets the number of literal words. - * - * @return the number of literal words - */ - public int getNumberOfLiteralWords() { - return this.NumberOfLiteralWords; - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - public boolean getRunningBit() { - return this.RunningBit; - } - - /** - * Gets the running length. - * - * @return the running length - */ - public int getRunningLength() { - return this.RunningLength; - } - - /** - * Reset the values using the provided word. - * - * @param a the word - */ - public void reset(final int a) { - this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); - this.RunningBit = (a & 1) != 0; - this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); - this.literalwordoffset = 0; - } - - /** - * Reset the values of this running length word so that it has the same values - * as the other running length word. - * - * @param rlw the other running length word - */ - public void reset(final RunningLengthWord32 rlw) { - reset(rlw.parent.buffer[rlw.position]); - } - - /** - * Sets the number of literal words. - * - * @param number the new number of literal words - */ - public void setNumberOfLiteralWords(final int number) { - this.NumberOfLiteralWords = number; - } - - /** - * Sets the running bit. - * - * @param b the new running bit - */ - public void setRunningBit(final boolean b) { - this.RunningBit = b; - } - - /** - * Sets the running length. - * - * @param number the new running length - */ - public void setRunningLength(final int number) { - this.RunningLength = number; - } - - /** - * Size in uncompressed words. - * - * @return the int - */ - public int size() { - return this.RunningLength + this.NumberOfLiteralWords; - } - - /* - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "running bit = " + getRunningBit() + " running length = " - + getRunningLength() + " number of lit. words " - + getNumberOfLiteralWords(); - } - - @Override -public BufferedRunningLengthWord32 clone() throws CloneNotSupportedException { - BufferedRunningLengthWord32 answer = (BufferedRunningLengthWord32) super.clone(); - answer.literalwordoffset = this.literalwordoffset; - answer.NumberOfLiteralWords = this.NumberOfLiteralWords; - answer.RunningBit = this.RunningBit; - answer.RunningLength = this.RunningLength; - return answer; - } - - /** how many literal words have we read so far? */ - public int literalwordoffset = 0; - - /** The Number of literal words. */ - public int NumberOfLiteralWords; - - /** The Running bit. */ - public boolean RunningBit; - - /** The Running length. */ - public int RunningLength; + /** + * how many literal words have we read so far? + */ + public int literalWordOffset = 0; + + /** + * The Number of literal words. + */ + protected int NumberOfLiteralWords; + + /** + * The Running bit. + */ + public boolean RunningBit; + + /** + * The Running length. + */ + public int RunningLength; - -} \ No newline at end of file +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/ChunkIteratorImpl32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/ChunkIteratorImpl32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/ChunkIteratorImpl32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/ChunkIteratorImpl32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,153 @@ +package com.googlecode.javaewah32; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import com.googlecode.javaewah.ChunkIterator; + +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; + +/** + * The ChunkIteratorImpl is the 32 bit implementation of the ChunkIterator + * interface, which efficiently returns the chunks of ones and zeros represented by an + * EWAHIterator. + * + * @author Gregory Ssi-Yan-Kai + */ +final class ChunkIteratorImpl32 implements ChunkIterator { + + private final EWAHIterator32 ewahIter; + private final int sizeInBits; + private final Buffer32 buffer; + private int position; + private boolean runningBit; + private int runningLength; + private int word; + private int wordMask; + private int wordPosition; + private int wordLength; + private boolean hasNext; + private Boolean nextBit; + private int nextLength; + + ChunkIteratorImpl32(EWAHIterator32 ewahIter, int sizeInBits) { + this.ewahIter = ewahIter; + this.sizeInBits = sizeInBits; + this.buffer = ewahIter.buffer(); + this.hasNext = moveToNextRLW(); + } + + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public boolean nextBit() { + return this.nextBit; + } + + @Override + public int nextLength() { + return this.nextLength; + } + + @Override + public void move() { + move(this.nextLength); + } + + @Override + public void move(int bits) { + this.nextLength -= bits; + if(this.nextLength <= 0) { + do { + this.nextBit = null; + updateNext(); + this.hasNext = moveToNextRLW(); + } while(this.nextLength <= 0 && this.hasNext); + } + } + + private boolean moveToNextRLW() { + while (!runningHasNext() && !literalHasNext()) { + if (!hasNextRLW()) { + return this.nextBit!=null; + } + setRLW(nextRLW()); + updateNext(); + } + return true; + } + + private void setRLW(RunningLengthWord32 rlw) { + this.runningLength = Math.min(this.sizeInBits, + this.position + WORD_IN_BITS * rlw.getRunningLength()); + this.runningBit = rlw.getRunningBit(); + this.wordPosition = this.ewahIter.literalWords(); + this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); + } + + private boolean runningHasNext() { + return this.position < this.runningLength; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordMask == 0 && this.wordPosition < this.wordLength) { + this.word = this.buffer.getWord(this.wordPosition++); + this.wordMask = 1; + } + return (this.word != 0 || this.wordMask != 0 || !hasNextRLW()) && this.position < this.sizeInBits; + } + + private boolean hasNextRLW() { + return this.ewahIter.hasNext(); + } + + private RunningLengthWord32 nextRLW() { + return this.ewahIter.next(); + } + + private void updateNext() { + if(runningHasNext()) { + if(this.nextBit == null || this.nextBit == this.runningBit) { + this.nextBit = this.runningBit; + int offset = runningOffset(); + this.nextLength += offset; + movePosition(offset); + updateNext(); + } + } else if (literalHasNext()) { + boolean b = currentWordBit(); + if(this.nextBit == null || this.nextBit == b) { + this.nextBit = b; + this.nextLength++; + movePosition(1); + shiftWordMask(); + updateNext(); + } + } else { + moveToNextRLW(); + } + } + + private int runningOffset() { + return this.runningLength - this.position; + } + + private void movePosition(int offset) { + this.position += offset; + } + + private boolean currentWordBit() { + return (this.word & this.wordMask) != 0; + } + + private void shiftWordMask() { + this.word &= ~this.wordMask; + this.wordMask = this.wordMask << 1; + } + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/ClearIntIterator32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/ClearIntIterator32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/ClearIntIterator32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/ClearIntIterator32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,97 @@ +package com.googlecode.javaewah32; + +import com.googlecode.javaewah.IntIterator; + +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +/** + * This class is equivalent to IntIteratorImpl, except that it allows + * use to iterate over "clear" bits (bits set to 0). + * + * @author Gregory Ssi-Yan-Kai + */ +final class ClearIntIterator32 implements IntIterator { + + private final EWAHIterator32 ewahIter; + private final int sizeInBits; + private final Buffer32 buffer; + private int position; + private int runningLength; + private int word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasNext; + + ClearIntIterator32(EWAHIterator32 ewahIter, int sizeInBits) { + this.ewahIter = ewahIter; + this.sizeInBits = sizeInBits; + this.buffer = ewahIter.buffer(); + this.hasNext = this.moveToNext(); + } + + public boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (!this.ewahIter.hasNext()) { + return false; + } + setRunningLengthWord(this.ewahIter.next()); + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int t = this.word & -this.word; + answer = this.literalPosition + Integer.bitCount(t - 1); + this.word ^= t; + } + this.hasNext = this.moveToNext(); + return answer; + } + + private void setRunningLengthWord(RunningLengthWord32 rlw) { + this.runningLength = Math.min(this.sizeInBits, + WORD_IN_BITS * rlw.getRunningLength() + this.position); + if (rlw.getRunningBit()) { + this.position = this.runningLength; + } + + this.wordPosition = this.ewahIter.literalWords(); + this.wordLength = this.wordPosition + + rlw.getNumberOfLiteralWords(); + } + + private boolean runningHasNext() { + return this.position < this.runningLength; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = ~this.buffer.getWord(this.wordPosition++); + if (this.wordPosition == this.wordLength && !this.ewahIter.hasNext()) { + final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; + if (usedBitsInLast > 0) { + this.word &= ((~0) >>> (WORD_IN_BITS - usedBitsInLast)); + } + } + this.literalPosition = this.position; + this.position += WORD_IN_BITS; + } + return this.word != 0; + } +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,15 +1,21 @@ package com.googlecode.javaewah32; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ -import java.util.*; -import java.io.*; - +import com.googlecode.javaewah.ChunkIterator; import com.googlecode.javaewah.IntIterator; import com.googlecode.javaewah.LogicalElement; +import com.googlecode.javaewah32.symmetric.RunningBitmapMerge32; +import com.googlecode.javaewah32.symmetric.ThresholdFuncBitmap32; +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; /** @@ -24,1585 +30,2112 @@ * data. In effect, there is a trade-off between memory usage and performances. *

* - * @see com.googlecode.javaewah.EWAHCompressedBitmap + *

Here is a code sample to illustrate usage:

+ *
+ * EWAHCompressedBitmap32 ewahBitmap1 = EWAHCompressedBitmap32.bitmapOf(0, 2, 55, 64,
+ *         1 << 30);
+ * EWAHCompressedBitmap32 ewahBitmap2 = EWAHCompressedBitmap32.bitmapOf(1, 3, 64,
+ *         1 << 30);
+ * EWAHCompressedBitmap32 ewahBitmap3 = EWAHCompressedBitmap32
+ *         .bitmapOf(5, 55, 1 << 30);
+ * EWAHCompressedBitmap32 ewahBitmap4 = EWAHCompressedBitmap32
+ *         .bitmapOf(4, 66, 1 << 30);
+ * EWAHCompressedBitmap32 orBitmap = ewahBitmap1.or(ewahBitmap2);
+ * EWAHCompressedBitmap32 andbitmap = ewahBitmap1.and(ewahBitmap2);
+ * EWAHCompressedBitmap32 xorbitmap = ewahBitmap1.xor(ewahBitmap2);
+ * andbitmap = EWAHCompressedBitmap32.and(ewahBitmap1, ewahBitmap2, ewahBitmap3,
+ *         ewahBitmap4);
+ * ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ * ObjectOutputStream oo = new ObjectOutputStream(bos);
+ * ewahBitmap1.writeExternal(oo);
+ * oo.close();
+ * ewahBitmap1 = null;
+ * ewahBitmap1 = new EWAHCompressedBitmap32();
+ * ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
+ * ewahBitmap1.readExternal(new ObjectInputStream(bis));
+ * EWAHCompressedBitmap32 threshold2 = EWAHCompressedBitmap32.threshold(2,
+ *         ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4);
+ * 
* - *

- * The objective of this compression type is to provide some compression, - * while reducing as much as possible the CPU cycle usage. - *

- * - * - *

- * For more details, see the following paper: - *

+ *

+ * The objective of this compression type is to provide some compression, while + * reducing as much as possible the CPU cycle usage. + *

* - *
    - *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves - * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages - * 3-28, 2010. http://arxiv.org/abs/0901.3751
  • - *
+ *

+ * Once constructed, the bitmap is essentially immutable (unless you call the + * "set" or "add" methods). Thus, it can be safely used in multi-threaded + * programs. + *

+ *

+ * For more details, see the following papers: + *

* + * + * + * @see com.googlecode.javaewah.EWAHCompressedBitmap EWAHCompressedBitmap * @since 0.5.0 */ public final class EWAHCompressedBitmap32 implements Cloneable, Externalizable, - Iterable, BitmapStorage32, LogicalElement { + Iterable, BitmapStorage32, + LogicalElement { + + /** + * Creates an empty bitmap (no bit set to true). + */ + public EWAHCompressedBitmap32() { + this(new IntArray()); - /** - * Creates an empty bitmap (no bit set to true). - */ - public EWAHCompressedBitmap32() { - this.buffer = new int[defaultbuffersize]; - this.rlw = new RunningLengthWord32(this, 0); - } - - /** - * Sets explicitly the buffer size (in 32-bit words). The initial memory usage - * will be "buffersize * 32". For large poorly compressible bitmaps, using - * large values may improve performance. - * - * @param buffersize - * number of 32-bit words reserved when the object is created) - */ - public EWAHCompressedBitmap32(final int buffersize) { - this.buffer = new int[buffersize]; - this.rlw = new RunningLengthWord32(this, 0); - } - - /** - * Adding words directly to the bitmap (for expert use). - * - * This is normally how you add data to the array. So you add bits in streams - * of 4*8 bits. - * - * Example: if you add 321, you are have added (in binary notation) - * 0b101000001, so you have effectively called set(0), set(6), set(8) - * in sequence. - * - * @param newdata - * the word - */ - @Override -public void add(final int newdata) { - add(newdata, wordinbits); - } - - /** - * Adding words directly to the bitmap (for expert use). - * - * @param newdata - * the word - * @param bitsthatmatter - * the number of significant bits (by default it should be 32) - */ - public void add(final int newdata, final int bitsthatmatter) { - this.sizeinbits += bitsthatmatter; - if (newdata == 0) { - addEmptyWord(false); - } else if (newdata == ~0) { - addEmptyWord(true); - } else { - addLiteralWord(newdata); - } - } - - /** - * For internal use. - * - * @param v - * the boolean value - * @return the storage cost of the addition - */ - private int addEmptyWord(final boolean v) { - final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0); - final int runlen = this.rlw.getRunningLength(); - if ((noliteralword) && (runlen == 0)) { - this.rlw.setRunningBit(v); - } - if ((noliteralword) && (this.rlw.getRunningBit() == v) - && (runlen < RunningLengthWord32.largestrunninglengthcount)) { - this.rlw.setRunningLength(runlen + 1); - return 0; - } - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(1); - return 1; - } - - /** - * For internal use. - * - * @param newdata - * the literal word - * @return the storage cost of the addition - */ - private int addLiteralWord(final int newdata) { - final int numbersofar = this.rlw.getNumberOfLiteralWords(); - if (numbersofar >= RunningLengthWord32.largestliteralcount) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - this.rlw.setNumberOfLiteralWords(1); - push_back(newdata); - return 2; - } - this.rlw.setNumberOfLiteralWords(numbersofar + 1); - push_back(newdata); - return 1; - } - - /** - * if you have several literal words to copy over, this might be faster. - * - * - * @param data - * the literal words - * @param start - * the starting point in the array - * @param number - * the number of literal words to add - */ - @Override -public void addStreamOfLiteralWords(final int[] data, final int start, - final int number) { - int leftovernumber = number; - while (leftovernumber > 0) { - final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); - final int whatwecanadd = leftovernumber < RunningLengthWord32.largestliteralcount - - NumberOfLiteralWords ? leftovernumber - : RunningLengthWord32.largestliteralcount - - NumberOfLiteralWords; - this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords - + whatwecanadd); - leftovernumber -= whatwecanadd; - push_back(data, start, whatwecanadd); - this.sizeinbits += whatwecanadd * wordinbits; - if (leftovernumber > 0) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - } - } - } - - /** - * For experts: You want to add many zeroes or ones? This is the method you - * use. - * - * @param v - * the boolean value - * @param number - * the number - */ - @Override -public void addStreamOfEmptyWords(final boolean v, int number) { - if (number == 0) - return; - this.sizeinbits += number * wordinbits; - if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { - this.rlw.setRunningBit(v); - } else if ((this.rlw.getNumberOfLiteralWords() != 0) - || (this.rlw.getRunningBit() != v)) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); } - final int runlen = this.rlw.getRunningLength(); - final int whatwecanadd = number < RunningLengthWord32.largestrunninglengthcount - - runlen ? number : RunningLengthWord32.largestrunninglengthcount - - runlen; - this.rlw.setRunningLength(runlen + whatwecanadd); - number -= whatwecanadd; - while (number >= RunningLengthWord32.largestrunninglengthcount) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(RunningLengthWord32.largestrunninglengthcount); - number -= RunningLengthWord32.largestrunninglengthcount; + + /** + * Sets explicitly the buffer size (in 32-bit words). The initial memory + * usage will be "bufferSize * 32". For large poorly compressible + * bitmaps, using large values may improve performance. + * + * If the requested bufferSize is less than 1, a value of 1 is used + * by default. In particular, negative values of bufferSize are + * effectively ignored. + * + * @param bufferSize number of 32-bit words reserved when the object is + * created) + */ + public EWAHCompressedBitmap32(int bufferSize) { + this(new IntArray(bufferSize)); } - if (number > 0) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(number); + + /** + * Creates a bitmap with the specified ByteBuffer backend. It assumes + * that a bitmap was serialized at this location. It is effectively "deserialized" + * though the actual content is not copied. + * This might be useful for implementing memory-mapped bitmaps. + * + * @param buffer data source + */ + public EWAHCompressedBitmap32(ByteBuffer buffer) { + IntBuffer ib = buffer.asIntBuffer(); + this.sizeInBits = ib.get(0); + int sizeInWords = ib.get(1); + int rlwposition = ib.get(2 + sizeInWords); + ib.position(2); + this.buffer = new IntBufferWrapper(ib.slice(), sizeInWords); + this.rlw = new RunningLengthWord32(this.buffer, rlwposition); + } + + /** + * Creates a bitmap with the specified java.nio.IntBuffer backend. + * The content of the IntBuffer is discarded. + * + * @param buffer data source + */ + public EWAHCompressedBitmap32(IntBuffer buffer) { + this(new IntBufferWrapper(buffer)); } - } - /** - * Same as addStreamOfLiteralWords, but the words are negated. - * - * @param data - * the literal words - * @param start - * the starting point in the array - * @param number - * the number of literal words to add - */ - @Override -public void addStreamOfNegatedLiteralWords(final int[] data, final int start, - final int number) { - int leftovernumber = number; - while (leftovernumber > 0) { - final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); - final int whatwecanadd = leftovernumber < RunningLengthWord32.largestliteralcount - - NumberOfLiteralWords ? leftovernumber - : RunningLengthWord32.largestliteralcount - - NumberOfLiteralWords; - this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords - + whatwecanadd); - leftovernumber -= whatwecanadd; - negative_push_back(data, start, whatwecanadd); - this.sizeinbits += whatwecanadd * wordinbits; - if (leftovernumber > 0) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - } - } - } - - /** - * Returns a new compressed bitmap containing the bitwise AND values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param a - * the other bitmap - * @return the EWAH compressed bitmap - */ - @Override -public EWAHCompressedBitmap32 and(final EWAHCompressedBitmap32 a) { - final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); - container - .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords - : a.actualsizeinwords); - andToContainer(a, container); - return container; - } - - /** - * Computes new compressed bitmap containing the bitwise AND values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * @param a - * the other bitmap - * @param container - * where we store the result - */ - /** - * Computes new compressed bitmap containing the bitwise AND values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * @since 0.4.0 - * @param a - * the other bitmap - * @param container - * where we store the result - */ - public void andToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { - final EWAHIterator32 i = a.getEWAHIterator(); - final EWAHIterator32 j = getEWAHIterator(); - final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); - final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); - while ((rlwi.size()>0) && (rlwj.size()>0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi - : rlwj; - final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - container.addStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); + private EWAHCompressedBitmap32(Buffer32 buffer) { + this.buffer = buffer; + this.rlw = new RunningLengthWord32(this.buffer, 0); + } + + /** + * @param newData the word + * @deprecated use addWord() instead. + */ + @Deprecated + public void add(final int newData) { + addWord(newData); + } + + /** + * @param newData the word + * @param bitsThatMatter the number of significant bits (by default it should + * be 64) + * @deprecated use addWord() instead. + */ + @Deprecated + public void add(final int newData, final int bitsThatMatter) { + addWord(newData, bitsThatMatter); + } + + + /** + * Adding words directly to the bitmap (for expert use). + * + * This method adds bits in words of 4*8 bits. It is not to + * be confused with the set method which sets individual bits. + * + * Most users will want the set method. + * + * Example: if you add word 321 to an empty bitmap, you are have + * added (in binary notation) 0b101000001, so you have effectively + * called set(0), set(6), set(8) in sequence. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * API change: prior to version 0.8.3, this method was called add. + * + * @param newData the word + */ + @Override + public void addWord(final int newData) { + addWord(newData, WORD_IN_BITS); + } + + /** + * Adding words directly to the bitmap (for expert use). Since this + * modifies the bitmap, this method is not thread-safe. + * + * API change: prior to version 0.8.3, this method was called add. + * + * @param newData the word + * @param bitsThatMatter the number of significant bits (by default it should + * be 32) + */ + public void addWord(final int newData, final int bitsThatMatter) { + this.sizeInBits += bitsThatMatter; + if (newData == 0) { + insertEmptyWord(false); + } else if (newData == ~0) { + insertEmptyWord(true); } else { - final int index = prey.discharge(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - if (adjustContainerSizeWhenAggregating) { - final boolean i_remains = rlwi.size() > 0; - final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi - : rlwj; - remaining.dischargeAsEmpty(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); - } - } - - - /** - * Returns the cardinality of the result of a bitwise AND of the values of the - * current bitmap with some other bitmap. Avoids needing to allocate an - * intermediate bitmap to hold the result of the OR. - * - * @param a - * the other bitmap - * @return the cardinality - */ - public int andCardinality(final EWAHCompressedBitmap32 a) { - final BitCounter32 counter = new BitCounter32(); - andToContainer(a, counter); - return counter.getCount(); - } - - /** - * Returns a new compressed bitmap containing the bitwise AND NOT values of - * the current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param a - * the other bitmap - * @return the EWAH compressed bitmap - */ - @Override -public EWAHCompressedBitmap32 andNot(final EWAHCompressedBitmap32 a) { - final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); - container - .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords - : a.actualsizeinwords); - andNotToContainer(a, container); - return container; - } - - /** - * Returns a new compressed bitmap containing the bitwise AND NOT values of - * the current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * @param a the other bitmap - * @param container where we store the result - */ - public void andNotToContainer(final EWAHCompressedBitmap32 a, - final BitmapStorage32 container) { - final EWAHIterator32 i = getEWAHIterator(); - final EWAHIterator32 j = a.getEWAHIterator(); - final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); - final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); - while ((rlwi.size()>0) && (rlwj.size()>0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi : rlwj; - final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj - : rlwi; - if ( ((predator.getRunningBit() == true) && (i_is_prey)) - || ((predator.getRunningBit() == false) && (!i_is_prey))){ - container.addStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); - } else if (i_is_prey) { - int index = prey.discharge(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); + insertLiteralWord(newData); + } + } + + /** + * For internal use. + * + * @param v the boolean value + */ + private void insertEmptyWord(final boolean v) { + final boolean noliteralword = (this.rlw + .getNumberOfLiteralWords() == 0); + final int runlen = this.rlw.getRunningLength(); + if ((noliteralword) && (runlen == 0)) { + this.rlw.setRunningBit(v); + } + if ((noliteralword) + && (this.rlw.getRunningBit() == v) + && (runlen < RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT)) { + this.rlw.setRunningLength(runlen + 1); + return; + } + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(1); + } + + /** + * Adding literal words directly to the bitmap (for expert use). + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param newData the literal word + */ + @Override + public void addLiteralWord(final int newData) { + this.sizeInBits += WORD_IN_BITS; + insertLiteralWord(newData); + } + + /** + * For internal use. + * + * @param newData the literal word + */ + private void insertLiteralWord(final int newData) { + final int numberSoFar = this.rlw.getNumberOfLiteralWords(); + if (numberSoFar >= RunningLengthWord32.LARGEST_LITERAL_COUNT) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + this.rlw.setNumberOfLiteralWords(1); + this.buffer.push_back(newData); } else { - int index = prey.dischargeNegated(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(true, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; - if(i_remains) - remaining.discharge(container); - else if (adjustContainerSizeWhenAggregating) - remaining.dischargeAsEmpty(container); - if (adjustContainerSizeWhenAggregating) - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); - - } - - /** - * Returns the cardinality of the result of a bitwise AND NOT of the values of - * the current bitmap with some other bitmap. Avoids needing to allocate an - * intermediate bitmap to hold the result of the OR. - * - * @param a - * the other bitmap - * @return the cardinality - */ - public int andNotCardinality(final EWAHCompressedBitmap32 a) { - final BitCounter32 counter = new BitCounter32(); - andNotToContainer(a, counter); - return counter.getCount(); - } - - /** - * reports the number of bits set to true. Running time is proportional to - * compressed size (as reported by sizeInBytes). - * - * @return the number of bits set to true - */ - public int cardinality() { - int counter = 0; - final EWAHIterator32 i = new EWAHIterator32(this, - this.actualsizeinwords); - while (i.hasNext()) { - RunningLengthWord32 localrlw = i.next(); - if (localrlw.getRunningBit()) { - counter += wordinbits * localrlw.getRunningLength(); - } - for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { - counter += Integer.bitCount(i.buffer()[i.literalWords() + j]); - } - } - return counter; - } - - /** - * Clear any set bits and set size in bits back to 0 - */ - public void clear() { - this.sizeinbits = 0; - this.actualsizeinwords = 1; - this.rlw.position = 0; - // buffer is not fully cleared but any new set operations should overwrite - // stale data - this.buffer[0] = 0; - } - - /* - * @see java.lang.Object#clone() - */ - @Override - public EWAHCompressedBitmap32 clone() throws java.lang.CloneNotSupportedException { - final EWAHCompressedBitmap32 clone = (EWAHCompressedBitmap32) super.clone(); - clone.buffer = this.buffer.clone(); - clone.actualsizeinwords = this.actualsizeinwords; - clone.sizeinbits = this.sizeinbits; - return clone; - } - - /** - * Deserialize. - * - * @param in - * the DataInput stream - * @throws IOException - * Signals that an I/O exception has occurred. - */ - public void deserialize(DataInput in) throws IOException { - this.sizeinbits = in.readInt(); - this.actualsizeinwords = in.readInt(); - if (this.buffer.length < this.actualsizeinwords) { - this.buffer = new int[this.actualsizeinwords]; - } - for (int k = 0; k < this.actualsizeinwords; ++k) - this.buffer[k] = in.readInt(); - this.rlw = new RunningLengthWord32(this, in.readInt()); - } - - /** - * Check to see whether the two compressed bitmaps contain the same set bits. - * - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object o) { - if (o instanceof EWAHCompressedBitmap32) { - try { - this.xorToContainer((EWAHCompressedBitmap32) o, new NonEmptyVirtualStorage32()); - return true; - } catch (NonEmptyVirtualStorage32.NonEmptyException e) { + this.rlw.setNumberOfLiteralWords(numberSoFar + 1); + this.buffer.push_back(newData); + } + } + + /** + * if you have several literal words to copy over, this might be faster. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param buffer the buffer wrapping the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + @Override + public void addStreamOfLiteralWords(final Buffer32 buffer, final int start, + final int number) { + int leftOverNumber = number; + while (leftOverNumber > 0) { + final int numberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); + final int whatWeCanAdd = leftOverNumber < RunningLengthWord32.LARGEST_LITERAL_COUNT + - numberOfLiteralWords ? leftOverNumber + : RunningLengthWord32.LARGEST_LITERAL_COUNT + - numberOfLiteralWords; + this.rlw.setNumberOfLiteralWords(numberOfLiteralWords + whatWeCanAdd); + leftOverNumber -= whatWeCanAdd; + this.buffer.push_back(buffer, start, whatWeCanAdd); + this.sizeInBits += whatWeCanAdd * WORD_IN_BITS; + if (leftOverNumber > 0) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + } + } + } + + /** + * For experts: You want to add many zeroes or ones? This is the method + * you use. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param v the boolean value + * @param number the number + */ + @Override + public void addStreamOfEmptyWords(final boolean v, int number) { + if (number == 0) + return; + this.sizeInBits += number * WORD_IN_BITS; + fastaddStreamOfEmptyWords(v, number); + } + + /** + * Same as addStreamOfLiteralWords, but the words are negated. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param buffer the buffer wrapping the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + @Override + public void addStreamOfNegatedLiteralWords(final Buffer32 buffer, + final int start, final int number) { + int leftOverNumber = number; + while (leftOverNumber > 0) { + final int NumberOfLiteralWords = this.rlw + .getNumberOfLiteralWords(); + final int whatwecanadd = leftOverNumber < RunningLengthWord32.LARGEST_LITERAL_COUNT + - NumberOfLiteralWords ? leftOverNumber + : RunningLengthWord32.LARGEST_LITERAL_COUNT + - NumberOfLiteralWords; + this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + + whatwecanadd); + leftOverNumber -= whatwecanadd; + this.buffer.negative_push_back(buffer, start, whatwecanadd); + this.sizeInBits += whatwecanadd * WORD_IN_BITS; + if (leftOverNumber > 0) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + } + } + } + + /** + * Returns a new compressed bitmap containing the bitwise AND values of + * the current bitmap with some other bitmap. The current bitmap + * is not modified. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap32 and(final EWAHCompressedBitmap32 a) { + int size = this.buffer.sizeInWords() > a.buffer.sizeInWords() ? this.buffer.sizeInWords() + : a.buffer.sizeInWords(); + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(size); + andToContainer(a, container); + return container; + } + + /** + * Computes new compressed bitmap containing the bitwise AND values of + * the current bitmap with some other bitmap. + * The current bitmap is not modified. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + * @since 0.4.0 + */ + public void andToContainer(final EWAHCompressedBitmap32 a, + final BitmapStorage32 container) { + container.clear(); + final EWAHIterator32 i = a.getEWAHIterator(); + final EWAHIterator32 j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32( + i); + final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32( + j); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi + .getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj + : rlwi; + if (!predator.getRunningBit()) { + container.addStreamOfEmptyWords(false, + predator.getRunningLength()); + prey.discardFirstWords(predator + .getRunningLength()); + } else { + final int index = prey.discharge( + container, + predator.getRunningLength()); + container.addStreamOfEmptyWords(false, + predator.getRunningLength() + - index + ); + } + predator.discardRunningWords(); + } + final int nbre_literal = Math.min( + rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) + & rlwj.getLiteralWordAt(k)); + rlwi.discardLiteralWords(nbre_literal); + rlwj.discardLiteralWords(nbre_literal); + } + } + + if (ADJUST_CONTAINER_SIZE_WHEN_AGGREGATING) { + container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), + a.sizeInBits())); + } + } + + /** + * Returns the cardinality of the result of a bitwise AND of the values + * of the current bitmap with some other bitmap. Avoids + * allocating an intermediate bitmap to hold the result of the OR. + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the cardinality + */ + public int andCardinality(final EWAHCompressedBitmap32 a) { + final BitCounter32 counter = new BitCounter32(); + andToContainer(a, counter); + return counter.getCount(); + } + + /** + * Returns a new compressed bitmap containing the bitwise AND NOT values + * of the current bitmap with some other bitmap. The current bitmap + * is not modified. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap32 andNot(final EWAHCompressedBitmap32 a) { + int size = this.buffer.sizeInWords() > a.buffer.sizeInWords() ? this.buffer.sizeInWords() + : a.buffer.sizeInWords(); + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(size); + andNotToContainer(a, container); + return container; + } + + /** + * Returns a new compressed bitmap containing the bitwise AND NOT values + * of the current bitmap with some other bitmap. The current bitmap + * is not modified. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + */ + public void andNotToContainer(final EWAHCompressedBitmap32 a, + final BitmapStorage32 container) { + container.clear(); + final EWAHIterator32 i = getEWAHIterator(); + final EWAHIterator32 j = a.getEWAHIterator(); + final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32( + i); + final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32( + j); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi + .getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj + : rlwi; + if (((predator.getRunningBit()) && (i_is_prey)) + || ((!predator.getRunningBit()) && (!i_is_prey))) { + container.addStreamOfEmptyWords(false, + predator.getRunningLength()); + prey.discardFirstWords(predator + .getRunningLength()); + } else if (i_is_prey) { + final int index = prey.discharge(container, + predator.getRunningLength()); + container.addStreamOfEmptyWords(false, + predator.getRunningLength() + - index + ); + } else { + final int index = prey.dischargeNegated( + container, + predator.getRunningLength()); + container.addStreamOfEmptyWords(true, + predator.getRunningLength() + - index + ); + } + predator.discardRunningWords(); + } + final int nbre_literal = Math.min( + rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) + & (~rlwj.getLiteralWordAt(k))); + rlwi.discardLiteralWords(nbre_literal); + rlwj.discardLiteralWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size() > 0; + final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; + if (i_remains) + remaining.discharge(container); + if (ADJUST_CONTAINER_SIZE_WHEN_AGGREGATING) + container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), + a.sizeInBits())); + + } + + /** + * Returns the cardinality of the result of a bitwise AND NOT of the + * values of the current bitmap with some other bitmap. Avoids allocating + * an intermediate bitmap to hold the result of the OR. + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the cardinality + */ + public int andNotCardinality(final EWAHCompressedBitmap32 a) { + final BitCounter32 counter = new BitCounter32(); + andNotToContainer(a, counter); + return counter.getCount(); + } + + /** + * reports the number of bits set to true. Running time is proportional + * to compressed size (as reported by sizeInBytes). + * + * @return the number of bits set to true + */ + public int cardinality() { + int counter = 0; + final EWAHIterator32 i = this.getEWAHIterator(); + while (i.hasNext()) { + RunningLengthWord32 localrlw = i.next(); + if (localrlw.getRunningBit()) { + counter += WORD_IN_BITS + * localrlw.getRunningLength(); + } + final int numberOfLiteralWords = localrlw.getNumberOfLiteralWords(); + final int literalWords = i.literalWords(); + for (int j = 0; j < numberOfLiteralWords; ++j) { + counter += Integer.bitCount(i.buffer().getWord(literalWords + j)); + } + } + return counter; + } + + /** + * Clear any set bits and set size in bits back to 0 + */ + @Override + public void clear() { + this.sizeInBits = 0; + this.buffer.clear(); + this.rlw.position = 0; + } + + /* + * @see java.lang.Object#clone() + */ + @Override + public EWAHCompressedBitmap32 clone() throws CloneNotSupportedException { + EWAHCompressedBitmap32 clone = new EWAHCompressedBitmap32(this.buffer.clone()); + clone.sizeInBits = this.sizeInBits; + clone.rlw = new RunningLengthWord32(clone.buffer, this.rlw.position); + return clone; + } + + /** + * Serialize. + * + * The current bitmap is not modified. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void serialize(DataOutput out) throws IOException { + out.writeInt(this.sizeInBits); + final int siw = this.buffer.sizeInWords(); + out.writeInt(siw); + for(int i = 0; i < siw ; ++i) { + out.writeInt(this.buffer.getWord(i)); + } + out.writeInt(this.rlw.position); + } + + /** + * Deserialize. + * + * @param in the ObjectInput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + this.sizeInBits = in.readInt(); + int sizeInWords = in.readInt(); + this.buffer.clear(); //This creates a buffer with 1 word in it already! + this.buffer.removeLastWord(); + this.buffer.ensureCapacity(sizeInWords); + for(int i = 0; i < sizeInWords; ++i) { + this.buffer.push_back(in.readInt()); + } + this.rlw = new RunningLengthWord32(this.buffer, in.readInt()); + } + + /** + * Check to see whether the two compressed bitmaps contain the same set + * bits. + * + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object o) { + if (o instanceof EWAHCompressedBitmap32) { + try { + this.xorToContainer((EWAHCompressedBitmap32) o, + new NonEmptyVirtualStorage32()); + return true; + } catch (NonEmptyVirtualStorage32.NonEmptyException e) { + return false; + } + } return false; - } } - return false; - } - /** - * For experts: You want to add many zeroes or ones faster? - * - * This method does not update sizeinbits. - * - * @param v - * the boolean value - * @param number - * the number (must be greater than 0) - */ - private void fastaddStreamOfEmptyWords(final boolean v, int number) { - if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { - this.rlw.setRunningBit(v); - } else if ((this.rlw.getNumberOfLiteralWords() != 0) - || (this.rlw.getRunningBit() != v)) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); + /** + * For experts: You want to add many zeroes or ones faster? + * + * This method does not update sizeInBits. + * + * @param v the boolean value + * @param number the number (must be greater than 0) + */ + private void fastaddStreamOfEmptyWords(final boolean v, int number) { + if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { + this.rlw.setRunningBit(v); + } else if ((this.rlw.getNumberOfLiteralWords() != 0) + || (this.rlw.getRunningBit() != v)) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + if (v) + this.rlw.setRunningBit(true); + } + final int runLen = this.rlw.getRunningLength(); + final int whatWeCanAdd = number < RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT + - runLen ? number + : RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT + - runLen; + this.rlw.setRunningLength(runLen + whatWeCanAdd); + number -= whatWeCanAdd; + while (number >= RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + if (v) + this.rlw.setRunningBit(true); + this.rlw.setRunningLength(RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT); + number -= RunningLengthWord32.LARGEST_RUNNING_LENGTH_COUNT; + } + if (number > 0) { + this.buffer.push_back(0); + this.rlw.position = this.buffer.sizeInWords() - 1; + if (v) + this.rlw.setRunningBit(true); + this.rlw.setRunningLength(number); + } } - final int runlen = this.rlw.getRunningLength(); - final int whatwecanadd = number < RunningLengthWord32.largestrunninglengthcount - - runlen ? number : RunningLengthWord32.largestrunninglengthcount - - runlen; - this.rlw.setRunningLength(runlen + whatwecanadd); - number -= whatwecanadd; - while (number >= RunningLengthWord32.largestrunninglengthcount) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(RunningLengthWord32.largestrunninglengthcount); - number -= RunningLengthWord32.largestrunninglengthcount; + + /** + * Gets an EWAHIterator32 over the data. This is a customized iterator + * which iterates over run length words. For experts only. + * + * The current bitmap is not modified. + * + * @return the EWAHIterator32 + */ + public EWAHIterator32 getEWAHIterator() { + return new EWAHIterator32(this.buffer); } - if (number > 0) { - push_back(0); - this.rlw.position = this.actualsizeinwords - 1; - if (v) - this.rlw.setRunningBit(v); - this.rlw.setRunningLength(number); + + /** + * Gets a ReverseEWAHIterator32 over the data. This is a customized iterator + * which iterates over run length words in reverse order. For experts only. + * + * The current bitmap is not modified. + * + * @return the ReverseEWAHIterator32 + */ + private ReverseEWAHIterator32 getReverseEWAHIterator() { + return new ReverseEWAHIterator32(this.buffer); + } + + /** + * Gets an IteratingRLW to iterate over the data. For experts only. + * + * Note that iterator does not know about the size in bits of the + * bitmap: the size in bits is effectively rounded up to the nearest + * multiple of 32. However, if you materialize a bitmap from + * an iterator, you can set the desired size in bits using the + * setSizeInBitsWithinLastWord methods: + * + * + * EWAHCompressedBitmap32 n = IteratorUtil32.materialize(bitmap.getIteratingRLW())); + * n.setSizeInBitsWithinLastWord(bitmap.sizeInBits()); + * + + * + * The current bitmap is not modified. + * + * @return the IteratingRLW iterator corresponding to this bitmap + */ + public IteratingRLW32 getIteratingRLW() { + return new IteratingBufferedRunningLengthWord32(this); + } + + /** + * @return a list + * @deprecated use toList() instead. + */ + @Deprecated + public List getPositions() { + return toList(); + } + + /** + * Gets the locations of the true values as one list. (May use more + * memory than iterator().) + * + * The current bitmap is not modified. + * + * API change: prior to version 0.8.3, this method was called getPositions. + * + * @return the positions + */ + public List toList() { + final ArrayList v = new ArrayList(); + final EWAHIterator32 i = this.getEWAHIterator(); + int pos = 0; + while (i.hasNext()) { + RunningLengthWord32 localrlw = i.next(); + if (localrlw.getRunningBit()) { + final int N = localrlw.getRunningLength(); + for (int j = 0; j < N; ++j) { + for (int c = 0; c < WORD_IN_BITS; ++c) + v.add(pos++); + } + } else { + pos += WORD_IN_BITS * localrlw.getRunningLength(); + } + final int nlw = localrlw.getNumberOfLiteralWords(); + for (int j = 0; j < nlw; ++j) { + int data = i.buffer().getWord(i.literalWords() + j); + while (data != 0) { + final int T = data & -data; + v.add(Integer.bitCount(T - 1) + + pos); + data ^= T; + } + pos += WORD_IN_BITS; + } + } + while ((v.size() > 0) + && (v.get(v.size() - 1) >= this.sizeInBits)) + v.remove(v.size() - 1); + return v; + } + + /** + * Returns a customized hash code (based on Karp-Rabin). Naturally, if + * the bitmaps are equal, they will hash to the same value. + * + * The current bitmap is not modified. + */ + @Override + public int hashCode() { + int karprabin = 0; + final int B = 0x9e3779b1; + final EWAHIterator32 i = this.getEWAHIterator(); + while (i.hasNext()) { + i.next(); + if (i.rlw.getRunningBit()) { + final int rl = i.rlw.getRunningLength(); + karprabin += B * rl; + } + final int nlw = i.rlw.getNumberOfLiteralWords(); + final int lw = i.literalWords(); + for (int k = 0; k < nlw; ++k) { + long W = this.buffer.getWord(lw + k); + karprabin += (int) (B * W); + } + } + return karprabin; + } + + /** + * Return true if the two EWAHCompressedBitmap have both at least one + * true bit in the same position. Equivalently, you could call "and" and + * check whether there is a set bit, but intersects will run faster if + * you don't need the result of the "and" operation. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return whether they intersect + */ + public boolean intersects(final EWAHCompressedBitmap32 a) { + NonEmptyVirtualStorage32 nevs = new NonEmptyVirtualStorage32(); + try { + this.andToContainer(a, nevs); + } catch (NonEmptyVirtualStorage32.NonEmptyException nee) { + return true; + } + return false; + } + + /** + * Iterator over the set bits (this is what most people will want to use + * to browse the content if they want an iterator). The location of the + * set bits is returned, in increasing order. + * + * The current bitmap is not modified. + * + * @return the int iterator + */ + public IntIterator intIterator() { + return new IntIteratorImpl32(this.getEWAHIterator()); } - } - /** - * Gets an EWAHIterator over the data. This is a customized iterator which - * iterates over run length word. For experts only. - * - * @return the EWAHIterator - */ - public EWAHIterator32 getEWAHIterator() { - return new EWAHIterator32(this, this.actualsizeinwords); - } - - /** - * @return the IteratingRLW iterator corresponding to this bitmap - */ - public IteratingRLW32 getIteratingRLW() { - return new IteratingBufferedRunningLengthWord32(this); - } - - /** - * get the locations of the true values as one vector. (may use more memory - * than iterator()) - * - * @return the positions - */ - public List getPositions() { - final ArrayList v = new ArrayList(); - final EWAHIterator32 i = new EWAHIterator32(this, - this.actualsizeinwords); - int pos = 0; - while (i.hasNext()) { - RunningLengthWord32 localrlw = i.next(); - if (localrlw.getRunningBit()) { - for (int j = 0; j < localrlw.getRunningLength(); ++j) { - for (int c = 0; c < wordinbits; ++c) - v.add(new Integer(pos++)); - } - } else { - pos += wordinbits * localrlw.getRunningLength(); - } - for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { - int data = i.buffer()[i.literalWords() + j]; - while (data != 0) { - final int ntz = Integer.numberOfTrailingZeros(data); - data ^= (1 << ntz); - v.add(new Integer(ntz + pos)); - } - pos += wordinbits; - } - } - while ((v.size() > 0) - && (v.get(v.size() - 1).intValue() >= this.sizeinbits)) - v.remove(v.size() - 1); - return v; - } - - /** - * Returns a customized hash code (based on Karp-Rabin). Naturally, if the - * bitmaps are equal, they will hash to the same value. - * - */ - @Override - public int hashCode() { - int karprabin = 0; - final int B = 31; - final EWAHIterator32 i = new EWAHIterator32(this, - this.actualsizeinwords); - while( i.hasNext() ) { - i.next(); - if (i.rlw.getRunningBit() == true) { - karprabin += B * karprabin + i.rlw.getRunningLength(); - } - for (int k = 0; k < i.rlw.getNumberOfLiteralWords(); ++k) { - karprabin += B * karprabin + this.buffer[k + i.literalWords()]; - } - } - return karprabin; - } - - /** - * Return true if the two EWAHCompressedBitmap have both at least one true bit - * in the same position. Equivalently, you could call "and" and check whether - * there is a set bit, but intersects will run faster if you don't need the - * result of the "and" operation. - * - * @param a - * the other bitmap - * @return whether they intersect - */ - public boolean intersects(final EWAHCompressedBitmap32 a) { - NonEmptyVirtualStorage32 nevs = new NonEmptyVirtualStorage32(); - try { - this.andToContainer(a, nevs); - } catch (NonEmptyVirtualStorage32.NonEmptyException nee) { - return true; - } - return false; - } - - /** - * Iterator over the set bits (this is what most people will want to use to - * browse the content if they want an iterator). The location of the set bits - * is returned, in increasing order. - * - * @return the int iterator - */ - public IntIterator intIterator() { - return new IntIteratorImpl32( - new EWAHIterator32(this, this.actualsizeinwords)); - } - - /** - * iterate over the positions of the true values. This is similar to - * intIterator(), but it uses Java generics. - * - * @return the iterator - */ - @Override -public Iterator iterator() { - return new Iterator() { - @Override - public boolean hasNext() { - return this.under.hasNext(); - } - - @Override - public Integer next() { - return new Integer(this.under.next()); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("bitsets do not support remove"); - } - - final private IntIterator under = intIterator(); - }; - } - - /** - * For internal use. - * - * @param data - * the array of words to be added - * @param start - * the starting point - * @param number - * the number of words to add - */ - private void negative_push_back(final int[] data, final int start, - final int number) { - while (this.actualsizeinwords + number >= this.buffer.length) { - final int oldbuffer[] = this.buffer; - if(this.actualsizeinwords + number < 32768) - this.buffer = new int[(this.actualsizeinwords + number) * 2]; - else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) - this.buffer = new int[Integer.MAX_VALUE]; - else - this.buffer = new int[(this.actualsizeinwords + number) * 3 / 2]; - System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.parent.buffer = this.buffer; - } - for (int k = 0; k < number; ++k) - this.buffer[this.actualsizeinwords + k] = ~data[start + k]; - this.actualsizeinwords += number; - } - - /** - * Negate (bitwise) the current bitmap. To get a negated copy, do - * EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); x.not(); - * - * The running time is proportional to the compressed size (as reported by - * sizeInBytes()). - * - */ - @Override -public void not() { - final EWAHIterator32 i = new EWAHIterator32(this, - this.actualsizeinwords); - if (!i.hasNext()) - return; - while (true) { - final RunningLengthWord32 rlw1 = i.next(); - rlw1.setRunningBit(!rlw1.getRunningBit()); - for (int j = 0; j < rlw1.getNumberOfLiteralWords(); ++j) { - i.buffer()[i.literalWords() + j] = ~i.buffer()[i.literalWords() + j]; - } - if (!i.hasNext()) {// must potentially adjust the last literal word - final int usedbitsinlast = this.sizeinbits % wordinbits; - if (usedbitsinlast == 0) + /** + * Iterator over the set bits in reverse order. + * + * The current bitmap is not modified. + * + * @return the int iterator + */ + public IntIterator reverseIntIterator() { + return new ReverseIntIterator32(this.getReverseEWAHIterator(), this.sizeInBits); + } + + /** + * Checks whether this bitmap is empty (has a cardinality of zero). + * + * @return true if no bit is set + */ + public boolean isEmpty() { + return getFirstSetBit() < 0; + } + + /** + * Iterator over the clear bits. The location of the clear bits is + * returned, in increasing order. + * + * The current bitmap is not modified. + * + * @return the int iterator + */ + public IntIterator clearIntIterator() { + return new ClearIntIterator32(this.getEWAHIterator(), this.sizeInBits); + } + + /** + * Iterator over the chunk of bits. + * + * The current bitmap is not modified. + * + * @return the chunk iterator + */ + public ChunkIterator chunkIterator() { + return new ChunkIteratorImpl32(this.getEWAHIterator(), this.sizeInBits); + } + + /** + * Iterates over the positions of the true values. This is similar to + * intIterator(), but it uses Java generics. + * + * The current bitmap is not modified. + * + * @return the iterator + */ + @Override + public Iterator iterator() { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return this.under.next(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException( + "bitsets do not support remove"); + } + + private final IntIterator under = intIterator(); + }; + } + + /** + * Negate (bitwise) the current bitmap. To get a negated copy, do + * EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); + * x.not(); + * + * The running time is proportional to the compressed size (as reported + * by sizeInBytes()). + * + * Because this method modifies the bitmap, it is not thread-safe. + */ + @Override + public void not() { + final EWAHIterator32 i = this.getEWAHIterator(); + if (!i.hasNext()) return; + while (true) { + final RunningLengthWord32 rlw1 = i.next(); + rlw1.setRunningBit(!rlw1.getRunningBit()); + int nlw = rlw1.getNumberOfLiteralWords(); + for (int j = 0; j < nlw; ++j) { + i.buffer().negateWord(i.literalWords() + j); + } + if (!i.hasNext()) {// must potentially adjust the last + // literal word + final int usedbitsinlast = this.sizeInBits + % WORD_IN_BITS; + if (usedbitsinlast == 0) + return; + if (rlw1.getNumberOfLiteralWords() == 0) { + if ((rlw1.getRunningLength() > 0) + && (rlw1.getRunningBit())) { + if ((rlw1.getRunningLength() == 1) + && (rlw1.position > 0)) { + // we need to prune ending + final EWAHIterator32 j = this.getEWAHIterator(); + int newrlwpos = this.rlw.position; + while (j.hasNext()) { + RunningLengthWord32 r = j.next(); + if (r.position < rlw1.position) { + newrlwpos = r.position; + } else break; + } + this.rlw.position = newrlwpos; + this.buffer.removeLastWord(); + } else + rlw1.setRunningLength(rlw1.getRunningLength() - 1); + this.insertLiteralWord((~0) >>> (WORD_IN_BITS - usedbitsinlast)); + + } + return; + } + i.buffer().andWord(i.literalWords() + + rlw1.getNumberOfLiteralWords() - 1, (~0) >>> (WORD_IN_BITS - usedbitsinlast)); + return; + } + } + } + + /** + * Returns a new compressed bitmap containing the bitwise OR values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap32 or(final EWAHCompressedBitmap32 a) { + int size = this.buffer.sizeInWords() + a.buffer.sizeInWords(); + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(size); + orToContainer(a, container); + return container; + } + + /** + * Computes the bitwise or between the current bitmap and the bitmap + * "a". Stores the result in the container. + * + * The current bitmap is not modified. + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + */ + public void orToContainer(final EWAHCompressedBitmap32 a, + final BitmapStorage32 container) { + container.clear(); + final EWAHIterator32 i = a.getEWAHIterator(); + final EWAHIterator32 j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32( + i); + final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32( + j); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi + .getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit()) { + container.addStreamOfEmptyWords(true, + predator.getRunningLength()); + prey.discardFirstWords(predator + .getRunningLength()); + } else { + final int index = prey.discharge(container, + predator.getRunningLength()); + container.addStreamOfEmptyWords(false, + predator.getRunningLength() + - index + ); + } + predator.discardRunningWords(); + } + final int nbre_literal = Math.min( + rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) { + container.addWord(rlwi.getLiteralWordAt(k) + | rlwj.getLiteralWordAt(k)); + } + rlwi.discardLiteralWords(nbre_literal); + rlwj.discardLiteralWords(nbre_literal); + } + } + if ((rlwj.size() > 0) && (rlwi.size() > 0)) throw new RuntimeException("fds"); + final boolean i_remains = rlwi.size() > 0; + final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi + : rlwj; + remaining.discharge(container); + container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), a.sizeInBits())); + } + + /** + * Returns the cardinality of the result of a bitwise OR of the values + * of the current bitmap with some other bitmap. Avoids allocating + * an intermediate bitmap to hold the result of the OR. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the cardinality + */ + public int orCardinality(final EWAHCompressedBitmap32 a) { + final BitCounter32 counter = new BitCounter32(); + orToContainer(a, counter); + return counter.getCount(); + } + + /* + * @see java.io.Externalizable#readExternal(java.io.ObjectInput) + */ + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + deserialize(in); + } + + /* + * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) + */ + @Override + public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } - if (rlw1.getNumberOfLiteralWords() == 0) { - if((rlw1.getRunningLength()>0) && (rlw1.getRunningBit())) { - rlw1.setRunningLength(rlw1.getRunningLength()-1); - this.addLiteralWord((~0) >>> (wordinbits - usedbitsinlast)); - } - return; - } - i.buffer()[i.literalWords() + rlw1.getNumberOfLiteralWords() - 1] &= ((~0) >>> (wordinbits - usedbitsinlast)); - return; - } - - } - } - - - /** - * Returns a new compressed bitmap containing the bitwise OR values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param a - * the other bitmap - * @return the EWAH compressed bitmap - */ - @Override -public EWAHCompressedBitmap32 or(final EWAHCompressedBitmap32 a) { - final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); - container.reserve(this.actualsizeinwords + a.actualsizeinwords); - orToContainer(a, container); - return container; - } - - /** - * Computes the bitwise or between the current bitmap and the bitmap "a". - * Stores the result in the container. - * - * @param a - * the other bitmap - * @param container - * where we store the result - */ - public void orToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { - final EWAHIterator32 i = a.getEWAHIterator(); - final EWAHIterator32 j = getEWAHIterator(); - final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); - final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); - while ((rlwi.size()>0) && (rlwj.size()>0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi - : rlwj; - final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == true) { - container.addStreamOfEmptyWords(true, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); + /** + * Report the number of bytes required to serialize this bitmap. + * + * The current bitmap is not modified. + * + * @return the size in bytes + */ + public int serializedSizeInBytes() { + return this.sizeInBytes() + 3 * 4; + } + + /** + * Query the value of a single bit. Relying on this method when speed is + * needed is discouraged. The complexity is linear with the size of the + * bitmap. + * + * (This implementation is based on zhenjl's Go version of JavaEWAH.) + * + * The current bitmap is not modified. + * + * @param i the bit we are interested in + * @return whether the bit is set to true + */ + public boolean get(final int i) { + if ((i < 0) || (i >= this.sizeInBits)) + return false; + int wordChecked = 0; + final IteratingRLW32 j = getIteratingRLW(); + final int wordi = i / WORD_IN_BITS; + while (wordChecked <= wordi) { + wordChecked += j.getRunningLength(); + if (wordi < wordChecked) { + return j.getRunningBit(); + } + if (wordi < wordChecked + j.getNumberOfLiteralWords()) { + final int w = j.getLiteralWordAt(wordi + - wordChecked); + return (w & (1 << i)) != 0; + } + wordChecked += j.getNumberOfLiteralWords(); + j.next(); + } + return false; + } + + /** + * getFirstSetBit is a light-weight method that returns the + * location of the set bit (=1) or -1 if there is none. + * + * @return location of the first set bit or -1 + */ + public int getFirstSetBit() { + int nword = 0; + final int siw = this.buffer.sizeInWords(); + for(int pos = 0; pos < siw; ++pos) { + int rl = RunningLengthWord32.getRunningLength(this.buffer, pos); + boolean rb = RunningLengthWord32.getRunningBit(this.buffer, pos); + if((rl > 0) && rb) { + return nword * WORD_IN_BITS; + } + nword += rl; + int lw = RunningLengthWord32.getNumberOfLiteralWords(this.buffer, pos); + if(lw > 0) { + int word = this.buffer.getWord(pos + 1); + if(word != 0) { + int T = word & -word; + return nword * WORD_IN_BITS + Integer.bitCount(T - 1); + } + } + } + return -1; + } + + /** + * Set the bit at position i to false. + * + * Though you can clear the bits in any order (e.g., clear(100), clear(10), clear(1), + * you will typically get better performance if you clear the bits in increasing order (e.g., clear(1), clear(10), clear(100)). + * + * Clearing a bit that is larger than the biggest bit is a constant time operation. + * Clearing a bit that is smaller than the biggest bit can require time proportional + * to the compressed size of the bitmap, as the bitmap may need to be rewritten. + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param i the index + * @return true if the value was unset + * @throws IndexOutOfBoundsException if i is negative or greater than Integer.MAX_VALUE - 32 + */ + public boolean clear(final int i) { + return set(i, false); + } + + /** + * Set the bit at position i to true. + * + * Though you can set the bits in any order (e.g., set(100), set(10), set(1), + * you will typically get better performance if you set the bits in increasing order (e.g., set(1), set(10), set(100)). + * + * Setting a bit that is larger than any of the current set bit + * is a constant time operation. Setting a bit that is smaller than an + * already set bit can require time proportional to the compressed + * size of the bitmap, as the bitmap may need to be rewritten. + + * + * Since this modifies the bitmap, this method is not thread-safe. + * + * @param i the index + * @return true if the value was set + * @throws IndexOutOfBoundsException if i is negative or greater than Integer.MAX_VALUE - 32 + */ + public boolean set(final int i) { + return set(i, true); + } + + /** + * For internal use. + * + * @param i the index + * @param value the value + */ + private boolean set(final int i, boolean value) { + if ((i > Integer.MAX_VALUE - WORD_IN_BITS) || (i < 0)) + throw new IndexOutOfBoundsException( + "Position should be between 0 and " + + (Integer.MAX_VALUE - WORD_IN_BITS) + ); + if (i < this.sizeInBits) { + locateAndSet(i, value); } else { - int index = prey.discharge(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) { - container.add(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)); - } - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; - remaining.discharge(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); - } - /** - * Returns the cardinality of the result of a bitwise OR of the values of the - * current bitmap with some other bitmap. Avoids needing to allocate an - * intermediate bitmap to hold the result of the OR. - * - * @param a - * the other bitmap - * @return the cardinality - */ - public int orCardinality(final EWAHCompressedBitmap32 a) { - final BitCounter32 counter = new BitCounter32(); - orToContainer(a, counter); - return counter.getCount(); - } - - /** - * For internal use. - * - * @param data - * the word to be added - */ - private void push_back(final int data) { - if (this.actualsizeinwords == this.buffer.length) { - final int oldbuffer[] = this.buffer; - if(oldbuffer.length < 32768) - this.buffer = new int[oldbuffer.length * 2]; - else if (oldbuffer.length * 3 / 2 < oldbuffer.length) - this.buffer = new int[Integer.MAX_VALUE]; - else - this.buffer = new int[oldbuffer.length * 3 / 2]; - System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.parent.buffer = this.buffer; - } - this.buffer[this.actualsizeinwords++] = data; - } - - /** - * For internal use. - * - * @param data - * the array of words to be added - * @param start - * the starting point - * @param number - * the number of words to add - */ - private void push_back(final int[] data, final int start, final int number) { - if (this.actualsizeinwords + number >= this.buffer.length) { - final int oldbuffer[] = this.buffer; - if(this.actualsizeinwords + number < 32768) - this.buffer = new int[(this.actualsizeinwords + number) * 2]; - else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) //overflow - this.buffer = new int[Integer.MAX_VALUE]; - else - this.buffer = new int[(this.actualsizeinwords + number) * 3 / 2]; - System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.parent.buffer = this.buffer; - } - System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number); - this.actualsizeinwords += number; - } - - /* - * @see java.io.Externalizable#readExternal(java.io.ObjectInput) - */ - @Override -public void readExternal(ObjectInput in) throws IOException { - deserialize(in); - } - - /** - * For internal use (trading off memory for speed). - * - * @param size - * the number of words to allocate - * @return True if the operation was a success. - */ - private boolean reserve(final int size) { - if (size > this.buffer.length) { - final int oldbuffer[] = this.buffer; - this.buffer = new int[size]; - System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); - this.rlw.parent.buffer = this.buffer; - return true; - } - return false; - } - - /** - * Serialize. - * - * @param out - * the DataOutput stream - * @throws IOException - * Signals that an I/O exception has occurred. - */ - public void serialize(DataOutput out) throws IOException { - out.writeInt(this.sizeinbits); - out.writeInt(this.actualsizeinwords); - for (int k = 0; k < this.actualsizeinwords; ++k) - out.writeInt(this.buffer[k]); - out.writeInt(this.rlw.position); - } - - /** - * Report the size required to serialize this bitmap - * - * @return the size in bytes - */ - public int serializedSizeInBytes() { - return this.sizeInBytes() + 3 * 4; - } - - /** - * Query the value of a single bit. Relying on this method when speed is - * needed is discouraged. The complexity is linear with the size of the - * bitmap. - * - * (This implementation is based on zhenjl's Go version of JavaEWAH.) - * - * @param i - * the bit we are interested in - * @return whether the bit is set to true - */ - public boolean get(final int i) { - if ((i < 0) || (i >= this.sizeinbits)) - return false; - int WordChecked = 0; - final IteratingRLW32 j = getIteratingRLW(); - final int wordi = i / wordinbits; - while (WordChecked <= wordi) { - WordChecked += j.getRunningLength(); - if (wordi < WordChecked) { - return j.getRunningBit(); - } - if (wordi < WordChecked + j.getNumberOfLiteralWords()) { - final int w = j.getLiteralWordAt(wordi - - WordChecked); - return (w & (1 << i)) != 0; - } - WordChecked += j.getNumberOfLiteralWords(); - j.next(); + extendAndSet(i, value); + } + return true; + } + + /** + * For internal use. + * + * @param i the index + * @param value the value + */ + private void extendAndSet(int i, boolean value) { + final int dist = distanceInWords(i); + this.sizeInBits = i + 1; + if(value) { + if (dist > 0) { + if (dist > 1) { + fastaddStreamOfEmptyWords(false, dist - 1); } - return false; + insertLiteralWord(1 << (i % WORD_IN_BITS)); + } + if (this.rlw.getNumberOfLiteralWords() == 0) { + this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); + insertLiteralWord(1 << (i % WORD_IN_BITS)); + } + this.buffer.orLastWord(1 << (i % WORD_IN_BITS)); + if (this.buffer.getLastWord() == ~0) { + this.buffer.removeLastWord(); + this.rlw.setNumberOfLiteralWords(this.rlw + .getNumberOfLiteralWords() - 1); + // next we add one clean word + insertEmptyWord(true); + } + } else { + if (dist > 0) { + fastaddStreamOfEmptyWords(false, dist); + } + } + } + + /** + * For internal use. + * + * @param i the index + * @param value the value + */ + private void locateAndSet(int i, boolean value) { + int nbits = 0; + final int siw = this.buffer.sizeInWords(); + for(int pos = 0; pos < siw; ) { + int rl = RunningLengthWord32.getRunningLength(this.buffer, pos); + boolean rb = RunningLengthWord32.getRunningBit(this.buffer, pos); + int lw = RunningLengthWord32.getNumberOfLiteralWords(this.buffer, pos); + int rbits = rl * WORD_IN_BITS; + if(i < nbits + rbits) { + setInRunningLength(value, i, nbits, pos, rl, rb, lw); + return; + } + nbits += rbits; + int lbits = lw * WORD_IN_BITS; + if(i < nbits + lbits) { + setInLiteralWords(value, i, nbits, pos, rl, rb, lw); + return; + } + nbits += lbits; + pos += lw + 1; } + } + + private void setInRunningLength(boolean value, int i, int nbits, int pos, int rl, boolean rb, int lw) { + if(value != rb) { + int wordPosition = (i - nbits) / WORD_IN_BITS + 1; + int addedWords = (wordPosition==rl) ? 1 : 2; + this.buffer.expand(pos+1, addedWords); + int mask = 1 << i % WORD_IN_BITS; + this.buffer.setWord(pos+1, value ? mask : ~mask); + if(this.rlw.position >= pos+1) { + this.rlw.position += addedWords; + } + if(addedWords==1) { + setRLWInfo(pos, rb, rl-1, lw+1); + } else { + setRLWInfo(pos, rb, wordPosition-1, 1); + setRLWInfo(pos+2, rb, rl-wordPosition, lw); + if(this.rlw.position == pos) { + this.rlw.position += 2; + } + } + } + } - /** - * Set the bit at position i to true, the bits must be set in (strictly) increasing - * order. For example, set(15) and then set(7) will fail. You must do set(7) - * and then set(15). - * - * @param i - * the index - * @return true if the value was set (always true when i is greater or equal to sizeInBits()). - * @throws IndexOutOfBoundsException - * if i is negative or greater than Integer.MAX_VALUE - 32 - */ - - public boolean set(final int i) { - if ((i > Integer.MAX_VALUE - wordinbits) || (i < 0)) - throw new IndexOutOfBoundsException("Set values should be between 0 and " - + (Integer.MAX_VALUE - wordinbits)); - if (i < this.sizeinbits) - return false; - // distance in words: - final int dist = (i + wordinbits) / wordinbits - - (this.sizeinbits + wordinbits - 1) / wordinbits; - this.sizeinbits = i + 1; - if (dist > 0) {// easy - if (dist > 1) - fastaddStreamOfEmptyWords(false, dist - 1); - addLiteralWord(1 << (i % wordinbits)); - return true; - } - if (this.rlw.getNumberOfLiteralWords() == 0) { - this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); - addLiteralWord(1 << (i % wordinbits)); - return true; - } - this.buffer[this.actualsizeinwords - 1] |= 1 << (i % wordinbits); - if (this.buffer[this.actualsizeinwords - 1] == ~0) { - this.buffer[this.actualsizeinwords - 1] = 0; - --this.actualsizeinwords; - this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); - // next we add one clean word - addEmptyWord(true); - } - return true; - } - - /** - * Set the size in bits. This does not change the compressed bitmap. - * - */ - @Override -public void setSizeInBits(final int size) { - if((size+EWAHCompressedBitmap32.wordinbits-1)/EWAHCompressedBitmap32.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap32.wordinbits-1)/EWAHCompressedBitmap32.wordinbits) - throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean): "+size+" "+this.sizeinbits); - this.sizeinbits = size; - } - - /** - * Change the reported size in bits of the *uncompressed* bitmap represented - * by this compressed bitmap. It may change the underlying compressed bitmap. - * It is not possible to reduce the sizeInBits, but - * it can be extended. The new bits are set to false or true depending on the - * value of defaultvalue. - * - * @param size - * the size in bits - * @param defaultvalue - * the default boolean value - * @return true if the update was possible - */ - public boolean setSizeInBits(final int size, final boolean defaultvalue) { - if (size < this.sizeinbits) - return false; - if (defaultvalue == false) - extendEmptyBits(this, this.sizeinbits, size); - else { - // next bit could be optimized - while (((this.sizeinbits % wordinbits) != 0) && (this.sizeinbits < size)) { - this.set(this.sizeinbits); - } - this.addStreamOfEmptyWords(defaultvalue, (size / wordinbits) - - this.sizeinbits / wordinbits); - // next bit could be optimized - while (this.sizeinbits < size) { - this.set(this.sizeinbits); - } - } - this.sizeinbits = size; - return true; - } - - /** - * Returns the size in bits of the *uncompressed* bitmap represented by this - * compressed bitmap. Initially, the sizeInBits is zero. It is extended - * automatically when you set bits to true. - * - * @return the size in bits - */ - @Override -public int sizeInBits() { - return this.sizeinbits; - } - - /** - * Report the *compressed* size of the bitmap (equivalent to memory usage, - * after accounting for some overhead). - * - * @return the size in bytes - */ - @Override -public int sizeInBytes() { - return this.actualsizeinwords * (wordinbits / 8); - } - - /** - * Populate an array of (sorted integers) corresponding to the location of the - * set bits. - * - * @return the array containing the location of the set bits - */ - public int[] toArray() { - int[] ans = new int[this.cardinality()]; - int inanspos = 0; - int pos = 0; - final EWAHIterator32 i = new EWAHIterator32(this, - this.actualsizeinwords); - while (i.hasNext()) { - RunningLengthWord32 localrlw = i.next(); - if (localrlw.getRunningBit()) { - for (int j = 0; j < localrlw.getRunningLength(); ++j) { - for (int c = 0; c < wordinbits; ++c) { - ans[inanspos++] = pos++; - } - } - } else { - pos += wordinbits * localrlw.getRunningLength(); - } - for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { - int data = i.buffer()[i.literalWords() + j]; - if (!usetrailingzeros) { - for (int c = 0; c < wordinbits; ++c) { - if ((data & (1 << c)) != 0) - ans[inanspos++] = c + pos; - } - pos += wordinbits; + private void setInLiteralWords(boolean value, int i, int nbits, int pos, int rl, boolean rb, int lw) { + int wordPosition = (i - nbits) / WORD_IN_BITS + 1; + int mask = 1 << i % WORD_IN_BITS; + if(value) { + this.buffer.orWord(pos + wordPosition, mask); } else { - while (data != 0) { - final int ntz = Integer.numberOfTrailingZeros(data); - data ^= (1l << ntz); - ans[inanspos++] = ntz + pos; - } - pos += wordinbits; - } - } - } - return ans; - - } - - /** - * A more detailed string describing the bitmap (useful for debugging). - * - * @return the string - */ - public String toDebugString() { - String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits - + " size in words = " + this.actualsizeinwords + "\n"; - final EWAHIterator32 i = new EWAHIterator32(this, - this.actualsizeinwords); - while (i.hasNext()) { - RunningLengthWord32 localrlw = i.next(); - if (localrlw.getRunningBit()) { - ans += localrlw.getRunningLength() + " 1x11\n"; - } else { - ans += localrlw.getRunningLength() + " 0x00\n"; - } - ans += localrlw.getNumberOfLiteralWords() + " dirties\n"; - for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { - int data = i.buffer()[i.literalWords() + j]; - ans += "\t" + data + "\n"; - } - } - return ans; - } - - /** - * A string describing the bitmap. - * - * @return the string - */ - @Override - public String toString() { - StringBuffer answer = new StringBuffer(); - IntIterator i = this.intIterator(); - answer.append("{"); - if (i.hasNext()) - answer.append(i.next()); - while (i.hasNext()) { - answer.append(","); - answer.append(i.next()); - } - answer.append("}"); - return answer.toString(); - } - /** - * swap the content of the bitmap with another. - * - * @param other - * bitmap to swap with - */ - public void swap(final EWAHCompressedBitmap32 other) { - int[] tmp = this.buffer; - this.buffer = other.buffer; - other.buffer = tmp; - - int tmp2 = this.rlw.position; - this.rlw.position = other.rlw.position; - other.rlw.position = tmp2; - - int tmp3 = this.actualsizeinwords; - this.actualsizeinwords = other.actualsizeinwords; - other.actualsizeinwords = tmp3; - - int tmp4 = this.sizeinbits; - this.sizeinbits = other.sizeinbits; - other.sizeinbits = tmp4; - } - /** - * Reduce the internal buffer to its minimal allowable size (given - * by this.actualsizeinwords). This can free memory. - */ - public void trim() { - this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords); - } - - /* - * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) - */ - @Override -public void writeExternal(ObjectOutput out) throws IOException { - serialize(out); - } - - /** - * Returns a new compressed bitmap containing the bitwise XOR values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param a - * the other bitmap - * @return the EWAH compressed bitmap - */ - @Override -public EWAHCompressedBitmap32 xor(final EWAHCompressedBitmap32 a) { - final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); - container.reserve(this.actualsizeinwords + a.actualsizeinwords); - xorToContainer(a, container); - return container; - } - - /** - * Computes a new compressed bitmap containing the bitwise XOR values of the - * current bitmap with some other bitmap. - * - * The running time is proportional to the sum of the compressed sizes (as - * reported by sizeInBytes()). - * - * @param a - * the other bitmap - * @param container - * where we store the result - */ - public void xorToContainer(final EWAHCompressedBitmap32 a, - final BitmapStorage32 container) { - final EWAHIterator32 i = a.getEWAHIterator(); - final EWAHIterator32 j = getEWAHIterator(); - final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); - final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); - while ((rlwi.size()>0) && (rlwj.size()>0)) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi : rlwj; - final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - int index = prey.discharge(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); + this.buffer.andWord(pos + wordPosition, ~mask); + } + int emptyWord = value ? ~0 : 0; + if(this.buffer.getWord(pos + wordPosition) == emptyWord) { + boolean canMergeInCurrentRLW = mergeLiteralWordInCurrentRunningLength(value, rb, rl, wordPosition); + boolean canMergeInNextRLW = mergeLiteralWordInNextRunningLength(value, lw, pos, wordPosition); + if(canMergeInCurrentRLW && canMergeInNextRLW) { + int nextRl = RunningLengthWord32.getRunningLength(this.buffer, pos + 2); + int nextLw = RunningLengthWord32.getNumberOfLiteralWords(this.buffer, pos + 2); + this.buffer.collapse(pos, 2); + setRLWInfo(pos, value, rl + 1 + nextRl, nextLw); + if(this.rlw.position >= pos+2) { + this.rlw.position -= 2; + } + } else if(canMergeInCurrentRLW) { + this.buffer.collapse(pos + 1, 1); + setRLWInfo(pos, value, rl+1, lw-1); + if(this.rlw.position >= pos+2) { + this.rlw.position--; + } + } else if(canMergeInNextRLW) { + int nextRLWPos = pos + lw + 1; + int nextRl = RunningLengthWord32.getRunningLength(this.buffer, nextRLWPos); + int nextLw = RunningLengthWord32.getNumberOfLiteralWords(this.buffer, nextRLWPos); + this.buffer.collapse(pos+wordPosition, 1); + setRLWInfo(pos, rb, rl, lw-1); + setRLWInfo(pos+wordPosition, value, nextRl+1, nextLw); + if(this.rlw.position >= nextRLWPos) { + this.rlw.position -= lw + 1 - wordPosition; + } + } else { + setRLWInfo(pos, rb, rl, wordPosition-1); + setRLWInfo(pos+wordPosition, value, 1, lw-wordPosition); + if(this.rlw.position == pos) { + this.rlw.position += wordPosition; + } + } + } + } + + private boolean mergeLiteralWordInCurrentRunningLength(boolean value, boolean rb, int rl, int wordPosition) { + return (value==rb || rl==0) && wordPosition==1; + } + + private boolean mergeLiteralWordInNextRunningLength(boolean value, int lw, int pos, int wordPosition) { + int nextRLWPos = pos + lw + 1; + if(lw==wordPosition && nextRLWPos (this.sizeInBits + WORD_IN_BITS - 1) / WORD_IN_BITS) { + setSizeInBits(size,false); + return; + } + if ((size + WORD_IN_BITS - 1) / WORD_IN_BITS != (this.sizeInBits + WORD_IN_BITS - 1) / WORD_IN_BITS) + throw new RuntimeException( + "You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean): " + + size + " " + this.sizeInBits + ); + this.sizeInBits = size; + final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; + if (usedBitsInLast == 0) + return; + if (this.rlw.getNumberOfLiteralWords() == 0) { + if (this.rlw.getRunningLength() > 0) { + this.rlw.setRunningLength(this.rlw .getRunningLength() - 1); + final int word = this.rlw.getRunningBit() ? (~0) >>> (WORD_IN_BITS - usedBitsInLast) : 0; + this.insertLiteralWord(word); + } + return; + } + this.buffer.andLastWord((~0) >>> (WORD_IN_BITS - usedBitsInLast)); + } + + /** + * Change the reported size in bits of the *uncompressed* bitmap + * represented by this compressed bitmap. It may change the underlying + * compressed bitmap. It is not possible to reduce the sizeInBits, but + * it can be extended. The new bits are set to false or true depending + * on the value of defaultValue. + * + * This method is not thread-safe. + * + * @param size the size in bits + * @param defaultValue the default boolean value + * @return true if the update was possible + */ + public boolean setSizeInBits(final int size, final boolean defaultValue) { + if (size <= this.sizeInBits) { + return false; + } + if ((this.sizeInBits % WORD_IN_BITS) != 0) { + if (!defaultValue) { + if (this.rlw.getNumberOfLiteralWords() > 0) { + final int bitsToAdd = size - this.sizeInBits; + final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; + final int freeBitsInLast = WORD_IN_BITS - usedBitsInLast; + if (this.buffer.getLastWord() == 0) { + this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); + this.buffer.removeLastWord(); + this.sizeInBits -= usedBitsInLast; + } else if (usedBitsInLast > 0) { + this.sizeInBits += Math.min(bitsToAdd, freeBitsInLast); + } + } + } else { + if (this.rlw.getNumberOfLiteralWords() == 0) { + this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); + insertLiteralWord(0); + } + final int maskWidth = Math.min(WORD_IN_BITS - this.sizeInBits % WORD_IN_BITS, + size - this.sizeInBits); + final int maskShift = this.sizeInBits % WORD_IN_BITS; + final int mask = ((~0) >>> (WORD_IN_BITS - maskWidth)) << maskShift; + this.buffer.orLastWord(mask); + if (this.buffer.getLastWord() == ~0) { + this.buffer.removeLastWord(); + this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); + insertEmptyWord(true); + } + this.sizeInBits += maskWidth; + } + } + this.addStreamOfEmptyWords(defaultValue, + (size / WORD_IN_BITS) - (this.sizeInBits / WORD_IN_BITS) + ); + if (this.sizeInBits < size) { + final int dist = distanceInWords(size - 1); + if (dist > 0) { + insertLiteralWord(0); + } + if (defaultValue) { + final int maskWidth = size - this.sizeInBits; + final int maskShift = this.sizeInBits % WORD_IN_BITS; + final int mask = ((~0) >>> (WORD_IN_BITS - maskWidth)) << maskShift; + this.buffer.orLastWord(mask); + } + this.sizeInBits = size; + } + return true; + } + + /** + * For internal use. + * + * @param i the index + */ + private int distanceInWords(final int i) { + return (i + WORD_IN_BITS) / WORD_IN_BITS + - (this.sizeInBits + WORD_IN_BITS - 1) / WORD_IN_BITS; + } + + /** + * Returns the size in bits of the *uncompressed* bitmap represented by + * this compressed bitmap. Initially, the sizeInBits is zero. It is + * extended automatically when you set bits to true. + * + * The current bitmap is not modified. + * + * @return the size in bits + */ + @Override + public int sizeInBits() { + return this.sizeInBits; + } + + /** + * Report the *compressed* size of the bitmap (equivalent to memory + * usage, after accounting for some overhead). + * + * @return the size in bytes + */ + @Override + public int sizeInBytes() { + return this.buffer.sizeInWords() * (WORD_IN_BITS / 8); + } + + + /** + * Compute a Boolean threshold function: bits are true where at least T + * bitmaps have a true bit. + * + * @param t the threshold + * @param bitmaps input data + * @return the aggregated bitmap + * @since 0.8.2 + */ + public static EWAHCompressedBitmap32 threshold(final int t, + final EWAHCompressedBitmap32... bitmaps) { + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + thresholdWithContainer(container, t, bitmaps); + return container; + } + + static int maxSizeInBits(final EWAHCompressedBitmap32... bitmaps) { + int maxSizeInBits = 0; + for(EWAHCompressedBitmap32 bitmap : bitmaps) { + maxSizeInBits = Math.max(maxSizeInBits, bitmap.sizeInBits()); + } + return maxSizeInBits; + } + + /** + * Compute a Boolean threshold function: bits are true where at least T + * bitmaps have a true bit. + * + * The content of the container is overwritten. + * + * @param t the threshold + * @param bitmaps input data + * @param container where we write the aggregated bitmap + * @since 0.8.2 + */ + public static void thresholdWithContainer( + final BitmapStorage32 container, final int t, + final EWAHCompressedBitmap32... bitmaps) { + (new RunningBitmapMerge32()).symmetric( + new ThresholdFuncBitmap32(t), container, bitmaps); + } + + + /** + * Populate an array of (sorted integers) corresponding to the location + * of the set bits. + * + * @return the array containing the location of the set bits + */ + public int[] toArray() { + int[] ans = new int[this.cardinality()]; + int inanspos = 0; + int pos = 0; + final EWAHIterator32 i = this.getEWAHIterator(); + while (i.hasNext()) { + RunningLengthWord32 localrlw = i.next(); + final int runningLength = localrlw.getRunningLength(); + if (localrlw.getRunningBit()) { + for (int j = 0; j < runningLength; ++j) { + for (int c = 0; c < WORD_IN_BITS; ++c) { + ans[inanspos++] = pos++; + } + } + } else { + pos += WORD_IN_BITS * runningLength; + } + final int numberOfLiteralWords = localrlw.getNumberOfLiteralWords(); + final int literalWords = i.literalWords(); + for (int j = 0; j < numberOfLiteralWords; ++j) { + int data = i.buffer().getWord(literalWords + j); + while (data != 0) { + final int t = data & -data; + ans[inanspos++] = Integer.bitCount(t - 1) + pos; + data ^= t; + } + pos += WORD_IN_BITS; + } + } + return ans; + + } + + /** + * A more detailed string describing the bitmap (useful for debugging). + * + * @return the string + */ + public String toDebugString() { + StringBuffer sb = new StringBuffer(" EWAHCompressedBitmap, size in bits = "); + sb.append(this.sizeInBits).append(" size in words = "); + sb.append(this.buffer.sizeInWords()).append("\n"); + final EWAHIterator32 i = this.getEWAHIterator(); + while (i.hasNext()) { + RunningLengthWord32 localrlw = i.next(); + if (localrlw.getRunningBit()) { + sb.append(localrlw.getRunningLength()).append(" 1x11\n"); + } else { + sb.append(localrlw.getRunningLength()).append(" 0x00\n"); + } + sb.append(localrlw.getNumberOfLiteralWords()).append(" dirties\n"); + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + int data = i.buffer().getWord(i.literalWords() + j); + sb.append("\t").append(data).append("\n"); + } + } + return sb.toString(); + } + + /** + * A string describing the bitmap. + * + * @return the string + */ + @Override + public String toString() { + StringBuilder answer = new StringBuilder(); + IntIterator i = this.intIterator(); + answer.append("{"); + if (i.hasNext()) + answer.append(i.next()); + while (i.hasNext()) { + answer.append(","); + answer.append(i.next()); + } + answer.append("}"); + return answer.toString(); + } + + /** + * swap the content of the bitmap with another. + * + * @param other bitmap to swap with + */ + public void swap(final EWAHCompressedBitmap32 other) { + this.buffer.swap(other.buffer); + + int tmp2 = this.rlw.position; + this.rlw.position = other.rlw.position; + other.rlw.position = tmp2; + + int tmp3 = this.sizeInBits; + this.sizeInBits = other.sizeInBits; + other.sizeInBits = tmp3; + } + + /** + * Reduce the internal buffer to its minimal allowable size (given by + * this.actualsizeinwords). This can free memory. + */ + public void trim() { + this.buffer.trim(); + } + + /** + * Returns a new compressed bitmap containing the bitwise XOR values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap32 xor(final EWAHCompressedBitmap32 a) { + int size = this.buffer.sizeInWords() + a.buffer.sizeInWords(); + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(size); + xorToContainer(a, container); + return container; + } + + /** + * Computes a new compressed bitmap containing the bitwise XOR values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes + * (as reported by sizeInBytes()). + * + * The current bitmap is not modified. + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + */ + public void xorToContainer(final EWAHCompressedBitmap32 a, + final BitmapStorage32 container) { + container.clear(); + final EWAHIterator32 i = a.getEWAHIterator(); + final EWAHIterator32 j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32( + i); + final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32( + j); + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi + .getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj + : rlwi; + final int index = (!predator.getRunningBit()) ? prey.discharge(container, + predator.getRunningLength()) : prey.dischargeNegated( + container, + predator.getRunningLength()); + container.addStreamOfEmptyWords(predator.getRunningBit(), + predator.getRunningLength() + - index + ); + predator.discardRunningWords(); + } + final int nbre_literal = Math.min( + rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) + ^ rlwj.getLiteralWordAt(k)); + rlwi.discardLiteralWords(nbre_literal); + rlwj.discardLiteralWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size() > 0; + final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi + : rlwj; + remaining.discharge(container); + container.setSizeInBitsWithinLastWord(Math.max(sizeInBits(), a.sizeInBits())); + } + + + + + /** + * Returns the cardinality of the result of a bitwise XOR of the values + * of the current bitmap with some other bitmap. Avoids allocating an + * intermediate bitmap to hold the result of the OR. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the cardinality + */ + public int xorCardinality(final EWAHCompressedBitmap32 a) { + final BitCounter32 counter = new BitCounter32(); + xorToContainer(a, counter); + return counter.getCount(); + } + + /** + * Returns a new compressed bitmap containing the composition of + * the current bitmap with some other bitmap. + * + * The composition A.compose(B) is defined as follows: we retain + * the ith set bit of A only if the ith bit of B is set. For example, + * if you have the following bitmap A = { 0, 1, 0, 1, 1, 0 } and want + * to keep only the second and third ones, you can call A.compose(B) + * with B = { 0, 1, 1 } and you will get C = { 0, 0, 0, 1, 1, 0 }. + * + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * The current bitmap is not modified. + * + * @param a the other bitmap (it will not be modified) + * @return the EWAH compressed bitmap + */ + @Override + public EWAHCompressedBitmap32 compose(EWAHCompressedBitmap32 a) { + int size = this.buffer.sizeInWords(); + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(size); + composeToContainer(a, container); + return container; + } + + /** + * Computes a new compressed bitmap containing the composition of + * the current bitmap with some other bitmap. + * + * The composition A.compose(B) is defined as follows: we retain + * the ith set bit of A only if the ith bit of B is set. For example, + * if you have the following bitmap A = { 0, 1, 0, 1, 1, 0 } and want + * to keep only the second and third ones, you can call A.compose(B) + * with B = { 0, 1, 1 } and you will get C = { 0, 0, 0, 1, 1, 0 }. + * + * + * The current bitmap is not modified. + * + * The content of the container is overwritten. + * + * @param a the other bitmap (it will not be modified) + * @param container where we store the result + */ + public void composeToContainer(final EWAHCompressedBitmap32 a, + final EWAHCompressedBitmap32 container) { + container.clear(); + final ChunkIterator iterator = chunkIterator(); + final ChunkIterator aIterator = a.chunkIterator(); + int index = 0; + while(iterator.hasNext() && aIterator.hasNext()) { + if(!iterator.nextBit()) { + int length = iterator.nextLength(); + index += length; + container.setSizeInBits(index, false); + iterator.move(length); + } else { + int length = Math.min(iterator.nextLength(), aIterator.nextLength()); + index += length; + container.setSizeInBits(index, aIterator.nextBit()); + iterator.move(length); + aIterator.move(length); + } + } + container.setSizeInBits(sizeInBits, false); + } + + /** + * For internal use. Computes the bitwise and of the provided bitmaps + * and stores the result in the container. + * + * The content of the container is overwritten. + * + * @param container where the result is stored + * @param bitmaps bitmaps to AND + */ + public static void andWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length == 1) + throw new IllegalArgumentException( + "Need at least one bitmap"); + if (bitmaps.length == 2) { + bitmaps[0].andToContainer(bitmaps[1], container); + return; + } + int initialSize = calculateInitialSize(bitmaps); + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(initialSize); + EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(initialSize); + bitmaps[0].andToContainer(bitmaps[1], answer); + for (int k = 2; k < bitmaps.length - 1; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + answer.andToContainer(bitmaps[bitmaps.length - 1], container); + } + + private static int calculateInitialSize(final EWAHCompressedBitmap32... bitmaps) { + int initialSize = 0; + for (EWAHCompressedBitmap32 bitmap : bitmaps) + initialSize = Math.max(bitmap.buffer.sizeInWords(), initialSize); + return initialSize; + } + + /** + * Returns a new compressed bitmap containing the bitwise AND values of + * the provided bitmaps. + * + * It may or may not be faster than doing the aggregation two-by-two + * (A.and(B).and(C)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * @param bitmaps bitmaps to AND together + * @return result of the AND + */ + public static EWAHCompressedBitmap32 and( + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length == 1) + return bitmaps[0]; + if (bitmaps.length == 2) + return bitmaps[0].and(bitmaps[1]); + int initialSize = calculateInitialSize(bitmaps); + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(initialSize); + EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(initialSize); + bitmaps[0].andToContainer(bitmaps[1], answer); + for (int k = 2; k < bitmaps.length; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + return answer; + } + + /** + * Returns the cardinality of the result of a bitwise AND of the values + * of the provided bitmaps. Avoids allocating an intermediate + * bitmap to hold the result of the AND. + * + * @param bitmaps bitmaps to AND + * @return the cardinality + */ + public static int andCardinality( + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length == 1) + return bitmaps[0].cardinality(); + final BitCounter32 counter = new BitCounter32(); + andWithContainer(counter, bitmaps); + return counter.getCount(); + } + + /** + * Return a bitmap with the bit set to true at the given positions. The + * positions should be given in sorted order. + * + * (This is a convenience method.) + * + * @param setbits list of set bit positions + * @return the bitmap + * @since 0.4.5 + */ + public static EWAHCompressedBitmap32 bitmapOf(int... setbits) { + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + for (int k : setbits) + a.set(k); + return a; + } + + /** + * For internal use. Computes the bitwise or of the provided bitmaps and + * stores the result in the container. + * + * The content of the container is overwritten. + * + * @param container where store the result + * @param bitmaps to be aggregated + */ + public static void orWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException( + "You should provide at least two bitmaps, provided " + + bitmaps.length + ); + FastAggregation32.orToContainer(container, bitmaps); + } + + /** + * For internal use. Computes the bitwise xor of the provided bitmaps + * and stores the result in the container. + * + * The content of the container is overwritten. + * + * @param container where store the result + * @param bitmaps to be aggregated + */ + public static void xorWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException( + "You should provide at least two bitmaps, provided " + + bitmaps.length + ); + FastAggregation32.xorToContainer(container, bitmaps); + } + + /** + * Returns a new compressed bitmap containing the bitwise OR values of + * the provided bitmaps. This is typically faster than doing the + * aggregation two-by-two (A.or(B).or(C).or(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * @param bitmaps bitmaps to OR together + * @return result of the OR + */ + public static EWAHCompressedBitmap32 or( + final EWAHCompressedBitmap32... bitmaps) { + return FastAggregation32.or(bitmaps); + } + + /** + * Returns a new compressed bitmap containing the bitwise XOR values of + * the provided bitmaps. This is typically faster than doing the + * aggregation two-by-two (A.xor(B).xor(C).xor(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may + * call the trim() method to reduce memory usage. + * + * @param bitmaps bitmaps to XOR together + * @return result of the XOR + */ + public static EWAHCompressedBitmap32 xor( + final EWAHCompressedBitmap32... bitmaps) { + return FastAggregation32.xor(bitmaps); + } + + /** + * Returns the cardinality of the result of a bitwise OR of the values + * of the provided bitmaps. Avoids allocating an intermediate + * bitmap to hold the result of the OR. + * + * @param bitmaps bitmaps to OR + * @return the cardinality + */ + public static int orCardinality(final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length == 1) + return bitmaps[0].cardinality(); + final BitCounter32 counter = new BitCounter32(); + orWithContainer(counter, bitmaps); + return counter.getCount(); + } + + /** + * Generates a new bitmap shifted by "b" bits. + * If b is positive, the position of all set bits is increased by + * b. The negative case is not supported. + * + * @param b number of bits + * @return new shifted bitmap + */ + public EWAHCompressedBitmap32 shift(final int b) { + if (b < 0) + throw new IllegalArgumentException( + "Negative shifts unsupported at the moment."); // TODO: add + // support + int sz = this.buffer.sizeInWords(); + int newsz = b > 0 ? sz + (b + (WORD_IN_BITS - 1)) / WORD_IN_BITS : sz; + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(newsz); + IteratingRLW32 i = this.getIteratingRLW(); + int fullwords = b / WORD_IN_BITS; + int shift = b % WORD_IN_BITS; + answer.addStreamOfEmptyWords(false, fullwords); + if (shift == 0) { + answer.buffer.push_back(this.buffer, 0, sz); } else { - int index = prey.dischargeNegated(container, predator.getRunningLength()); - container.addStreamOfEmptyWords(true, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - final boolean i_remains = rlwi.size()>0; - final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; - remaining.discharge(container); - container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); - } - - /** - * Returns the cardinality of the result of a bitwise XOR of the values of the - * current bitmap with some other bitmap. Avoids needing to allocate an - * intermediate bitmap to hold the result of the OR. - * - * @param a - * the other bitmap - * @return the cardinality - */ - public int xorCardinality(final EWAHCompressedBitmap32 a) { - final BitCounter32 counter = new BitCounter32(); - xorToContainer(a, counter); - return counter.getCount(); - } - - /** - * For internal use. Computes the bitwise and of the provided bitmaps and - * stores the result in the container. - * - * @param container - * where the result is stored - * @param bitmaps - * bitmaps to AND - */ - public static void andWithContainer(final BitmapStorage32 container, - final EWAHCompressedBitmap32... bitmaps) { - if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap"); - if(bitmaps.length == 2) { - bitmaps[0].andToContainer(bitmaps[1],container); - return; - } - EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(); - bitmaps[0].andToContainer(bitmaps[1], answer); - for(int k = 2; k < bitmaps.length - 1; ++k) { - answer.andToContainer(bitmaps[k], tmp); - tmp.swap(answer); - tmp.clear(); - } - answer.andToContainer(bitmaps[bitmaps.length - 1], container); - } - - /** - * Returns a new compressed bitmap containing the bitwise AND values of the - * provided bitmaps. - * - * It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)). - * - * If only one bitmap is provided, it is returned as is. - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param bitmaps - * bitmaps to AND together - * @return result of the AND - */ - public static EWAHCompressedBitmap32 and( - final EWAHCompressedBitmap32... bitmaps) { - if(bitmaps.length == 1) return bitmaps[0]; - if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]); - EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(); - bitmaps[0].andToContainer(bitmaps[1], answer); - for(int k = 2; k < bitmaps.length; ++k) { - answer.andToContainer(bitmaps[k], tmp); - tmp.swap(answer); - tmp.clear(); - } - return answer; - } - - /** - * Returns the cardinality of the result of a bitwise AND of the values of the - * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold - * the result of the AND. - * - * @param bitmaps - * bitmaps to AND - * @return the cardinality - */ - public static int andCardinality(final EWAHCompressedBitmap32... bitmaps) { - if(bitmaps.length == 1) return bitmaps[0].cardinality(); - final BitCounter32 counter = new BitCounter32(); - andWithContainer(counter, bitmaps); - return counter.getCount(); - } - - - /** - * Return a bitmap with the bit set to true at the given - * positions. The positions should be given in sorted order. - * - * (This is a convenience method.) - * - * @since 0.4.5 - * @param setbits list of set bit positions - * @return the bitmap - */ - public static EWAHCompressedBitmap32 bitmapOf(int ... setbits) { - EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); - for (int k : setbits) - a.set(k); - return a; - } - - - - - /** - * For internal use. This simply adds a stream of words made of zeroes so that - * we pad to the desired size. - * - * @param storage - * bitmap to extend - * @param currentSize - * current size (in bits) - * @param newSize - * new desired size (in bits) - */ - private static void extendEmptyBits(final BitmapStorage32 storage, - final int currentSize, final int newSize) { - final int currentLeftover = currentSize % wordinbits; - final int finalLeftover = newSize % wordinbits; - storage.addStreamOfEmptyWords(false, (newSize / wordinbits) - currentSize - / wordinbits + (finalLeftover != 0 ? 1 : 0) - + (currentLeftover != 0 ? -1 : 0)); - } - - /** - * For internal use. Computes the bitwise or of the provided bitmaps and - * stores the result in the container. - * @param container where store the result - * @param bitmaps to be aggregated - */ - public static void orWithContainer(final BitmapStorage32 container, - final EWAHCompressedBitmap32... bitmaps) { - if (bitmaps.length < 2) - throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); - int size = 0; - int sinbits = 0; - for (EWAHCompressedBitmap32 b : bitmaps) { - size += b.sizeInBytes(); - if (sinbits < b.sizeInBits()) - sinbits = b.sizeInBits(); - } - if (size * 8 > sinbits) { - FastAggregation32.bufferedorWithContainer(container, 65536, bitmaps); - } else { - FastAggregation32.orToContainer(container, bitmaps); - } - } - - /** - * For internal use. Computes the bitwise xor of the provided bitmaps and - * stores the result in the container. - * @param container where store the result - * @param bitmaps to be aggregated - */ - public static void xorWithContainer(final BitmapStorage32 container, - final EWAHCompressedBitmap32... bitmaps) { - if (bitmaps.length < 2) - throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); - int size = 0; - int sinbits = 0; - for (EWAHCompressedBitmap32 b : bitmaps) { - size += b.sizeInBytes(); - if (sinbits < b.sizeInBits()) - sinbits = b.sizeInBits(); - } - if (size * 8 > sinbits) { - FastAggregation32.bufferedxorWithContainer(container, 65536, bitmaps); - } else { - FastAggregation32.xorToContainer(container, bitmaps); - } - } - - /** - * Returns a new compressed bitmap containing the bitwise OR values of the - * provided bitmaps. This is typically faster than doing the aggregation - * two-by-two (A.or(B).or(C).or(D)). - * - * If only one bitmap is provided, it is returned as is. - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param bitmaps - * bitmaps to OR together - * @return result of the OR - */ - public static EWAHCompressedBitmap32 or( - final EWAHCompressedBitmap32... bitmaps) { - if(bitmaps.length == 1) return bitmaps[0]; - final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); - int largestSize = 0; - for (EWAHCompressedBitmap32 bitmap : bitmaps) { - largestSize = Math.max(bitmap.actualsizeinwords, largestSize); - } - container.reserve((int) (largestSize * 1.5)); - orWithContainer(container, bitmaps); - return container; - } - - - /** - * Returns a new compressed bitmap containing the bitwise XOR values of the - * provided bitmaps. This is typically faster than doing the aggregation - * two-by-two (A.xor(B).xor(C).xor(D)). - * - * If only one bitmap is provided, it is returned as is. - * - * If you are not planning on adding to the resulting bitmap, you may call the trim() - * method to reduce memory usage. - * - * @param bitmaps - * bitmaps to XOR together - * @return result of the XOR - */ - public static EWAHCompressedBitmap32 xor( - final EWAHCompressedBitmap32... bitmaps) { - if(bitmaps.length == 1) return bitmaps[0]; - final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); - int largestSize = 0; - for (EWAHCompressedBitmap32 bitmap : bitmaps) { - largestSize = Math.max(bitmap.actualsizeinwords, largestSize); - } - container.reserve((int) (largestSize * 1.5)); - xorWithContainer(container, bitmaps); - return container; - } - - /** - * Returns the cardinality of the result of a bitwise OR of the values of the - * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold - * the result of the OR. - * - * @param bitmaps - * bitmaps to OR - * @return the cardinality - */ - public static int orCardinality(final EWAHCompressedBitmap32... bitmaps) { - if(bitmaps.length == 1) return bitmaps[0].cardinality(); - final BitCounter32 counter = new BitCounter32(); - orWithContainer(counter, bitmaps); - return counter.getCount(); - } - - /** The actual size in words. */ - int actualsizeinwords = 1; - - /** The buffer (array of 32-bit words) */ - int buffer[] = null; - - /** The current (last) running length word. */ - RunningLengthWord32 rlw = null; - - /** sizeinbits: number of bits in the (uncompressed) bitmap. */ - int sizeinbits = 0; - - /** - * The Constant defaultbuffersize: default memory allocation when the object - * is constructed. - */ - static final int defaultbuffersize = 4; - - /** optimization option **/ - public static final boolean usetrailingzeros = true; - - /** whether we adjust after some aggregation by adding in zeroes **/ - public static final boolean adjustContainerSizeWhenAggregating = true; - - /** The Constant wordinbits represents the number of bits in a int. */ - public static final int wordinbits = 32; + int w = 0; + while (true) { + int rl = i.getRunningLength(); + // whether the shift should justify a new word + final boolean shiftextension = ((this.sizeInBits + WORD_IN_BITS - 1) % WORD_IN_BITS) + shift >= WORD_IN_BITS; + if (rl > 0) { + if (i.getRunningBit()) { + int sw = w | (-1 << shift); + answer.addWord(sw); + w = -1 >>> (WORD_IN_BITS - shift); + } else { + answer.addWord(w); + w = 0; + } + if (rl > 1) { + answer.addStreamOfEmptyWords(i.getRunningBit(), rl - 1); + } + } + int x = i.getNumberOfLiteralWords(); + for (int k = 0; k < x; ++k) { + int neww = i.getLiteralWordAt(k); + int sw = w | (neww << shift); + answer.addWord(sw); + w = neww >>> (WORD_IN_BITS - shift); + } + if (!i.next()) { + if(shiftextension) answer.addWord(w); + break; + } + } + } + answer.sizeInBits = this.sizeInBits + b; + return answer; + } + /** + * The buffer + */ + final Buffer32 buffer; + + /** + * The current (last) running length word. + */ + private RunningLengthWord32 rlw = null; + + /** + * sizeInBits: number of bits in the (uncompressed) bitmap. + */ + private int sizeInBits = 0; + + /** + * whether we adjust after some aggregation by adding in zeroes * + */ + public static final boolean ADJUST_CONTAINER_SIZE_WHEN_AGGREGATING = true; + + /** + * The Constant WORD_IN_BITS represents the number of bits in a int. + */ + public static final int WORD_IN_BITS = 32; + static final long serialVersionUID = 1L; } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java 2019-11-08 21:55:59.000000000 +0000 @@ -2,97 +2,105 @@ /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** - * The class EWAHIterator represents a special type of - * efficient iterator iterating over (uncompressed) words of bits. + * The class EWAHIterator represents a special type of efficient iterator + * iterating over (uncompressed) words of bits. * * @author Daniel Lemire * @since 0.5.0 - * */ public final class EWAHIterator32 implements Cloneable { - - /** - * Instantiates a new eWAH iterator. - * - * @param a the array of words - * @param sizeinwords the number of words that are significant in the array of words - */ - public EWAHIterator32(final EWAHCompressedBitmap32 a, final int sizeinwords) { - this.rlw = new RunningLengthWord32(a, 0); - this.size = sizeinwords; - this.pointer = 0; - } - - /** - * Allow expert developers to instantiate an EWAHIterator. - * - * @param bitmap we want to iterate over - * @return an iterator - */ - public static EWAHIterator32 getEWAHIterator(EWAHCompressedBitmap32 bitmap) { - return bitmap.getEWAHIterator(); - } - - /** - * Access to the array of words - * - * @return the int[] - */ - public int[] buffer() { - return this.rlw.parent.buffer; - } - - /** - * Position of the literal words represented by this running length word. - * - * @return the int - */ - public int literalWords() { - return this.pointer - this.rlw.getNumberOfLiteralWords(); - } - - /** - * Checks for next. - * - * @return true, if successful - */ - public boolean hasNext() { - return this.pointer < this.size; - } - - /** - * Next running length word. - * - * @return the running length word - */ - public RunningLengthWord32 next() { - this.rlw.position = this.pointer; - this.pointer += this.rlw.getNumberOfLiteralWords() + 1; - return this.rlw; - } - - @Override - public EWAHIterator32 clone() throws CloneNotSupportedException { - EWAHIterator32 ans = (EWAHIterator32) super.clone(); - ans.rlw = this.rlw.clone(); - ans.size = this.size; - ans.pointer = this.pointer; - return ans; - } - - /** The pointer represent the location of the current running length - * word in the array of words (embedded in the rlw attribute). */ - int pointer; - - /** The current running length word. */ - RunningLengthWord32 rlw; - /** The size in words. */ - int size; + /** + * Instantiates a new eWAH iterator. + * + * @param buffer the buffer + */ + public EWAHIterator32(final Buffer32 buffer) { + this.rlw = new RunningLengthWord32(buffer, 0); + this.size = buffer.sizeInWords(); + this.pointer = 0; + } + + private EWAHIterator32(int pointer, RunningLengthWord32 rlw, int size){ + this.pointer = pointer; + this.rlw = rlw; + this.size = size; + } + + /** + * Allow expert developers to instantiate an EWAHIterator. + * + * @param bitmap we want to iterate over + * @return an iterator + */ + public static EWAHIterator32 getEWAHIterator( + EWAHCompressedBitmap32 bitmap) { + return bitmap.getEWAHIterator(); + } + + /** + * Access to the buffer + * + * @return the buffer + */ + public Buffer32 buffer() { + return this.rlw.buffer; + } + + /** + * Position of the literal words represented by this running length + * word. + * + * @return the int + */ + public int literalWords() { + return this.pointer - this.rlw.getNumberOfLiteralWords(); + } + + /** + * Checks for next. + * + * @return true, if successful + */ + public boolean hasNext() { + return this.pointer < this.size; + } + + /** + * Next running length word. + * + * @return the running length word + */ + public RunningLengthWord32 next() { + this.rlw.position = this.pointer; + this.pointer += this.rlw.getNumberOfLiteralWords() + 1; + return this.rlw; + } + + @Override + public EWAHIterator32 clone() throws CloneNotSupportedException { + return new EWAHIterator32(pointer,rlw.clone(),size); + } + + /** + * The pointer represent the location of the current running length word + * in the array of words (embedded in the rlw attribute). + */ + private int pointer; + + /** + * The current running length word. + */ + final RunningLengthWord32 rlw; + + /** + * The size in words. + */ + private final int size; } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/FastAggregation32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/FastAggregation32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/FastAggregation32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/FastAggregation32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,377 +1,395 @@ package com.googlecode.javaewah32; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.PriorityQueue; +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ /** * Fast algorithms to aggregate many bitmaps. These algorithms are just given as * reference. They may not be faster than the corresponding methods in the * EWAHCompressedBitmap class. - * + * * @author Daniel Lemire - * */ -public class FastAggregation32 { +public final class FastAggregation32 { + + /** Private constructor to prevent instantiation */ + private FastAggregation32() {} + + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * + * This function does not seek to match the "sizeinbits" attributes + * of the input bitmaps. + * + * @param bitmaps the source bitmaps + * @param bufSize buffer size used during the computation in 64-bit + * words (per input bitmap) + * @return the or aggregate. + */ + public static EWAHCompressedBitmap32 bufferedand(final int bufSize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedandWithContainer(answer, bufSize, bitmaps); + return answer; + } + + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * + * This function does not seek to match the "sizeinbits" attributes + * of the input bitmaps. + * + * @param container where the aggregate is written + * @param bufSize buffer size used during the computation in 64-bit + * words (per input bitmap) + * @param bitmaps the source bitmaps + */ + public static void bufferedandWithContainer( + final BitmapStorage32 container, final int bufSize, + final EWAHCompressedBitmap32... bitmaps) { + + java.util.LinkedList al = new java.util.LinkedList(); + for (EWAHCompressedBitmap32 bitmap : bitmaps) { + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufSize * bitmaps.length]; + + for (IteratingRLW32 i : al) + if (i.size() == 0) { + al.clear(); + break; + } + + while (!al.isEmpty()) { + Arrays.fill(hardbitmap, ~0); + int effective = Integer.MAX_VALUE; + for (IteratingRLW32 i : al) { + int eff = IteratorAggregation32.inplaceand( + hardbitmap, i); + if (eff < effective) + effective = eff; + } + for (int k = 0; k < effective; ++k) + container.addWord(hardbitmap[k]); + for (IteratingRLW32 i : al) + if (i.size() == 0) { + al.clear(); + break; + } + } + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * + * @param bitmaps the source bitmaps + * @param bufSize buffer size used during the computation in 64-bit + * words + * @return the or aggregate. + */ + public static EWAHCompressedBitmap32 bufferedor(final int bufSize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedorWithContainer(answer, bufSize, bitmaps); + return answer; + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufSize buffer size used during the computation in 64-bit + * words + * @param bitmaps the source bitmaps + */ + public static void bufferedorWithContainer( + final BitmapStorage32 container, final int bufSize, + final EWAHCompressedBitmap32... bitmaps) { + int range = 0; + EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, + EWAHCompressedBitmap32 b) { + return b.sizeInBits() - a.sizeInBits(); + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap32 bitmap : sbitmaps) { + if (bitmap.sizeInBits() > range) + range = bitmap.sizeInBits(); + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufSize]; + int maxr = al.size(); + while (maxr > 0) { + int effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation32 + .inplaceor(hardbitmap, + al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.addWord(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + + } + container.setSizeInBitsWithinLastWord(range); + } + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * + * @param bitmaps the source bitmaps + * @param bufSize buffer size used during the computation in 64-bit + * words + * @return the xor aggregate. + */ + public static EWAHCompressedBitmap32 bufferedxor(final int bufSize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedxorWithContainer(answer, bufSize, bitmaps); + return answer; + } + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufSize buffer size used during the computation in 64-bit + * words + * @param bitmaps the source bitmaps + */ + public static void bufferedxorWithContainer( + final BitmapStorage32 container, final int bufSize, + final EWAHCompressedBitmap32... bitmaps) { + int range = 0; + EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, + EWAHCompressedBitmap32 b) { + return b.sizeInBits() - a.sizeInBits(); + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap32 bitmap : sbitmaps) { + if (bitmap.sizeInBits() > range) + range = bitmap.sizeInBits(); + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufSize]; + int maxr = al.size(); + while (maxr > 0) { + int effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation32 + .inplacexor(hardbitmap, + al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.addWord(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + } + container.setSizeInBitsWithinLastWord(range); + } + + /** + * Uses a priority queue to compute the or aggregate. + * + * The content of the container is overwritten. + * + * This algorithm runs in linearithmic time (O(n log n)) with respect to the number of bitmaps. + * + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void orToContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException( + "We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue( + bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, + EWAHCompressedBitmap32 b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + while (pq.size() > 2) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.or(x2)); + } + pq.poll().orToContainer(pq.poll(), container); + } + + /** + * Simple algorithm that computes the OR aggregate. + * + * @param bitmaps input bitmaps + * @return new bitmap containing the aggregate + */ + public static EWAHCompressedBitmap32 or(final EWAHCompressedBitmap32... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + if(pq.isEmpty()) return new EWAHCompressedBitmap32(); + while (pq.size() > 1) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.or(x2)); + } + return pq.poll(); + } + + /** + * Simple algorithm that computes the XOR aggregate. + * + * @param bitmaps input bitmaps + * @return new bitmap containing the aggregate + */ + public static EWAHCompressedBitmap32 xor(final EWAHCompressedBitmap32... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + if(pq.isEmpty()) return new EWAHCompressedBitmap32(); + while (pq.size() > 1) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + return pq.poll(); + } + + /** + * Simple algorithm that computes the OR aggregate. + * + * @param bitmaps input bitmaps + * @return new bitmap containing the aggregate + */ + public static EWAHCompressedBitmap32 or(final Iterator bitmaps) { + PriorityQueue pq = new PriorityQueue(32, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + while(bitmaps.hasNext()) + pq.add(bitmaps.next()); + if(pq.isEmpty()) return new EWAHCompressedBitmap32(); + while (pq.size() > 1) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.or(x2)); + } + return pq.poll(); + } + + /** + * Simple algorithm that computes the XOR aggregate. + * + * @param bitmaps input bitmaps + * @return new bitmap containing the aggregate + */ + public static EWAHCompressedBitmap32 xor(final Iterator bitmaps) { + PriorityQueue pq = new PriorityQueue(32, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + while(bitmaps.hasNext()) + pq.add(bitmaps.next()); + if(pq.isEmpty()) return new EWAHCompressedBitmap32(); + while (pq.size() > 1) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + return pq.poll(); + } + + + /** + * Uses a priority queue to compute the xor aggregate. + * + * The content of the container is overwritten. + * + * This algorithm runs in linearithmic time (O(n log n)) with respect to the number of bitmaps. + * + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void xorToContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException( + "We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue( + bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, + EWAHCompressedBitmap32 b) { + return a.sizeInBytes() + - b.sizeInBytes(); + } + } + ); + Collections.addAll(pq, bitmaps); + while (pq.size() > 2) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + pq.poll().xorToContainer(pq.poll(), container); + } + - /** - * Compute the and aggregate using a temporary uncompressed bitmap. - * @param bitmaps the source bitmaps - * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) - * @return the or aggregate. - */ - public static EWAHCompressedBitmap32 bufferedand(final int bufsize, - final EWAHCompressedBitmap32... bitmaps) { - EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); - bufferedandWithContainer(answer,bufsize, bitmaps); - return answer; - } - /** - * Compute the and aggregate using a temporary uncompressed bitmap. - * - * @param container where the aggregate is written - * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) - * @param bitmaps the source bitmaps - */ - public static void bufferedandWithContainer(final BitmapStorage32 container,final int bufsize, - final EWAHCompressedBitmap32... bitmaps) { - - java.util.LinkedList al = new java.util.LinkedList(); - for (EWAHCompressedBitmap32 bitmap : bitmaps) { - al.add(new IteratingBufferedRunningLengthWord32(bitmap)); - } - int[] hardbitmap = new int[bufsize*bitmaps.length]; - - for(IteratingRLW32 i : al) - if (i.size() == 0) { - al.clear(); - break; - } - - while (!al.isEmpty()) { - Arrays.fill(hardbitmap, ~0); - int effective = Integer.MAX_VALUE; - for(IteratingRLW32 i : al) { - int eff = IteratorAggregation32.inplaceand(hardbitmap, i); - if (eff < effective) - effective = eff; - } - for (int k = 0; k < effective; ++k) - container.add(hardbitmap[k]); - for(IteratingRLW32 i : al) - if (i.size() == 0) { - al.clear(); - break; - } - } - } - - /** - * Compute the or aggregate using a temporary uncompressed bitmap. - * @param bitmaps the source bitmaps - * @param bufsize buffer size used during the computation in 64-bit words - * @return the or aggregate. - */ - public static EWAHCompressedBitmap32 bufferedor(final int bufsize, - final EWAHCompressedBitmap32... bitmaps) { - EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); - bufferedorWithContainer(answer, bufsize, bitmaps); - return answer; - } - - /** - * Compute the or aggregate using a temporary uncompressed bitmap. - * - * @param container where the aggregate is written - * @param bufsize buffer size used during the computation in 64-bit words - * @param bitmaps the source bitmaps - */ - public static void bufferedorWithContainer(final BitmapStorage32 container,final int bufsize, - final EWAHCompressedBitmap32... bitmaps) { - int range = 0; - EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); - Arrays.sort(sbitmaps, new Comparator() { - @Override - public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { - return b.sizeinbits - a.sizeinbits; - } - }); - - java.util.ArrayList al = new java.util.ArrayList(); - for (EWAHCompressedBitmap32 bitmap : sbitmaps) { - if (bitmap.sizeinbits > range) - range = bitmap.sizeinbits; - al.add(new IteratingBufferedRunningLengthWord32(bitmap)); - } - int[] hardbitmap = new int[bufsize]; - int maxr = al.size(); - while (maxr > 0) { - int effective = 0; - for (int k = 0; k < maxr; ++k) { - if (al.get(k).size() > 0) { - int eff = IteratorAggregation32.inplaceor(hardbitmap, al.get(k)); - if (eff > effective) - effective = eff; - } else - maxr = k; - } - for (int k = 0; k < effective; ++k) - container.add(hardbitmap[k]); - Arrays.fill(hardbitmap, 0); - - } - container.setSizeInBits(range); - } - - /** - * Compute the xor aggregate using a temporary uncompressed bitmap. - * @param bitmaps the source bitmaps - * @param bufsize buffer size used during the computation in 64-bit words - * @return the xor aggregate. - */ - public static EWAHCompressedBitmap32 bufferedxor(final int bufsize, - final EWAHCompressedBitmap32... bitmaps) { - EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); - bufferedxorWithContainer(answer, bufsize, bitmaps); - return answer; - } - - - /** - * Compute the xor aggregate using a temporary uncompressed bitmap. - * - * @param container where the aggregate is written - * @param bufsize buffer size used during the computation in 64-bit words - * @param bitmaps the source bitmaps - */ - public static void bufferedxorWithContainer(final BitmapStorage32 container,final int bufsize, - final EWAHCompressedBitmap32... bitmaps) { - int range = 0; - EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); - Arrays.sort(sbitmaps, new Comparator() { - @Override - public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { - return b.sizeinbits - a.sizeinbits; - } - }); - - java.util.ArrayList al = new java.util.ArrayList(); - for (EWAHCompressedBitmap32 bitmap : sbitmaps) { - if (bitmap.sizeinbits > range) - range = bitmap.sizeinbits; - al.add(new IteratingBufferedRunningLengthWord32(bitmap)); - } - int[] hardbitmap = new int[bufsize]; - int maxr = al.size(); - while (maxr > 0) { - int effective = 0; - for (int k = 0; k < maxr; ++k) { - if (al.get(k).size() > 0) { - int eff = IteratorAggregation32.inplacexor(hardbitmap, al.get(k)); - if (eff > effective) - effective = eff; - } else - maxr = k; - } - for (int k = 0; k < effective; ++k) - container.add(hardbitmap[k]); - Arrays.fill(hardbitmap, 0); - } - container.setSizeInBits(range); - } - - /** - * Uses a priority queue to compute the or aggregate. - * @param container where we write the result - * @param bitmaps to be aggregated - */ - public static void orToContainer(final BitmapStorage32 container, - final EWAHCompressedBitmap32 ... bitmaps) { - if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); - PriorityQueue pq = new PriorityQueue(bitmaps.length, - new Comparator() { - @Override - public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (EWAHCompressedBitmap32 x : bitmaps) { - pq.add(x); - } - while (pq.size() > 2) { - EWAHCompressedBitmap32 x1 = pq.poll(); - EWAHCompressedBitmap32 x2 = pq.poll(); - pq.add(x1.or(x2)); - } - pq.poll().orToContainer(pq.poll(), container); - } - - - /** - * Uses a priority queue to compute the xor aggregate. - * @param container where we write the result - * @param bitmaps to be aggregated - */ - public static void xorToContainer(final BitmapStorage32 container, - final EWAHCompressedBitmap32 ... bitmaps) { - if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); - PriorityQueue pq = new PriorityQueue(bitmaps.length, - new Comparator() { - @Override - public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { - return a.sizeInBytes() - b.sizeInBytes(); - } - }); - for (EWAHCompressedBitmap32 x : bitmaps) { - pq.add(x); - } - while (pq.size() > 2) { - EWAHCompressedBitmap32 x1 = pq.poll(); - EWAHCompressedBitmap32 x2 = pq.poll(); - pq.add(x1.xor(x2)); - } - pq.poll().xorToContainer(pq.poll(), container); - } - - /** - * For internal use. Computes the bitwise or of the provided bitmaps and - * stores the result in the container. (This used to be the default.) - * - * @deprecated use EWAHCompressedBitmap32.or instead - * @since 0.4.0 - * @param container where store the result - * @param bitmaps to be aggregated - */ - @Deprecated - public static void legacy_orWithContainer(final BitmapStorage32 container, - final EWAHCompressedBitmap32... bitmaps) { - if (bitmaps.length == 2) { - // should be more efficient - bitmaps[0].orToContainer(bitmaps[1], container); - return; - } - - // Sort the bitmaps in descending order by sizeinbits. We will exhaust the - // sorted bitmaps from right to left. - final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); - Arrays.sort(sortedBitmaps, new Comparator() { - @Override - public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { - return a.sizeinbits < b.sizeinbits ? 1 - : a.sizeinbits == b.sizeinbits ? 0 : -1; - } - }); - - final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; - int maxAvailablePos = 0; - for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) { - EWAHIterator32 iterator = bitmap.getEWAHIterator(); - if (iterator.hasNext()) { - rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32( - iterator); - } - } - - if (maxAvailablePos == 0) { // this never happens... - container.setSizeInBits(0); - return; - } - - int maxSize = sortedBitmaps[0].sizeinbits; - - while (true) { - int maxOneRl = 0; - int minZeroRl = Integer.MAX_VALUE; - int minSize = Integer.MAX_VALUE; - int numEmptyRl = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - int size = rlw.size(); - if (size == 0) { - maxAvailablePos = i; - break; - } - minSize = Math.min(minSize, size); - - if (rlw.getRunningBit()) { - int rl = rlw.getRunningLength(); - maxOneRl = Math.max(maxOneRl, rl); - minZeroRl = 0; - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } else { - int rl = rlw.getRunningLength(); - minZeroRl = Math.min(minZeroRl, rl); - if (rl == 0 && size > 0) { - numEmptyRl++; - } - } - } - - if (maxAvailablePos == 0) { - break; - } else if (maxAvailablePos == 1) { - // only one bitmap is left so just write the rest of it out - rlws[0].discharge(container); - break; - } - - if (maxOneRl > 0) { - container.addStreamOfEmptyWords(true, maxOneRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(maxOneRl); - } - } else if (minZeroRl > 0) { - container.addStreamOfEmptyWords(false, minZeroRl); - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(minZeroRl); - } - } else { - int index = 0; - - if (numEmptyRl == 1) { - // if one rlw has literal words to process and the rest have a run of - // 0's we can write them out here - IteratingBufferedRunningLengthWord32 emptyRl = null; - int minNonEmptyRl = Integer.MAX_VALUE; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - int rl = rlw.getRunningLength(); - if (rl == 0) { - assert emptyRl == null; - emptyRl = rlw; - } else { - minNonEmptyRl = Math.min(minNonEmptyRl, rl); - } - } - int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; - if (emptyRl != null) - emptyRl.writeLiteralWords(wordsToWrite, container); - index += wordsToWrite; - } - - while (index < minSize) { - int word = 0; - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - if (rlw.getRunningLength() <= index) { - word |= rlw.getLiteralWordAt(index - rlw.getRunningLength()); - } - } - container.add(word); - index++; - } - for (int i = 0; i < maxAvailablePos; i++) { - IteratingBufferedRunningLengthWord32 rlw = rlws[i]; - rlw.discardFirstWords(minSize); - } - } - } - container.setSizeInBits(maxSize); - } - } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntArray.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IntArray.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntArray.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IntArray.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,237 @@ +package com.googlecode.javaewah32; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import java.util.Arrays; + +/** + * Int array wrapper. + * Users should not be concerned by this class. + * + * @author Gregory Ssi-Yan-Kai + */ +class IntArray implements Buffer32, Cloneable { + + /** + * Creates a buffer with default size + */ + public IntArray() { + this(DEFAULT_BUFFER_SIZE); + } + + /** + * Creates a buffer with explicit size + * @param bufferSize + */ + public IntArray(int bufferSize) { + if(bufferSize < 1) { + bufferSize = 1; + } + this.buffer = new int[bufferSize]; + } + + @Override + public int sizeInWords() { + return this.actualSizeInWords; + } + + @Override + public void ensureCapacity(int capacity) { + resizeBuffer(capacity - this.actualSizeInWords); + } + + @Override + public int getWord(int position) { + return this.buffer[position]; + } + + @Override + public int getLastWord() { + return getWord(this.actualSizeInWords - 1); + } + + @Override + public void clear() { + this.actualSizeInWords = 1; + this.buffer[0] = 0; + } + + @Override + public void trim() { + this.buffer = Arrays.copyOf(this.buffer, this.actualSizeInWords); + } + + @Override + public void setWord(int position, int word) { + this.buffer[position] = word; + } + + @Override + public void setLastWord(int word) { + setWord(this.actualSizeInWords - 1, word); + } + + @Override + public void push_back(int word) { + resizeBuffer(1); + this.buffer[this.actualSizeInWords++] = word; + } + + @Override + public void push_back(Buffer32 buffer, int start, int number) { + resizeBuffer(number); + if(buffer instanceof IntArray) { + int[] data = ((IntArray)buffer).buffer; + System.arraycopy(data, start, this.buffer, this.actualSizeInWords, number); + } else { + for(int i = 0; i < number; ++i) { + this.buffer[this.actualSizeInWords + i] = buffer.getWord(start + i); + } + } + this.actualSizeInWords += number; + } + + @Override + public void negative_push_back(Buffer32 buffer, int start, int number) { + resizeBuffer(number); + for (int i = 0; i < number; ++i) { + this.buffer[this.actualSizeInWords + i] = ~buffer.getWord(start + i); + } + this.actualSizeInWords += number; + } + + @Override + public void removeLastWord() { + setWord(--this.actualSizeInWords, 0); + } + + @Override + public void negateWord(int position) { + this.buffer[position] = ~this.buffer[position]; + } + + @Override + public void andWord(int position, int mask) { + this.buffer[position] &= mask; + } + + @Override + public void orWord(int position, int mask) { + this.buffer[position] |= mask; + } + + @Override + public void andLastWord(int mask) { + andWord(this.actualSizeInWords - 1, mask); + } + + @Override + public void orLastWord(int mask) { + orWord(this.actualSizeInWords - 1, mask); + } + + @Override + public void expand(int position, int length) { + resizeBuffer(length); + System.arraycopy(this.buffer, position, this.buffer, position + length, this.actualSizeInWords - position); + this.actualSizeInWords += length; + } + + @Override + public void collapse(int position, int length) { + System.arraycopy(this.buffer, position + length, this.buffer, position, this.actualSizeInWords - position - length); + for(int i = 0; i < length; ++i) { + removeLastWord(); + } + } + + @Override + public IntArray clone() { + IntArray clone = null; + try { + clone = (IntArray) super.clone(); + clone.buffer = this.buffer.clone(); + clone.actualSizeInWords = this.actualSizeInWords; + } catch (CloneNotSupportedException e) { + e.printStackTrace(); // cannot happen + } + return clone; + } + + + @Override + public void swap(final Buffer32 other) { + if(other instanceof IntArray) { + int[] tmp = this.buffer; + this.buffer = ((IntArray) other).buffer; + ((IntArray) other).buffer = tmp; + + int tmp2 = this.actualSizeInWords; + this.actualSizeInWords = ((IntArray) other).actualSizeInWords; + ((IntArray) other).actualSizeInWords = tmp2; + } else { + int[] tmp = new int[other.sizeInWords()]; + for(int i = 0; i < other.sizeInWords(); ++i) { + tmp[i] = other.getWord(i); + } + int tmp2 = other.sizeInWords(); + + other.clear(); + other.removeLastWord(); + other.push_back(this, 0, this.sizeInWords()); + + this.buffer = tmp; + this.actualSizeInWords = tmp2; + } + } + + /** + * Resizes the buffer if the number of words to add exceeds the buffer capacity. + * @param number the number of words to add + */ + private void resizeBuffer(int number) { + int size = newSizeInWords(number); + if (size >= this.buffer.length) { + int oldBuffer[] = this.buffer; + this.buffer = new int[size]; + System.arraycopy(oldBuffer, 0, this.buffer, 0, oldBuffer.length); + } + } + + /** + * Returns the resulting buffer size in words given the number of words to add. + * @param number the number of words to add + */ + private int newSizeInWords(int number) { + int size = this.actualSizeInWords + number; + if (size >= this.buffer.length) { + if (size < 32768) + size = size * 2; + else if (size * 3 / 2 < size) // overflow + size = Integer.MAX_VALUE; + else + size = size * 3 / 2; + } + return size; + } + + /** + * The actual size in words. + */ + private int actualSizeInWords = 1; + + /** + * The buffer (array of 32-bit words) + */ + private int buffer[] = null; + + /** + * The Constant DEFAULT_BUFFER_SIZE: default memory allocation when the + * object is constructed. + */ + private static final int DEFAULT_BUFFER_SIZE = 4; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntBufferWrapper.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IntBufferWrapper.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntBufferWrapper.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IntBufferWrapper.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,169 @@ +package com.googlecode.javaewah32; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import java.nio.IntBuffer; + + + +/** + * java.nio.IntBuffer wrapper. + * Users should not be concerned by this class. + * + * @author Gregory Ssi-Yan-Kai + */ +final class IntBufferWrapper implements Buffer32, Cloneable { + + public IntBufferWrapper(IntBuffer buffer) { + this.buffer = buffer; + } + + public IntBufferWrapper(IntBuffer slice, int sizeInWords) { + this.buffer = slice; + this.actualSizeInWords = sizeInWords; + } + + + @Override + public int sizeInWords() { + return this.actualSizeInWords; + } + + @Override + public void ensureCapacity(int capacity) { + if(capacity > buffer.capacity()) { + throw new RuntimeException("Cannot increase buffer capacity. Current capacity: " + buffer.capacity() + ". New capacity: " + capacity); + } + } + + @Override + public int getWord(int position) { + return this.buffer.get(position); + } + + @Override + public int getLastWord() { + return getWord(this.actualSizeInWords - 1); + } + + @Override + public void clear() { + this.actualSizeInWords = 1; + setWord(0, 0); + } + + @Override + public void trim() { + } + + @Override + public void setWord(int position, int word) { + this.buffer.put(position, word); + } + + @Override + public void setLastWord(int word) { + setWord(this.actualSizeInWords - 1, word); + } + + @Override + public void push_back(int word) { + setWord(this.actualSizeInWords++, word); + } + + @Override + public void push_back(Buffer32 buffer, int start, int number) { + for(int i = 0; i < number; ++i) { + push_back(buffer.getWord(start + i)); + } + } + + @Override + public void negative_push_back(Buffer32 buffer, int start, int number) { + for(int i = 0; i < number; ++i) { + push_back(~buffer.getWord(start + i)); + } + } + + @Override + public void removeLastWord() { + setWord(--this.actualSizeInWords, 0); + } + + @Override + public void negateWord(int position) { + setWord(position, ~getWord(position)); + } + + @Override + public void andWord(int position, int mask) { + setWord(position, getWord(position) & mask); + } + + @Override + public void orWord(int position, int mask) { + setWord(position, getWord(position) | mask); + } + + @Override + public void andLastWord(int mask) { + andWord(this.actualSizeInWords - 1, mask); + } + + @Override + public void orLastWord(int mask) { + orWord(this.actualSizeInWords - 1, mask); + } + + @Override + public void expand(int position, int length) { + for(int i = this.actualSizeInWords - position - 1; i >= 0; --i) { + setWord(position + length + i, getWord(position + i)); + } + this.actualSizeInWords += length; + } + + @Override + public void collapse(int position, int length) { + for(int i = 0; i < this.actualSizeInWords - position - length; ++i) { + setWord(position + i, getWord(position + length + i)); + } + for(int i = 0; i < length; ++i) { + removeLastWord(); + } + } + + @Override + public IntBufferWrapper clone() throws CloneNotSupportedException { + return new IntBufferWrapper(this.buffer, this.actualSizeInWords); + } + + @Override + public void swap(final Buffer32 other) { + if (other instanceof IntBufferWrapper) {// optimized version + IntBufferWrapper o = (IntBufferWrapper) other; + IntBuffer tmp = this.buffer; + int tmp2 = this.actualSizeInWords; + this.actualSizeInWords = o.actualSizeInWords; + this.buffer = o.buffer; + o.actualSizeInWords = tmp2; + o.buffer = tmp; + } else { + other.swap(this); + } + } + + /** + * The actual size in words. + */ + private int actualSizeInWords = 1; + + /** + * The buffer + */ + private IntBuffer buffer; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java 2019-11-08 21:55:59.000000000 +0000 @@ -5,86 +5,87 @@ * Licensed under the Apache License, Version 2.0. */ -import static com.googlecode.javaewah32.EWAHCompressedBitmap32.wordinbits; - import com.googlecode.javaewah.IntIterator; +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; + /** * The IntIteratorImpl32 is the 32 bit implementation of the IntIterator * interface, which efficiently returns the stream of integers represented by an * EWAHIterator32. - * + * * @author Colby Ranger * @since 0.5.6 */ final class IntIteratorImpl32 implements IntIterator { - private final EWAHIterator32 ewahIter; - private final int[] ewahBuffer; - private int position; - private int runningLength; - private int word; - private int wordPosition; - private int wordLength; - private int literalPosition; - private boolean hasnext; - - IntIteratorImpl32(EWAHIterator32 ewahIter) { - this.ewahIter = ewahIter; - this.ewahBuffer = ewahIter.buffer(); - this.hasnext = this.moveToNext(); - } - - public final boolean moveToNext() { - while (!runningHasNext() && !literalHasNext()) { - if (!this.ewahIter.hasNext()) { - return false; - } - setRunningLengthWord(this.ewahIter.next()); - } - return true; - } - - @Override - public final boolean hasNext() { - return this.hasnext; - } - - @Override - public final int next() { - final int answer; - if (runningHasNext()) { - answer = this.position++; - } else { - final int bit = Long.numberOfTrailingZeros(this.word); - this.word ^= (1l << bit); - answer = this.literalPosition + bit; - } - this.hasnext = this.moveToNext(); - return answer; - } - - private final void setRunningLengthWord(RunningLengthWord32 rlw) { - this.runningLength = wordinbits * rlw.getRunningLength() - + this.position; - if (!rlw.getRunningBit()) { - this.position = this.runningLength; - } - - this.wordPosition = this.ewahIter.literalWords(); - this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); - } - - private final boolean runningHasNext() { - return this.position < this.runningLength; - } - - private final boolean literalHasNext() { - while (this.word == 0 && this.wordPosition < this.wordLength) { - this.word = this.ewahBuffer[this.wordPosition++]; - this.literalPosition = this.position; - this.position += wordinbits; - } - return this.word != 0; - } + private final EWAHIterator32 ewahIter; + private final Buffer32 buffer; + private int position; + private int runningLength; + private int word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasnext; + + IntIteratorImpl32(EWAHIterator32 ewahIter) { + this.ewahIter = ewahIter; + this.buffer = ewahIter.buffer(); + this.hasnext = this.moveToNext(); + } + + public boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (!this.ewahIter.hasNext()) { + return false; + } + setRunningLengthWord(this.ewahIter.next()); + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasnext; + } + + @Override + public int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int t = this.word & -this.word; + answer = this.literalPosition + Integer.bitCount(t - 1); + this.word ^= t; + } + this.hasnext = this.moveToNext(); + return answer; + } + + private void setRunningLengthWord(RunningLengthWord32 rlw) { + this.runningLength = WORD_IN_BITS * rlw.getRunningLength() + + this.position; + if (!rlw.getRunningBit()) { + this.position = this.runningLength; + } + + this.wordPosition = this.ewahIter.literalWords(); + this.wordLength = this.wordPosition + + rlw.getNumberOfLiteralWords(); + } + + private boolean runningHasNext() { + return this.position < this.runningLength; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.buffer.getWord(this.wordPosition++); + this.literalPosition = this.position; + this.position += WORD_IN_BITS; + } + return this.word != 0; + } } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,91 +1,91 @@ package com.googlecode.javaewah32; -import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; - import com.googlecode.javaewah.IntIterator; +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; + /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** * Implementation of an IntIterator over an IteratingRLW. - * - * */ public class IntIteratorOverIteratingRLW32 implements IntIterator { - IteratingRLW32 parent; - private int position; - private int runningLength; - private int word; - private int wordPosition; - private int wordLength; - private int literalPosition; - private boolean hasnext; - - /** - * @param p iterator we wish to iterate over - */ - public IntIteratorOverIteratingRLW32(final IteratingRLW32 p) { - this.parent = p; - this.position = 0; + final IteratingRLW32 parent; + private int position; + private int runningLength; + private int word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasNext; + + /** + * @param p iterator we wish to iterate over + */ + public IntIteratorOverIteratingRLW32(final IteratingRLW32 p) { + this.parent = p; + this.position = 0; + setupForCurrentRunningLengthWord(); + this.hasNext = moveToNext(); + } + + /** + * @return whether we could find another set bit; don't move if there is + * an unprocessed value + */ + private boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (this.parent.next()) setupForCurrentRunningLengthWord(); - this.hasnext = moveToNext(); - } - - /** - * @return whether we could find another set bit; don't move if there is an unprocessed value - */ - private final boolean moveToNext() { - while (!runningHasNext() && !literalHasNext()) { - if (this.parent.next()) - setupForCurrentRunningLengthWord(); - else return false; - } - return true; - } - - @Override - public boolean hasNext() { - return this.hasnext; + else + return false; } + return true; + } - @Override - public final int next() { - final int answer; - if (runningHasNext()) { - answer = this.position++; - } else { - final int bit = Long.numberOfTrailingZeros(this.word); - this.word ^= (1l << bit); - answer = this.literalPosition + bit; - } - this.hasnext = this.moveToNext(); - return answer; - } - - private final void setupForCurrentRunningLengthWord() { - this.runningLength = wordinbits * this.parent.getRunningLength() - + this.position; - - if (!this.parent.getRunningBit()) { - this.position = this.runningLength; - } - this.wordPosition = 0; - this.wordLength = this.parent.getNumberOfLiteralWords(); - } - - private final boolean runningHasNext() { - return this.position < this.runningLength; - } - - private final boolean literalHasNext() { - while (this.word == 0 && this.wordPosition < this.wordLength) { - this.word = this.parent.getLiteralWordAt(this.wordPosition++); - this.literalPosition = this.position; - this.position += wordinbits; - } - return this.word != 0; + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public final int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int t = this.word & -this.word; + answer = this.literalPosition + Integer.bitCount(t - 1); + this.word ^= t; + } + this.hasNext = this.moveToNext(); + return answer; + } + + private void setupForCurrentRunningLengthWord() { + this.runningLength = WORD_IN_BITS + * this.parent.getRunningLength() + this.position; + + if (!this.parent.getRunningBit()) { + this.position = this.runningLength; + } + this.wordPosition = 0; + this.wordLength = this.parent.getNumberOfLiteralWords(); + } + + private boolean runningHasNext() { + return this.position < this.runningLength; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.parent.getLiteralWordAt(this.wordPosition++); + this.literalPosition = this.position; + this.position += WORD_IN_BITS; } + return this.word != 0; + } } - diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,274 +1,322 @@ package com.googlecode.javaewah32; - - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** - * Mostly for internal use. Similar to BufferedRunningLengthWord32, but automatically - * advances to the next BufferedRunningLengthWord32 as words are discarded. + * Mostly for internal use. Similar to BufferedRunningLengthWord32, but + * automatically advances to the next BufferedRunningLengthWord32 as words are + * discarded. * - * @since 0.5.0 * @author Daniel Lemire and David McIntosh + * @since 0.5.0 */ -public final class IteratingBufferedRunningLengthWord32 implements IteratingRLW32, Cloneable { - /** - * Instantiates a new iterating buffered running length word. - * - * @param iterator iterator - */ - public IteratingBufferedRunningLengthWord32(final EWAHIterator32 iterator) { - this.iterator = iterator; - this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; - this.buffer = this.iterator.buffer(); - } - - - /** - * Instantiates a new iterating buffered running length word. - * @param bitmap over which we want to iterate - * - */ - public IteratingBufferedRunningLengthWord32(final EWAHCompressedBitmap32 bitmap) { - this(EWAHIterator32.getEWAHIterator(bitmap)); - } - +public final class IteratingBufferedRunningLengthWord32 implements + IteratingRLW32, Cloneable { + /** + * Instantiates a new iterating buffered running length word. + * + * @param iterator iterator + */ + public IteratingBufferedRunningLengthWord32( + final EWAHIterator32 iterator) { + this.iterator = iterator; + this.brlw = new BufferedRunningLengthWord32( + this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + + this.brlw.literalWordOffset; + this.buffer = this.iterator.buffer(); + } - /** - * Discard first words, iterating to the next running length word if needed. - * - * @param x the x - */ - @Override -public void discardFirstWords(int x) { + /** + * Instantiates a new iterating buffered running length word. + * + * @param bitmap over which we want to iterate + */ + public IteratingBufferedRunningLengthWord32( + final EWAHCompressedBitmap32 bitmap) { + this(EWAHIterator32.getEWAHIterator(bitmap)); + } + /** + * Discard first words, iterating to the next running length word if + * needed. + * + * @param x the x + */ + @Override + public void discardFirstWords(int x) { + while (x > 0) { + if (this.brlw.RunningLength > x) { + this.brlw.RunningLength -= x; + return; + } + x -= this.brlw.RunningLength; + this.brlw.RunningLength = 0; + int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords + : x; + + this.literalWordStartPosition += toDiscard; + this.brlw.NumberOfLiteralWords -= toDiscard; + x -= toDiscard; + if ((x > 0) || (this.brlw.size() == 0)) { + if (!this.iterator.hasNext()) { + break; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator + .literalWords(); + } + } + } - while (x > 0) { - if (this.brlw.RunningLength > x) { - this.brlw.RunningLength -= x; + @Override + public void discardLiteralWords(int x) { + this.literalWordStartPosition += x; + this.brlw.NumberOfLiteralWords -= x; + if (this.brlw.NumberOfLiteralWords == 0) { + if (!this.iterator.hasNext()) { return; } - x -= this.brlw.RunningLength; - this.brlw.RunningLength = 0; - int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; - - this.literalWordStartPosition += toDiscard; - this.brlw.NumberOfLiteralWords -= toDiscard; - x -= toDiscard; - if ((x > 0) || (this.brlw.size() == 0)) { + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); + } + } + + + @Override + public void discardRunningWords() { + this.brlw.RunningLength = 0; + if (this.brlw.getNumberOfLiteralWords() == 0) + this.next(); + } + + /** + * Write out up to max words, returns how many were written + * + * @param container target for writes + * @param max maximal number of writes + * @return how many written + */ + public int discharge(BitmapStorage32 container, int max) { + int index = 0; + while (true) { + if (index + getRunningLength() > max) { + final int offset = max - index; + container.addStreamOfEmptyWords(getRunningBit(), offset); + this.brlw.RunningLength -= offset; + return max; + } + container.addStreamOfEmptyWords(getRunningBit(), getRunningLength()); + index += getRunningLength(); + if (getNumberOfLiteralWords() + index > max) { + final int offset = max - index; + writeLiteralWords(offset, container); + this.brlw.RunningLength = 0; + this.brlw.NumberOfLiteralWords -= offset; + this.literalWordStartPosition += offset; + return max; + } + writeLiteralWords(getNumberOfLiteralWords(), container); + index += getNumberOfLiteralWords(); + if(!next()) break; + } + return index; + } + + + /** + * Write out up to max words (negated), returns how many were written + * + * @param container target for writes + * @param max maximal number of writes + * @return how many written + */ + public int dischargeNegated(BitmapStorage32 container, int max) { + int index = 0; + while ((index < max) && (size() > 0)) { + // first run + int pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + container.addStreamOfEmptyWords(!getRunningBit(), pl); + index += pl; + int pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = max - index; + } + writeNegatedLiteralWords(pd, container); + discardFirstWords(pl + pd); + index += pd; + } + return index; + } + + /** + * Move to the next RunningLengthWord + * + * @return whether the move was possible + */ + @Override + public boolean next() { if (!this.iterator.hasNext()) { - break; + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; } this.brlw.reset(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset == 0; - } + this.literalWordStartPosition = this.iterator.literalWords(); // + + // this.brlw.literalWordOffset + // ==0 + return true; } - } - /** - * Write out up to max words, returns how many were written - * @param container target for writes - * @param max maximal number of writes - * @return how many written - */ - public int discharge(BitmapStorage32 container, int max) { - int index = 0; - while ((index < max) && (size() > 0)) { - // first run - int pl = getRunningLength(); - if (index + pl > max) { - pl = max - index; - } - container.addStreamOfEmptyWords(getRunningBit(), pl); - index += pl; - int pd = getNumberOfLiteralWords(); - if (pd + index > max) { - pd = max - index; - } - writeLiteralWords(pd, container); - discardFirstWords(pl+pd); - index += pd; + + /** + * Write out the remain words, transforming them to zeroes. + * + * @param container target for writes + */ + public void dischargeAsEmpty(BitmapStorage32 container) { + while (size() > 0) { + container.addStreamOfEmptyWords(false, size()); + discardFirstWords(size()); + } } - return index; - } - /** - * Write out up to max words (negated), returns how many were written - * @param container target for writes - * @param max maximal number of writes - * @return how many written - */ - public int dischargeNegated(BitmapStorage32 container, int max) { - int index = 0; - while ((index < max) && (size() > 0)) { - // first run - int pl = getRunningLength(); - if (index + pl > max) { - pl = max - index; - } - container.addStreamOfEmptyWords(!getRunningBit(), pl); - index += pl; - int pd = getNumberOfLiteralWords(); - if (pd + index > max) { - pd = max - index; - } - writeNegatedLiteralWords(pd, container); - discardFirstWords(pl+pd); - index += pd; + /** + * Write out the remaining words + * + * @param container target for writes + */ + public void discharge(BitmapStorage32 container) { + // fix the offset + this.brlw.literalWordOffset = this.literalWordStartPosition + - this.iterator.literalWords(); + discharge(this.brlw, this.iterator, container); } - return index; - } - /** - * Move to the next RunningLengthWord - * @return whether the move was possible - */ - @Override -public boolean next() { - if (!this.iterator.hasNext()) { - this.brlw.NumberOfLiteralWords = 0; - this.brlw.RunningLength = 0; - return false; - } - this.brlw.reset(this.iterator.next()); - this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 - return true; - } + /** + * Get the nth literal word for the current running length word + * + * @param index zero based index + * @return the literal word + */ + @Override + public int getLiteralWordAt(int index) { + return this.buffer.getWord(this.literalWordStartPosition + index); + } - /** - * Write out the remain words, transforming them to zeroes. - * @param container target for writes - */ - public void dischargeAsEmpty(BitmapStorage32 container) { - while(size()>0) { - container.addStreamOfEmptyWords(false, size()); - discardFirstWords(size()); + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override + public int getNumberOfLiteralWords() { + return this.brlw.NumberOfLiteralWords; } - } - - /** - * Write out the remaining words - * @param container target for writes - */ - public void discharge(BitmapStorage32 container) { - // fix the offset - this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); - discharge(this.brlw, this.iterator, container); - } - /** - * Get the nth literal word for the current running length word - * @param index zero based index - * @return the literal word - */ - @Override -public int getLiteralWordAt(int index) { - return this.buffer[this.literalWordStartPosition + index]; - } + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override + public boolean getRunningBit() { + return this.brlw.RunningBit; + } - /** - * Gets the number of literal words for the current running length word. - * - * @return the number of literal words - */ - @Override -public int getNumberOfLiteralWords() { - return this.brlw.NumberOfLiteralWords; - } + /** + * Gets the running length. + * + * @return the running length + */ + @Override + public int getRunningLength() { + return this.brlw.RunningLength; + } - /** - * Gets the running bit. - * - * @return the running bit - */ - @Override -public boolean getRunningBit() { - return this.brlw.RunningBit; - } - - /** - * Gets the running length. - * - * @return the running length - */ - @Override -public int getRunningLength() { - return this.brlw.RunningLength; - } - - /** - * Size in uncompressed words of the current running length word. - * - * @return the int - */ - @Override -public int size() { - return this.brlw.size(); - } - - /** - * write the first N literal words to the target bitmap. Does not discard the words or perform iteration. - * @param numWords number of words to be written - * @param container where we write the data - */ - public void writeLiteralWords(int numWords, BitmapStorage32 container) { - container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); - } - + /** + * Size in uncompressed words of the current running length word. + * + * @return the int + */ + @Override + public int size() { + return this.brlw.size(); + } - /** - * write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. - * @param numWords number of words to be written - * @param container where we write the data - */ - public void writeNegatedLiteralWords(int numWords, BitmapStorage32 container) { - container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); - } - + /** + * write the first N literal words to the target bitmap. Does not + * discard the words or perform iteration. + * + * @param numWords number of words to be written + * @param container where we write the data + */ + public void writeLiteralWords(int numWords, BitmapStorage32 container) { + container.addStreamOfLiteralWords(this.buffer, + this.literalWordStartPosition, numWords); + } - /** - * For internal use. (One could use the non-static discharge method instead, - * but we expect them to be slower.) - * - * @param initialWord - * the initial word - * @param iterator - * the iterator - * @param container - * the container - */ - protected static void discharge( - final BufferedRunningLengthWord32 initialWord, - final EWAHIterator32 iterator, final BitmapStorage32 container) { - BufferedRunningLengthWord32 runningLengthWord = initialWord; - for (;;) { - final int runningLength = runningLengthWord.getRunningLength(); - container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), - runningLength); - container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() - + runningLengthWord.literalwordoffset, - runningLengthWord.getNumberOfLiteralWords()); - if (!iterator.hasNext()) - break; - runningLengthWord = new BufferedRunningLengthWord32(iterator.next()); + /** + * write the first N literal words (negated) to the target bitmap. Does + * not discard the words or perform iteration. + * + * @param numWords number of words to be written + * @param container where we write the data + */ + public void writeNegatedLiteralWords(int numWords, + BitmapStorage32 container) { + container.addStreamOfNegatedLiteralWords(this.buffer, + this.literalWordStartPosition, numWords); } - } - + + /** + * For internal use. (One could use the non-static discharge method + * instead, but we expect them to be slower.) + * + * @param initialWord the initial word + * @param iterator the iterator + * @param container the container + */ + protected static void discharge( + final BufferedRunningLengthWord32 initialWord, + final EWAHIterator32 iterator, final BitmapStorage32 container) { + BufferedRunningLengthWord32 runningLengthWord = initialWord; + for (; ; ) { + final int runningLength = runningLengthWord + .getRunningLength(); + container.addStreamOfEmptyWords( + runningLengthWord.getRunningBit(), + runningLength); + container.addStreamOfLiteralWords(iterator.buffer(), + iterator.literalWords() + + runningLengthWord.literalWordOffset, + runningLengthWord.getNumberOfLiteralWords() + ); + if (!iterator.hasNext()) + break; + runningLengthWord = new BufferedRunningLengthWord32( + iterator.next()); + } + } + + @Override + public IteratingBufferedRunningLengthWord32 clone() + throws CloneNotSupportedException { + IteratingBufferedRunningLengthWord32 answer = (IteratingBufferedRunningLengthWord32) super + .clone(); + answer.brlw = this.brlw.clone(); + answer.iterator = this.iterator.clone(); + return answer; + } + + private BufferedRunningLengthWord32 brlw; + private final Buffer32 buffer; + private int literalWordStartPosition; + private EWAHIterator32 iterator; - @Override -public IteratingBufferedRunningLengthWord32 clone() throws CloneNotSupportedException { - IteratingBufferedRunningLengthWord32 answer = (IteratingBufferedRunningLengthWord32) super.clone(); - answer.brlw = this.brlw.clone(); - answer.buffer = this.buffer; - answer.iterator = this.iterator.clone(); - answer.literalWordStartPosition = this.literalWordStartPosition; - return answer; - } - - private BufferedRunningLengthWord32 brlw; - private int[] buffer; - private int literalWordStartPosition; - private EWAHIterator32 iterator; } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,42 +1,65 @@ package com.googlecode.javaewah32; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** * High-level iterator over a compressed bitmap. - * + * */ public interface IteratingRLW32 { - /** - * @return whether there is more - */ - public boolean next() ; - /** - * @param index where the literal word is - * @return the literal word at the given index. - */ - public int getLiteralWordAt(int index); - /** - * @return the number of literal (non-fill) words - */ - public int getNumberOfLiteralWords() ; - /** - * @return the bit used for the fill bits - */ - public boolean getRunningBit() ; - /** - * @return sum of getRunningLength() and getNumberOfLiteralWords() - */ - public int size() ; - /** - * @return length of the run of fill words - */ - public int getRunningLength() ; - /** - * @param x the number of words to discard - */ - public void discardFirstWords(int x); + /** + * @return whether there is more + */ + boolean next(); + + /** + * @param index where the literal word is + * @return the literal word at the given index. + */ + int getLiteralWordAt(int index); + + /** + * @return the number of literal (non-fill) words + */ + int getNumberOfLiteralWords(); + + /** + * @return the bit used for the fill bits + */ + boolean getRunningBit(); + + /** + * @return sum of getRunningLength() and getNumberOfLiteralWords() + */ + int size(); + + /** + * @return length of the run of fill words + */ + int getRunningLength(); + + /** + * @param x the number of words to discard + */ + void discardFirstWords(int x); + + /** + * Discard all running words + */ + void discardRunningWords(); + + /** + * Discard x literal words (assumes that there is no running word) + * @param x the number of words to discard + */ + void discardLiteralWords(int x); + + /** + * @return a copy of the iterator + * @throws CloneNotSupportedException this should not be thrown in theory + */ + IteratingRLW32 clone() throws CloneNotSupportedException; } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,601 +1,675 @@ package com.googlecode.javaewah32; +import com.googlecode.javaewah.CloneableIterator; + import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; -import com.googlecode.javaewah.CloneableIterator; - - - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** * Set of helper functions to aggregate bitmaps. - * */ -public class IteratorAggregation32 { - /** - * @param x iterator to negate - * @return negated version of the iterator - */ - public static IteratingRLW32 not(final IteratingRLW32 x) { - return new IteratingRLW32() { - - @Override - public boolean next() { - return x.next(); - } +public final class IteratorAggregation32 { - @Override - public int getLiteralWordAt(int index) { - return ~x.getLiteralWordAt(index); - } - - @Override - public int getNumberOfLiteralWords() { - return x.getNumberOfLiteralWords(); - } + /** Private constructor to prevent instantiation */ + private IteratorAggregation32() {} - @Override - public boolean getRunningBit() { - return ! x.getRunningBit(); - } - - @Override - public int size() { - return x.size(); - } + /** + * @param x iterator to negate + * @return negated version of the iterator + */ + public static IteratingRLW32 not(final IteratingRLW32 x) { + return new IteratingRLW32() { + + @Override + public boolean next() { + return x.next(); + } + + @Override + public int getLiteralWordAt(int index) { + return ~x.getLiteralWordAt(index); + } + + @Override + public int getNumberOfLiteralWords() { + return x.getNumberOfLiteralWords(); + } + + @Override + public boolean getRunningBit() { + return !x.getRunningBit(); + } + + @Override + public int size() { + return x.size(); + } + + @Override + public int getRunningLength() { + return x.getRunningLength(); + } + + @Override + public void discardFirstWords(int y) { + x.discardFirstWords(y); + } + + @Override + public void discardRunningWords() { + x.discardRunningWords(); + } + + @Override + public IteratingRLW32 clone() + throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } @Override - public int getRunningLength() { - return x.getRunningLength(); + public void discardLiteralWords(int y) { + x.discardLiteralWords(y); } + }; + } - @Override - public void discardFirstWords(int y) { - x.discardFirstWords(y); - } - - }; - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al iterators to aggregate - * @return and aggregate - */ - public static IteratingRLW32 bufferedand(final IteratingRLW32... al) { - return bufferedand (DEFAULTMAXBUFSIZE,al); - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al iterators to aggregate - * @param bufsize size of the internal buffer used by the iterator in 64-bit words - * @return and aggregate - */ - public static IteratingRLW32 bufferedand(final int bufsize, final IteratingRLW32... al) { - if (al.length == 0) - throw new IllegalArgumentException("Need at least one iterator"); - if (al.length == 1) - return al[0]; - final LinkedList basell = new LinkedList(); - for (IteratingRLW32 i : al) - basell.add(i); - return new BufferedIterator32(new AndIt(basell,bufsize)); - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al iterators to aggregate - * @return or aggregate - */ - public static IteratingRLW32 bufferedor(final IteratingRLW32... al) { - return bufferedor(DEFAULTMAXBUFSIZE,al); - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al iterators to aggregate - * @param bufsize size of the internal buffer used by the iterator in 64-bit words - * @return or aggregate - */ - public static IteratingRLW32 bufferedor(final int bufsize, final IteratingRLW32... al) { - if (al.length == 0) - throw new IllegalArgumentException("Need at least one iterator"); - if (al.length == 1) - return al[0]; - - final LinkedList basell = new LinkedList(); - for (IteratingRLW32 i : al) - basell.add(i); - return new BufferedIterator32(new ORIt(basell,bufsize)); - } - - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al iterators to aggregate - * @return xor aggregate - */ - public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) { - return bufferedxor (DEFAULTMAXBUFSIZE,al); - } - /** - * Aggregate the iterators using a bitmap buffer. - * - * @param al iterators to aggregate - * @param bufsize size of the internal buffer used by the iterator in 64-bit words - * @return xor aggregate - */ - public static IteratingRLW32 bufferedxor(final int bufsize, final IteratingRLW32... al) { - if (al.length == 0) - throw new IllegalArgumentException("Need at least one iterator"); - if (al.length == 1) - return al[0]; - - final LinkedList basell = new LinkedList(); - for (IteratingRLW32 i : al) - basell.add(i); - return new BufferedIterator32(new XORIt(basell,bufsize)); - } - /** - * Write out the content of the iterator, but as if it were all zeros. - * - * @param container - * where we write - * @param i - * the iterator - */ - protected static void dischargeAsEmpty(final BitmapStorage32 container, - final IteratingRLW32 i) { - while (i.size() > 0) { - container.addStreamOfEmptyWords(false, i.size()); - i.next(); - - } - } - - /** - * Write out up to max words, returns how many were written - * @param container target for writes - * @param i source of data - * @param max maximal number of writes - * @return how many written - */ - protected static int discharge(final BitmapStorage32 container, IteratingRLW32 i, int max) { - int counter = 0; - while (i.size() > 0 && counter < max) { - int L1 = i.getRunningLength(); - if (L1 > 0) { - if (L1 + counter > max) - L1 = max - counter; - container.addStreamOfEmptyWords(i.getRunningBit(), L1); - counter += L1; - } - int L = i.getNumberOfLiteralWords(); - if(L + counter > max) L = max - counter; - for (int k = 0; k < L; ++k) { - container.add(i.getLiteralWordAt(k)); - } - counter += L; - i.discardFirstWords(L+L1); - } - return counter; - } - - /** - * Write out up to max negated words, returns how many were written - * @param container target for writes - * @param i source of data - * @param max maximal number of writes - * @return how many written - */ - protected static int dischargeNegated(final BitmapStorage32 container, IteratingRLW32 i, int max) { - int counter = 0; - while (i.size() > 0 && counter < max) { - int L1 = i.getRunningLength(); - if (L1 > 0) { - if (L1 + counter > max) - L1 = max - counter; - container.addStreamOfEmptyWords(i.getRunningBit(), L1); - counter += L1; - } - int L = i.getNumberOfLiteralWords(); - if(L + counter > max) L = max - counter; - for (int k = 0; k < L; ++k) { - container.add(i.getLiteralWordAt(k)); - } - counter += L; - i.discardFirstWords(L+L1); - } - return counter; - } - - static void andToContainer(final BitmapStorage32 container, - int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { - while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; - final IteratingRLW32 predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - container.addStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); - } else { - final int index = discharge(container, prey, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - desiredrlwcount -= nbre_literal; - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - } - - static void andToContainer(final BitmapStorage32 container, - final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { - while ((rlwi.size()>0) && (rlwj.size()>0) ) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; - final IteratingRLW32 predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - container.addStreamOfEmptyWords(false, predator.getRunningLength()); - prey.discardFirstWords(predator.getRunningLength()); - predator.discardFirstWords(predator.getRunningLength()); - } else { - final int index = discharge(container, prey, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - } - - - /** - * Compute the first few words of the XOR aggregate between two iterators. - * - * @param container where to write - * @param desiredrlwcount number of words to be written (max) - * @param rlwi first iterator to aggregate - * @param rlwj second iterator to aggregate - */ - public static void xorToContainer(final BitmapStorage32 container, - int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { - while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { - while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { - final boolean i_is_prey = rlwi.getRunningLength() < rlwj - .getRunningLength(); - final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; - final IteratingRLW32 predator = i_is_prey ? rlwj - : rlwi; - if (predator.getRunningBit() == false) { - int index = discharge(container, prey, predator.getRunningLength()); - container.addStreamOfEmptyWords(false, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } else { - int index = dischargeNegated(container, prey, predator.getRunningLength()); - container.addStreamOfEmptyWords(true, predator.getRunningLength() - - index); - predator.discardFirstWords(predator.getRunningLength()); - } - } - final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), - rlwj.getNumberOfLiteralWords()); - if (nbre_literal > 0) { - desiredrlwcount -= nbre_literal; - for (int k = 0; k < nbre_literal; ++k) - container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); - rlwi.discardFirstWords(nbre_literal); - rlwj.discardFirstWords(nbre_literal); - } - } - } - - protected static int inplaceor(int[] bitmap, - IteratingRLW32 i) { - int pos = 0; - int s; - while ((s = i.size()) > 0) { - if (pos + s < bitmap.length) { - final int L = i.getRunningLength(); - if (i.getRunningBit()) - java.util.Arrays.fill(bitmap, pos, pos + L, ~0); - pos += L; - final int LR = i.getNumberOfLiteralWords(); - for (int k = 0; k < LR; ++k) - bitmap[pos++] |= i.getLiteralWordAt(k); - if (!i.next()) { - return pos; - } - } else { - int howmany = bitmap.length - pos; - int L = i.getRunningLength(); - if (pos + L > bitmap.length) { - if (i.getRunningBit()) { - java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0); - } - i.discardFirstWords(howmany); - return bitmap.length; - } - if (i.getRunningBit()) - java.util.Arrays.fill(bitmap, pos, pos + L, ~0); - pos += L; - for (int k = 0; pos < bitmap.length; ++k) - bitmap[pos++] |= i.getLiteralWordAt(k); - i.discardFirstWords(howmany); - return pos; - } - } - return pos; - } - - - protected static int inplacexor(int[] bitmap, - IteratingRLW32 i) { - int pos = 0; - int s; - while ((s = i.size()) > 0) { - if (pos + s < bitmap.length) { - final int L = i.getRunningLength(); - if (i.getRunningBit()) { - for(int k = pos ; k < pos + L; ++k) - bitmap[k] = ~bitmap[k]; - } - pos += L; - final int LR = i.getNumberOfLiteralWords(); - for (int k = 0; k < LR; ++k) - bitmap[pos++] ^= i.getLiteralWordAt(k); - if (!i.next()) { - return pos; - } - } else { - int howmany = bitmap.length - pos; - int L = i.getRunningLength(); - if (pos + L > bitmap.length) { - if (i.getRunningBit()) { - for(int k = pos ; k < bitmap.length; ++k) - bitmap[k] = ~bitmap[k]; - } - i.discardFirstWords(howmany); - return bitmap.length; - } - if (i.getRunningBit()) - for(int k = pos ; k < pos + L; ++k) - bitmap[k] = ~bitmap[k]; - pos += L; - for (int k = 0; pos < bitmap.length; ++k) - bitmap[pos++] ^= i.getLiteralWordAt(k); - i.discardFirstWords(howmany); - return pos; - } - } - return pos; - } - protected static int inplaceand(int[] bitmap, - IteratingRLW32 i) { - int pos = 0; - int s; - while ((s = i.size()) > 0) { - if (pos + s < bitmap.length) { - final int L = i.getRunningLength(); - if (!i.getRunningBit()) { - for(int k = pos ; k < pos + L; ++k) - bitmap[k] = 0; - } - pos += L; - final int LR = i.getNumberOfLiteralWords(); - for (int k = 0; k < LR; ++k) - bitmap[pos++] &= i.getLiteralWordAt(k); - if (!i.next()) { - return pos; - } - } else { - int howmany = bitmap.length - pos; - int L = i.getRunningLength(); - if (pos + L > bitmap.length) { - if (!i.getRunningBit()) { - for(int k = pos ; k < bitmap.length; ++k) - bitmap[k] = 0; - } - i.discardFirstWords(howmany); - return bitmap.length; - } - if (!i.getRunningBit()) - for(int k = pos ; k < pos + L; ++k) - bitmap[k] = 0; - pos += L; - for (int k = 0; pos < bitmap.length; ++k) - bitmap[pos++] &= i.getLiteralWordAt(k); - i.discardFirstWords(howmany); - return pos; - } - } - return pos; - } - - /** - * An optimization option. Larger values may improve speed, but at - * the expense of memory. - */ - public final static int DEFAULTMAXBUFSIZE = 65536; + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return and aggregate + */ + public static IteratingRLW32 bufferedand(final IteratingRLW32... al) { + return bufferedand(DEFAULT_MAX_BUF_SIZE, al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufSize size of the internal buffer used by the iterator in + * 64-bit words + * @return and aggregate + */ + public static IteratingRLW32 bufferedand(final int bufSize, + final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException( + "Need at least one iterator"); + if (al.length == 1) + return al[0]; + final LinkedList basell = new LinkedList(); + Collections.addAll(basell, al); + return new BufferedIterator32(new AndIt(basell, bufSize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return or aggregate + */ + public static IteratingRLW32 bufferedor(final IteratingRLW32... al) { + return bufferedor(DEFAULT_MAX_BUF_SIZE, al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufSize size of the internal buffer used by the iterator in + * 64-bit words + * @return or aggregate + */ + public static IteratingRLW32 bufferedor(final int bufSize, + final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException( + "Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + Collections.addAll(basell, al); + return new BufferedIterator32(new ORIt(basell, bufSize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return xor aggregate + */ + public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) { + return bufferedxor(DEFAULT_MAX_BUF_SIZE, al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufSize size of the internal buffer used by the iterator in + * 64-bit words + * @return xor aggregate + */ + public static IteratingRLW32 bufferedxor(final int bufSize, + final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException( + "Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + Collections.addAll(basell, al); + return new BufferedIterator32(new XORIt(basell, bufSize)); + } + + /** + * Write out the content of the iterator, but as if it were all zeros. + * + * @param container where we write + * @param i the iterator + */ + protected static void dischargeAsEmpty(final BitmapStorage32 container, + final IteratingRLW32 i) { + while (i.size() > 0) { + container.addStreamOfEmptyWords(false, i.size()); + i.next(); + + } + } + + /** + * Write out up to max words, returns how many were written + * + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static int discharge(final BitmapStorage32 container, + IteratingRLW32 i, int max) { + int counter = 0; + while (i.size() > 0 && counter < max) { + int l1 = i.getRunningLength(); + if (l1 > 0) { + if (l1 + counter > max) + l1 = max - counter; + container.addStreamOfEmptyWords( + i.getRunningBit(), l1); + counter += l1; + } + int l = i.getNumberOfLiteralWords(); + if (l + counter > max) + l = max - counter; + for (int k = 0; k < l; ++k) { + container.addWord(i.getLiteralWordAt(k)); + } + counter += l; + i.discardFirstWords(l + l1); + } + return counter; + } + + /** + * Write out up to max negated words, returns how many were written + * + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static int dischargeNegated(final BitmapStorage32 container, + IteratingRLW32 i, int max) { + int counter = 0; + while (i.size() > 0 && counter < max) { + int l1 = i.getRunningLength(); + if (l1 > 0) { + if (l1 + counter > max) + l1 = max - counter; + container.addStreamOfEmptyWords(i.getRunningBit(), l1); + counter += l1; + } + int l = i.getNumberOfLiteralWords(); + if (l + counter > max) + l = max - counter; + for (int k = 0; k < l; ++k) { + container.addWord(i.getLiteralWordAt(k)); + } + counter += l; + i.discardFirstWords(l + l1); + } + return counter; + } + + static void andToContainer(final BitmapStorage32 container, + int desiredrlwcount, final IteratingRLW32 rlwi, + IteratingRLW32 rlwj) { + while ((rlwi.size() > 0) && (rlwj.size() > 0) + && (desiredrlwcount-- > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi + .getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (!predator.getRunningBit()) { + container.addStreamOfEmptyWords(false, + predator.getRunningLength()); + prey.discardFirstWords(predator + .getRunningLength()); + predator.discardFirstWords(predator + .getRunningLength()); + } else { + final int index = discharge(container, + prey, + predator.getRunningLength()); + container.addStreamOfEmptyWords(false, + predator.getRunningLength() + - index + ); + predator.discardFirstWords(predator + .getRunningLength()); + } + } + final int nbre_literal = Math.min( + rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) + & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + static void andToContainer(final BitmapStorage32 container, + final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { + while ((rlwi.size() > 0) && (rlwj.size() > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi + .getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (!predator.getRunningBit()) { + container.addStreamOfEmptyWords(false, + predator.getRunningLength()); + prey.discardFirstWords(predator + .getRunningLength()); + predator.discardFirstWords(predator + .getRunningLength()); + } else { + final int index = discharge(container, + prey, + predator.getRunningLength()); + container.addStreamOfEmptyWords(false, + predator.getRunningLength() + - index + ); + predator.discardFirstWords(predator + .getRunningLength()); + } + } + final int nbre_literal = Math.min( + rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) + & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + /** + * Compute the first few words of the XOR aggregate between two + * iterators. + * + * @param container where to write + * @param desiredrlwcount number of words to be written (max) + * @param rlwi first iterator to aggregate + * @param rlwj second iterator to aggregate + */ + public static void xorToContainer(final BitmapStorage32 container, + int desiredrlwcount, final IteratingRLW32 rlwi, + IteratingRLW32 rlwj) { + while ((rlwi.size() > 0) && (rlwj.size() > 0) + && (desiredrlwcount-- > 0)) { + while ((rlwi.getRunningLength() > 0) + || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi + .getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (!predator.getRunningBit()) { + int index = discharge(container, prey, + predator.getRunningLength()); + container.addStreamOfEmptyWords(false, + predator.getRunningLength() + - index + ); + predator.discardFirstWords(predator + .getRunningLength()); + } else { + int index = dischargeNegated(container, + prey, + predator.getRunningLength()); + container.addStreamOfEmptyWords(true, + predator.getRunningLength() + - index + ); + predator.discardFirstWords(predator + .getRunningLength()); + } + } + final int nbre_literal = Math.min( + rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.addWord(rlwi.getLiteralWordAt(k) + ^ rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + protected static int inplaceor(int[] bitmap, IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (i.getRunningBit()) + java.util.Arrays.fill(bitmap, pos, pos + + L, ~0); + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int l = i.getRunningLength(); + if (pos + l > bitmap.length) { + if (i.getRunningBit()) { + java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0); + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + java.util.Arrays.fill(bitmap, pos, pos + l, ~0); + pos += l; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + protected static int inplacexor(int[] bitmap, IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (i.getRunningBit()) { + for (int k = pos; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howMany = bitmap.length - pos; + int l = i.getRunningLength(); + if (pos + l > bitmap.length) { + if (i.getRunningBit()) { + for (int k = pos; k < bitmap.length; ++k) + bitmap[k] = ~bitmap[k]; + } + i.discardFirstWords(howMany); + return bitmap.length; + } + if (i.getRunningBit()) + for (int k = pos; k < pos + l; ++k) + bitmap[k] = ~bitmap[k]; + pos += l; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + i.discardFirstWords(howMany); + return pos; + } + } + return pos; + } + + protected static int inplaceand(int[] bitmap, IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (!i.getRunningBit()) { + for (int k = pos; k < pos + L; ++k) + bitmap[k] = 0; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howMany = bitmap.length - pos; + int l = i.getRunningLength(); + if (pos + l > bitmap.length) { + if (!i.getRunningBit()) { + for (int k = pos; k < bitmap.length; ++k) + bitmap[k] = 0; + } + i.discardFirstWords(howMany); + return bitmap.length; + } + if (!i.getRunningBit()) + for (int k = pos; k < pos + l; ++k) + bitmap[k] = 0; + pos += l; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + i.discardFirstWords(howMany); + return pos; + } + } + return pos; + } + + /** + * An optimization option. Larger values may improve speed, but at the + * expense of memory. + */ + public static final int DEFAULT_MAX_BUF_SIZE = 65536; - } - class ORIt implements CloneableIterator { - EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); - int[] hardbitmap; - LinkedList ll; - - ORIt(LinkedList basell, final int bufsize) { - this.ll = basell; - this.hardbitmap = new int[bufsize]; - } - - @Override - public XORIt clone() throws CloneNotSupportedException { - XORIt answer = (XORIt) super.clone(); - answer.buffer = this.buffer.clone(); - answer.hardbitmap = this.hardbitmap.clone(); - answer.ll = (LinkedList) this.ll.clone(); - return answer; - } - - @Override - public boolean hasNext() { - return !this.ll.isEmpty(); - } - - @Override - public EWAHIterator32 next() { - this.buffer.clear(); - int effective = 0; - Iterator i = this.ll.iterator(); - while (i.hasNext()) { - IteratingRLW32 rlw = i.next(); - if (rlw.size() > 0) { - int eff = IteratorAggregation32.inplaceor(this.hardbitmap, rlw); - if (eff > effective) - effective = eff; - } else - i.remove(); - } - for (int k = 0; k < effective; ++k) - this.buffer.add(this.hardbitmap[k]); - Arrays.fill(this.hardbitmap, 0); - return this.buffer.getEWAHIterator(); - } + final EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + final int[] hardBitmap; + final LinkedList ll; + + ORIt(LinkedList basell, final int bufSize) { + this.ll = basell; + this.hardBitmap = new int[bufSize]; + } + + @Override + public XORIt clone() throws CloneNotSupportedException { + XORIt answer = (XORIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardBitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + int effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW32 rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation32.inplaceor( + this.hardBitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.addWord(this.hardBitmap[k]); + Arrays.fill(this.hardBitmap, 0); + return this.buffer.getEWAHIterator(); + } } class XORIt implements CloneableIterator { - EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); - int[] hardbitmap; + EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + int[] hardbitmap; LinkedList ll; - - XORIt(LinkedList basell, final int bufsize) { - this.ll = basell; - this.hardbitmap = new int[bufsize]; - - } - - @Override - public XORIt clone() throws CloneNotSupportedException { - XORIt answer = (XORIt) super.clone(); - answer.buffer = this.buffer.clone(); - answer.hardbitmap = this.hardbitmap.clone(); - answer.ll = (LinkedList) this.ll.clone(); - return answer; - } - - @Override - public boolean hasNext() { - return !this.ll.isEmpty(); - } - - @Override - public EWAHIterator32 next() { - this.buffer.clear(); - int effective = 0; - Iterator i = this.ll.iterator(); - while (i.hasNext()) { - IteratingRLW32 rlw = i.next(); - if (rlw.size() > 0) { - int eff = IteratorAggregation32.inplacexor(this.hardbitmap, rlw); - if (eff > effective) - effective = eff; - } else - i.remove(); - } - for (int k = 0; k < effective; ++k) - this.buffer.add(this.hardbitmap[k]); - Arrays.fill(this.hardbitmap, 0); - return this.buffer.getEWAHIterator(); - } + + XORIt(LinkedList basell, final int bufSize) { + this.ll = basell; + this.hardbitmap = new int[bufSize]; + + } + + @Override + public XORIt clone() throws CloneNotSupportedException { + XORIt answer = (XORIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + int effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW32 rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation32.inplacexor( + this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.addWord(this.hardbitmap[k]); + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } } class AndIt implements CloneableIterator { - EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); - LinkedList ll; - int buffersize; - - public AndIt(LinkedList basell, final int bufsize) { - this.ll = basell; - this.buffersize = bufsize; - } - - @Override - public boolean hasNext() { - return !this.ll.isEmpty(); - } - - @Override - public AndIt clone() throws CloneNotSupportedException { - AndIt answer = (AndIt) super.clone(); - answer.buffer = this.buffer.clone(); - answer.ll = (LinkedList) this.ll.clone(); - return answer; - } - - @Override - public EWAHIterator32 next() { - this.buffer.clear(); - IteratorAggregation32.andToContainer(this.buffer, this.buffersize * this.ll.size(), - this.ll.get(0), this.ll.get(1)); - if (this.ll.size() > 2) { - Iterator i = this.ll.iterator(); - i.next(); - i.next(); - EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32(); - while (i.hasNext() && this.buffer.sizeInBytes() > 0) { - IteratorAggregation32.andToContainer(tmpbuffer, - this.buffer.getIteratingRLW(), i.next()); - this.buffer.swap(tmpbuffer); - tmpbuffer.clear(); - } - } - Iterator i = this.ll.iterator(); - while(i.hasNext()) { - if(i.next().size() == 0) { - this.ll.clear(); - break; - } - } - return this.buffer.getEWAHIterator(); - } + EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + LinkedList ll; + final int bufferSize; -} \ No newline at end of file + public AndIt(LinkedList basell, final int bufSize) { + this.ll = basell; + this.bufferSize = bufSize; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public AndIt clone() throws CloneNotSupportedException { + AndIt answer = (AndIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + IteratorAggregation32.andToContainer(this.buffer, + this.bufferSize * this.ll.size(), this.ll.get(0), + this.ll.get(1)); + if (this.ll.size() > 2) { + Iterator i = this.ll.iterator(); + i.next(); + i.next(); + EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32(); + while (i.hasNext() && this.buffer.sizeInBytes() > 0) { + IteratorAggregation32 + .andToContainer(tmpbuffer, + this.buffer.getIteratingRLW(), + i.next()); + this.buffer.swap(tmpbuffer); + tmpbuffer.clear(); + } + } + for (IteratingRLW32 aLl : this.ll) { + if (aLl.size() == 0) { + this.ll.clear(); + break; + } + } + return this.buffer.getEWAHIterator(); + } + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,135 +1,158 @@ package com.googlecode.javaewah32; -import java.util.Iterator; - import com.googlecode.javaewah.IntIterator; +import java.util.Iterator; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** * Convenience functions for working over iterators - * */ -public class IteratorUtil32 { - - /** - * @param i iterator we wish to iterate over - * @return an iterator over the set bits corresponding to the iterator - */ - public static IntIterator toSetBitsIntIterator(final IteratingRLW32 i) { - return new IntIteratorOverIteratingRLW32(i); - } - - /** - * @param i iterator we wish to iterate over - * @return an iterator over the set bits corresponding to the iterator - */ - public static Iterator toSetBitsIterator(final IteratingRLW32 i) { - return new Iterator() { - @Override - public boolean hasNext() { - return this.under.hasNext(); - } - - @Override - public Integer next() { - return new Integer(this.under.next()); - } - - @Override - public void remove() { - } - - final private IntIterator under = toSetBitsIntIterator(i); - }; - - } - - /** - * Turn an iterator into a bitmap - * @param i iterator we wish to materialize - * @param c where we write - */ - public static void materialize(final IteratingRLW32 i, final BitmapStorage32 c) { - while (true) { - if (i.getRunningLength() > 0) { - c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); - } - for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) - c.add(i.getLiteralWordAt(k)); - if (!i.next()) - break; - } - } - - /** - * @param i iterator we wish to iterate over - * @return the cardinality (number of set bits) corresponding to the iterator - */ - public static int cardinality(final IteratingRLW32 i) { - int answer = 0; - while (true) { - if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap32.wordinbits; - for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) - answer += Long.bitCount(i.getLiteralWordAt(k)); - if(!i.next()) break; - } - return answer; - } - - /** - * - * @param x set of bitmaps we wish to iterate over - * @return an array of iterators corresponding to the array of bitmaps - */ - public static IteratingRLW32[] toIterators(final EWAHCompressedBitmap32... x) { - IteratingRLW32[] X = new IteratingRLW32[x.length]; - for (int k = 0; k < X.length; ++k) { - X[k] = new IteratingBufferedRunningLengthWord32(x[k]); - } - return X; - } - /** - * Turn an iterator into a bitmap - * - * @param i iterator we wish to materialize - * @param c where we write - * @param Max maximum number of words to materialize - * @return how many words were actually materialized - */ - public static long materialize(final IteratingRLW32 i, final BitmapStorage32 c, int Max) { - final int origMax = Max; - while (true) { - if (i.getRunningLength() > 0) { - int L = i.getRunningLength(); - if(L > Max) L = Max; - c.addStreamOfEmptyWords(i.getRunningBit(), L); - Max -= L; - } - long L = i.getNumberOfLiteralWords(); - for (int k = 0; k < L; ++k) - c.add(i.getLiteralWordAt(k)); - if(Max>0) { - if (!i.next()) - break; - } - else break; - } - return origMax - Max; - } - /** - * Turn an iterator into a bitmap - * - * @param i iterator we wish to materialize - * @return materialized version of the iterator - */ - public static EWAHCompressedBitmap32 materialize(final IteratingRLW32 i) { - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - materialize(i, ewah); - return ewah; - } +public final class IteratorUtil32 { + + /** Private constructor to prevent instantiation */ + private IteratorUtil32() {} + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static IntIterator toSetBitsIntIterator(final IteratingRLW32 i) { + return new IntIteratorOverIteratingRLW32(i); + } + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static Iterator toSetBitsIterator(final IteratingRLW32 i) { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return this.under.next(); + } + + @Override + public void remove() { + } + + private final IntIterator under = toSetBitsIntIterator(i); + }; + + } + + /** + * Turn an iterator into a bitmap + * + * @param i iterator we wish to materialize + * @param c where we write + */ + public static void materialize(final IteratingRLW32 i, + final BitmapStorage32 c) { + while (true) { + if (i.getRunningLength() > 0) { + c.addStreamOfEmptyWords(i.getRunningBit(), + i.getRunningLength()); + } + int il = i.getNumberOfLiteralWords(); + for (int k = 0; k < il ; ++k) + c.addWord(i.getLiteralWordAt(k)); + if (!i.next()) + break; + } + } + + /** + * @param i iterator we wish to iterate over + * @return the cardinality (number of set bits) corresponding to the + * iterator + */ + public static int cardinality(final IteratingRLW32 i) { + int answer = 0; + while (true) { + if (i.getRunningBit()) + answer += i.getRunningLength() + * EWAHCompressedBitmap32.WORD_IN_BITS; + int il = i.getNumberOfLiteralWords(); + for (int k = 0; k < il; ++k) + answer += Integer.bitCount(i.getLiteralWordAt(k)); + if (!i.next()) + break; + } + return answer; + } + + /** + * @param x set of bitmaps we wish to iterate over + * @return an array of iterators corresponding to the array of bitmaps + */ + public static IteratingRLW32[] toIterators( + final EWAHCompressedBitmap32... x) { + IteratingRLW32[] X = new IteratingRLW32[x.length]; + for (int k = 0; k < X.length; ++k) { + X[k] = new IteratingBufferedRunningLengthWord32(x[k]); + } + return X; + } + + /** + * Turn an iterator into a bitmap + * + * @param i iterator we wish to materialize + * @param c where we write + * @param max maximum number of words to materialize + * @return how many words were actually materialized + */ + public static long materialize(final IteratingRLW32 i, final BitmapStorage32 c, int max) { + final int origMax = max; + while (true) { + if (i.getRunningLength() > 0) { + int l = i.getRunningLength(); + if (l > max) + l = max; + c.addStreamOfEmptyWords(i.getRunningBit(), l); + max -= l; + } + long L = i.getNumberOfLiteralWords(); + for (int k = 0; k < L; ++k) + c.addWord(i.getLiteralWordAt(k)); + if (max > 0) { + if (!i.next()) + break; + } else + break; + } + return origMax - max; + } + + /** + * Turn an iterator into a bitmap + * + * + * This can be used to effectively clone a bitmap in the following + * manner: + * + * + * EWAHCompressedBitmap32 n = IteratorUtil32.materialize(bitmap.getIteratingRLW())); + * n.setSizeInBitsWithinLastWord(bitmap.sizeInBits()); + * + * + * @param i iterator we wish to materialize + * @return materialized version of the iterator + */ + public static EWAHCompressedBitmap32 materialize(final IteratingRLW32 i) { + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + materialize(i, ewah); + return ewah; + } } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,87 +1,97 @@ package com.googlecode.javaewah32; - - /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** - * This is a BitmapStorage that can be used to determine quickly - * if the result of an operation is non-trivial... that is, whether - * there will be at least on set bit. - * - * @since 0.5.0 - * @author Daniel Lemire and Veronika Zenz + * This is a BitmapStorage that can be used to determine quickly if the result + * of an operation is non-trivial... that is, whether there will be at least on + * set bit. * + * @author Daniel Lemire and Veronika Zenz + * @since 0.5.0 */ public class NonEmptyVirtualStorage32 implements BitmapStorage32 { - static class NonEmptyException extends RuntimeException { - private static final long serialVersionUID = 1L; + private static final NonEmptyException nonEmptyException = new NonEmptyException(); + + /** + * If the word to be added is non-zero, a NonEmptyException exception is + * thrown. + */ + @Override + public void addWord(int newData) { + if (newData != 0) + throw nonEmptyException; + } /** - * Do not fill in the stack trace for this exception - * for performance reasons. + * If the word to be added is non-zero, a NonEmptyException exception is + * thrown. + */ + @Override + public void addLiteralWord(int newData) { + if (newData != 0) + throw nonEmptyException; + } + + /** + * throws a NonEmptyException exception when number is greater than 0 + */ + @Override + public void addStreamOfLiteralWords(Buffer32 buffer, int start, int number) { + for(int x = start; x < start + number ; ++x) + if(buffer.getWord(x)!=0) throw nonEmptyException; + } + + /** + * If the boolean value is true and number is greater than 0, then it + * throws a NonEmptyException exception, otherwise, nothing happens. + */ + @Override + public void addStreamOfEmptyWords(boolean v, int number) { + if (v && (number > 0)) + throw nonEmptyException; + } + + /** + * throws a NonEmptyException exception when number is greater than 0 + */ + @Override + public void addStreamOfNegatedLiteralWords(Buffer32 buffer, int start, + int number) { + if (number > 0) { + throw nonEmptyException; + } + } + + @Override + public void clear() { + } + + /** + * Does nothing. * - * @return this instance - * @see java.lang.Throwable#fillInStackTrace() + * @see com.googlecode.javaewah.BitmapStorage#setSizeInBitsWithinLastWord(int) */ @Override - public synchronized Throwable fillInStackTrace() { - return this; + public void setSizeInBitsWithinLastWord(int bits) { } - } - - private static final NonEmptyException nonEmptyException = new NonEmptyException(); - - - /** - * If the word to be added is non-zero, a NonEmptyException exception is thrown. - */ - @Override -public void add(int newdata) { - if(newdata!=0) throw nonEmptyException; - } - - /** - * throws a NonEmptyException exception when number is greater than 0 - * - */ - @Override -public void addStreamOfLiteralWords(int[] data, int start, int number) { - if (number > 0){ - throw nonEmptyException; - } - } - - /** - * If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, - * otherwise, nothing happens. - * - */ - @Override -public void addStreamOfEmptyWords(boolean v, int number) { - if(v && (number>0)) throw nonEmptyException; - } - - /** - * throws a NonEmptyException exception when number is greater than 0 - * - */ - @Override -public void addStreamOfNegatedLiteralWords(int[] data, int start, int number) { - if (number > 0){ - throw nonEmptyException; - } - } - - /** - * Does nothing. - * - * @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) - */ - @Override -public void setSizeInBits(int bits) { - } + static class NonEmptyException extends RuntimeException { + private static final long serialVersionUID = 1L; + + /** + * Do not fill in the stack trace for this exception for + * performance reasons. + * + * @return this instance + * @see java.lang.Throwable#fillInStackTrace() + */ + @Override + public synchronized Throwable fillInStackTrace() { + return this; + } + } } diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/ReverseEWAHIterator32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/ReverseEWAHIterator32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/ReverseEWAHIterator32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/ReverseEWAHIterator32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,90 @@ +package com.googlecode.javaewah32; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import java.util.Stack; + + +/** + * The class ReverseEWAHIterator32 represents a special type of efficient iterator + * iterating over (uncompressed) words of bits in reverse order. + * + * @author Gregory Ssi-Yan-Kai + */ +final class ReverseEWAHIterator32 { + + /** + * Instantiates a new reverse EWAH iterator. + * + * @param buffer the buffer + */ + public ReverseEWAHIterator32(final Buffer32 buffer) { + this.pointer = 0; + this.rlw = new RunningLengthWord32(buffer, this.pointer); + this.positions = new Stack(); + this.positions.ensureCapacity(buffer.sizeInWords()); + while(this.pointer < buffer.sizeInWords()) { + this.positions.push(this.pointer); + this.rlw.position = this.pointer; + this.pointer += this.rlw.getNumberOfLiteralWords() + 1; + } + } + + /** + * Access to the buffer + * + * @return the buffer + */ + public Buffer32 buffer() { + return this.rlw.buffer; + } + + /** + * Position of the current running length word. + * + * @return the int + */ + public int position() { + return this.pointer; + } + + /** + * Checks for previous. + * + * @return true, if successful + */ + public boolean hasPrevious() { + return !this.positions.isEmpty(); + } + + /** + * Previous running length word. + * + * @return the running length word + */ + public RunningLengthWord32 previous() { + this.pointer = this.positions.pop(); + this.rlw.position = this.pointer; + return this.rlw; + } + + /** + * The positions of running length words (embedded in the rlw attribute). + */ + private Stack positions; + + /** + * The pointer representing the location of the current running length word + * in the array of words (embedded in the rlw attribute). + */ + private int pointer; + + /** + * The current running length word. + */ + protected RunningLengthWord32 rlw; + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/ReverseIntIterator32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/ReverseIntIterator32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/ReverseIntIterator32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/ReverseIntIterator32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,103 @@ +package com.googlecode.javaewah32; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import com.googlecode.javaewah.IntIterator; + +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; + +/** + * The ReverseIntIterator32 is the 32 bit implementation of the IntIterator + * interface, which efficiently returns the stream of integers represented by a + * ReverseEWAHIterator32 in reverse order. + * + * @author Gregory Ssi-Yan-Kai + */ +final class ReverseIntIterator32 implements IntIterator { + + private final ReverseEWAHIterator32 ewahIter; + private final int sizeInBits; + private final Buffer32 buffer; + private int position; + private boolean runningBit; + private int runningLength; + private int word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasNext; + + ReverseIntIterator32(ReverseEWAHIterator32 ewahIter, int sizeInBits) { + this.ewahIter = ewahIter; + this.sizeInBits = sizeInBits; + this.buffer = ewahIter.buffer(); + this.runningLength = sizeInBits - 1; + this.hasNext = this.moveToPreviousRLW(); + } + + @Override + public boolean hasNext() { + return this.hasNext; + } + + @Override + public int next() { + final int answer; + if (literalHasNext()) { + final int t = this.word & -this.word; + answer = this.literalPosition - Integer.bitCount(t - 1); + this.word ^= t; + } else { + answer = this.position--; + } + this.hasNext = this.moveToPreviousRLW(); + return answer; + } + + private boolean moveToPreviousRLW() { + while (!literalHasNext() && !runningHasNext()) { + if (!this.ewahIter.hasPrevious()) { + return false; + } + setRLW(this.ewahIter.previous()); + } + return true; + } + + private void setRLW(RunningLengthWord32 rlw) { + this.wordLength = rlw.getNumberOfLiteralWords(); + this.wordPosition = this.ewahIter.position(); + this.position = this.runningLength; + this.runningLength -= WORD_IN_BITS * (rlw.getRunningLength() + this.wordLength); + if (this.position == this.sizeInBits - 1) { + final int usedBitsInLast = this.sizeInBits % WORD_IN_BITS; + if(usedBitsInLast > 0) { + this.runningLength += WORD_IN_BITS - usedBitsInLast; + if (this.wordLength > 0) { + this.word = Integer.reverse(this.buffer.getWord(this.wordPosition + this.wordLength--)); + this.word = (this.word >>> (WORD_IN_BITS - usedBitsInLast)); + this.literalPosition = this.position; + this.position -= usedBitsInLast; + } + } + } + this.runningBit = rlw.getRunningBit(); + } + + private boolean runningHasNext() { + return this.runningBit && this.runningLength < this.position; + } + + private boolean literalHasNext() { + while (this.word == 0 && this.wordLength > 0) { + this.word = Integer.reverse(this.buffer.getWord(this.wordPosition + this.wordLength--)); + this.literalPosition = this.position; + this.position -= WORD_IN_BITS; + } + return this.word != 0; + } + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,152 +1,172 @@ package com.googlecode.javaewah32; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. - * - * @since 0.5.0 + * * @author Daniel Lemire + * @since 0.5.0 */ public final class RunningLengthWord32 implements Cloneable { - /** - * Instantiates a new running length word. - * - * @param a - * an array of 32-bit words - * @param p - * position in the array where the running length word is - * located. - */ - RunningLengthWord32(final EWAHCompressedBitmap32 a, final int p) { - this.parent = a; - this.position = p; - } - - /** - * Gets the number of literal words. - * - * @return the number of literal words - */ - public int getNumberOfLiteralWords() { - return (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); - } - - /** - * Gets the running bit. - * - * @return the running bit - */ - public boolean getRunningBit() { - return (this.parent.buffer[this.position] & 1) != 0; - } - - /** - * Gets the running length. - * - * @return the running length - */ - public int getRunningLength() { - return (this.parent.buffer[this.position] >>> 1) - & largestrunninglengthcount; - } - - /** - * Sets the number of literal words. - * - * @param number - * the new number of literal words - */ - public void setNumberOfLiteralWords(final int number) { - this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; - this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) - | runninglengthplusrunningbit; - } - - /** - * Sets the running bit. - * - * @param b - * the new running bit - */ - public void setRunningBit(final boolean b) { - if (b) - this.parent.buffer[this.position] |= 1; - else - this.parent.buffer[this.position] &= ~1; - } - - /** - * Sets the running length. - * - * @param number - * the new running length - */ - public void setRunningLength(final int number) { - this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; - this.parent.buffer[this.position] &= (number << 1) - | notshiftedlargestrunninglengthcount; - } - - /** - * Return the size in uncompressed words represented by this running - * length word. - * - * @return the int - */ - public int size() { - return getRunningLength() + getNumberOfLiteralWords(); - } - - /* - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "running bit = " + getRunningBit() - + " running length = " + getRunningLength() - + " number of lit. words " + getNumberOfLiteralWords(); - } - - @Override - public RunningLengthWord32 clone() throws CloneNotSupportedException { - RunningLengthWord32 answer; - answer = (RunningLengthWord32) super.clone(); - answer.parent = this.parent; - answer.position = this.position; - return answer; - } - - /** The array of words. */ - public EWAHCompressedBitmap32 parent; - - /** The position in array. */ - public int position; - - /** - * number of bits dedicated to marking of the running length of clean - * words - */ - public static final int runninglengthbits = 16; - - private static final int literalbits = 32 - 1 - runninglengthbits; - - /** largest number of literal words in a run. */ - public static final int largestliteralcount = (1 << literalbits) - 1; - - /** largest number of clean words in a run */ - public static final int largestrunninglengthcount = (1 << runninglengthbits) - 1; - - private static final int runninglengthplusrunningbit = (1 << (runninglengthbits + 1)) - 1; + /** + * Instantiates a new running length word. + * + * @param buffer the buffer + * @param p position in the array where the running length word is + * located. + */ + RunningLengthWord32(final Buffer32 buffer, final int p) { + this.buffer = buffer; + this.position = p; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return getNumberOfLiteralWords(this.buffer, this.position); + } + + static int getNumberOfLiteralWords(final Buffer32 buffer, final int position) { + return (buffer.getWord(position) >>> (1 + RUNNING_LENGTH_BITS)); + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return getRunningBit(this.buffer, this.position); + } + + static boolean getRunningBit(final Buffer32 buffer, final int position) { + return (buffer.getWord(position) & 1) != 0; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public int getRunningLength() { + return getRunningLength(this.buffer, this.position); + } + + static int getRunningLength(final Buffer32 buffer, final int position) { + return (buffer.getWord(position) >>> 1) & LARGEST_RUNNING_LENGTH_COUNT; + } + + /** + * Sets the number of literal words. + * + * @param number the new number of literal words + */ + public void setNumberOfLiteralWords(final int number) { + setNumberOfLiteralWords(this.buffer, this.position, number); + } + + static void setNumberOfLiteralWords(final Buffer32 buffer, final int position, final int number) { + buffer.orWord(position, NOT_RUNNING_LENGTH_PLUS_RUNNING_BIT); + buffer.andWord(position, (number << (RUNNING_LENGTH_BITS + 1)) | RUNNING_LENGTH_PLUS_RUNNING_BIT); + } + + /** + * Sets the running bit. + * + * @param b the new running bit + */ + public void setRunningBit(final boolean b) { + setRunningBit(this.buffer, this.position, b); + } + + static void setRunningBit(final Buffer32 buffer, final int position, final boolean b) { + if (b) + buffer.orWord(position, 1); + else + buffer.andWord(position, ~1); + } + + /** + * Sets the running length. + * + * @param number the new running length + */ + public void setRunningLength(final int number) { + setRunningLength(this.buffer, this.position, number); + } + + static void setRunningLength(final Buffer32 buffer, final int position, final int number) { + buffer.orWord(position, SHIFTED_LARGEST_RUNNING_LENGTH_COUNT); + buffer.andWord(position, (number << 1) | NOT_SHIFTED_LARGEST_RUNNING_LENGTH_COUNT); + } + + /** + * Return the size in uncompressed words represented by this running + * length word. + * + * @return the int + */ + public int size() { + return getRunningLength() + getNumberOfLiteralWords(); + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + + " running length = " + getRunningLength() + + " number of lit. words " + getNumberOfLiteralWords(); + } + + @Override + public RunningLengthWord32 clone() throws CloneNotSupportedException { + return (RunningLengthWord32) super.clone(); + } + + /** + * The array of words. + */ + final Buffer32 buffer; + + /** + * The position in array. + */ + int position; + + /** + * number of bits dedicated to marking of the running length of clean + * words + */ + public static final int RUNNING_LENGTH_BITS = 16; + + private static final int LITERAL_BITS = 32 - 1 - RUNNING_LENGTH_BITS; + + /** + * largest number of literal words in a run. + */ + public static final int LARGEST_LITERAL_COUNT = (1 << LITERAL_BITS) - 1; + + /** + * largest number of clean words in a run + */ + public static final int LARGEST_RUNNING_LENGTH_COUNT = (1 << RUNNING_LENGTH_BITS) - 1; + + private static final int RUNNING_LENGTH_PLUS_RUNNING_BIT = (1 << (RUNNING_LENGTH_BITS + 1)) - 1; - private static final int shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; + private static final int SHIFTED_LARGEST_RUNNING_LENGTH_COUNT = LARGEST_RUNNING_LENGTH_COUNT << 1; - private static final int notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; + private static final int NOT_RUNNING_LENGTH_PLUS_RUNNING_BIT = ~RUNNING_LENGTH_PLUS_RUNNING_BIT; - private static final int notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; + private static final int NOT_SHIFTED_LARGEST_RUNNING_LENGTH_COUNT = ~SHIFTED_LARGEST_RUNNING_LENGTH_COUNT; -} \ No newline at end of file +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/BitmapSymmetricAlgorithm32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/BitmapSymmetricAlgorithm32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/BitmapSymmetricAlgorithm32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/BitmapSymmetricAlgorithm32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,23 @@ +package com.googlecode.javaewah32.symmetric; + +import com.googlecode.javaewah32.BitmapStorage32; +import com.googlecode.javaewah32.EWAHCompressedBitmap32; + +/** + * Generic interface to compute symmetric Boolean functions. + * + * @author Daniel Lemire + * @see http://en.wikipedia.org/wiki/Symmetric_Boolean_function + * @since 0.8.2 + */ +public interface BitmapSymmetricAlgorithm32 { + /** + * Compute a Boolean symmetric query. + * + * @param f symmetric boolean function to be processed + * @param out the result of the query + * @param set the inputs + */ + void symmetric(UpdateableBitmapFunction32 f, BitmapStorage32 out, EWAHCompressedBitmap32... set); +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/EWAHPointer32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/EWAHPointer32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/EWAHPointer32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/EWAHPointer32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,120 @@ +package com.googlecode.javaewah32.symmetric; + +import com.googlecode.javaewah32.IteratingBufferedRunningLengthWord32; + +/** + * Wrapper around an IteratingBufferedRunningLengthWord used by the + * RunningBitmapMerge class. + * + * @author Daniel Lemire + * @since 0.8.2 + */ +public final class EWAHPointer32 implements Comparable { + private int endrun; + private final int pos; + private boolean isLiteral; + private boolean value; + private boolean dead = false; + + /** + * Underlying iterator + */ + public final IteratingBufferedRunningLengthWord32 iterator; + + /** + * Construct a pointer over an IteratingBufferedRunningLengthWord. + * + * @param previousEndRun word where the previous run ended + * @param rw the iterator + * @param pos current position (in word) + */ + public EWAHPointer32(final int previousEndRun, + final IteratingBufferedRunningLengthWord32 rw, final int pos) { + this.pos = pos; + this.iterator = rw; + if (this.iterator.getRunningLength() > 0) { + this.endrun = previousEndRun + + this.iterator.getRunningLength(); + this.isLiteral = false; + this.value = this.iterator.getRunningBit(); + } else if (this.iterator.getNumberOfLiteralWords() > 0) { + this.isLiteral = true; + this.endrun = previousEndRun + + this.iterator.getNumberOfLiteralWords(); + } else { + this.endrun = previousEndRun; + this.dead = true; + } + } + + /** + * @return the end of the current run + */ + public int endOfRun() { + return this.endrun; + } + + /** + * @return the beginning of the current run + */ + public int beginOfRun() { + if (this.isLiteral) + return this.endrun + - this.iterator.getNumberOfLiteralWords(); + return (this.endrun - this.iterator.getRunningLength()); + } + + /** + * Process the next run + */ + public void parseNextRun() { + if ((this.isLiteral) + || (this.iterator.getNumberOfLiteralWords() == 0)) { + // no choice, must load next runs + this.iterator.discardFirstWords(this.iterator.size()); + if (this.iterator.getRunningLength() > 0) { + this.endrun += this.iterator + .getRunningLength(); + this.isLiteral = false; + this.value = this.iterator.getRunningBit(); + } else if (this.iterator.getNumberOfLiteralWords() > 0) { + this.isLiteral = true; + this.endrun += this.iterator + .getNumberOfLiteralWords(); + } else { + this.dead = true; + } + + } else { + this.isLiteral = true; + this.endrun += this.iterator.getNumberOfLiteralWords(); + } + + } + + /** + * @return true if there is no more data + */ + public boolean hasNoData() { + return this.dead; + } + + /** + * @param f call the function with the current information + */ + public void callbackUpdate(final UpdateableBitmapFunction32 f) { + if (this.dead) + f.setZero(this.pos); + else if (this.isLiteral) + f.setLiteral(this.pos); + else if (this.value) + f.setOne(this.pos); + else + f.setZero(this.pos); + } + + @Override + public int compareTo(EWAHPointer32 other) { + return this.endrun - other.endrun; + } +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/RunningBitmapMerge32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/RunningBitmapMerge32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/RunningBitmapMerge32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/RunningBitmapMerge32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,78 @@ +package com.googlecode.javaewah32.symmetric; + +import com.googlecode.javaewah.datastructure.PriorityQ; +import com.googlecode.javaewah32.BitmapStorage32; +import com.googlecode.javaewah32.EWAHCompressedBitmap32; +import com.googlecode.javaewah32.IteratingBufferedRunningLengthWord32; + +import java.util.Comparator; + +/** + * This is an implementation of the RunningBitmapMerge algorithm running on top + * of JavaEWAH. It is well suited to computing symmetric Boolean queries. + * + * It is a revised version of an algorithm described in the following reference: + *
  • + * Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned + * bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. + *
+ * + * @author Daniel Lemire + * @since 0.8.2 + */ +public class RunningBitmapMerge32 implements BitmapSymmetricAlgorithm32 { + + @Override + public void symmetric(UpdateableBitmapFunction32 f, BitmapStorage32 out, + EWAHCompressedBitmap32... set) { + out.clear(); + final PriorityQ h = new PriorityQ( + set.length, new Comparator() { + @Override + public int compare(EWAHPointer32 arg0, + EWAHPointer32 arg1) { + return arg0.compareTo(arg1); + } + } + ); + f.resize(set.length); + + for (int k = 0; k < set.length; ++k) { + final EWAHPointer32 x = new EWAHPointer32(0, + new IteratingBufferedRunningLengthWord32(set[k]), + k); + if (x.hasNoData()) + continue; + f.rw[k] = x; + x.callbackUpdate(f); + h.toss(x); + } + h.buildHeap(); // just in case we use an insane number of inputs + + int lasta = 0; + if (h.isEmpty()) + return; + mainloop: + while (true) { // goes until no more active inputs + final int a = h.peek().endOfRun(); + // I suppose we have a run of length a - lasta here. + f.dispatch(out, lasta, a); + lasta = a; + + while (h.peek().endOfRun() == a) { + final EWAHPointer32 p = h.peek(); + p.parseNextRun(); + p.callbackUpdate(f); + if (p.hasNoData()) { + h.poll(); // we just remove it + if (h.isEmpty()) + break mainloop; + } else { + h.percolateDown(); // since we have + // increased the key + } + } + } + } + +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/ThresholdFuncBitmap32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/ThresholdFuncBitmap32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/ThresholdFuncBitmap32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/ThresholdFuncBitmap32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,146 @@ +package com.googlecode.javaewah32.symmetric; + +import com.googlecode.javaewah32.BitmapStorage32; + +import java.util.Arrays; + +/** + * A threshold Boolean function returns true if the number of true values exceed + * a threshold. It is a symmetric Boolean function. + * + * This class implements an algorithm described in the following paper: + * + * Owen Kaser and Daniel Lemire, Compressed bitmap indexes: beyond unions and intersections + * http://arxiv.org/abs/1402.4466 + * + * It is not thread safe: you should use one object per thread. + * + * @author Daniel Lemire + * @see http://en.wikipedia.org/wiki/Symmetric_Boolean_function + * @since 0.8.2 + */ +public final class ThresholdFuncBitmap32 extends UpdateableBitmapFunction32 { + private final int min; + private int[] buffers; + private int bufferUsed; + private final int[] bufcounters = new int[64]; + private static final int[] zeroes64 = new int[64]; + + /** + * Construction a threshold function with a given threshold + * + * @param min threshold + */ + public ThresholdFuncBitmap32(final int min) { + super(); + this.min = min; + this.buffers = new int[16]; + this.bufferUsed = 0; + } + + @Override + public void dispatch(BitmapStorage32 out, int runBegin, int runend) { + final int runLength = runend - runBegin; + if (this.hammingWeight >= this.min) { + out.addStreamOfEmptyWords(true, runLength); + } else if (this.litWeight + this.hammingWeight < this.min) { + out.addStreamOfEmptyWords(false, runLength); + } else { + final int deficit = this.min - this.hammingWeight; + if (deficit == 1) { + orLiterals(out, runBegin, runLength); + return; + } + this.bufferUsed = this.getNumberOfLiterals(); + if (this.bufferUsed == deficit) { + andLiterals(out, runBegin, runLength); + } else { + generalLiterals(deficit, out, runBegin, + runLength); + } + } + } + + private int threshold2buf(final int t, final int[] buf, + final int bufUsed) { + int result = 0; + final int[] counters = this.bufcounters; + System.arraycopy(zeroes64, 0, counters, 0, 64); + for (int k = 0; k < bufUsed; ++k) { + int bitset = buf[k]; + while (bitset != 0) { + int t2 = bitset & -bitset; + counters[Integer.bitCount(t2 - 1)]++; + bitset ^= t2; + } + } + for (int pos = 0; pos < 64; ++pos) { + if (counters[pos] >= t) + result |= (1L << pos); + } + return result; + } + + private static int threshold3(final int t, final int[] buffers, final int bufUsed) { + if (buffers.length == 0) + return 0; + final int[] v = new int[t]; + v[0] = buffers[0]; + for (int k = 1; k < bufUsed; ++k) { + final int c = buffers[k]; + final int m = Math.min(t - 1, k); + for (int j = m; j >= 1; --j) { + v[j] |= (c & v[j - 1]); + } + v[0] |= c; + } + return v[t - 1]; + } + + private int threshold4(final int t, final int[] buf, final int bufUsed) { + if (t >= 128) + return threshold2buf(t, buf, bufUsed); + int b = 0; + for (int k = 0; k < bufUsed; ++k) + b += Integer.bitCount(buf[k]); + + if (2 * b >= bufUsed * t) + return threshold3(t, buf, bufUsed); + else + return threshold2buf(t, buf, bufUsed); + } + + private void orLiterals(final BitmapStorage32 out, final int runBegin, final int runLength) { + for (int i = 0; i < runLength; ++i) { + int w = 0; + for (EWAHPointer32 r : this.getLiterals()) { + w |= r.iterator.getLiteralWordAt(i + runBegin - r.beginOfRun()); + } + out.addWord(w); + } + } + + private void andLiterals(final BitmapStorage32 out, final int runBegin, final int runLength) { + for (int i = 0; i < runLength; ++i) { + int w = ~0; + for (EWAHPointer32 r : this.getLiterals()) { + w &= r.iterator.getLiteralWordAt(i + runBegin - r.beginOfRun()); + } + out.addWord(w); + } + } + + private void generalLiterals(final int deficit, final BitmapStorage32 out, + final int runBegin, final int runLength) { + if (this.bufferUsed > this.buffers.length) + this.buffers = Arrays.copyOf(this.buffers, 2 * this.bufferUsed); + for (int i = 0; i < runLength; ++i) { + int p = 0; + for (EWAHPointer32 r : this.getLiterals()) { + this.buffers[p++] = r.iterator.getLiteralWordAt(i + runBegin - r.beginOfRun()); + } + out.addWord(threshold4(deficit, this.buffers, this.bufferUsed)); + } + } +} diff -Nru libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/UpdateableBitmapFunction32.java libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/UpdateableBitmapFunction32.java --- libjavaewah-java-0.7.9/src/main/java/com/googlecode/javaewah32/symmetric/UpdateableBitmapFunction32.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/main/java/com/googlecode/javaewah32/symmetric/UpdateableBitmapFunction32.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,154 @@ +package com.googlecode.javaewah32.symmetric; + +import com.googlecode.javaewah.datastructure.BitSet; +import com.googlecode.javaewah32.BitmapStorage32; + +import java.util.Iterator; +import java.util.List; + +/** + * This is a Java specification for an "updatable" Boolean function meant to run + * over EWAH bitmaps. + * + * Reference: + * + * Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned + * bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. + * + * @author Daniel Lemire + * @since 0.8.2 + */ +public abstract class UpdateableBitmapFunction32 { + EWAHPointer32[] rw = new EWAHPointer32[0]; + int hammingWeight = 0; + int litWeight = 0; + boolean[] b = new boolean[0]; + final BitSet litwlist = new BitSet(0); + + UpdateableBitmapFunction32() { + } + + /** + * @return the current number of literal words + */ + public final int getNumberOfLiterals() { + return this.litwlist.cardinality(); + } + + /** + * Goes through the literals. + * + * @return an iterator + */ + public final Iterable getLiterals() { + return new Iterable() { + + @Override + public Iterator iterator() { + return new Iterator() { + int k = UpdateableBitmapFunction32.this.litwlist + .nextSetBit(0); + + @Override + public boolean hasNext() { + return this.k >= 0; + } + + @Override + public EWAHPointer32 next() { + EWAHPointer32 answer = UpdateableBitmapFunction32.this.rw[this.k]; + this.k = UpdateableBitmapFunction32.this.litwlist + .nextSetBit(this.k + 1); + return answer; + } + + @Override + public void remove() { + throw new RuntimeException( + "N/A"); + } + }; + } + }; + } + + /** + * append to the list the literal words as EWAHPointer + * + * @param container where we write + */ + public final void fillWithLiterals(final List container) { + for (int k = this.litwlist.nextSetBit(0); k >= 0; k = this.litwlist + .nextSetBit(k + 1)) { + container.add(this.rw[k]); + } + } + + /** + * @param newsize the number of inputs + */ + public final void resize(final int newsize) { + this.rw = java.util.Arrays.copyOf(this.rw, newsize); + this.litwlist.resize(newsize); + this.b = java.util.Arrays.copyOf(this.b, newsize); + } + + /** + * @param pos position of a literal + */ + public void setLiteral(final int pos) { + if (!this.litwlist.get(pos)) { + this.litwlist.set(pos); + this.litWeight++; + if (this.b[pos]) { + this.b[pos] = false; + --this.hammingWeight; + } + } + } + + /** + * @param pos position where a literal was removed + */ + public void clearLiteral(final int pos) { + if (this.litwlist.get(pos)) { + // litwlist.unset(pos); + this.litwlist.set(pos, false); + this.litWeight--; + } + } + + /** + * @param pos position where a zero word was added + */ + public final void setZero(final int pos) { + if (this.b[pos]) { + this.b[pos] = false; + --this.hammingWeight; + } else { + clearLiteral(pos); + } + } + + /** + * @param pos position were a 11...1 word was added + */ + public final void setOne(final int pos) { + if (!this.b[pos]) { + clearLiteral(pos); + this.b[pos] = true; + ++this.hammingWeight; + } + } + + /** + * Writes out the answer. + * + * @param out output buffer + * @param runBegin beginning of the run + * @param runend end of the run + */ + public abstract void dispatch(BitmapStorage32 out, int runBegin, + int runend); + +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/datastructure/BitSetTest.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/datastructure/BitSetTest.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/datastructure/BitSetTest.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/datastructure/BitSetTest.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,224 @@ +package com.googlecode.javaewah.datastructure; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; + +import junit.framework.Assert; + +import org.junit.Test; + +import com.googlecode.javaewah.IntIterator; + + +public class BitSetTest +{ + + + public static ImmutableBitSet toImmutableBitSet(BitSet b) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + b.serialize(new DataOutputStream(bos)); + ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray()); + ImmutableBitSet rmap = new ImmutableBitSet(bb.asLongBuffer()); + System.out.println("bitmap 1 (mapped) : " + rmap); + if (!rmap.equals(b)) + throw new RuntimeException("Will not happen"); + return rmap; + } + @Test + public void simpleImmuExample() throws IOException { + ImmutableBitSet Bitmap1 = toImmutableBitSet(BitSet.bitmapOf(0, 2, 55, 64, 512)); + ImmutableBitSet Bitmap2 = toImmutableBitSet(BitSet.bitmapOf(1, 3, 64, 512)); + System.out.println("bitmap 1: " + Bitmap1); + System.out.println("bitmap 2: " + Bitmap2); + assertEquals(Bitmap1.cardinality(),5); + assertEquals(Bitmap2.cardinality(),4); + assertFalse(Bitmap1.hashCode()==Bitmap2.hashCode()); + IntIterator is = Bitmap1.intIterator(); + int c1 = 0; + while(is.hasNext()) { + c1++; + is.next(); + } + assertEquals(Bitmap1.cardinality(),c1); + + IntIterator iu = Bitmap1.unsetIntIterator(); + int c2 = 0; + while(iu.hasNext()) { + c2++; + iu.next(); + } + assertEquals(Bitmap1.getNumberOfWords() * 64 - Bitmap1.cardinality(),c2); + } + + @Test + public void simpleExample() throws IOException { + BitSet Bitmap1 = BitSet.bitmapOf(0, 2, 55, 64, 512); + BitSet Bitmap2 = BitSet.bitmapOf(1, 3, 64, 512); + Bitmap1.trim(); + Bitmap2.trim(); + assertTrue(Bitmap1.intersects(Bitmap2)); + assertFalse(Bitmap1.hashCode()==Bitmap2.hashCode()); + System.out.println("bitmap 1: " + Bitmap1); + System.out.println("bitmap 2: " + Bitmap2); + // or + BitSet orbitmap = Bitmap1.clone(); + int orcard = Bitmap1.orcardinality(Bitmap2); + orbitmap.or(Bitmap2); + assertEquals(orbitmap.cardinality(),orcard); + System.out.println("bitmap 1 OR bitmap 2: " + orbitmap); + // and + BitSet andbitmap = Bitmap1.clone(); + int andcard = Bitmap1.andcardinality(Bitmap2); + andbitmap.and(Bitmap2); + assertEquals(andbitmap.cardinality(),andcard); + System.out.println("bitmap 1 AND bitmap 2: " + andbitmap); + // xor + BitSet xorbitmap = Bitmap1.clone(); + int xorcard = Bitmap1.xorcardinality(Bitmap2); + xorbitmap.xor(Bitmap2); + assertEquals(xorbitmap.cardinality(),xorcard); + System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap); + BitSet andnotbitmap = Bitmap1.clone(); + int andnotcard = Bitmap1.andNotcardinality(Bitmap2); + andnotbitmap.andNot(Bitmap2); + assertEquals(andnotbitmap.cardinality(),andnotcard); + System.out.println("bitmap 1 ANDNOT bitmap 2:" + andnotbitmap); + + // serialization + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // Note: you could use a file output steam instead of ByteArrayOutputStream + Bitmap1.serialize(new DataOutputStream(bos)); + BitSet Bitmap1new = new BitSet(); + byte[] bout = bos.toByteArray(); + Bitmap1new.deserialize(new DataInputStream(new ByteArrayInputStream(bout))); + System.out.println("bitmap 1 (recovered) : " + Bitmap1new); + if (!Bitmap1.equals(Bitmap1new)) + throw new RuntimeException("Will not happen"); + // + // we can use a ByteBuffer as backend for a bitmap + // which allows memory-mapped bitmaps + // + ByteBuffer bb = ByteBuffer.wrap(bout); + ImmutableBitSet rmap = new ImmutableBitSet(bb.asLongBuffer()); + System.out.println("bitmap 1 (mapped) : " + rmap); + + if (!rmap.equals(Bitmap1)) + throw new RuntimeException("Will not happen"); + IntIterator is = Bitmap1.intIterator(); + int c1 = 0; + while(is.hasNext()) { + c1++; + is.next(); + } + assertEquals(Bitmap1.cardinality(),c1); + + IntIterator iu = Bitmap1.unsetIntIterator(); + int c2 = 0; + while(iu.hasNext()) { + c2++; + iu.next(); + } + assertEquals(Bitmap1.getNumberOfWords() * 64 - Bitmap1.cardinality(),c2); + Bitmap1.clear(); + assertEquals(Bitmap1.cardinality(),0); + } + + @Test + public void testFlipRanges() throws IOException { + int N = 256; + for(int end = 1; end < N; ++end ) { + for(int start = 0; start< end; ++start) { + BitSet bs1 = new BitSet(N); + for(int k = start; k < end; ++k) { + bs1.flip(k); + } + BitSet bs2 = new BitSet(N); + bs2.flip(start, end); + Assert.assertEquals(bs2.cardinality(), end-start); + Assert.assertEquals(bs1, bs2); + } + } + } + + @Test + public void testSetRanges() throws IOException { + int N = 256; + for(int end = 1; end < N; ++end ) { + for(int start = 0; start< end; ++start) { + BitSet bs1 = new BitSet(N); + for(int k = start; k < end; ++k) { + bs1.set(k); + } + BitSet bs2 = new BitSet(N); + bs2.set(start, end); + Assert.assertEquals(bs1, bs2); + } + } + } + + + @Test + public void testClearRanges() throws IOException { + int N = 256; + for(int end = 1; end < N; ++end ) { + for(int start = 0; start< end; ++start) { + BitSet bs1 = new BitSet(N); + bs1.set(0, N); + for(int k = start; k < end; ++k) { + bs1.clear(k); + } + BitSet bs2 = new BitSet(N); + bs2.set(0, N); + bs2.clear(start, end); + Assert.assertEquals(bs1, bs2); + } + } + } + + + @Test + public void serializationExample() throws IOException { + File tmpfile = File.createTempFile("javaewah", "bin"); + tmpfile.deleteOnExit(); + final FileOutputStream fos = new FileOutputStream(tmpfile); + BitSet Bitmap = BitSet.bitmapOf(0, 2, 55, 64, 512); + System.out.println("Created the bitmap " + Bitmap); + Bitmap.serialize(new DataOutputStream(fos)); + long totalcount = fos.getChannel().position(); + System.out.println("Serialized total count = " + totalcount + " bytes"); + fos.close(); + RandomAccessFile memoryMappedFile = new RandomAccessFile(tmpfile, "r"); + ByteBuffer bb = memoryMappedFile.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, totalcount); + ImmutableBitSet mapped = new ImmutableBitSet(bb.asLongBuffer()); + System.out.println("Mapped the bitmap " + mapped); + memoryMappedFile.close(); + if (!mapped.equals(Bitmap)) + throw new RuntimeException("Will not happen"); + assertEquals(mapped.size(),Bitmap.size()); + assertEquals(mapped.empty(),Bitmap.empty()); + for(int k = 0; k <= 512; ++k) + assertEquals(mapped.get(k),Bitmap.get(k)); + + assertTrue(mapped.asBitSet().equals(Bitmap)); + assertTrue(mapped.clone().asBitSet().equals(Bitmap)); + BitSet t = new BitSet(); + t.resize(mapped.size()); + for(int i : mapped) + t.set(i); + assertTrue(t.equals(Bitmap)); + + + } + +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,1450 +1,2823 @@ package com.googlecode.javaewah; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ +import org.junit.Assert; import org.junit.Test; -import java.util.*; + import java.io.*; +import java.nio.ByteBuffer; +import java.nio.LongBuffer; +import java.util.*; -import junit.framework.Assert; +import static com.googlecode.javaewah.EWAHCompressedBitmap.maxSizeInBits; +import static com.googlecode.javaewah.EWAHCompressedBitmap.WORD_IN_BITS; /** * This class is used for basic unit testing. */ @SuppressWarnings("javadoc") public class EWAHCompressedBitmapTest { - - @Test - public void testGet() { - for (int gap = 29; gap < 10000; gap *= 10) { - EWAHCompressedBitmap x = new EWAHCompressedBitmap(); - for (int k = 0; k < 100; ++k) - x.set(k * gap); - for (int k = 0; k < 100 * gap; ++k) - if (x.get(k)) { - if (k % gap != 0) - throw new RuntimeException( - "spotted an extra set bit at " - + k + " gap = " - + gap); - } else if (k % gap == 0) - throw new RuntimeException( - "missed a set bit " + k - + " gap = " + gap); - } - } - - @SuppressWarnings({ "deprecation", "boxing" }) - @Test - public void OKaserBugReportJuly2013() { - System.out.println("testing OKaserBugReportJuly2013"); - int[][] data = { {}, { 5, 6, 7, 8, 9 }, { 1 }, { 2 }, { 2, 5, 7 }, - { 1 }, { 2 }, { 1, 6, 9 }, { 1, 3, 4, 6, 8, 9 }, - { 1, 3, 4, 6, 8, 9 }, { 1, 3, 6, 8, 9 }, { 2, 5, 7 }, - { 2, 5, 7 }, { 1, 3, 9 }, { 3, 8, 9 } }; - - EWAHCompressedBitmap[] toBeOred = new EWAHCompressedBitmap[data.length]; - Set bruteForceAnswer = new HashSet(); - for (int i = 0; i < toBeOred.length; ++i) { - toBeOred[i] = new EWAHCompressedBitmap(); - for (int j : data[i]) { - toBeOred[i].set(j); - bruteForceAnswer.add(j); - } - toBeOred[i].setSizeInBits(1000,false); - } - long rightcard = bruteForceAnswer.size(); - EWAHCompressedBitmap e1 = FastAggregation.or(toBeOred); - Assert.assertEquals(rightcard, e1.cardinality()); - EWAHCompressedBitmap e2 = FastAggregation.bufferedor(65536, toBeOred); - Assert.assertEquals(rightcard, e2.cardinality()); - EWAHCompressedBitmap foo = new EWAHCompressedBitmap(); - FastAggregation.legacy_orWithContainer(foo, toBeOred); - Assert.assertEquals(rightcard, foo.cardinality()); - } - - @Test - public void testSizeInBitsWithAnd() { - System.out.println("testing SizeInBitsWithAnd"); - EWAHCompressedBitmap a = new EWAHCompressedBitmap(); - EWAHCompressedBitmap b = new EWAHCompressedBitmap(); - - a.set(1); - a.set(2); - a.set(3); - - b.set(3); - b.set(4); - b.set(5); - - a.setSizeInBits(10,false); - b.setSizeInBits(10,false); - - EWAHCompressedBitmap and = a.and(b); - Assert.assertEquals(10, and.sizeInBits()); - EWAHCompressedBitmap and2 = EWAHCompressedBitmap.and(a,b); - Assert.assertEquals(10, and2.sizeInBits()); - } - @Test - public void testSizeInBitsWithAndNot() { - System.out.println("testing SizeInBitsWithAndNot"); - EWAHCompressedBitmap a = new EWAHCompressedBitmap(); - EWAHCompressedBitmap b = new EWAHCompressedBitmap(); - - a.set(1); - a.set(2); - a.set(3); - - b.set(3); - b.set(4); - b.set(5); - - a.setSizeInBits(10,false); - b.setSizeInBits(10,false); - - EWAHCompressedBitmap and = a.andNot(b); - Assert.assertEquals(10, and.sizeInBits()); - } - @Test - public void testSizeInBitsWithOr() { - System.out.println("testing SizeInBitsWithOr"); - EWAHCompressedBitmap a = new EWAHCompressedBitmap(); - EWAHCompressedBitmap b = new EWAHCompressedBitmap(); - - a.set(1); - a.set(2); - a.set(3); - - b.set(3); - b.set(4); - b.set(5); - - a.setSizeInBits(10,false); - b.setSizeInBits(10,false); - - EWAHCompressedBitmap or = a.or(b); - Assert.assertEquals(10, or.sizeInBits()); - EWAHCompressedBitmap or2 = EWAHCompressedBitmap.or(a,b); - Assert.assertEquals(10, or2.sizeInBits()); + public void swaptest() { + EWAHCompressedBitmap x = EWAHCompressedBitmap.bitmapOf(1,2,3); + EWAHCompressedBitmap y = EWAHCompressedBitmap.bitmapOf(1,2,3,4); + x.swap(y); + Assert.assertEquals(x.cardinality(),4); + Assert.assertEquals(y.cardinality(),3); } - - @Test - public void testSizeInBitsWithXor() { - System.out.println("testing SizeInBitsWithXor"); - EWAHCompressedBitmap a = new EWAHCompressedBitmap(); - EWAHCompressedBitmap b = new EWAHCompressedBitmap(); - - a.set(1); - a.set(2); - a.set(3); - - b.set(3); - b.set(4); - b.set(5); - - a.setSizeInBits(10,false); - b.setSizeInBits(10,false); - - EWAHCompressedBitmap xor = a.xor(b); - Assert.assertEquals(10, xor.sizeInBits()); - EWAHCompressedBitmap xor2 = EWAHCompressedBitmap.xor(a,b); - Assert.assertEquals(10, xor2.sizeInBits()); - } - - - @Test - public void testDebugSetSizeInBitsTest() { - System.out.println("testing DebugSetSizeInBits"); - EWAHCompressedBitmap b = new EWAHCompressedBitmap(); - - b.set(4); - - b.setSizeInBits(6, true); - - List positions = b.getPositions(); - - Assert.assertEquals(2, positions.size()); - Assert.assertEquals(Integer.valueOf(4), positions.get(0)); - Assert.assertEquals(Integer.valueOf(5), positions.get(1)); - - Iterator iterator = b.iterator(); - Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(Integer.valueOf(4), iterator.next()); - Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(Integer.valueOf(5), iterator.next()); - Assert.assertFalse(iterator.hasNext()); - - IntIterator intIterator = b.intIterator(); - Assert.assertTrue(intIterator.hasNext()); - Assert.assertEquals(4, intIterator.next()); - Assert.assertTrue(intIterator.hasNext()); - Assert.assertEquals(5, intIterator.next()); - Assert.assertFalse(intIterator.hasNext()); - - } - - /** - * Created: 2/4/11 6:03 PM By: Arnon Moscona. - */ - @Test - public void EwahIteratorProblem() { - System.out.println("testing ArnonMoscona"); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - for (int i = 9434560; i <= 9435159; i++) { - bitmap.set(i); - } - IntIterator iterator = bitmap.intIterator(); - List v = bitmap.getPositions(); - int[] array = bitmap.toArray(); - for (int k = 0; k < v.size(); ++k) { - Assert.assertTrue(array[k] == v.get(k).intValue()); - Assert.assertTrue(iterator.hasNext()); - final int ival = iterator.next(); - final int vval = v.get(k).intValue(); - Assert.assertTrue(ival == vval); - } - Assert.assertTrue(!iterator.hasNext()); - // - for (int k = 2; k <= 1024; k *= 2) { - int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, 434455 + 5 * k); - EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); - for (int i : bitsToSet) { - ewah.set(i); - } - equal(ewah.iterator(), bitsToSet); - } - } - - /** - * Test submitted by Gregory Ssi-Yan-Kai - */ - @Test - public void SsiYanKaiTest() { - System.out.println("testing SsiYanKaiTest"); - EWAHCompressedBitmap a = EWAHCompressedBitmap.bitmapOf(39935, 39936, - 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, - 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, - 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, - 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, - 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, - 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, - 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, - 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, - 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, - 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, - 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, - 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, - 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, - 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, - 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, - 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, - 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, 40089, - 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, 40098, - 40099, 40100); - EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(39935, 39936, - 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, - 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, - 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, - 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, - 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, - 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, - 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, - 270000); - LinkedHashSet aPositions = new LinkedHashSet( - a.getPositions()); - int intersection = 0; - EWAHCompressedBitmap inter = new EWAHCompressedBitmap(); - LinkedHashSet bPositions = new LinkedHashSet( - b.getPositions()); - for (Integer integer : bPositions) { - if (aPositions.contains(integer)) { - inter.set(integer.intValue()); - ++intersection; - } - } - EWAHCompressedBitmap and2 = a.and(b); - if (!and2.equals(inter)) - throw new RuntimeException("intersections don't match"); - if (intersection != and2.cardinality()) - throw new RuntimeException("cardinalities don't match"); - } - /** - * Test inspired by William Habermaas. - */ - @Test - public void habermaasTest() { - System.out.println("testing habermaasTest"); - BitSet bitsetaa = new BitSet(); - EWAHCompressedBitmap aa = new EWAHCompressedBitmap(); - int[] val = { 55400, 1000000, 1000128 }; - for (int k = 0; k < val.length; ++k) { - aa.set(val[k]); - bitsetaa.set(val[k]); - } - equal(aa, bitsetaa); - BitSet bitsetab = new BitSet(); - EWAHCompressedBitmap ab = new EWAHCompressedBitmap(); - for (int i = 4096; i < (4096 + 5); i++) { - ab.set(i); - bitsetab.set(i); - } - ab.set(99000); - bitsetab.set(99000); - ab.set(1000130); - bitsetab.set(1000130); - equal(ab, bitsetab); - EWAHCompressedBitmap bb = aa.or(ab); - EWAHCompressedBitmap bbAnd = aa.and(ab); - try { - EWAHCompressedBitmap abnot = ab.clone(); - abnot.not(); - EWAHCompressedBitmap bbAnd2 = aa.andNot(abnot); - assertEquals(bbAnd2, bbAnd); - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - BitSet bitsetbb = (BitSet) bitsetaa.clone(); - bitsetbb.or(bitsetab); - BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); - bitsetbbAnd.and(bitsetab); - equal(bbAnd, bitsetbbAnd); - equal(bb, bitsetbb); - } + @Test + public void shiftByWordSizeBits() { + int[] positions = { 10, 11, 12, 13 }; + EWAHCompressedBitmap bm1 = EWAHCompressedBitmap.bitmapOf(positions); + EWAHCompressedBitmap bm2 = bm1.shift(WORD_IN_BITS); + + EWAHCompressedBitmap bm3 = EWAHCompressedBitmap.bitmapOf(); + for (int pos : positions) { + bm3.set(pos + WORD_IN_BITS); + } + Assert.assertEquals(bm3, bm2); + } - @Test - public void testAndResultAppend() { - System.out.println("testing AndResultAppend"); - EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); - bitmap1.set(35); - EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); - bitmap2.set(35); - bitmap2.set(130); + @Test + public void shiftbug001() { + EWAHCompressedBitmap bm1 = EWAHCompressedBitmap.bitmapOf(10, 11, 12, 13); + EWAHCompressedBitmap bm2 = bm1.shift(1); + + EWAHCompressedBitmap bm3 = bm1.or(bm2); + EWAHCompressedBitmap bm4 = EWAHCompressedBitmap.bitmapOf(10,11,12,13,14); + Assert.assertEquals(bm3, bm4); + } + + @Test + public void shiftbug002() { + EWAHCompressedBitmap bm1 = EWAHCompressedBitmap.bitmapOf(10, 11, 12, 13, 63); + EWAHCompressedBitmap bm2 = bm1.shift(1); + + EWAHCompressedBitmap bm3 = bm1.or(bm2); + EWAHCompressedBitmap bm4 = EWAHCompressedBitmap.bitmapOf(10,11,12,13,14, 63, 64); + Assert.assertEquals(bm3, bm4); + } + + @Test + public void shiftbug003() { + EWAHCompressedBitmap bm1 = EWAHCompressedBitmap.bitmapOf(10, 11, 12, 13, 62); + EWAHCompressedBitmap bm2 = bm1.shift(1); + + EWAHCompressedBitmap bm3 = bm1.or(bm2); + EWAHCompressedBitmap bm4 = EWAHCompressedBitmap.bitmapOf(10,11,12,13,14, 62, 63); + Assert.assertEquals(bm3, bm4); + } + + @Test + public void shiftbug004() { + EWAHCompressedBitmap bm1 = EWAHCompressedBitmap.bitmapOf(10, 11, 12, 13, 64); + EWAHCompressedBitmap bm2 = bm1.shift(1); + + EWAHCompressedBitmap bm3 = bm1.or(bm2); + EWAHCompressedBitmap bm4 = EWAHCompressedBitmap.bitmapOf(10,11,12,13,14, 64, 65); + Assert.assertEquals(bm3, bm4); + } + + + @Test + public void example() throws Exception { + EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0, 2, 55, 64, 1 << 30); + EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1, 3, 64, + 1 << 30); + System.out.println("bitmap 1: " + ewahBitmap1); + System.out.println("bitmap 2: " + ewahBitmap2); + // or + EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2); + System.out.println("bitmap 1 OR bitmap 2: " + orbitmap); + System.out.println("memory usage: " + orbitmap.sizeInBytes() + " bytes"); + // and + EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2); + System.out.println("bitmap 1 AND bitmap 2: " + andbitmap); + System.out.println("memory usage: " + andbitmap.sizeInBytes() + " bytes"); + // xor + EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2); + System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap); + System.out.println("memory usage: " + xorbitmap.sizeInBytes() + " bytes"); + // fast aggregation over many bitmaps + EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap.bitmapOf(5, 55, + 1 << 30); + EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap.bitmapOf(4, 66, + 1 << 30); + System.out.println("bitmap 3: " + ewahBitmap3); + System.out.println("bitmap 4: " + ewahBitmap4); + andbitmap = EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2, ewahBitmap3, + ewahBitmap4); + System.out.println("b1 AND b2 AND b3 AND b4: " + andbitmap); + // serialization + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // Note: you could use a file output steam instead of ByteArrayOutputStream + ewahBitmap1.serialize(new DataOutputStream(bos)); + EWAHCompressedBitmap ewahBitmap1new = new EWAHCompressedBitmap(); + byte[] bout = bos.toByteArray(); + ewahBitmap1new.deserialize(new DataInputStream(new ByteArrayInputStream(bout))); + System.out.println("bitmap 1 (recovered) : " + ewahBitmap1new); + if (!ewahBitmap1.equals(ewahBitmap1new)) throw new RuntimeException("Will not happen"); + // + // we can use a ByteBuffer as backend for a bitmap + // which allows memory-mapped bitmaps + // + ByteBuffer bb = ByteBuffer.wrap(bout); + EWAHCompressedBitmap rmap = new EWAHCompressedBitmap(bb); + System.out.println("bitmap 1 (mapped) : " + rmap); + + if (!rmap.equals(ewahBitmap1)) throw new RuntimeException("Will not happen"); + // + // support for threshold function (new as of version 0.8.0): + // mark as true a bit that occurs at least T times in the source + // bitmaps + // + EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2, + ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4); + System.out.println("threshold 2 : " + threshold2); + + } + + @Test + public void issue54() { + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + for (int i = 1500; i <1600; i ++) { + bm.set(i); + } + for (int i = 1500; i < 1535; i ++) { + bm.clear(i); + } + bm.clear(1535); + Assert.assertFalse(bm.isEmpty()); + } + + @Test + public void xorCardinality() { + EWAHCompressedBitmap b1 = EWAHCompressedBitmap.bitmapOf(0,1,2,3,5,8,13,21,34,55,89); + EWAHCompressedBitmap b2 = EWAHCompressedBitmap.bitmapOf(0,1,2,3,5,8,13,21,34,55,89,144,233,377,610); + Assert.assertEquals(4, b1.xorCardinality(b2)); + } + + @Test + public void andNotCardinality() { + EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(0,1,2,3,5,8,13,21,34,55,89); + Assert.assertEquals(0, b.andNotCardinality(b)); + } + + @Test + public void getFirstSetBit() { + EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(); + Assert.assertEquals(-1, b.getFirstSetBit()); + b.set(0); + Assert.assertEquals(0, b.getFirstSetBit()); + b.clear(); + b.setSizeInBits(WORD_IN_BITS, false); + b.setSizeInBits(2*WORD_IN_BITS, true); + Assert.assertEquals(WORD_IN_BITS, b.getFirstSetBit()); + } + + @Test + public void clearStressTest() { + System.out.println("clear stress test"); + int n = 10 * WORD_IN_BITS; + for (int k = 0; k < 100; ++k) { + List setPositions = new ArrayList(n); + List clearPositions = new ArrayList(n); + for (int i = 0; i < n; ++i) { + setPositions.add(i); + clearPositions.add(i); + } + Collections.shuffle(setPositions); + Collections.shuffle(clearPositions); + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + for (int i = 0; i < n; ++i) { + bitmap.set(setPositions.get(i)); + bitmap.clear(clearPositions.get(i)); + } + for (int i = 0; i < n; ++i) { + bitmap.clear(i); + } + Assert.assertEquals(0, bitmap.cardinality()); + Assert.assertEquals(WORD_IN_BITS / 8, bitmap.sizeInBytes()); + } + } - EWAHCompressedBitmap resultBitmap = bitmap1.and(bitmap2); - resultBitmap.set(131); + @Test + public void clear() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(0, 1, 3, 199, 666); + Assert.assertEquals(667, bitmap.sizeInBits()); + bitmap.clear(900); + Assert.assertEquals(901, bitmap.sizeInBits()); + for (int i = 667; i < 901; ++i) { + Assert.assertFalse(bitmap.get(i)); + } + Assert.assertTrue(bitmap.get(199)); + bitmap.clear(199); + Assert.assertFalse(bitmap.get(199)); + } + + @Test + public void equalToSelf() { + EWAHCompressedBitmap ewahBitmap = EWAHCompressedBitmap.bitmapOf(0, 2, 55, + 64, 1 << 30); + Assert.assertTrue(ewahBitmap.equals(ewahBitmap)); + } + + @Test + public void notEqualTo() { + EWAHCompressedBitmap b1 = EWAHCompressedBitmap.bitmapOf(0,1,2,3,5,8,13,21,34,55,89); + EWAHCompressedBitmap b2 = EWAHCompressedBitmap.bitmapOf(0,1,2,3,5,8,13,21,34,55,89,144,233,377,610); + Assert.assertFalse(b1.equals(b2)); + } + + @Test + public void safeSerialization() throws IOException { + EWAHCompressedBitmap ewahBitmap = EWAHCompressedBitmap.bitmapOf(0, 2, 55, + 64, 1 << 30); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // Note: you could use a file output steam instead of ByteArrayOutputStream + ewahBitmap.serialize(new DataOutputStream(bos)); + EWAHCompressedBitmap ewahBitmapnew = new EWAHCompressedBitmap(); + byte[] bout = bos.toByteArray(); + ewahBitmapnew.deserialize(new DataInputStream(new ByteArrayInputStream(bout))); + assertEquals(ewahBitmapnew, ewahBitmap); + Assert.assertEquals(ewahBitmapnew.serializedSizeInBytes(), ewahBitmap.serializedSizeInBytes()); + } + + @Test + public void simpleTestWithLongBuffer() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(LongBuffer.wrap(new long[10])); + + int maxPosition = 666; + int[] positions = new int[] { 1, maxPosition, 99, 5 }; + for (int position : positions) { + bitmap.set(position); + } - bitmap1.set(131); - assertEquals(bitmap1, resultBitmap); - } + Assert.assertEquals(positions.length, bitmap.cardinality()); - /** - * Test cardinality. - */ - @Test - public void testCardinality() { - System.out.println("testing EWAH cardinality"); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(Integer.MAX_VALUE - 64); - // System.out.format("Total Items %d\n", bitmap.cardinality()); - Assert.assertTrue(bitmap.cardinality() == 1); - } + int[] sortedPositions = positions.clone(); + Arrays.sort(sortedPositions); + Assert.assertArrayEquals(sortedPositions, bitmap.toArray()); - /** - * Test clear function - */ - @Test - public void testClear() { - System.out.println("testing Clear"); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(5); - bitmap.clear(); - bitmap.set(7); - Assert.assertTrue(1 == bitmap.cardinality()); - Assert.assertTrue(1 == bitmap.getPositions().size()); - Assert.assertTrue(1 == bitmap.toArray().length); - Assert.assertTrue(7 == bitmap.getPositions().get(0).intValue()); - Assert.assertTrue(7 == bitmap.toArray()[0]); - bitmap.clear(); - bitmap.set(5000); - Assert.assertTrue(1 == bitmap.cardinality()); - Assert.assertTrue(1 == bitmap.getPositions().size()); - Assert.assertTrue(1 == bitmap.toArray().length); - Assert.assertTrue(5000 == bitmap.getPositions().get(0).intValue()); - bitmap.set(5001); - bitmap.set(5005); - bitmap.set(5100); - bitmap.set(5500); - bitmap.clear(); - bitmap.set(5); - bitmap.set(7); - bitmap.set(1000); - bitmap.set(1001); - Assert.assertTrue(4 == bitmap.cardinality()); - List positions = bitmap.getPositions(); - Assert.assertTrue(4 == positions.size()); - Assert.assertTrue(5 == positions.get(0).intValue()); - Assert.assertTrue(7 == positions.get(1).intValue()); - Assert.assertTrue(1000 == positions.get(2).intValue()); - Assert.assertTrue(1001 == positions.get(3).intValue()); - } + bitmap.not(); + Assert.assertEquals(maxPosition+1-positions.length, bitmap.cardinality()); - /** - * Test ewah compressed bitmap. - */ - @Test - public void testEWAHCompressedBitmap() { - System.out.println("testing EWAH"); - long zero = 0; - long specialval = 1l | (1l << 4) | (1l << 63); - long notzero = ~zero; - EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); - myarray1.add(zero); - myarray1.add(zero); - myarray1.add(zero); - myarray1.add(specialval); - myarray1.add(specialval); - myarray1.add(notzero); - myarray1.add(zero); - Assert.assertEquals(myarray1.getPositions().size(), 6 + 64); - EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap(); - myarray2.add(zero); - myarray2.add(specialval); - myarray2.add(specialval); - myarray2.add(notzero); - myarray2.add(zero); - myarray2.add(zero); - myarray2.add(zero); - Assert.assertEquals(myarray2.getPositions().size(), 6 + 64); - List data1 = myarray1.getPositions(); - List data2 = myarray2.getPositions(); - Vector logicalor = new Vector(); - { - HashSet tmp = new HashSet(); - tmp.addAll(data1); - tmp.addAll(data2); - logicalor.addAll(tmp); - } - Collections.sort(logicalor); - Vector logicaland = new Vector(); - logicaland.addAll(data1); - logicaland.retainAll(data2); - Collections.sort(logicaland); - EWAHCompressedBitmap arrayand = myarray1.and(myarray2); - Assert.assertTrue(arrayand.getPositions().equals(logicaland)); - EWAHCompressedBitmap arrayor = myarray1.or(myarray2); - Assert.assertTrue(arrayor.getPositions().equals(logicalor)); - EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1); - Assert.assertTrue(arrayandbis.getPositions().equals(logicaland)); - EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1); - Assert.assertTrue(arrayorbis.getPositions().equals(logicalor)); - EWAHCompressedBitmap x = new EWAHCompressedBitmap(); - for (Integer i : myarray1.getPositions()) { - x.set(i.intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); - x = new EWAHCompressedBitmap(); - for (Integer i : myarray2.getPositions()) { - x.set(i.intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); - x = new EWAHCompressedBitmap(); - for (Iterator k = myarray1.iterator(); k.hasNext();) { - x.set(extracted(k).intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); - x = new EWAHCompressedBitmap(); - for (Iterator k = myarray2.iterator(); k.hasNext();) { - x.set(extracted(k).intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); - } + for (int i = 0; i <= maxPosition; i++) { + bitmap.set(i); + } + Assert.assertEquals(maxPosition + 1, bitmap.cardinality()); - /** - * Test externalization. - * - * @throws IOException - * Signals that an I/O exception has occurred. - */ - @Test - public void testExternalization() throws IOException { - System.out.println("testing EWAH externalization"); - EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); - int[] val = { 5, 4400, 44600, 55400, 1000000 }; - for (int k = 0; k < val.length; ++k) { - ewcb.set(val[k]); - } - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - ObjectOutputStream oo = new ObjectOutputStream(bos); - ewcb.writeExternal(oo); - oo.close(); - ewcb = null; - ewcb = new EWAHCompressedBitmap(); - ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); - ewcb.readExternal(new ObjectInputStream(bis)); - List result = ewcb.getPositions(); - Assert.assertTrue(val.length == result.size()); - for (int k = 0; k < val.length; ++k) { - Assert.assertTrue(result.get(k).intValue() == val[k]); - } - } + bitmap.clear(); + Assert.assertEquals(0, bitmap.cardinality()); - @Test - public void testExtremeRange() { - System.out.println("testing EWAH at its extreme range"); - int N = 1024; - EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); - for (int i = 0; i < N; ++i) { - myarray1.set(Integer.MAX_VALUE - 64 - N + i); - Assert.assertTrue(myarray1.cardinality() == i + 1); - int[] val = myarray1.toArray(); - Assert.assertTrue(val[0] == Integer.MAX_VALUE - 64 - N); - } - } + bitmap.swap(EWAHCompressedBitmap.bitmapOf(1)); + Assert.assertEquals(1, bitmap.cardinality()); + } + + @Test + public void andCompressedSize() { + EWAHCompressedBitmap b1 = EWAHCompressedBitmap.bitmapOf(); + EWAHCompressedBitmap b2 = EWAHCompressedBitmap.bitmapOf(); + + b1.set(0); + b1.set(WORD_IN_BITS); + b2.set(1); + b2.set(WORD_IN_BITS+1); + + EWAHCompressedBitmap result = b1.and(b2); + Assert.assertEquals(2 * WORD_IN_BITS / 8, result.sizeInBytes()); + } + + @Test + public void orCompressedSize() { + EWAHCompressedBitmap b1 = EWAHCompressedBitmap.bitmapOf(); + EWAHCompressedBitmap b2 = EWAHCompressedBitmap.bitmapOf(); + + b1.set(0); + b1.set(WORD_IN_BITS); + b2.setSizeInBits(1, false); + b2.setSizeInBits(WORD_IN_BITS, true); + + EWAHCompressedBitmap result = b1.or(b2); + Assert.assertEquals(2 * WORD_IN_BITS / 8, result.sizeInBytes()); + } + + @Test + public void xorCompressedSize() { + EWAHCompressedBitmap b1 = EWAHCompressedBitmap.bitmapOf(); + EWAHCompressedBitmap b2 = EWAHCompressedBitmap.bitmapOf(); + + b1.set(0); + b1.set(WORD_IN_BITS); + b2.setSizeInBits(1, false); + b2.setSizeInBits(WORD_IN_BITS, true); + + EWAHCompressedBitmap result = b1.xor(b2); + Assert.assertEquals(2 * WORD_IN_BITS / 8, result.sizeInBytes()); + } + + @Test + public void andNotCompressedSize() { + EWAHCompressedBitmap b1 = EWAHCompressedBitmap.bitmapOf(); + + b1.set(0); + b1.set(WORD_IN_BITS); - /** - * Test the intersects method - */ - @Test - public void testIntersectsMethod() { - System.out.println("testing Intersets Bug"); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(1); - EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); - bitmap2.set(1); - bitmap2.set(11); - bitmap2.set(111); - bitmap2.set(1111111); - bitmap2.set(11111111); - Assert.assertTrue(bitmap.intersects(bitmap2)); - Assert.assertTrue(bitmap2.intersects(bitmap)); - - EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); - bitmap3.set(101); - EWAHCompressedBitmap bitmap4 = new EWAHCompressedBitmap(); - for (int i = 0; i < 100; i++) { - bitmap4.set(i); - } - Assert.assertFalse(bitmap3.intersects(bitmap4)); - Assert.assertFalse(bitmap4.intersects(bitmap3)); - - EWAHCompressedBitmap bitmap5 = new EWAHCompressedBitmap(); - bitmap5.set(0); - bitmap5.set(10); - bitmap5.set(20); - EWAHCompressedBitmap bitmap6 = new EWAHCompressedBitmap(); - bitmap6.set(1); - bitmap6.set(11); - bitmap6.set(21); - bitmap6.set(1111111); - bitmap6.set(11111111); - Assert.assertFalse(bitmap5.intersects(bitmap6)); - Assert.assertFalse(bitmap6.intersects(bitmap5)); - - bitmap5.set(21); - Assert.assertTrue(bitmap5.intersects(bitmap6)); - Assert.assertTrue(bitmap6.intersects(bitmap5)); - - EWAHCompressedBitmap bitmap7 = new EWAHCompressedBitmap(); - bitmap7.set(1); - bitmap7.set(10); - bitmap7.set(20); - bitmap7.set(1111111); - bitmap7.set(11111111); - EWAHCompressedBitmap bitmap8 = new EWAHCompressedBitmap(); - for (int i = 0; i < 1000; i++) { - if (i != 1 && i != 10 && i != 20) { - bitmap8.set(i); - } - } - Assert.assertFalse(bitmap7.intersects(bitmap8)); - Assert.assertFalse(bitmap8.intersects(bitmap7)); - } + EWAHCompressedBitmap result = b1.andNot(b1); + Assert.assertEquals(2 * WORD_IN_BITS / 8, result.sizeInBytes()); + } + + @Test + public void testBug091() { + String v1 = "0000000000000000000000000000000000000000000000000000000000111101"; + String v2 = "0000000000000000001111011111111111111111111111111110001111000000"; + + EWAHCompressedBitmap bm1 = strToBitmap(v1); + EWAHCompressedBitmap bm2 = strToBitmap(v2); + + bm1 = bm1.and(bm2); // bm1 should now have no bit set + + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + bm.setSizeInBits(bm1.sizeInBits(), false); // Create a bitmap with no bit set + + Assert.assertEquals(0,bm1.cardinality()); + Assert.assertEquals(0,bm1.cardinality()); + Assert.assertEquals(bm.sizeInBits(),bm1.sizeInBits()); + Assert.assertTrue(bm.equals(bm1)); + } + + private EWAHCompressedBitmap strToBitmap(String str) { + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + for (int i = 0; i < str.length(); i++) { + if (str.charAt(i)=='1') { + bm.set(i); + } + } + bm.setSizeInBits(str.length(), false); + return bm; + } + + @Test + public void testBug090() throws Exception { + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + bm.setSizeInBits(8, false); // Create a bitmap with no bit set + + EWAHCompressedBitmap bm1 = bm.clone(); + bm1.not(); // Create a bitmap with all bits set + bm1 = bm1.and(bm); // Clear all bits + + Assert.assertEquals(0,bm.cardinality()); + Assert.assertEquals(0,bm1.cardinality()); + Assert.assertEquals(bm.sizeInBits(),bm1.sizeInBits()); + Assert.assertTrue(bm.equals(bm1)); + } + + @Test + public void testBug090b() throws Exception { + EWAHCompressedBitmap bm1 = new EWAHCompressedBitmap(); + bm1.setSizeInBits(8, false); // Create a bitmap with no bit set + System.out.println(bm1.toDebugString()); + EWAHCompressedBitmap bm2 = new EWAHCompressedBitmap(); + bm2.setSizeInBits(64, false); // Create a bitmap with no bit set + EWAHCompressedBitmap bm3 = new EWAHCompressedBitmap(); + Assert.assertTrue(bm1.equals(bm2)); + Assert.assertTrue(bm2.equals(bm1)); + Assert.assertTrue(bm2.equals(bm3)); + Assert.assertTrue(bm3.equals(bm2)); + Assert.assertTrue(bm1.equals(bm3)); + Assert.assertTrue(bm3.equals(bm1)); + } + + + @Test + public void testBug090c() throws Exception { + EWAHCompressedBitmap bm1 = new EWAHCompressedBitmap(); + bm1.setSizeInBits(8, false); // Create a bitmap with no bit set + System.out.println(bm1.toDebugString()); + EWAHCompressedBitmap bm2 = new EWAHCompressedBitmap(); + bm2.setSizeInBits(64, false); // Create a bitmap with no bit set + EWAHCompressedBitmap bm3 = new EWAHCompressedBitmap(); + Assert.assertEquals(bm1.hashCode(), bm2.hashCode()); + Assert.assertEquals(bm3.hashCode(), bm2.hashCode()); + } + + + + @Test + public void jugovacTest() { + EWAHCompressedBitmap bm1 = new EWAHCompressedBitmap(1); + bm1.set(1); + EWAHCompressedBitmap bm2 = new EWAHCompressedBitmap(0); + bm1.andCardinality(bm2); + } + + @Test + public void setOutOfOrderStressTest() { + System.out.println("out-of-order stress test"); + int n = 10 * WORD_IN_BITS; + for(int k = 0; k < 100; ++k) { + List positions = new ArrayList(n); + for (int i = 0; i < n; ++i) { + positions.add(i); + } + Collections.shuffle(positions); + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + for (int position : positions) { + bitmap.set(position); + } + IntIterator iterator = bitmap.intIterator(); + for (int i = 0; i < n; ++i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + Assert.assertEquals(WORD_IN_BITS / 8, bitmap.sizeInBytes()); + } + } - /** - * as per renaud.delbru, Feb 12, 2009 this might throw an error out of bound - * exception. - */ - @Test - public void testLargeEWAHCompressedBitmap() { - System.out.println("testing EWAH over a large array"); - EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); - int N = 11000000; - for (int i = 0; i < N; ++i) { - myarray1.set(i); - } - Assert.assertTrue(myarray1.sizeInBits() == N); - } + @Test + public void setOutOfOrder() { + int[][] positionsArray = new int[][]{ + new int[] { 111, 94, 17, 116, 100, 4, 72, 125, 112, 130, 8, 141, 45, 33, 171, 122, 128, 104, 102, 62, 115, 37, 96, 77, 165, 168, 52, 58, 47, 59, 49, 69, 185, 97, 151, 28, 29, 3, 61, 88, 135, 164, 178, 120, 144, 73, 155, 180, 140, 74, 20, 161, 143, 92, 85, 71, 63, 123, 147, 75, 15, 34, 105, 93, 158, 167, 86, 1, 127, 160, 133, 31, 53, 99, 129, 21, 44, 81, 27, 38, 11, 172, 66, 118, 57, 54, 36, 32, 159, 106, 12, 114, 132, 174, 9, 184, 121, 134, 181, 191, 190, 64, 65, 55, 156, 173, 109, 51, 170, 7, 146, 157, 182, 23, 10, 24, 2, 119, 25, 101, 84, 154, 179, 67, 46, 177, 40, 107, 14, 68, 79, 82, 22, 137, 124, 48, 0, 110, 148, 18, 188, 30, 169, 60, 145, 35, 89, 19, 90, 153, 163, 41, 70, 113, 108, 39, 152, 117, 175, 6, 43, 50, 80, 26, 95, 83, 126, 103, 91, 183, 16, 150, 131, 78, 189, 136, 5, 149, 187, 87, 176, 142, 42, 138, 186, 139, 56, 166, 98, 76, 13, 162 }, + new int[] { 160, 146, 144, 19, 94, 135, 109, 150, 133, 158, 168, 8, 151, 115, 91, 167, 147, 54, 126, 110, 155, 163, 52, 80, 9, 38, 48, 66, 21, 174, 49, 77, 165, 114, 149, 71, 86, 41, 185, 101, 180, 57, 96, 112, 67, 68, 184, 69, 148, 100, 27, 164, 0, 2, 90, 111, 72, 159, 3, 127, 83, 122, 128, 75, 34, 182, 123, 179, 130, 172, 26, 58, 120, 42, 102, 87, 16, 97, 39, 121, 161, 169, 145, 171, 17, 103, 37, 25, 117, 46, 53, 84, 125, 23, 1, 141, 137, 186, 30, 50, 190, 131, 191, 56, 107, 178, 12, 82, 18, 44, 113, 116, 92, 51, 134, 156, 15, 153, 14, 188, 162, 106, 20, 10, 175, 157, 124, 13, 189, 88, 119, 136, 176, 139, 187, 170, 173, 65, 24, 40, 181, 61, 47, 35, 154, 132, 142, 62, 45, 183, 138, 95, 6, 152, 93, 4, 73, 5, 98, 28, 11, 166, 81, 99, 32, 22, 79, 60, 33, 85, 63, 76, 105, 143, 55, 36, 43, 59, 118, 7, 108, 64, 104, 74, 89, 129, 177, 29, 140, 78, 31, 70 }, + new int[] { 156, 104, 167, 70, 102, 151, 87, 186, 169, 172, 158, 73, 85, 89, 103, 79, 38, 81, 66, 42, 126, 61, 157, 185, 120, 149, 32, 114, 170, 155, 91, 127, 96, 30, 4, 165, 39, 0, 56, 129, 171, 82, 108, 26, 123, 55, 152, 184, 137, 118, 147, 134, 116, 14, 113, 177, 1, 58, 35, 63, 150, 41, 19, 101, 23, 43, 49, 180, 141, 176, 153, 37, 12, 122, 145, 112, 27, 97, 105, 20, 133, 109, 6, 50, 51, 106, 11, 125, 25, 139, 9, 52, 54, 138, 95, 166, 62, 189, 173, 124, 48, 100, 47, 128, 159, 31, 45, 44, 72, 17, 130, 88, 119, 67, 140, 76, 98, 13, 131, 78, 117, 16, 190, 28, 143, 187, 183, 5, 18, 164, 7, 24, 115, 121, 34, 160, 178, 99, 162, 111, 146, 174, 69, 77, 161, 53, 65, 64, 188, 181, 8, 29, 10, 80, 110, 90, 132, 40, 86, 135, 179, 3, 148, 92, 33, 2, 59, 107, 60, 93, 191, 74, 84, 182, 83, 15, 154, 175, 36, 46, 57, 136, 21, 163, 71, 142, 94, 144, 75, 22, 168, 68 }, + new int[] { 186, 71, 66, 157, 163, 135, 38, 160, 105, 28, 173, 106, 6, 177, 22, 73, 2, 11, 110, 108, 26, 139, 56, 137, 17, 129, 166, 24, 33, 12, 51, 96, 44, 74, 87, 72, 99, 156, 9, 84, 172, 167, 150, 153, 134, 30, 115, 102, 158, 76, 170, 55, 65, 162, 54, 14, 53, 154, 70, 161, 75, 159, 176, 10, 111, 100, 133, 37, 93, 175, 67, 83, 86, 169, 147, 149, 138, 82, 103, 164, 97, 124, 1, 16, 155, 116, 118, 112, 143, 98, 91, 13, 101, 180, 19, 34, 35, 127, 39, 152, 42, 61, 68, 168, 104, 141, 184, 185, 15, 64, 128, 32, 21, 49, 25, 89, 171, 81, 183, 181, 40, 5, 125, 78, 189, 109, 4, 113, 178, 114, 121, 62, 63, 79, 0, 43, 142, 36, 119, 47, 122, 148, 41, 92, 187, 131, 48, 45, 132, 69, 182, 90, 59, 126, 60, 130, 29, 57, 18, 94, 120, 136, 27, 46, 151, 179, 190, 3, 107, 52, 88, 77, 174, 95, 165, 31, 145, 188, 23, 80, 8, 85, 117, 144, 50, 123, 146, 20, 58, 140, 7, 191 }, + new int[] { 187, 9, 174, 56, 26, 81, 132, 156, 103, 100, 79, 137, 117, 123, 157, 68, 61, 167, 98, 0, 77, 39, 65, 34, 48, 72, 74, 181, 146, 70, 5, 138, 80, 90, 86, 46, 37, 53, 89, 83, 45, 121, 166, 11, 171, 58, 125, 142, 64, 92, 108, 59, 71, 127, 135, 188, 14, 150, 173, 55, 158, 136, 99, 10, 112, 116, 155, 151, 145, 38, 54, 35, 101, 12, 3, 107, 180, 178, 22, 84, 183, 154, 102, 104, 190, 159, 170, 47, 115, 111, 88, 131, 140, 124, 149, 6, 168, 133, 28, 139, 82, 91, 160, 27, 126, 78, 130, 41, 134, 164, 163, 51, 19, 23, 17, 60, 189, 20, 42, 114, 13, 118, 97, 30, 147, 76, 24, 93, 110, 44, 50, 176, 7, 16, 87, 63, 67, 69, 113, 15, 185, 148, 62, 33, 95, 169, 25, 57, 161, 182, 120, 21, 36, 94, 1, 128, 75, 175, 66, 184, 31, 73, 153, 52, 129, 152, 85, 49, 119, 32, 4, 40, 2, 8, 177, 109, 96, 29, 43, 179, 18, 105, 141, 186, 106, 162, 165, 122, 143, 172, 144, 191 }, + new int[] { 219, 226, 72, 129, 131, 249, 140, 213, 245, 240, 28, 250, 212, 87, 42, 112, 69, 94, 125, 165, 215, 30, 197, 247, 39, 171, 16, 3, 101, 147, 54, 149, 89, 236, 15, 77, 141, 246, 36, 6, 104, 85, 248, 8, 66, 119, 23, 2, 123, 91, 229, 61, 68, 223, 124, 135, 158, 218, 177, 251, 71, 75, 26, 217, 120, 180, 188, 64, 80, 100, 252, 208, 45, 130, 52, 44, 31, 216, 167, 152, 84, 126, 142, 224, 65, 154, 127, 113, 92, 170, 74, 108, 67, 57, 17, 201, 78, 32, 244, 194, 157, 121, 103, 122, 48, 232, 117, 34, 178, 46, 179, 231, 95, 211, 183, 110, 162, 7, 186, 196, 148, 187, 93, 173, 47, 88, 156, 172, 73, 204, 139, 41, 132, 58, 159, 90, 109, 4, 70, 5, 176, 99, 160, 184, 150, 18, 133, 106, 199, 168, 161, 118, 63, 145, 11, 20, 10, 144, 207, 174, 230, 102, 51, 253, 37, 225, 243, 22, 151, 128, 175, 242, 182, 220, 206, 136, 40, 190, 254, 235, 195, 27, 35, 19, 62, 21, 81, 1, 198, 56, 163, 193, 155, 53, 205, 203, 241, 214, 169, 134, 192, 233, 50, 210, 164, 97, 221, 185, 13, 255, 227, 83, 96, 209, 146, 114, 143, 237, 107, 105, 115, 166, 200, 222, 59, 76, 29, 153, 43, 14, 181, 79, 189, 24, 228, 38, 86, 0, 116, 238, 234, 55, 98, 137, 12, 202, 191, 111, 33, 49, 25, 9, 138, 60, 82, 239 }, + new int[] { 261, 182, 37, 161, 47, 240, 214, 124, 167, 233, 110, 83, 310, 209, 198, 206, 201, 219, 177, 82, 210, 107, 163, 16, 200, 53, 71, 20, 193, 158, 183, 106, 138, 290, 19, 55, 313, 197, 123, 125, 257, 92, 104, 60, 234, 139, 218, 223, 88, 276, 127, 259, 148, 297, 145, 38, 302, 260, 118, 282, 314, 100, 23, 153, 288, 121, 241, 316, 165, 168, 98, 24, 238, 244, 89, 278, 255, 237, 99, 277, 306, 61, 222, 27, 191, 215, 298, 43, 87, 51, 293, 129, 70, 25, 180, 190, 132, 133, 149, 94, 79, 21, 73, 181, 225, 131, 44, 249, 119, 95, 195, 69, 204, 315, 187, 54, 81, 134, 164, 284, 30, 232, 52, 160, 235, 64, 226, 171, 205, 262, 236, 300, 309, 304, 156, 263, 10, 286, 221, 96, 50, 289, 189, 212, 143, 254, 256, 49, 147, 75, 318, 85, 169, 185, 248, 1, 18, 6, 15, 295, 159, 162, 112, 301, 292, 36, 97, 247, 146, 59, 32, 155, 157, 178, 33, 22, 103, 128, 170, 108, 65, 220, 188, 203, 229, 130, 253, 117, 230, 243, 287, 273, 57, 68, 246, 109, 40, 56, 274, 46, 12, 285, 45, 242, 245, 126, 258, 41, 144, 17, 58, 213, 62, 252, 194, 217, 122, 102, 279, 72, 305, 266, 216, 303, 35, 283, 39, 137, 269, 272, 312, 4, 151, 2, 172, 13, 294, 296, 186, 114, 250, 101, 224, 3, 77, 141, 111, 67, 74, 184, 307, 115, 0, 271, 227, 311, 78, 28, 299, 63, 308, 150, 208, 211, 317, 116, 5, 239, 202, 135, 84, 142, 86, 80, 192, 251, 42, 199, 34, 281, 9, 93, 8, 136, 264, 174, 231, 175, 275, 280, 207, 48, 228, 90, 268, 76, 113, 179, 140, 11, 173, 120, 166, 265, 152, 291, 176, 91, 196, 66, 154, 26, 270, 7, 267, 319, 31, 105, 14, 29 }, + new int[] { 306, 159, 36, 192, 263, 107, 119, 109, 140, 297, 275, 261, 259, 139, 283, 211, 148, 317, 262, 91, 11, 278, 301, 216, 232, 168, 12, 133, 116, 66, 88, 95, 154, 46, 312, 136, 229, 242, 218, 53, 38, 213, 127, 32, 247, 130, 84, 31, 137, 93, 251, 179, 238, 220, 106, 26, 298, 239, 18, 111, 44, 103, 45, 118, 292, 276, 59, 20, 308, 196, 141, 67, 78, 72, 172, 212, 255, 288, 160, 289, 69, 209, 47, 187, 303, 117, 181, 104, 43, 210, 79, 222, 113, 315, 296, 290, 285, 264, 17, 129, 99, 149, 2, 138, 175, 295, 55, 206, 16, 299, 71, 167, 62, 123, 50, 215, 246, 157, 164, 236, 266, 319, 144, 221, 7, 92, 75, 51, 152, 282, 200, 57, 49, 271, 134, 186, 56, 70, 170, 97, 199, 300, 98, 169, 314, 128, 195, 318, 267, 10, 22, 219, 272, 189, 258, 226, 42, 87, 76, 73, 153, 178, 183, 110, 9, 23, 155, 205, 286, 126, 241, 256, 214, 94, 250, 21, 142, 8, 80, 176, 102, 19, 161, 132, 163, 177, 194, 174, 120, 284, 52, 171, 124, 61, 150, 1, 166, 6, 231, 240, 307, 291, 101, 277, 162, 228, 89, 54, 207, 217, 85, 108, 245, 184, 74, 305, 237, 77, 235, 146, 65, 253, 281, 304, 27, 4, 294, 33, 203, 112, 40, 224, 29, 165, 249, 100, 293, 105, 243, 13, 197, 310, 63, 311, 135, 96, 173, 68, 257, 156, 114, 5, 35, 260, 90, 15, 145, 143, 122, 287, 248, 244, 24, 225, 0, 268, 14, 234, 188, 201, 279, 86, 60, 313, 230, 39, 227, 208, 28, 233, 25, 198, 302, 58, 191, 202, 309, 316, 48, 254, 37, 131, 252, 151, 81, 182, 204, 82, 185, 125, 115, 34, 269, 190, 158, 83, 147, 30, 121, 273, 280, 64, 180, 193, 274, 265, 3, 41, 223, 270 }, + new int[] { 633, 145, 267, 188, 75, 528, 160, 305, 459, 455, 530, 186, 359, 181, 437, 250, 180, 325, 147, 473, 87, 510, 465, 280, 166, 120, 453, 128, 566, 33, 608, 253, 350, 522, 430, 351, 360, 580, 45, 51, 544, 555, 457, 597, 213, 400, 390, 513, 438, 313, 37, 616, 57, 311, 436, 100, 228, 108, 533, 1, 396, 462, 342, 378, 297, 148, 216, 211, 304, 146, 546, 46, 262, 290, 71, 639, 201, 624, 178, 303, 254, 487, 468, 344, 506, 451, 369, 420, 195, 444, 107, 50, 592, 12, 326, 259, 13, 227, 634, 270, 226, 276, 570, 524, 194, 190, 90, 394, 101, 606, 542, 229, 340, 581, 541, 578, 118, 301, 5, 16, 501, 14, 158, 466, 551, 636, 231, 320, 193, 222, 625, 152, 112, 134, 167, 287, 199, 189, 610, 440, 110, 554, 89, 408, 35, 365, 138, 419, 22, 483, 157, 122, 214, 514, 316, 247, 371, 109, 91, 500, 206, 237, 63, 170, 495, 163, 352, 523, 449, 384, 29, 418, 88, 536, 426, 432, 44, 635, 605, 347, 192, 489, 590, 310, 0, 271, 337, 185, 516, 234, 15, 150, 79, 210, 235, 613, 480, 161, 21, 355, 175, 56, 169, 38, 572, 637, 607, 65, 503, 467, 401, 261, 505, 539, 402, 255, 34, 171, 97, 98, 174, 59, 176, 383, 596, 593, 464, 431, 604, 92, 266, 476, 286, 472, 114, 260, 27, 84, 336, 332, 2, 386, 519, 525, 559, 232, 308, 124, 439, 353, 545, 416, 600, 212, 73, 575, 284, 299, 252, 385, 441, 69, 531, 275, 30, 427, 583, 269, 488, 187, 300, 263, 623, 411, 484, 52, 67, 405, 393, 595, 338, 611, 99, 333, 534, 617, 39, 8, 568, 362, 202, 17, 111, 615, 603, 567, 560, 397, 452, 279, 191, 507, 143, 508, 442, 47, 31, 535, 272, 322, 509, 258, 168, 309, 130, 149, 103, 184, 485, 571, 461, 526, 105, 155, 218, 448, 407, 285, 93, 95, 78, 446, 282, 215, 42, 406, 104, 348, 4, 72, 433, 323, 106, 377, 594, 20, 589, 74, 520, 458, 302, 293, 85, 470, 403, 577, 298, 60, 498, 217, 454, 409, 26, 208, 10, 629, 370, 291, 387, 225, 238, 140, 358, 131, 66, 321, 354, 179, 329, 380, 502, 435, 312, 242, 159, 584, 561, 547, 53, 24, 492, 64, 58, 249, 317, 241, 494, 497, 294, 562, 248, 621, 364, 288, 246, 314, 307, 256, 129, 239, 81, 388, 586, 557, 345, 587, 154, 251, 49, 173, 481, 40, 563, 550, 598, 511, 278, 389, 243, 162, 413, 517, 372, 32, 601, 632, 521, 612, 343, 477, 126, 392, 428, 41, 86, 61, 196, 376, 327, 295, 331, 478, 132, 638, 83, 77, 102, 3, 62, 11, 373, 23, 113, 141, 200, 619, 335, 76, 264, 177, 349, 375, 283, 356, 198, 491, 151, 512, 203, 532, 346, 6, 482, 165, 334, 582, 543, 475, 127, 391, 423, 142, 548, 54, 172, 527, 306, 588, 490, 28, 289, 374, 553, 319, 183, 136, 115, 399, 585, 443, 315, 245, 123, 257, 412, 445, 627, 631, 43, 620, 236, 277, 339, 504, 220, 499, 153, 156, 424, 7, 55, 456, 296, 558, 515, 496, 367, 569, 469, 121, 240, 119, 538, 207, 363, 628, 125, 205, 460, 96, 133, 268, 265, 518, 18, 164, 281, 209, 556, 137, 9, 224, 630, 182, 94, 361, 48, 565, 471, 395, 341, 573, 463, 434, 429, 382, 139, 273, 116, 80, 410, 197, 415, 366, 537, 421, 564, 486, 599, 618, 274, 576, 493, 19, 422, 25, 529, 552, 135, 219, 36, 602, 379, 591, 626, 357, 447, 292, 223, 381, 479, 233, 368, 230, 425, 574, 70, 474, 244, 82, 324, 417, 450, 221, 404, 540, 549, 328, 414, 622, 614, 117, 398, 579, 609, 204, 68, 330, 144, 318 }, + }; + for(int[] positions : positionsArray) { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + for (int position : positions) { + bitmap.set(position); + Assert.assertTrue(bitmap.toList().contains(position)); + } + IntIterator iterator = bitmap.intIterator(); + for (int i = 0; i < positions.length; ++i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + Assert.assertEquals(WORD_IN_BITS / 8, bitmap.sizeInBytes()); + } + } - /** - * Test massive and. - */ - @Test - public void testMassiveAnd() { - System.out.println("testing massive logical and"); - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[1024]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap answer = ewah[0]; - for (int k = 1; k < ewah.length; ++k) - answer = answer.and(ewah[k]); - // result should be empty - if (answer.getPositions().size() != 0) - System.out.println(answer.toDebugString()); - Assert.assertTrue(answer.getPositions().size() == 0); - Assert.assertTrue(EWAHCompressedBitmap.and(ewah).getPositions().size() == 0); - } + @Test + public void setBitsInDecreasingOrder() { + int[] positions = new int[] { 0, 1, 2, 3, 5, 8, 13, 21 }; + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + for(int i=positions.length-1; i>=0; --i) { + Assert.assertTrue(bitmap.set(positions[i])); + } + IntIterator iterator = bitmap.intIterator(); + for(int position : positions) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(position, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } - /** - * Test massive and not. - */ - @Test - public void testMassiveAndNot() { - System.out.println("testing massive and not"); - final int N = 1024; - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap answer = ewah[0]; - EWAHCompressedBitmap answer2 = ewah[0]; - for (int k = 1; k < ewah.length; ++k) { - answer = answer.andNot(ewah[k]); - EWAHCompressedBitmap copy = null; - try { - copy = ewah[k].clone(); - copy.not(); - answer2.and(copy); - assertEqualsPositions(answer, answer2); - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - } - } + @Test + public void setBitsInDecreasingOrderWithWordPrefix() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.set(10); + bitmap.setSizeInBits(WORD_IN_BITS, false); + bitmap.set(WORD_IN_BITS + 10); + bitmap.set(WORD_IN_BITS + 5); + IntIterator iterator = bitmap.intIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(10, iterator.next()); + Assert.assertEquals(WORD_IN_BITS + 5, iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(WORD_IN_BITS + 10, iterator.next()); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void setBitsInDecreasingOrderWithWordPrefixOfOnes() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, true); + bitmap.set(WORD_IN_BITS + 10); + bitmap.set(WORD_IN_BITS + 5); + IntIterator iterator = bitmap.intIterator(); + for(int i=0; i=0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(positions[i], iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } - @Test - public void testOrCardinality() { - System.out.println("testing Or Cardinality"); - for (int N = 0; N < 1024; ++N) { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - for (int i = 0; i < N; i++) { - bitmap.set(i); - } - bitmap.set(1025); - bitmap.set(1026); - Assert.assertEquals(N + 2, bitmap.cardinality()); - EWAHCompressedBitmap orbitmap = bitmap.or(bitmap); - assertEquals(orbitmap, bitmap); - Assert.assertEquals(N + 2, orbitmap.cardinality()); - - Assert.assertEquals(N + 2, - bitmap.orCardinality(new EWAHCompressedBitmap())); - } - } + @Test + public void reverseIntIteratorOverBitmapsOfOnes() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, true); + IntIterator iterator = bitmap.reverseIntIterator(); + for(int i=WORD_IN_BITS-1; i>=0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } - /** - * Test sets and gets. - */ - @Test - public void testSetGet() { - System.out.println("testing EWAH set/get"); - EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); - int[] val = { 5, 4400, 44600, 55400, 1000000 }; - for (int k = 0; k < val.length; ++k) { - ewcb.set(val[k]); - } - List result = ewcb.getPositions(); - Assert.assertTrue(val.length == result.size()); - for (int k = 0; k < val.length; ++k) { - Assert.assertEquals(result.get(k).intValue(), val[k]); - } - } + @Test + public void reverseIntIteratorOverBitmapsOfZeros() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, false); + IntIterator iterator = bitmap.reverseIntIterator(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void reverseIntIteratorOverBitmapsOfOnesAndZeros() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS-10, true); + bitmap.setSizeInBits(WORD_IN_BITS, false); + IntIterator iterator = bitmap.reverseIntIterator(); + for(int i=WORD_IN_BITS-10; i>0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i-1, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } - @Test - public void testHashCode() { - System.out.println("testing hashCode"); - EWAHCompressedBitmap ewcb = EWAHCompressedBitmap.bitmapOf(50, 70).and( - EWAHCompressedBitmap.bitmapOf(50, 1000)); - Assert.assertEquals(EWAHCompressedBitmap.bitmapOf(50), ewcb); - Assert.assertEquals(EWAHCompressedBitmap.bitmapOf(50).hashCode(), - ewcb.hashCode()); - } + @Test + public void reverseIntIteratorOverMultipleRLWs() { + EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(1000, 100000, 100000 + WORD_IN_BITS); + IntIterator iterator = b.reverseIntIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(100000 + WORD_IN_BITS, iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(100000, iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(1000, iterator.next()); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void reverseIntIteratorOverMixedRunningLengthWords() { + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + b.setSizeInBits(WORD_IN_BITS, true); + b.set(WORD_IN_BITS+5); + + IntIterator iterator = b.reverseIntIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(WORD_IN_BITS+5, iterator.next()); + for(int i=WORD_IN_BITS-1; i>=0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } - @Test - public void testSetSizeInBits() { - System.out.println("testing SetSizeInBits"); - testSetSizeInBits(130, 131); - testSetSizeInBits(63, 64); - testSetSizeInBits(64, 65); - testSetSizeInBits(64, 128); - testSetSizeInBits(35, 131); - testSetSizeInBits(130, 400); - testSetSizeInBits(130, 191); - testSetSizeInBits(130, 192); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(31); - bitmap.setSizeInBits(130, false); - bitmap.set(131); - BitSet jdkBitmap = new BitSet(); - jdkBitmap.set(31); - jdkBitmap.set(131); - assertEquals(jdkBitmap, bitmap); - } + @Test + public void reverseIntIteratorOverConsecutiveLiteralsInSameRunningLengthWord() { + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + b.setSizeInBits(WORD_IN_BITS, true); + b.setSizeInBits(2*WORD_IN_BITS, false); + b.setSizeInBits(3*WORD_IN_BITS, true); + b.set(3*WORD_IN_BITS+5); + b.set(5*WORD_IN_BITS-1); + + IntIterator iterator = b.reverseIntIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(5*WORD_IN_BITS-1, iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(3*WORD_IN_BITS+5, iterator.next()); + for(int i=3*WORD_IN_BITS-1; i>=2*WORD_IN_BITS; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + for(int i=WORD_IN_BITS-1; i>=0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } - /** - * Test with parameters. - * - * @throws IOException - * Signals that an I/O exception has occurred. - */ - @Test - public void testWithParameters() throws IOException { - System.out - .println("These tests can run for several minutes. Please be patient."); - for (int k = 2; k < 1 << 24; k *= 8) - shouldSetBits(k); - PolizziTest(64); - PolizziTest(128); - PolizziTest(256); - PolizziTest(2048); - System.out.println("Your code is probably ok."); - } + @Test + public void isEmpty() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(1000, false); + Assert.assertTrue(bitmap.isEmpty()); + bitmap.set(1001); + Assert.assertFalse(bitmap.isEmpty()); + } + + @Test + public void issue58() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(52344, 52344 + 9); + ChunkIterator iterator = bitmap.chunkIterator(); + + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(52344, iterator.nextLength()); + iterator.move(iterator.nextLength()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(1, iterator.nextLength()); + iterator.move(iterator.nextLength()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(8, iterator.nextLength()); + iterator.move(iterator.nextLength()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(1, iterator.nextLength()); + iterator.move(iterator.nextLength()); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void issue59() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(243, 260, 1000); + ChunkIterator iter = bitmap.chunkIterator(); + iter.move(245); + Assert.assertEquals(15, iter.nextLength()); + } + + @Test + public void issue61() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(210696); + bitmap.set(210984); + bitmap.set(210985); + ChunkIterator iter = bitmap.chunkIterator(); + iter.move(210984); + Assert.assertEquals(2, iter.nextLength()); + + bitmap = new EWAHCompressedBitmap(); + bitmap.set(210696); + bitmap.set(210698); + bitmap.set(210699); + iter = bitmap.chunkIterator(); + iter.move(210698); + Assert.assertEquals(2, iter.nextLength()); + } + + @Test + public void chunkIterator() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(0, 1, 2, 3, 4, 7, 8, 9, 10); + + ChunkIterator iterator = bitmap.chunkIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(5, iterator.nextLength()); + iterator.move(2); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(3, iterator.nextLength()); + iterator.move(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(2, iterator.nextLength()); + iterator.move(5); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(1, iterator.nextLength()); + iterator.move(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void chunkIteratorOverBitmapOfZeros() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, false); + + ChunkIterator iterator = bitmap.chunkIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS, iterator.nextLength()); + iterator.move(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void chunkIteratorOverBitmapOfZerosAndOnes() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS + 10, false); + bitmap.setSizeInBits(2 * WORD_IN_BITS, true); + + ChunkIterator iterator = bitmap.chunkIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS + 10, iterator.nextLength()); + iterator.move(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS - 10, iterator.nextLength()); + iterator.move(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void chunkIteratorOverBitmapOfOnesAndZeros() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS - 10, true); + bitmap.setSizeInBits(2 * WORD_IN_BITS, false); + + ChunkIterator iterator = bitmap.chunkIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS - 10, iterator.nextLength()); + iterator.move(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS + 10, iterator.nextLength()); + iterator.move(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void simpleCompose() { + EWAHCompressedBitmap bitmap1 = EWAHCompressedBitmap.bitmapOf(1, 3, 4); + bitmap1.setSizeInBits(5, false); + + EWAHCompressedBitmap bitmap2 = EWAHCompressedBitmap.bitmapOf(0, 2); + + EWAHCompressedBitmap result = bitmap1.compose(bitmap2); + + Assert.assertEquals(5, result.sizeInBits()); + Assert.assertEquals(2, result.cardinality()); + Assert.assertEquals(Integer.valueOf(1), result.toList().get(0)); + Assert.assertEquals(Integer.valueOf(4), result.toList().get(1)); + } + + @Test + public void composeBitmapOfOnesWithItself() { + EWAHCompressedBitmap bitmap = EWAHCompressedBitmap.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, true); + + EWAHCompressedBitmap result = bitmap.compose(bitmap); + + Assert.assertEquals(bitmap, result); + } + + @Test + public void composeBitmapOfZerosAndOnesWithBitmapOfOnes() { + EWAHCompressedBitmap bitmap1 = EWAHCompressedBitmap.bitmapOf(); + bitmap1.setSizeInBits(WORD_IN_BITS, false); + bitmap1.setSizeInBits(2 * WORD_IN_BITS, true); + + EWAHCompressedBitmap bitmap2 = EWAHCompressedBitmap.bitmapOf(); + bitmap2.setSizeInBits(WORD_IN_BITS, true); + + EWAHCompressedBitmap result = bitmap1.compose(bitmap2); + + Assert.assertEquals(bitmap1, result); + } + + @Test + public void composeBitmapOfOnesWithBitmapOfZerosAndOnes() { + EWAHCompressedBitmap bitmap1 = EWAHCompressedBitmap.bitmapOf(); + bitmap1.setSizeInBits(2 * WORD_IN_BITS, true); + + EWAHCompressedBitmap bitmap2 = EWAHCompressedBitmap.bitmapOf(); + bitmap2.setSizeInBits(WORD_IN_BITS, false); + bitmap2.setSizeInBits(2 * WORD_IN_BITS, true); + + EWAHCompressedBitmap result = bitmap1.compose(bitmap2); + + Assert.assertEquals(bitmap2, result); + } + + @Test + public void composeBitmapWithBitmapOfZeros() { + EWAHCompressedBitmap bitmap1 = EWAHCompressedBitmap.bitmapOf(1, 3, 4, 9); + bitmap1.setSizeInBits(WORD_IN_BITS, false); + + EWAHCompressedBitmap bitmap2 = EWAHCompressedBitmap.bitmapOf(); + bitmap2.setSizeInBits(5, false); + + EWAHCompressedBitmap result = bitmap1.compose(bitmap2); + + Assert.assertEquals(0, result.cardinality()); + Assert.assertEquals(WORD_IN_BITS, result.sizeInBits()); + } + + @Test + public void testAstesana() throws Exception { + for(int k = 5; k < 256; ++k) { + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + bm.set(1); + bm.setSizeInBits(k, false); + EWAHCompressedBitmap bm1 = bm.clone(); + bm1.not(); + EWAHCompressedBitmap x = bm1.and(bm1); + Assert.assertEquals(x.cardinality(), k-1); + x = bm1.andNot(bm1); + Assert.assertEquals(x.cardinality(), 0); + x = bm1.xor(bm1); + Assert.assertEquals(x.cardinality(), 0); + x = bm1.or(bm1); + Assert.assertEquals(x.cardinality(), k-1); + } + } + @Test + public void testAstesana2() { + for (int k = 1; k < 256; ++k) { + // Create two equivalent bitmaps + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + bm.set(0); + bm.setSizeInBits(k, false); + EWAHCompressedBitmap bm3 = new EWAHCompressedBitmap(); + bm3.set(0); + bm3.setSizeInBits(k, false); + // Perform two negation -> + // should change nothing + bm.not(); + bm.not(); + // Verify it changes nothing + Assert.assertArrayEquals(bm.toArray(), bm3.toArray()); + Assert.assertEquals(bm.sizeInBits(), bm3.sizeInBits()); - /** - * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, - * non-deterministic tests are bad, but the test is actually deterministic.) - */ - @Test - public void vanSchaikTest() { - System.out.println("testing vanSchaikTest (this takes some time)"); - final int totalNumBits = 32768; - final double odds = 0.9; - Random rand = new Random(323232323); - for (int t = 0; t < 100; t++) { - int numBitsSet = 0; - EWAHCompressedBitmap cBitMap = new EWAHCompressedBitmap(); - for (int i = 0; i < totalNumBits; i++) { - if (rand.nextDouble() < odds) { - cBitMap.set(i); - numBitsSet++; - } - } - Assert.assertEquals(cBitMap.cardinality(), numBitsSet); - } + Assert.assertTrue(bm.equals(bm3)); + } + } - } + @Test + public void clearIntIterator() { + EWAHCompressedBitmap x = EWAHCompressedBitmap.bitmapOf(1, 3, 7, 8, 10); + x.setSizeInBits(500, true); + x.setSizeInBits(501, false); + x.setSizeInBits(1000, true); + x.set(1001); + IntIterator iterator = x.clearIntIterator(); + for (int i : Arrays.asList(0, 2, 4, 5, 6, 9, 500, 1000)) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } - /** - * Function used in a test inspired by Federico Fissore. - * - * @param size - * the number of set bits - * @param seed - * the random seed - * @return the pseudo-random array int[] - */ - public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { - Random random = new Random(seed); - // build raw int array - int[] bits = new int[size]; - for (int i = 0; i < bits.length; i++) { - bits[i] = random.nextInt(TEST_BS_SIZE); - } - // might generate duplicates - Arrays.sort(bits); - // first count how many distinct values - int counter = 0; - int oldx = -1; - for (int x : bits) { - if (x != oldx) - ++counter; - oldx = x; - } - // then construct new array - int[] answer = new int[counter]; - counter = 0; - oldx = -1; - for (int x : bits) { - if (x != oldx) { - answer[counter] = x; - ++counter; - } - oldx = x; - } - return answer; - } + @Test + public void clearIntIteratorOverBitmapOfZeros() { + EWAHCompressedBitmap x = EWAHCompressedBitmap.bitmapOf(); + x.setSizeInBits(WORD_IN_BITS, false); + IntIterator iterator = x.clearIntIterator(); + for (int i = 0; i < WORD_IN_BITS; ++i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } - /** - * Test inspired by Bilal Tayara - */ - @Test - public void TayaraTest() { - System.out.println("Tayara test"); - for (int offset = 64; offset < (1 << 30); offset *= 2) { - EWAHCompressedBitmap a = new EWAHCompressedBitmap(); - EWAHCompressedBitmap b = new EWAHCompressedBitmap(); - for (int k = 0; k < 64; ++k) { - a.set(offset + k); - b.set(offset + k); - } - if (!a.and(b).equals(a)) - throw new RuntimeException("bug"); - if (!a.or(b).equals(a)) - throw new RuntimeException("bug"); - } - } + @Test + public void testGet() { + for (int gap = 29; gap < 10000; gap *= 10) { + EWAHCompressedBitmap x = new EWAHCompressedBitmap(); + for (int k = 0; k < 100; ++k) + x.set(k * gap); + for (int k = 0; k < 100 * gap; ++k) + if (x.get(k)) { + if (k % gap != 0) + throw new RuntimeException( + "spotted an extra set bit at " + + k + " gap = " + + gap + ); + } else if (k % gap == 0) + throw new RuntimeException( + "missed a set bit " + k + + " gap = " + gap + ); + } + } - @Test - public void TestCloneEwahCompressedBitArray() - throws CloneNotSupportedException { - System.out.println("testing EWAH clone"); - EWAHCompressedBitmap a = new EWAHCompressedBitmap(); - a.set(410018); - a.set(410019); - a.set(410020); - a.set(410021); - a.set(410022); - a.set(410023); + @SuppressWarnings({"deprecation", "boxing"}) + @Test + public void OKaserBugReportJuly2013() { + System.out.println("testing OKaserBugReportJuly2013"); + int[][] data = {{}, {5, 6, 7, 8, 9}, {1}, {2}, + {2, 5, 7}, {1}, {2}, {1, 6, 9}, + {1, 3, 4, 6, 8, 9}, {1, 3, 4, 6, 8, 9}, + {1, 3, 6, 8, 9}, {2, 5, 7}, {2, 5, 7}, + {1, 3, 9}, {3, 8, 9}}; + + EWAHCompressedBitmap[] toBeOred = new EWAHCompressedBitmap[data.length]; + Set bruteForceAnswer = new HashSet(); + for (int i = 0; i < toBeOred.length; ++i) { + toBeOred[i] = new EWAHCompressedBitmap(); + for (int j : data[i]) { + toBeOred[i].set(j); + bruteForceAnswer.add(j); + } + toBeOred[i].setSizeInBits(1000, false); + } + long rightcard = bruteForceAnswer.size(); + EWAHCompressedBitmap e1 = FastAggregation.or(toBeOred); + Assert.assertEquals(rightcard, e1.cardinality()); + EWAHCompressedBitmap e2 = FastAggregation.bufferedor(65536, + toBeOred); + Assert.assertEquals(rightcard, e2.cardinality()); + EWAHCompressedBitmap foo = new EWAHCompressedBitmap(); + FastAggregation.orToContainer(foo, toBeOred); + Assert.assertEquals(rightcard, foo.cardinality()); + } + + public static Iterator toIterator(final EWAHCompressedBitmap[] bitmaps) { + return new Iterator() { + int k = 0; + + @Override + public boolean hasNext() { + return k < bitmaps.length; + } + + @Override + public Object next() { + return bitmaps[k++]; + } + + @Override + public void remove() { + // nothing + } + }; + } + + @Test + public void fastand() { + int[][] data = { {5, 6, 7, 8, 9}, {1, 5}, {2, 5}}; - EWAHCompressedBitmap b; + EWAHCompressedBitmap[] bitmaps = new EWAHCompressedBitmap[data.length]; + + for (int i = 0; i < bitmaps.length; ++i) { + bitmaps[i] = new EWAHCompressedBitmap(); + for (int j : data[i]) { + bitmaps[i].set(j); + } + bitmaps[i].setSizeInBits(1000, false); + } + EWAHCompressedBitmap and1 = FastAggregation.bufferedand(1024, bitmaps[0],bitmaps[1],bitmaps[2]); + EWAHCompressedBitmap and2 = new EWAHCompressedBitmap(); + FastAggregation.bufferedandWithContainer(and2, 32, bitmaps[0],bitmaps[1],bitmaps[2]); + EWAHCompressedBitmap and3 = EWAHCompressedBitmap.and(bitmaps[0],bitmaps[1],bitmaps[2]); + System.out.println(and1.sizeInBits()); + System.out.println(and2.sizeInBits()); + System.out.println(and3.sizeInBits()); + assertEqualsPositions(and1, and2); + assertEqualsPositions(and2, and3); + } + + + @Test + public void fastagg() { + int[][] data = {{}, {5, 6, 7, 8, 9}, {1}, {2}}; - b = a.clone(); + EWAHCompressedBitmap[] bitmaps = new EWAHCompressedBitmap[data.length]; + + for (int i = 0; i < bitmaps.length; ++i) { + bitmaps[i] = new EWAHCompressedBitmap(); + for (int j : data[i]) { + bitmaps[i].set(j); + } + bitmaps[i].setSizeInBits(1000, false); + } + + EWAHCompressedBitmap or1 = FastAggregation.bufferedor(1024, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap or2 = FastAggregation.or(bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap or3 = FastAggregation.bufferedor(1024, bitmaps); + EWAHCompressedBitmap or4 = FastAggregation.or(bitmaps); + EWAHCompressedBitmap or5 = FastAggregation.or(toIterator(bitmaps)); + EWAHCompressedBitmap or6 = new EWAHCompressedBitmap(); + FastAggregation.orToContainer(or6, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + + assertEquals(or1,or2); + assertEquals(or2,or3); + assertEquals(or3,or4); + assertEquals(or4,or5); + assertEquals(or5,or6); + + EWAHCompressedBitmap xor1 = FastAggregation.bufferedxor(1024, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap xor2 = FastAggregation.xor(bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap xor3 = FastAggregation.bufferedxor(1024, bitmaps); + EWAHCompressedBitmap xor4 = FastAggregation.xor(bitmaps); + EWAHCompressedBitmap xor5 = FastAggregation.xor(toIterator(bitmaps)); + EWAHCompressedBitmap xor6 = new EWAHCompressedBitmap(); + FastAggregation.orToContainer(xor6, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + + assertEquals(xor1,xor2); + assertEquals(xor2,xor3); + assertEquals(xor3,xor4); + assertEquals(xor4,xor5); + assertEquals(xor5,xor6); + } + + @Test + public void testSizeInBitsWithAnd() { + System.out.println("testing SizeInBitsWithAnd"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10, false); + b.setSizeInBits(10, false); + + EWAHCompressedBitmap and = a.and(b); + Assert.assertEquals(10, and.sizeInBits()); + EWAHCompressedBitmap and2 = EWAHCompressedBitmap.and(a, b); + Assert.assertEquals(10, and2.sizeInBits()); + } + + @Test + public void testSizeInBitsWithAndNot() { + System.out.println("testing SizeInBitsWithAndNot"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10, false); + b.setSizeInBits(10, false); + + EWAHCompressedBitmap and = a.andNot(b); + Assert.assertEquals(10, and.sizeInBits()); + } + + @Test + public void testSizeInBitsWithOr() { + System.out.println("testing SizeInBitsWithOr"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10, false); + b.setSizeInBits(10, false); + + EWAHCompressedBitmap or = a.or(b); + Assert.assertEquals(10, or.sizeInBits()); + EWAHCompressedBitmap or2 = EWAHCompressedBitmap.or(a, b); + Assert.assertEquals(10, or2.sizeInBits()); + } + + @Test + public void testSizeInBitsWithXor() { + System.out.println("testing SizeInBitsWithXor"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBits(10, false); + b.setSizeInBits(10, false); + + EWAHCompressedBitmap xor = a.xor(b); + Assert.assertEquals(10, xor.sizeInBits()); + EWAHCompressedBitmap xor2 = EWAHCompressedBitmap.xor(a, b); + Assert.assertEquals(10, xor2.sizeInBits()); + } + + @Test + public void testDebugSetSizeInBitsTest() { + System.out.println("testing DebugSetSizeInBits"); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + + b.set(4); + + b.setSizeInBits(6, true); + + List positions = b.toList(); + + Assert.assertEquals(2, positions.size()); + Assert.assertEquals(Integer.valueOf(4), positions.get(0)); + Assert.assertEquals(Integer.valueOf(5), positions.get(1)); + + Iterator iterator = b.iterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(Integer.valueOf(4), iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(Integer.valueOf(5), iterator.next()); + Assert.assertFalse(iterator.hasNext()); + + IntIterator intIterator = b.intIterator(); + Assert.assertTrue(intIterator.hasNext()); + Assert.assertEquals(4, intIterator.next()); + Assert.assertTrue(intIterator.hasNext()); + Assert.assertEquals(5, intIterator.next()); + Assert.assertFalse(intIterator.hasNext()); + + } + + /** + * Created: 2/4/11 6:03 PM By: Arnon Moscona. + */ + @Test + public void EwahIteratorProblem() { + System.out.println("testing ArnonMoscona"); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + for (int i = 9434560; i <= 9435159; i++) { + bitmap.set(i); + } + IntIterator iterator = bitmap.intIterator(); + List v = bitmap.toList(); + int[] array = bitmap.toArray(); + for (int k = 0; k < v.size(); ++k) { + Assert.assertTrue(array[k] == v.get(k)); + Assert.assertTrue(iterator.hasNext()); + final int ival = iterator.next(); + final int vval = v.get(k); + Assert.assertTrue(ival == vval); + } + Assert.assertTrue(!iterator.hasNext()); + // + for (int k = 2; k <= 1024; k *= 2) { + int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, + 434455 + 5 * k); + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + for (int i : bitsToSet) { + ewah.set(i); + } + equal(ewah.iterator(), bitsToSet); + } + } - a.setSizeInBits(487123, false); - b.setSizeInBits(487123, false); + + @Test + public void shiftTest() { + System.out.println("testing shifts"); + for (int k = 2; k <= 4096; k *= 2) { + int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, + 434455 + 5 * k); + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + for (int i : bitsToSet) { + ewah.set(i); + } + for(int b = 0; b < 128; ++b) { + EWAHCompressedBitmap ewahs = ewah.shift(b); + int[] sb = ewahs.toArray(); + for(int z = 0; z < sb.length; ++z) + if(sb[z] != bitsToSet[z] + b) throw new RuntimeException("bug"); + } + for(int z = 0; z < 256;++z) { + ewah.set(z); + } + bitsToSet = ewah.toArray(); + for(int b = 0; b < 128; ++b) { + EWAHCompressedBitmap ewahs = ewah.shift(b); + int[] sb = ewahs.toArray(); + for(int z = 0; z < sb.length; ++z) + if(sb[z] != bitsToSet[z] + b) throw new RuntimeException("bug"); + } + } + } + /** + * Test submitted by Gregory Ssi-Yan-Kai + */ + @Test + public void SsiYanKaiTest() { + System.out.println("testing SsiYanKaiTest"); + EWAHCompressedBitmap a = EWAHCompressedBitmap.bitmapOf(39935, + 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, + 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, + 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, + 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, + 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, + 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, + 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, + 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, + 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, + 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, + 40016, 40017, 40018, 40019, 40020, 40021, 40022, 40023, + 40024, 40025, 40026, 40027, 40028, 40029, 40030, 40031, + 40032, 40033, 40034, 40035, 40036, 40037, 40038, 40039, + 40040, 40041, 40042, 40043, 40044, 40045, 40046, 40047, + 40048, 40049, 40050, 40051, 40052, 40053, 40054, 40055, + 40056, 40057, 40058, 40059, 40060, 40061, 40062, 40063, + 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, + 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, + 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, + 40088, 40089, 40090, 40091, 40092, 40093, 40094, 40095, + 40096, 40097, 40098, 40099, 40100); + EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(39935, + 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, + 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, + 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, + 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, + 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, + 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, + 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, + 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, + 270000); + LinkedHashSet aPositions = new LinkedHashSet( + a.toList()); + int intersection = 0; + EWAHCompressedBitmap inter = new EWAHCompressedBitmap(); + LinkedHashSet bPositions = new LinkedHashSet( + b.toList()); + for (Integer integer : bPositions) { + if (aPositions.contains(integer)) { + inter.set(integer); + ++intersection; + } + } + inter.setSizeInBits(maxSizeInBits(a, b), false); + EWAHCompressedBitmap and2 = a.and(b); + if (!and2.equals(inter)) + throw new RuntimeException("intersections don't match"); + if (intersection != and2.cardinality()) + throw new RuntimeException("cardinalities don't match"); + } + + /** + * Test inspired by William Habermaas. + */ + @Test + public void habermaasTest() throws Exception { + System.out.println("testing habermaasTest"); + BitSet bitsetaa = new BitSet(); + EWAHCompressedBitmap aa = new EWAHCompressedBitmap(); + int[] val = {55400, 1000000, 1000128}; + for (int k = 0; k < val.length; ++k) { + aa.set(val[k]); + bitsetaa.set(val[k]); + } + equal(aa, bitsetaa); + BitSet bitsetab = new BitSet(); + EWAHCompressedBitmap ab = new EWAHCompressedBitmap(); + for (int i = 4096; i < (4096 + 5); i++) { + ab.set(i); + bitsetab.set(i); + } + ab.set(99000); + bitsetab.set(99000); + ab.set(1000130); + bitsetab.set(1000130); + equal(ab, bitsetab); + EWAHCompressedBitmap bb = aa.or(ab); + EWAHCompressedBitmap bbAnd = aa.and(ab); + EWAHCompressedBitmap abnot = ab.clone(); + abnot.not(); + EWAHCompressedBitmap bbAnd2 = aa.andNot(abnot); + assertEquals(bbAnd2, bbAnd); + BitSet bitsetbb = (BitSet) bitsetaa.clone(); + bitsetbb.or(bitsetab); + BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); + bitsetbbAnd.and(bitsetab); + equal(bbAnd, bitsetbbAnd); + equal(bb, bitsetbb); + } + + @Test + public void testAndResultAppend() { + System.out.println("testing AndResultAppend"); + EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); + bitmap1.set(35); + EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); + bitmap2.set(35); + bitmap2.set(130); + + EWAHCompressedBitmap resultBitmap = bitmap1.and(bitmap2); + resultBitmap.set(131); + + bitmap1.set(131); + assertEquals(bitmap1, resultBitmap); + } + + /** + * Test cardinality. + */ + @Test + public void testCardinality() { + System.out.println("testing EWAH cardinality"); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(Integer.MAX_VALUE - 64); + // System.out.format("Total Items %d\n", bitmap.cardinality()); + Assert.assertTrue(bitmap.cardinality() == 1); + } + + /** + * Test clear function + */ + @Test + public void testClear() { + System.out.println("testing Clear"); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(5); + bitmap.clear(); + bitmap.set(7); + Assert.assertTrue(1 == bitmap.cardinality()); + Assert.assertTrue(1 == bitmap.toList().size()); + Assert.assertTrue(1 == bitmap.toArray().length); + Assert.assertTrue(7 == bitmap.toList().get(0)); + Assert.assertTrue(7 == bitmap.toArray()[0]); + bitmap.clear(); + bitmap.set(5000); + Assert.assertTrue(1 == bitmap.cardinality()); + Assert.assertTrue(1 == bitmap.toList().size()); + Assert.assertTrue(1 == bitmap.toArray().length); + Assert.assertTrue(5000 == bitmap.toList().get(0)); + bitmap.set(5001); + bitmap.set(5005); + bitmap.set(5100); + bitmap.set(5500); + bitmap.clear(); + bitmap.set(5); + bitmap.set(7); + bitmap.set(1000); + bitmap.set(1001); + Assert.assertTrue(4 == bitmap.cardinality()); + List positions = bitmap.toList(); + Assert.assertTrue(4 == positions.size()); + Assert.assertTrue(5 == positions.get(0)); + Assert.assertTrue(7 == positions.get(1)); + Assert.assertTrue(1000 == positions.get(2)); + Assert.assertTrue(1001 == positions.get(3)); + } + + /** + * Test ewah compressed bitmap. + */ + @Test + public void testEWAHCompressedBitmap() { + System.out.println("testing EWAH"); + long zero = 0; + long specialval = 1l | (1l << 4) | (1l << 63); + long notzero = ~zero; + EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); + myarray1.addWord(zero); + myarray1.addWord(zero); + myarray1.addWord(zero); + myarray1.addWord(specialval); + myarray1.addWord(specialval); + myarray1.addWord(notzero); + myarray1.addWord(zero); + Assert.assertEquals(myarray1.toList().size(), 6 + 64); + EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap(); + myarray2.addWord(zero); + myarray2.addWord(specialval); + myarray2.addWord(specialval); + myarray2.addWord(notzero); + myarray2.addWord(zero); + myarray2.addWord(zero); + myarray2.addWord(zero); + Assert.assertEquals(myarray2.toList().size(), 6 + 64); + List data1 = myarray1.toList(); + List data2 = myarray2.toList(); + Vector logicalor = new Vector(); + { + HashSet tmp = new HashSet(); + tmp.addAll(data1); + tmp.addAll(data2); + logicalor.addAll(tmp); + } + Collections.sort(logicalor); + Vector logicaland = new Vector(); + logicaland.addAll(data1); + logicaland.retainAll(data2); + Collections.sort(logicaland); + EWAHCompressedBitmap arrayand = myarray1.and(myarray2); + Assert.assertTrue(arrayand.toList().equals(logicaland)); + EWAHCompressedBitmap arrayor = myarray1.or(myarray2); + Assert.assertTrue(arrayor.toList().equals(logicalor)); + EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1); + Assert.assertTrue(arrayandbis.toList().equals(logicaland)); + EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1); + Assert.assertTrue(arrayorbis.toList().equals(logicalor)); + EWAHCompressedBitmap x = new EWAHCompressedBitmap(); + for (Integer i : myarray1.toList()) { + x.set(i); + } + Assert.assertTrue(x.toList().equals( + myarray1.toList())); + x = new EWAHCompressedBitmap(); + for (Integer i : myarray2.toList()) { + x.set(i); + } + Assert.assertTrue(x.toList().equals( + myarray2.toList())); + x = new EWAHCompressedBitmap(); + for (Iterator k = myarray1.iterator(); k.hasNext(); ) { + x.set(extracted(k)); + } + Assert.assertTrue(x.toList().equals( + myarray1.toList())); + x = new EWAHCompressedBitmap(); + for (Iterator k = myarray2.iterator(); k.hasNext(); ) { + x.set(extracted(k)); + } + Assert.assertTrue(x.toList().equals( + myarray2.toList())); + } + + /** + * Test externalization. + * + * @throws IOException Signals that an I/O exception has occurred. + */ + @Test + public void testExternalization() throws Exception { + System.out.println("testing EWAH externalization"); + EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); + int[] val = {5, 4400, 44600, 55400, 1000000}; + for (int k = 0; k < val.length; ++k) { + ewcb.set(val[k]); + } + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oo = new ObjectOutputStream(bos); + ewcb.writeExternal(oo); + oo.close(); + ewcb = new EWAHCompressedBitmap(); + ByteArrayInputStream bis = new ByteArrayInputStream( + bos.toByteArray()); + ewcb.readExternal(new ObjectInputStream(bis)); + List result = ewcb.toList(); + Assert.assertTrue(val.length == result.size()); + for (int k = 0; k < val.length; ++k) { + Assert.assertTrue(result.get(k) == val[k]); + } + } - Assert.assertTrue(a.equals(b)); - } + @Test + public void testExtremeRange() { + System.out.println("testing EWAH at its extreme range"); + int N = 1024; + EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); + for (int i = 0; i < N; ++i) { + myarray1.set(Integer.MAX_VALUE - 64 - N + i); + Assert.assertTrue(myarray1.cardinality() == i + 1); + int[] val = myarray1.toArray(); + Assert.assertTrue(val[0] == Integer.MAX_VALUE - 64 - N); + } + } - /** - * a non-deterministic test proposed by Marc Polizzi. - * - * @param maxlength - * the maximum uncompressed size of the bitmap - */ - public static void PolizziTest(int maxlength) { - System.out.println("Polizzi test with max length = " + maxlength); - for (int k = 0; k < 10000; ++k) { - final Random rnd = new Random(); - final EWAHCompressedBitmap ewahBitmap1 = new EWAHCompressedBitmap(); - final BitSet jdkBitmap1 = new BitSet(); - final EWAHCompressedBitmap ewahBitmap2 = new EWAHCompressedBitmap(); - final BitSet jdkBitmap2 = new BitSet(); - final EWAHCompressedBitmap ewahBitmap3 = new EWAHCompressedBitmap(); - final BitSet jdkBitmap3 = new BitSet(); - final int len = rnd.nextInt(maxlength); - for (int pos = 0; pos < len; pos++) { // random *** number of bits - // set *** - if (rnd.nextInt(7) == 0) { // random *** increasing *** values - ewahBitmap1.set(pos); - jdkBitmap1.set(pos); - } - if (rnd.nextInt(11) == 0) { // random *** increasing *** values - ewahBitmap2.set(pos); - jdkBitmap2.set(pos); - } - if (rnd.nextInt(7) == 0) { // random *** increasing *** values - ewahBitmap3.set(pos); - jdkBitmap3.set(pos); - } - } - assertEquals(jdkBitmap1, ewahBitmap1); - assertEquals(jdkBitmap2, ewahBitmap2); - assertEquals(jdkBitmap3, ewahBitmap3); - // XOR - { - final EWAHCompressedBitmap xorEwahBitmap = ewahBitmap1 - .xor(ewahBitmap2); - final BitSet xorJdkBitmap = (BitSet) jdkBitmap1.clone(); - xorJdkBitmap.xor(jdkBitmap2); - assertEquals(xorJdkBitmap, xorEwahBitmap); - } - // AND - { - final EWAHCompressedBitmap andEwahBitmap = ewahBitmap1 - .and(ewahBitmap2); - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - assertEquals(andJdkBitmap, andEwahBitmap); - } - // AND - { - final EWAHCompressedBitmap andEwahBitmap = ewahBitmap2 - .and(ewahBitmap1); - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - assertEquals(andJdkBitmap, andEwahBitmap); - assertEquals(andJdkBitmap, - EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2)); - } - // MULTI AND - { - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - andJdkBitmap.and(jdkBitmap3); - assertEquals(andJdkBitmap, EWAHCompressedBitmap.and( - ewahBitmap1, ewahBitmap2, ewahBitmap3)); - assertEquals(andJdkBitmap, EWAHCompressedBitmap.and( - ewahBitmap3, ewahBitmap2, ewahBitmap1)); - Assert.assertEquals(andJdkBitmap.cardinality(), - EWAHCompressedBitmap.andCardinality(ewahBitmap1, - ewahBitmap2, ewahBitmap3)); - } - // AND NOT - { - final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap1 - .andNot(ewahBitmap2); - final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1.clone(); - andNotJdkBitmap.andNot(jdkBitmap2); - assertEquals(andNotJdkBitmap, andNotEwahBitmap); - } - // AND NOT - { - final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap2 - .andNot(ewahBitmap1); - final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2.clone(); - andNotJdkBitmap.andNot(jdkBitmap1); - assertEquals(andNotJdkBitmap, andNotEwahBitmap); - } - // OR - { - final EWAHCompressedBitmap orEwahBitmap = ewahBitmap1 - .or(ewahBitmap2); - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - assertEquals(orJdkBitmap, orEwahBitmap); - assertEquals(orJdkBitmap, - EWAHCompressedBitmap.or(ewahBitmap1, ewahBitmap2)); - Assert.assertEquals(orEwahBitmap.cardinality(), - ewahBitmap1.orCardinality(ewahBitmap2)); - } - // OR - { - final EWAHCompressedBitmap orEwahBitmap = ewahBitmap2 - .or(ewahBitmap1); - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - assertEquals(orJdkBitmap, orEwahBitmap); - } - // MULTI OR - { - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - orJdkBitmap.or(jdkBitmap3); - assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap1, - ewahBitmap2, ewahBitmap3)); - assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap3, - ewahBitmap2, ewahBitmap1)); - Assert.assertEquals(orJdkBitmap.cardinality(), - EWAHCompressedBitmap.orCardinality(ewahBitmap1, - ewahBitmap2, ewahBitmap3)); - } - } - } + /** + * Test the intersects method + */ + @Test + public void testIntersectsMethod() { + System.out.println("testing Intersets Bug"); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(1); + EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); + bitmap2.set(1); + bitmap2.set(11); + bitmap2.set(111); + bitmap2.set(1111111); + bitmap2.set(11111111); + Assert.assertTrue(bitmap.intersects(bitmap2)); + Assert.assertTrue(bitmap2.intersects(bitmap)); + + EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); + bitmap3.set(101); + EWAHCompressedBitmap bitmap4 = new EWAHCompressedBitmap(); + for (int i = 0; i < 100; i++) { + bitmap4.set(i); + } + Assert.assertFalse(bitmap3.intersects(bitmap4)); + Assert.assertFalse(bitmap4.intersects(bitmap3)); - /** - * Pseudo-non-deterministic test inspired by Federico Fissore. - * - * @param length - * the number of set bits in a bitmap - */ - public static void shouldSetBits(int length) { - System.out.println("testing shouldSetBits " + length); - int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, 434222); - EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); - System.out.println(" ... setting " + bitsToSet.length + " values"); - for (int i : bitsToSet) { - ewah.set(i); - } - System.out.println(" ... verifying " + bitsToSet.length + " values"); - equal(ewah.iterator(), bitsToSet); - System.out.println(" ... checking cardinality"); - Assert.assertEquals(bitsToSet.length, ewah.cardinality()); - } + EWAHCompressedBitmap bitmap5 = new EWAHCompressedBitmap(); + bitmap5.set(0); + bitmap5.set(10); + bitmap5.set(20); + EWAHCompressedBitmap bitmap6 = new EWAHCompressedBitmap(); + bitmap6.set(1); + bitmap6.set(11); + bitmap6.set(21); + bitmap6.set(1111111); + bitmap6.set(11111111); + Assert.assertFalse(bitmap5.intersects(bitmap6)); + Assert.assertFalse(bitmap6.intersects(bitmap5)); + + bitmap5.set(21); + Assert.assertTrue(bitmap5.intersects(bitmap6)); + Assert.assertTrue(bitmap6.intersects(bitmap5)); + + EWAHCompressedBitmap bitmap7 = new EWAHCompressedBitmap(); + bitmap7.set(1); + bitmap7.set(10); + bitmap7.set(20); + bitmap7.set(1111111); + bitmap7.set(11111111); + EWAHCompressedBitmap bitmap8 = new EWAHCompressedBitmap(); + for (int i = 0; i < 1000; i++) { + if (i != 1 && i != 10 && i != 20) { + bitmap8.set(i); + } + } + Assert.assertFalse(bitmap7.intersects(bitmap8)); + Assert.assertFalse(bitmap8.intersects(bitmap7)); + } + + /** + * as per renaud.delbru, Feb 12, 2009 this might throw an error out of + * bound exception. + */ + @Test + public void testLargeEWAHCompressedBitmap() { + System.out.println("testing EWAH over a large array"); + EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); + int N = 11000000; + for (int i = 0; i < N; ++i) { + myarray1.set(i); + } + Assert.assertTrue(myarray1.sizeInBits() == N); + } - @Test - public void testSizeInBits1() { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.setSizeInBits(1, false); - bitmap.not(); - Assert.assertEquals(1, bitmap.cardinality()); - } + /** + * Test massive and. + */ + @Test + public void testMassiveAnd() { + System.out.println("testing massive logical and"); + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[1024]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap answer = ewah[0]; + for (int k = 1; k < ewah.length; ++k) + answer = answer.and(ewah[k]); + // result should be empty + if (answer.toList().size() != 0) + System.out.println(answer.toDebugString()); + Assert.assertTrue(answer.toList().size() == 0); + Assert.assertTrue(EWAHCompressedBitmap.and(ewah).toList() + .size() == 0); + } + + /** + * Test massive and not. + */ + @Test + public void testMassiveAndNot() throws Exception { + System.out.println("testing massive and not"); + final int N = 1024; + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap answer = ewah[0]; + EWAHCompressedBitmap answer2 = ewah[0]; + for (int k = 1; k < ewah.length; ++k) { + answer = answer.andNot(ewah[k]); + EWAHCompressedBitmap copy = ewah[k].clone(); + copy.not(); + answer2.and(copy); + assertEqualsPositions(answer, answer2); + } + } - @Test - public void testHasNextSafe() { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(0); - IntIterator it = bitmap.intIterator(); - Assert.assertTrue(it.hasNext()); - Assert.assertEquals(0, it.next()); - } + /** + * Test massive or. + */ + @Test + public void testMassiveOr() { + System.out + .println("testing massive logical or (can take a couple of minutes)"); + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + BitSet[] bset = new BitSet[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < bset.length; ++k) + bset[k] = new BitSet(); + for (int k = 0; k < N; ++k) + assertEqualsPositions(bset[k], ewah[k]); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + bset[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 0; k < N; ++k) + assertEqualsPositions(bset[k], ewah[k]); + EWAHCompressedBitmap answer = ewah[0]; + BitSet bitsetanswer = bset[0]; + for (int k = 1; k < ewah.length; ++k) { + EWAHCompressedBitmap tmp = answer.or(ewah[k]); + bitsetanswer.or(bset[k]); + answer = tmp; + assertEqualsPositions(bitsetanswer, answer); + } + assertEqualsPositions(bitsetanswer, answer); + assertEqualsPositions(bitsetanswer, + EWAHCompressedBitmap.or(ewah)); + int k = 0; + for (int j : answer) { + if (k != j) + System.out.println(answer + .toDebugString()); + Assert.assertEquals(k, j); + k += 1; + } + } + } - @Test - public void testHasNextSafe2() { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.set(0); - IntIterator it = bitmap.intIterator(); - Assert.assertEquals(0, it.next()); - } + @Test + public void testsetSizeInBits() { + System.out.println("testing setSizeInBits"); + for (int k = 0; k < 4096; ++k) { + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + ewah.setSizeInBits(k, false); + Assert.assertEquals(ewah.sizeInBits(), k); + Assert.assertEquals(ewah.cardinality(), 0); + EWAHCompressedBitmap ewah2 = new EWAHCompressedBitmap(); + ewah2.setSizeInBits(k, false); + Assert.assertEquals(ewah2.sizeInBits(), k); + Assert.assertEquals(ewah2.cardinality(), 0); + EWAHCompressedBitmap ewah3 = new EWAHCompressedBitmap(); + for (int i = 0; i < k; ++i) { + ewah3.set(i); + } + Assert.assertEquals(ewah3.sizeInBits(), k); + Assert.assertEquals(ewah3.cardinality(), k); + EWAHCompressedBitmap ewah4 = new EWAHCompressedBitmap(); + ewah4.setSizeInBits(k, true); + Assert.assertEquals(ewah4.sizeInBits(), k); + Assert.assertEquals(ewah4.cardinality(), k); + } + } - @Test - public void testInfiniteLoop() { - System.out.println("Testing for an infinite loop"); - EWAHCompressedBitmap b1 = new EWAHCompressedBitmap(); - EWAHCompressedBitmap b2 = new EWAHCompressedBitmap(); - EWAHCompressedBitmap b3 = new EWAHCompressedBitmap(); - b3.setSizeInBits(5,false); - b1.set(2); - b2.set(4); - EWAHCompressedBitmap.and(b1, b2, b3); - EWAHCompressedBitmap.or(b1, b2, b3); - } + /** + * Test massive xor. + */ + @Test + public void testMassiveXOR() { + System.out + .println("testing massive xor (can take a couple of minutes)"); + final int N = 16; + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + BitSet[] bset = new BitSet[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < bset.length; ++k) + bset[k] = new BitSet(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + bset[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap answer = ewah[0]; + BitSet bitsetanswer = bset[0]; + for (int k = 1; k < ewah.length; ++k) { + answer = answer.xor(ewah[k]); + bitsetanswer.xor(bset[k]); + assertEqualsPositions(bitsetanswer, answer); + } + int k = 0; + for (int j : answer) { + if (k != j) + System.out.println(answer.toDebugString()); + Assert.assertEquals(k, j); + k += 1; + } + } - @Test - public void testSizeInBits2() { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.setSizeInBits(1, true); - bitmap.not(); - Assert.assertEquals(0, bitmap.cardinality()); - } + @Test + public void testMultiAnd() { + System.out.println("testing MultiAnd"); + // test bitmap3 has a literal word while bitmap1/2 have a run of + // 1 + EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); + bitmap1.addStreamOfEmptyWords(true, 1000); + EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); + bitmap2.addStreamOfEmptyWords(true, 2000); + EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); + bitmap3.set(500); + bitmap3.set(502); + bitmap3.set(504); + + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // equal + bitmap1 = new EWAHCompressedBitmap(); + bitmap1.set(35); + bitmap2 = new EWAHCompressedBitmap(); + bitmap2.set(35); + bitmap3 = new EWAHCompressedBitmap(); + bitmap3.set(35); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // same number of words for each + bitmap3.set(63); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // one word bigger + bitmap3.set(64); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // two words bigger + bitmap3.set(130); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // test that result can still be appended to + EWAHCompressedBitmap resultBitmap = EWAHCompressedBitmap.and( + bitmap1, bitmap2, bitmap3); + + resultBitmap.set(131); + + bitmap1.set(131); + assertEquals(bitmap1, resultBitmap); + + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 1; k <= ewah.length; ++k) { + EWAHCompressedBitmap[] shortewah = new EWAHCompressedBitmap[k]; + System.arraycopy(ewah, 0, shortewah, 0, k); + assertAndEquals(shortewah); + } + } + } - private static void assertAndEquals(EWAHCompressedBitmap... bitmaps) { - EWAHCompressedBitmap expected = bitmaps[0]; - for (int i = 1; i < bitmaps.length; i++) { - expected = expected.and(bitmaps[i]); - } - Assert.assertTrue(expected.equals(EWAHCompressedBitmap.and(bitmaps))); - } + @Test + public void testMultiOr() { + System.out.println("testing MultiOr"); + // test bitmap3 has a literal word while bitmap1/2 have a run of + // 0 + EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); + bitmap1.set(1000); + EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); + bitmap2.set(2000); + EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); + bitmap3.set(500); + bitmap3.set(502); + bitmap3.set(504); + + EWAHCompressedBitmap expected = bitmap1.or(bitmap2).or(bitmap3); + + assertEquals(expected, + EWAHCompressedBitmap.or(bitmap1, bitmap2, bitmap3)); + + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap(); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 1; k <= ewah.length; ++k) { + EWAHCompressedBitmap[] shortewah = new EWAHCompressedBitmap[k]; + System.arraycopy(ewah, 0, shortewah, 0, k); + assertOrEquals(shortewah); + } + } - private static void assertEquals(EWAHCompressedBitmap expected, - EWAHCompressedBitmap actual) { - Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); - assertEqualsPositions(expected, actual); - } + } - private static void assertOrEquals(EWAHCompressedBitmap... bitmaps) { - EWAHCompressedBitmap expected = bitmaps[0]; - for (int i = 1; i < bitmaps.length; i++) { - expected = expected.or(bitmaps[i]); - } - assertEquals(expected, EWAHCompressedBitmap.or(bitmaps)); - } + /** + * Test not. (Based on an idea by Ciaran Jessup) + */ + @Test + public void testNot() { + System.out.println("testing not"); + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + for (int i = 0; i <= 184; ++i) { + ewah.set(i); + } + Assert.assertEquals(ewah.cardinality(), 185); + ewah.not(); + Assert.assertEquals(ewah.cardinality(), 0); + } + + @Test + public void testOrCardinality() { + System.out.println("testing Or Cardinality"); + for (int N = 0; N < 1024; ++N) { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + for (int i = 0; i < N; i++) { + bitmap.set(i); + } + bitmap.set(1025); + bitmap.set(1026); + Assert.assertEquals(N + 2, bitmap.cardinality()); + EWAHCompressedBitmap orbitmap = bitmap.or(bitmap); + assertEquals(orbitmap, bitmap); + Assert.assertEquals(N + 2, orbitmap.cardinality()); - /** - * Extracted. - * - * @param bits - * the bits - * @return the integer - */ - private static Integer extracted(final Iterator bits) { - return bits.next(); - } + Assert.assertEquals(N + 2, bitmap + .orCardinality(new EWAHCompressedBitmap())); + } + } - private static void testSetSizeInBits(int size, int nextBit) { - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - bitmap.setSizeInBits(size, false); - bitmap.set(nextBit); - BitSet jdkBitmap = new BitSet(); - jdkBitmap.set(nextBit); - assertEquals(jdkBitmap, bitmap); - } + /** + * Test sets and gets. + */ + @Test + public void testSetGet() { + System.out.println("testing EWAH set/get"); + EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); + int[] val = {5, 4400, 44600, 55400, 1000000}; + for (int k = 0; k < val.length; ++k) { + ewcb.set(val[k]); + } + List result = ewcb.toList(); + Assert.assertTrue(val.length == result.size()); + for (int k = 0; k < val.length; ++k) { + Assert.assertEquals(result.get(k).intValue(), val[k]); + } + } - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi - * - * @param jdkBitmap - * the uncompressed bitmap - * @param ewahBitmap - * the compressed bitmap - */ - static void assertCardinality(BitSet jdkBitmap, - EWAHCompressedBitmap ewahBitmap) { - final int c1 = jdkBitmap.cardinality(); - final int c2 = ewahBitmap.cardinality(); - Assert.assertEquals(c1, c2); - } + @Test + public void testHashCode() throws Exception { + System.out.println("testing hashCode"); + EWAHCompressedBitmap ewcb = EWAHCompressedBitmap.bitmapOf(50, + 70).and(EWAHCompressedBitmap.bitmapOf(50, 1000)); + EWAHCompressedBitmap expectedBitmap = EWAHCompressedBitmap.bitmapOf(50); + expectedBitmap.setSizeInBits(1000, false); + Assert.assertEquals(expectedBitmap, ewcb); + Assert.assertEquals(expectedBitmap.hashCode(), ewcb.hashCode()); + ewcb.addWord(~0l); + EWAHCompressedBitmap ewcb2 = ewcb.clone(); + ewcb2.addWord(0); + Assert.assertEquals(ewcb.hashCode(), ewcb2.hashCode()); + + } + + @Test + public void testSetSizeInBits() { + System.out.println("testing SetSizeInBits"); + testSetSizeInBits(130, 131); + testSetSizeInBits(63, 64); + testSetSizeInBits(64, 65); + testSetSizeInBits(64, 128); + testSetSizeInBits(35, 131); + testSetSizeInBits(130, 400); + testSetSizeInBits(130, 191); + testSetSizeInBits(130, 192); + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(31); + bitmap.setSizeInBits(130, false); + bitmap.set(131); + BitSet jdkBitmap = new BitSet(); + jdkBitmap.set(31); + jdkBitmap.set(131); + assertEquals(jdkBitmap, bitmap); + } + + /** + * Test with parameters. + * + * @throws IOException Signals that an I/O exception has occurred. + */ + @Test + public void testWithParameters() throws IOException { + System.out + .println("These tests can run for several minutes. Please be patient."); + for (int k = 2; k < 1 << 24; k *= 8) + shouldSetBits(k); + PolizziTest(64); + PolizziTest(128); + PolizziTest(256); + System.out.println("Your code is probably ok."); + } + + /** + * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, + * non-deterministic tests are bad, but the test is actually + * deterministic.) + */ + @Test + public void vanSchaikTest() { + System.out + .println("testing vanSchaikTest (this takes some time)"); + final int totalNumBits = 32768; + final double odds = 0.9; + Random rand = new Random(323232323); + for (int t = 0; t < 100; t++) { + int numBitsSet = 0; + EWAHCompressedBitmap cBitMap = new EWAHCompressedBitmap(); + for (int i = 0; i < totalNumBits; i++) { + if (rand.nextDouble() < odds) { + cBitMap.set(i); + numBitsSet++; + } + } + Assert.assertEquals(cBitMap.cardinality(), numBitsSet); + } - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi. - * - * @param jdkBitmap - * the uncompressed bitmap - * @param ewahBitmap - * the compressed bitmap - */ - static void assertEquals(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) { - assertEqualsIterator(jdkBitmap, ewahBitmap); - assertEqualsPositions(jdkBitmap, ewahBitmap); - assertCardinality(jdkBitmap, ewahBitmap); - } + } - static void assertEquals(int[] v, List p) { - assertEquals(p, v); - } + /** + * Function used in a test inspired by Federico Fissore. + * + * @param size the number of set bits + * @param seed the random seed + * @return the pseudo-random array int[] + */ + public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { + Random random = new Random(seed); + // build raw int array + int[] bits = new int[size]; + for (int i = 0; i < bits.length; i++) { + bits[i] = random.nextInt(TEST_BS_SIZE); + } + // might generate duplicates + Arrays.sort(bits); + // first count how many distinct values + int counter = 0; + int oldx = -1; + for (int x : bits) { + if (x != oldx) + ++counter; + oldx = x; + } + // then construct new array + int[] answer = new int[counter]; + counter = 0; + oldx = -1; + for (int x : bits) { + if (x != oldx) { + answer[counter] = x; + ++counter; + } + oldx = x; + } + return answer; + } - static void assertEquals(List p, int[] v) { - if (v.length != p.size()) - throw new RuntimeException("Different lengths " + v.length + " " - + p.size()); - for (int k = 0; k < v.length; ++k) - if (v[k] != p.get(k).intValue()) - throw new RuntimeException("expected equal at " + k + " " - + v[k] + " " + p.get(k)); - } + /** + * Test inspired by Bilal Tayara + */ + @Test + public void TayaraTest() { + System.out.println("Tayara test"); + for (int offset = 64; offset < (1 << 30); offset *= 2) { + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b = new EWAHCompressedBitmap(); + for (int k = 0; k < 64; ++k) { + a.set(offset + k); + b.set(offset + k); + } + if (!a.and(b).equals(a)) + throw new RuntimeException("bug"); + if (!a.or(b).equals(a)) + throw new RuntimeException("bug"); + } + } - // - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi - * - * @param jdkBitmap - * the jdk bitmap - * @param ewahBitmap - * the ewah bitmap - */ - static void assertEqualsIterator(BitSet jdkBitmap, - EWAHCompressedBitmap ewahBitmap) { - final Vector positions = new Vector(); - final Iterator bits = ewahBitmap.iterator(); - while (bits.hasNext()) { - final int bit = extracted(bits).intValue(); - Assert.assertTrue(jdkBitmap.get(bit)); - positions.add(new Integer(bit)); - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - if (!positions.contains(new Integer(pos))) { - throw new RuntimeException( - "iterator: bitset got different bits"); - } - } - } + @Test + public void TestCloneEwahCompressedBitArray() throws Exception { + System.out.println("testing EWAH clone"); + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + a.set(410018); + a.set(410019); + a.set(410020); + a.set(410021); + a.set(410022); + a.set(410023); + + EWAHCompressedBitmap b; + + b = a.clone(); + + a.setSizeInBits(487123, false); + b.setSizeInBits(487123, false); + + Assert.assertTrue(a.equals(b)); + } + + /** + * a non-deterministic test proposed by Marc Polizzi. + * + * @param maxlength the maximum uncompressed size of the bitmap + */ + public static void PolizziTest(int maxlength) { + System.out.println("Polizzi test with max length = " + + maxlength); + for (int k = 0; k < 10000; ++k) { + final Random rnd = new Random(); + final EWAHCompressedBitmap ewahBitmap1 = new EWAHCompressedBitmap(); + final BitSet jdkBitmap1 = new BitSet(); + final EWAHCompressedBitmap ewahBitmap2 = new EWAHCompressedBitmap(); + final BitSet jdkBitmap2 = new BitSet(); + final EWAHCompressedBitmap ewahBitmap3 = new EWAHCompressedBitmap(); + final BitSet jdkBitmap3 = new BitSet(); + final int len = rnd.nextInt(maxlength); + for (int pos = 0; pos < len; pos++) { // random *** + // number of bits + // set *** + if (rnd.nextInt(7) == 0) { // random *** + // increasing *** + // values + ewahBitmap1.set(pos); + jdkBitmap1.set(pos); + } + if (rnd.nextInt(11) == 0) { // random *** + // increasing *** + // values + ewahBitmap2.set(pos); + jdkBitmap2.set(pos); + } + if (rnd.nextInt(7) == 0) { // random *** + // increasing *** + // values + ewahBitmap3.set(pos); + jdkBitmap3.set(pos); + } + } + assertEquals(jdkBitmap1, ewahBitmap1); + assertEquals(jdkBitmap2, ewahBitmap2); + assertEquals(jdkBitmap3, ewahBitmap3); + // XOR + { + final EWAHCompressedBitmap xorEwahBitmap = ewahBitmap1 + .xor(ewahBitmap2); + final BitSet xorJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + xorJdkBitmap.xor(jdkBitmap2); + assertEquals(xorJdkBitmap, xorEwahBitmap); + } + // AND + { + final EWAHCompressedBitmap andEwahBitmap = ewahBitmap1 + .and(ewahBitmap2); + final BitSet andJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + andJdkBitmap.and(jdkBitmap2); + assertEquals(andJdkBitmap, andEwahBitmap); + } + // AND + { + final EWAHCompressedBitmap andEwahBitmap = ewahBitmap2 + .and(ewahBitmap1); + final BitSet andJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + andJdkBitmap.and(jdkBitmap2); + assertEquals(andJdkBitmap, andEwahBitmap); + assertEquals(andJdkBitmap, + EWAHCompressedBitmap.and(ewahBitmap1, + ewahBitmap2) + ); + } + // MULTI AND + { + final BitSet andJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + andJdkBitmap.and(jdkBitmap2); + andJdkBitmap.and(jdkBitmap3); + assertEquals(andJdkBitmap, + EWAHCompressedBitmap.and(ewahBitmap1, + ewahBitmap2, ewahBitmap3) + ); + assertEquals(andJdkBitmap, + EWAHCompressedBitmap.and(ewahBitmap3, + ewahBitmap2, ewahBitmap1) + ); + Assert.assertEquals(andJdkBitmap.cardinality(), + EWAHCompressedBitmap.andCardinality( + ewahBitmap1, ewahBitmap2, + ewahBitmap3) + ); + } + // AND NOT + { + final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap1 + .andNot(ewahBitmap2); + final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + andNotJdkBitmap.andNot(jdkBitmap2); + assertEquals(andNotJdkBitmap, andNotEwahBitmap); + } + // AND NOT + { + final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap2 + .andNot(ewahBitmap1); + final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2 + .clone(); + andNotJdkBitmap.andNot(jdkBitmap1); + assertEquals(andNotJdkBitmap, andNotEwahBitmap); + } + // OR + { + final EWAHCompressedBitmap orEwahBitmap = ewahBitmap1 + .or(ewahBitmap2); + final BitSet orJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + orJdkBitmap.or(jdkBitmap2); + assertEquals(orJdkBitmap, orEwahBitmap); + assertEquals(orJdkBitmap, + EWAHCompressedBitmap.or(ewahBitmap1, + ewahBitmap2) + ); + Assert.assertEquals(orEwahBitmap.cardinality(), + ewahBitmap1.orCardinality(ewahBitmap2)); + } + // OR + { + final EWAHCompressedBitmap orEwahBitmap = ewahBitmap2 + .or(ewahBitmap1); + final BitSet orJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + orJdkBitmap.or(jdkBitmap2); + assertEquals(orJdkBitmap, orEwahBitmap); + } + // MULTI OR + { + final BitSet orJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + orJdkBitmap.or(jdkBitmap2); + orJdkBitmap.or(jdkBitmap3); + assertEquals(orJdkBitmap, + EWAHCompressedBitmap.or(ewahBitmap1, + ewahBitmap2, ewahBitmap3) + ); + assertEquals(orJdkBitmap, + EWAHCompressedBitmap.or(ewahBitmap3, + ewahBitmap2, ewahBitmap1) + ); + Assert.assertEquals(orJdkBitmap.cardinality(), + EWAHCompressedBitmap.orCardinality( + ewahBitmap1, ewahBitmap2, + ewahBitmap3) + ); + } + } + } - // part of a test contributed by Marc Polizzi - /** - * Assert equals positions. - * - * @param jdkBitmap - * the jdk bitmap - * @param ewahBitmap - * the ewah bitmap - */ - static void assertEqualsPositions(BitSet jdkBitmap, - EWAHCompressedBitmap ewahBitmap) { - final List positions = ewahBitmap.getPositions(); - for (int position : positions) { - if (!jdkBitmap.get(position)) { - throw new RuntimeException( - "positions: bitset got different bits"); - } - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - if (!positions.contains(new Integer(pos))) { - throw new RuntimeException( - "positions: bitset got different bits"); - } - } - // we check again - final int[] fastpositions = ewahBitmap.toArray(); - for (int position : fastpositions) { - if (!jdkBitmap.get(position)) { - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - } - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - int index = Arrays.binarySearch(fastpositions, pos); - if (index < 0) - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - if (fastpositions[index] != pos) - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - } - } + /** + * Pseudo-non-deterministic test inspired by Federico Fissore. + * + * @param length the number of set bits in a bitmap + */ + public static void shouldSetBits(int length) { + System.out.println("testing shouldSetBits " + length); + int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, + 434222); + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + System.out.println(" ... setting " + bitsToSet.length + + " values"); + for (int i : bitsToSet) { + ewah.set(i); + } + System.out.println(" ... verifying " + bitsToSet.length + + " values"); + equal(ewah.iterator(), bitsToSet); + System.out.println(" ... checking cardinality"); + Assert.assertEquals(bitsToSet.length, ewah.cardinality()); + } + + @Test + public void testSizeInBits1() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.setSizeInBits(1, false); + bitmap.not(); + Assert.assertEquals(1, bitmap.cardinality()); + } + + @Test + public void testHasNextSafe() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(0); + IntIterator it = bitmap.intIterator(); + Assert.assertTrue(it.hasNext()); + Assert.assertEquals(0, it.next()); + } + + @Test + public void testHasNextSafe2() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.set(0); + IntIterator it = bitmap.intIterator(); + Assert.assertEquals(0, it.next()); + } + + @Test + public void testInfiniteLoop() { + System.out.println("Testing for an infinite loop"); + EWAHCompressedBitmap b1 = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b2 = new EWAHCompressedBitmap(); + EWAHCompressedBitmap b3 = new EWAHCompressedBitmap(); + b3.setSizeInBits(5, false); + b1.set(2); + b2.set(4); + EWAHCompressedBitmap.and(b1, b2, b3); + EWAHCompressedBitmap.or(b1, b2, b3); + } + + @Test + public void testSizeInBits2() { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.setSizeInBits(1, true); + bitmap.not(); + Assert.assertEquals(0, bitmap.cardinality()); + } + + private static void assertAndEquals(EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap expected = bitmaps[0]; + for (int i = 1; i < bitmaps.length; i++) { + expected = expected.and(bitmaps[i]); + } + Assert.assertTrue(expected.equals(EWAHCompressedBitmap + .and(bitmaps))); + } + + private static void assertEquals(EWAHCompressedBitmap expected, + EWAHCompressedBitmap actual) { + Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); + assertEqualsPositions(expected, actual); + } + + private static void assertOrEquals(EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap expected = bitmaps[0]; + for (int i = 1; i < bitmaps.length; i++) { + expected = expected.or(bitmaps[i]); + } + assertEquals(expected, EWAHCompressedBitmap.or(bitmaps)); + } - /** - * Assert equals positions. - * - * @param ewahBitmap1 - * the ewah bitmap1 - * @param ewahBitmap2 - * the ewah bitmap2 - */ - static void assertEqualsPositions(EWAHCompressedBitmap ewahBitmap1, - EWAHCompressedBitmap ewahBitmap2) { - final List positions1 = ewahBitmap1.getPositions(); - final List positions2 = ewahBitmap2.getPositions(); - if (!positions1.equals(positions2)) - throw new RuntimeException( - "positions: alternative got different bits (two bitmaps)"); - // - final int[] fastpositions1 = ewahBitmap1.toArray(); - assertEquals(fastpositions1, positions1); - final int[] fastpositions2 = ewahBitmap2.toArray(); - assertEquals(fastpositions2, positions2); - if (!Arrays.equals(fastpositions1, fastpositions2)) - throw new RuntimeException( - "positions: alternative got different bits with toArray but not with getPositions (two bitmaps)"); - } + /** + * Extracted. + * + * @param bits the bits + * @return the integer + */ + private static Integer extracted(final Iterator bits) { + return bits.next(); + } + + private static void testSetSizeInBits(int size, int nextBit) { + EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); + bitmap.setSizeInBits(size, false); + bitmap.set(nextBit); + BitSet jdkBitmap = new BitSet(); + jdkBitmap.set(nextBit); + assertEquals(jdkBitmap, bitmap); + } + + /** + * Assess equality between an uncompressed bitmap and a compressed one, + * part of a test contributed by Marc Polizzi + * + * @param jdkBitmap the uncompressed bitmap + * @param ewahBitmap the compressed bitmap + */ + static void assertCardinality(BitSet jdkBitmap, + EWAHCompressedBitmap ewahBitmap) { + final int c1 = jdkBitmap.cardinality(); + final int c2 = ewahBitmap.cardinality(); + Assert.assertEquals(c1, c2); + } + + /** + * Assess equality between an uncompressed bitmap and a compressed one, + * part of a test contributed by Marc Polizzi. + * + * @param jdkBitmap the uncompressed bitmap + * @param ewahBitmap the compressed bitmap + */ + static void assertEquals(BitSet jdkBitmap, + EWAHCompressedBitmap ewahBitmap) { + assertEqualsIterator(jdkBitmap, ewahBitmap); + assertEqualsPositions(jdkBitmap, ewahBitmap); + assertCardinality(jdkBitmap, ewahBitmap); + } + + static void assertEquals(int[] v, List p) { + assertEquals(p, v); + } + + static void assertEquals(List p, int[] v) { + if (v.length != p.size()) + throw new RuntimeException("Different lengths " + + v.length + " " + p.size()); + for (int k = 0; k < v.length; ++k) + if (v[k] != p.get(k)) + throw new RuntimeException("expected equal at " + + k + " " + v[k] + " " + p.get(k)); + } + + // + + /** + * Assess equality between an uncompressed bitmap and a compressed one, + * part of a test contributed by Marc Polizzi + * + * @param jdkBitmap the jdk bitmap + * @param ewahBitmap the ewah bitmap + */ + static void assertEqualsIterator(BitSet jdkBitmap, + EWAHCompressedBitmap ewahBitmap) { + final Vector positions = new Vector(); + final Iterator bits = ewahBitmap.iterator(); + while (bits.hasNext()) { + final int bit = extracted(bits); + Assert.assertTrue(jdkBitmap.get(bit)); + positions.add(bit); + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + if (!positions.contains(new Integer(pos))) { + throw new RuntimeException( + "iterator: bitset got different bits"); + } + } + } - /** - * Convenience function to assess equality between a compressed bitset and - * an uncompressed bitset - * - * @param x - * the compressed bitset/bitmap - * @param y - * the uncompressed bitset/bitmap - */ - static void equal(EWAHCompressedBitmap x, BitSet y) { - Assert.assertEquals(x.cardinality(), y.cardinality()); - for (int i : x.getPositions()) - Assert.assertTrue(y.get(i)); - } + // part of a test contributed by Marc Polizzi - /** - * Convenience function to assess equality between an array and an iterator - * over Integers - * - * @param i - * the iterator - * @param array - * the array - */ - static void equal(Iterator i, int[] array) { - int cursor = 0; - while (i.hasNext()) { - int x = extracted(i).intValue(); - int y = array[cursor++]; - Assert.assertEquals(x, y); - } - } + /** + * Assert equals positions. + * + * @param jdkBitmap the jdk bitmap + * @param ewahBitmap the ewah bitmap + */ + static void assertEqualsPositions(BitSet jdkBitmap, + EWAHCompressedBitmap ewahBitmap) { + final List positions = ewahBitmap.toList(); + for (int position : positions) { + if (!jdkBitmap.get(position)) { + throw new RuntimeException( + "positions: bitset got different bits"); + } + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + if (!positions.contains(new Integer(pos))) { + throw new RuntimeException( + "positions: bitset got different bits"); + } + } + // we check again + final int[] fastpositions = ewahBitmap.toArray(); + for (int position : fastpositions) { + if (!jdkBitmap.get(position)) { + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + } + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + int index = Arrays.binarySearch(fastpositions, pos); + if (index < 0) + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + if (fastpositions[index] != pos) + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + } + } - /** The Constant MEGA: a large integer. */ - private static final int MEGA = 8 * 1024 * 1024; + /** + * Assert equals positions. + * + * @param ewahBitmap1 the ewah bitmap1 + * @param ewahBitmap2 the ewah bitmap2 + */ + static void assertEqualsPositions(EWAHCompressedBitmap ewahBitmap1, + EWAHCompressedBitmap ewahBitmap2) { + final List positions1 = ewahBitmap1.toList(); + final List positions2 = ewahBitmap2.toList(); + if (!positions1.equals(positions2)) + throw new RuntimeException( + "positions: alternative got different bits (two bitmaps)"); + // + final int[] fastpositions1 = ewahBitmap1.toArray(); + assertEquals(fastpositions1, positions1); + final int[] fastpositions2 = ewahBitmap2.toArray(); + assertEquals(fastpositions2, positions2); + if (!Arrays.equals(fastpositions1, fastpositions2)) + throw new RuntimeException( + "positions: alternative got different bits with toArray but not with toList (two bitmaps)"); + } + + /** + * Convenience function to assess equality between a compressed bitset + * and an uncompressed bitset + * + * @param x the compressed bitset/bitmap + * @param y the uncompressed bitset/bitmap + */ + static void equal(EWAHCompressedBitmap x, BitSet y) { + Assert.assertEquals(x.cardinality(), y.cardinality()); + for (int i : x.toList()) + Assert.assertTrue(y.get(i)); + } + + + @Test + public void insertTest() { + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + for(int k = 0; k < 1<<20; ++k) + ewah.addLiteralWord(0xF0); + Assert.assertEquals(ewah.cardinality(), 4 * (1<<20)); + } + + /** + * Convenience function to assess equality between an array and an + * iterator over Integers + * + * @param i the iterator + * @param array the array + */ + static void equal(Iterator i, int[] array) { + int cursor = 0; + while (i.hasNext()) { + int x = extracted(i); + int y = array[cursor++]; + Assert.assertEquals(x, y); + } + } - /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ - private static final int TEST_BS_SIZE = 8 * MEGA; + /** + * The Constant MEGA: a large integer. + */ + private static final int MEGA = 8 * 1024 * 1024; + + /** + * The Constant TEST_BS_SIZE: used to represent the size of a large + * bitmap. + */ + private static final int TEST_BS_SIZE = 8 * MEGA; } diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,116 +1,171 @@ package com.googlecode.javaewah; -import static org.junit.Assert.*; import org.junit.Test; + +import static org.junit.Assert.*; /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + +import java.util.Iterator; + /** - * Tests for utility class. Sketchy for now. - * + * Tests for utility class. */ @SuppressWarnings("javadoc") public class IntIteratorOverIteratingRLWTest { + + @Test + public void iteratorAggregation() { + EWAHCompressedBitmap e1 = EWAHCompressedBitmap.bitmapOf(0, 2, 1000, 10001); + EWAHCompressedBitmap e2 = new EWAHCompressedBitmap(); + for (int k = 64; k < 450; ++k) + e2.set(k); + EWAHCompressedBitmap e3 = new EWAHCompressedBitmap(); + for (int k = 64; k < 450; ++k) + e2.set(400 * k); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedand(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedand(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedor(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedor(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedxor(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedxor(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedand(500, e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedand(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedor(500, e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedor(1024, e1, e2, e3)); + assertEquals(IteratorUtil.materialize( + IteratorAggregation.bufferedxor(500, e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation.bufferedxor(1024, e1, e2, e3)); + } + + @Test + // had problems with bitmaps beginning with two consecutive clean runs + public void testConsecClean() { + System.out + .println("testing int iteration, 2 consec clean runs starting with zeros"); + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + for (int i = 64; i < 128; ++i) + e.set(i); + + IntIterator ii = IteratorUtil.toSetBitsIntIterator(e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(64, ctr); + Iterator iii = IteratorUtil.toSetBitsIterator(e.getIteratingRLW()); + assertTrue(iii.hasNext()); + ctr = 0; + while (iii.hasNext()) { + ++ctr; + iii.next(); + } + assertEquals(64, ctr); - @Test - // had problems with bitmaps beginning with two consecutive clean runs - public void testConsecClean() { - System.out - .println("testing int iteration, 2 consec clean runs starting with zeros"); - EWAHCompressedBitmap e = new EWAHCompressedBitmap(); - for (int i = 64; i < 128; ++i) - e.set(i); - IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( - e.getIteratingRLW()); - assertTrue(ii.hasNext()); - int ctr = 0; - while (ii.hasNext()) { - ++ctr; - ii.next(); - } - assertEquals(64, ctr); - } - - @Test - public void testConsecCleanStartOnes() { - System.out - .println("testing int iteration, 2 consec clean runs starting with ones"); - EWAHCompressedBitmap e = new EWAHCompressedBitmap(); - for (int i = 0; i < 2 * 64; ++i) - e.set(i); - for (int i = 4 * 64; i < 5 * 64; ++i) - e.set(i); - - IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( - e.getIteratingRLW()); - assertTrue(ii.hasNext()); - int ctr = 0; - while (ii.hasNext()) { - ++ctr; - ii.next(); - } - assertEquals(3 * 64, ctr); - } - - @Test - public void testStartDirty() { - System.out.println("testing int iteration, no initial runs"); - EWAHCompressedBitmap e = new EWAHCompressedBitmap(); - for (int i = 1; i < 2 * 64; ++i) - e.set(i); - for (int i = 4 * 64; i < 5 * 64; ++i) - e.set(i); - - IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( - e.getIteratingRLW()); - assertTrue(ii.hasNext()); - int ctr = 0; - while (ii.hasNext()) { - ++ctr; - ii.next(); - } - assertEquals(3 * 64 - 1, ctr); - } - - @Test - public void testEmpty() { - System.out.println("testing int iteration over empty bitmap"); - EWAHCompressedBitmap e = new EWAHCompressedBitmap(); - - IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( - e.getIteratingRLW()); - assertFalse(ii.hasNext()); - } - - @Test - public void testRandomish() { - EWAHCompressedBitmap e = new EWAHCompressedBitmap(); - - int upperlimit = 100000; - for (int i = 0; i < upperlimit; ++i) { - double probabilityOfOne = i / (double) (upperlimit / 2); - if (probabilityOfOne > 1.0) - probabilityOfOne = 1.0; - if (Math.random() < probabilityOfOne) { - e.set(i); - } - } - - IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( - e.getIteratingRLW()); - int ctr = 0; - while (ii.hasNext()) { - ++ctr; - ii.next(); - } - - assertEquals(e.cardinality(), ctr); - System.out - .println("checking int iteration over a var density bitset of size " - + e.cardinality()); + } + + @Test + public void testMaterialize() { + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + for (int i = 64; i < 128; ++i) + e.set(333 * i); + assertEquals(e.cardinality(), IteratorUtil.cardinality(e.getIteratingRLW())); + EWAHCompressedBitmap newe = new EWAHCompressedBitmap(); + IteratorUtil.materialize(e.getIteratingRLW(), newe); + assertEquals(e,newe); + newe.clear(); + IteratorUtil.materialize(e.getIteratingRLW(), newe,4096); + assertEquals(e,newe); + } + + @Test + public void testConsecCleanStartOnes() { + System.out + .println("testing int iteration, 2 consec clean runs starting with ones"); + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + for (int i = 0; i < 2 * 64; ++i) + e.set(i); + for (int i = 4 * 64; i < 5 * 64; ++i) + e.set(i); + + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(3 * 64, ctr); + } + + @Test + public void testStartDirty() { + System.out.println("testing int iteration, no initial runs"); + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + for (int i = 1; i < 2 * 64; ++i) + e.set(i); + for (int i = 4 * 64; i < 5 * 64; ++i) + e.set(i); + + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(3 * 64 - 1, ctr); + } + @Test + public void testEmpty() { + System.out.println("testing int iteration over empty bitmap"); + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + assertFalse(ii.hasNext()); + } + + @Test + public void testRandomish() { + EWAHCompressedBitmap e = new EWAHCompressedBitmap(); + + int upperlimit = 100000; + for (int i = 0; i < upperlimit; ++i) { + double probabilityOfOne = i / (double) (upperlimit / 2); + if (probabilityOfOne > 1.0) + probabilityOfOne = 1.0; + if (Math.random() < probabilityOfOne) { + e.set(i); + } } + IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( + e.getIteratingRLW()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + + assertEquals(e.cardinality(), ctr); + System.out + .println("checking int iteration over a var density bitset of size " + + e.cardinality()); + + } + } diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,144 +1,194 @@ package com.googlecode.javaewah; -import static org.junit.Assert.*; +import static com.googlecode.javaewah.EWAHCompressedBitmap.maxSizeInBits; +import static org.junit.Assert.assertTrue; + import java.util.Iterator; + import org.junit.Test; -import com.googlecode.javaewah.benchmark.ClusteredDataGenerator; + +import com.googlecode.javaewah.synth.ClusteredDataGenerator; + /* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves * Licensed under the Apache License, Version 2.0. */ + /** * Tests specifically for iterators. - * */ public class IteratorAggregationTest { - /** - * @param N Number of bitmaps to generate in each set - * @param nbr parameter determining the size of the arrays (in a log scale) - * @return an iterator over sets of bitmaps - */ - public static Iterator getCollections(final int N, final int nbr) { - final ClusteredDataGenerator cdg = new ClusteredDataGenerator(123); - return new Iterator() { - int sparsity = 1; - - @Override - public boolean hasNext() { - return this.sparsity < 5; - } - - @Override - public EWAHCompressedBitmap[] next() { - int[][] data = new int[N][]; - int Max = (1 << (nbr + this.sparsity)); - for (int k = 0; k < N; ++k) - data[k] = cdg.generateClustered(1 << nbr, Max); - EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - data[k] = null; - } - this.sparsity += 3; - return ewah; - } - - @Override - public void remove() { - // unimplemented - } - - }; - - } - - /** - * - */ - @Test - public void testAnd() { - for (int N = 1; N < 10; ++N) { - System.out.println("testAnd N = " + N); - Iterator i = getCollections(N,3); - while (i.hasNext()) { - EWAHCompressedBitmap[] x = i.next(); - EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.and(x); - EWAHCompressedBitmap x1 = IteratorUtil - .materialize(IteratorAggregation.bufferedand(IteratorUtil - .toIterators(x))); - assertTrue(x1.equals(tanswer)); - } - System.gc(); - } - - } - - /** - * - */ - @Test - public void testOr() { - for (int N = 1; N < 10; ++N) { - System.out.println("testOr N = " + N); - Iterator i = getCollections(N,3); - while (i.hasNext()) { - EWAHCompressedBitmap[] x = i.next(); - EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.or(x); - EWAHCompressedBitmap x1 = IteratorUtil - .materialize(IteratorAggregation.bufferedor(IteratorUtil - .toIterators(x))); - assertTrue(x1.equals(tanswer)); - } - System.gc(); - } - } - - /** - * - */ - @SuppressWarnings("deprecation") - @Test - public void testWideOr() { - for (int nbr = 3; nbr <= 24; nbr += 3) { - for (int N = 100; N < 1000; N += 100) { - System.out.println("testWideOr N = " + N); - Iterator i = getCollections(N, 3); - while (i.hasNext()) { - EWAHCompressedBitmap[] x = i.next(); - EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.or(x); - EWAHCompressedBitmap container = new EWAHCompressedBitmap(); - FastAggregation.legacy_orWithContainer(container, x); - assertTrue(container.equals(tanswer)); - EWAHCompressedBitmap x1 = IteratorUtil - .materialize(IteratorAggregation - .bufferedor(IteratorUtil.toIterators(x))); - assertTrue(x1.equals(tanswer)); - } - System.gc(); - } - } - } - - /** - * - */ - @Test - public void testXor() { - System.out.println("testXor "); - Iterator i = getCollections(2,3); - while (i.hasNext()) { - EWAHCompressedBitmap[] x = i.next(); - EWAHCompressedBitmap tanswer = x[0].xor(x[1]); - EWAHCompressedBitmap x1 = IteratorUtil - .materialize(IteratorAggregation.bufferedxor( - x[0].getIteratingRLW(), x[1].getIteratingRLW())); - assertTrue(x1.equals(tanswer)); - } - System.gc(); - } + /** + * @param N Number of bitmaps to generate in each set + * @param nbr parameter determining the size of the arrays (in a log + * scale) + * @return an iterator over sets of bitmaps + */ + public static Iterator getCollections( + final int N, final int nbr) { + final ClusteredDataGenerator cdg = new ClusteredDataGenerator(123); + return new Iterator() { + int sparsity = 1; + + @Override + public boolean hasNext() { + return this.sparsity < 5; + } + + @Override + public EWAHCompressedBitmap[] next() { + int[][] data = new int[N][]; + int Max = (1 << (nbr + this.sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered( + 1 << nbr, Max); + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + this.sparsity += 3; + return ewah; + } + + @Override + public void remove() { + // unimplemented + } + + }; + + } + + /** + * + */ + @Test + public void testAnd() { + for (int N = 1; N < 10; ++N) { + System.out.println("testAnd N = " + N); + Iterator i = getCollections(N, + 3); + while (i.hasNext()) { + EWAHCompressedBitmap[] x = i.next(); + EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.and(x); + EWAHCompressedBitmap x1 = IteratorUtil + .materialize(IteratorAggregation + .bufferedand(IteratorUtil + .toIterators(x))); + x1.setSizeInBits(maxSizeInBits(x), false); + x1.setSizeInBitsWithinLastWord(maxSizeInBits(x)); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + + } + + /** + * + */ + @Test + public void testOr() { + for (int N = 1; N < 10; ++N) { + System.out.println("testOr N = " + N); + Iterator i = getCollections(N, + 3); + while (i.hasNext()) { + EWAHCompressedBitmap[] x = i.next(); + EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.or(x); + EWAHCompressedBitmap x1 = IteratorUtil + .materialize(IteratorAggregation + .bufferedor(IteratorUtil + .toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + } + + /** + * + */ + @SuppressWarnings("deprecation") + @Test + public void testWideOr() { + for (int nbr = 3; nbr <= 24; nbr += 3) { + for (int N = 100; N < 1000; N += 100) { + System.out.println("testWideOr N = " + N); + Iterator i = getCollections( + N, 3); + while (i.hasNext()) { + EWAHCompressedBitmap[] x = i.next(); + EWAHCompressedBitmap tanswer = EWAHCompressedBitmap + .or(x); + EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + FastAggregation.orToContainer(container, x); + assertTrue(container.equals(tanswer)); + EWAHCompressedBitmap x1 = IteratorUtil + .materialize(IteratorAggregation + .bufferedor(IteratorUtil + .toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + } + } + + /** + * + */ + @Test + public void testXor() { + System.out.println("testXor "); + Iterator i = getCollections(2, 3); + while (i.hasNext()) { + EWAHCompressedBitmap[] x = i.next(); + EWAHCompressedBitmap tanswer = x[0].xor(x[1]); + EWAHCompressedBitmap x1 = IteratorUtil + .materialize(IteratorAggregation.bufferedxor( + x[0].getIteratingRLW(), + x[1].getIteratingRLW())); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + + /** + * + */ + @Test + public void testMat() throws Exception { + System.out.println("testMat "); + EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(0,3); + EWAHCompressedBitmap n = IteratorUtil.materialize(b.getIteratingRLW()); + assertTrue(n.sizeInBits() == 64); + n.setSizeInBitsWithinLastWord(b.sizeInBits()); + assertTrue(n.sizeInBits() == b.sizeInBits()); + assertTrue(n.equals(b)); + EWAHCompressedBitmap neg = IteratorUtil.materialize(IteratorAggregation.not(b.getIteratingRLW())); + neg.setSizeInBitsWithinLastWord(b.sizeInBits()); + EWAHCompressedBitmap x= b.clone(); + x.not(); + assertTrue(x.equals(neg)); + for(int k = 145; k<1024; ++k) + b.set(k); + n = IteratorUtil.materialize(b.getIteratingRLW()); + assertTrue(n.sizeInBits()/64 * 64 == n.sizeInBits()); + n.setSizeInBitsWithinLastWord(b.sizeInBits()); + assertTrue(n.sizeInBits() == b.sizeInBits()); + assertTrue(n.equals(b)); + neg = IteratorUtil.materialize(IteratorAggregation.not(b.getIteratingRLW())); + neg.setSizeInBitsWithinLastWord(b.sizeInBits()); + x= b.clone(); + x.not(); + assertTrue(x.equals(neg)); + } + } diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/MemoryMapTest.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/MemoryMapTest.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/MemoryMapTest.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/MemoryMapTest.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,47 @@ +package com.googlecode.javaewah; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; + +import org.junit.Assert; +import org.junit.Test; + +public class MemoryMapTest +{ + + + @Test + public void basicTest() throws IOException,CloneNotSupportedException { + EWAHCompressedBitmap ewahBitmap = EWAHCompressedBitmap.bitmapOf(0, 2, 55, + 64, 1 << 30); + EWAHCompressedBitmap newewahBitmap = ewahBitmap.clone(); + Assert.assertEquals(newewahBitmap, ewahBitmap); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ewahBitmap.serialize(new DataOutputStream(bos)); + ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray()); + EWAHCompressedBitmap mapped = new EWAHCompressedBitmap(bb); + Assert.assertEquals(mapped, ewahBitmap); + EWAHCompressedBitmap newmapped; + newmapped = mapped.clone(); + Assert.assertEquals(newmapped, ewahBitmap); + } + + @Test + public void basicFileTest() throws IOException { + File tmpfile = File.createTempFile("javaewah", "bin"); + tmpfile.deleteOnExit(); + final FileOutputStream fos = new FileOutputStream(tmpfile); + EWAHCompressedBitmap ewahBitmap = EWAHCompressedBitmap.bitmapOf(0, 2, 55, + 64, 1 << 30); + ewahBitmap.serialize(new DataOutputStream(fos)); + long totalcount = fos.getChannel().position(); + fos.close(); + RandomAccessFile memoryMappedFile = new RandomAccessFile(tmpfile, "r"); + ByteBuffer bb = memoryMappedFile.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, totalcount); + EWAHCompressedBitmap mapped = new EWAHCompressedBitmap(bb); + memoryMappedFile.close(); + Assert.assertEquals(mapped, ewahBitmap); + } +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/synth/ClusteredDataGenerator.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/synth/ClusteredDataGenerator.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/synth/ClusteredDataGenerator.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/synth/ClusteredDataGenerator.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,78 @@ +package com.googlecode.javaewah.synth; + + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +/** + * This class will generate lists of random integers with a "clustered" + * distribution. Reference: Anh VN, Moffat A. Index compression using 64-bit + * words. Software: Practice and Experience 2010; 40(2):131-147. + * + * @author Daniel Lemire + */ +public class ClusteredDataGenerator { + + /** + * + */ + public ClusteredDataGenerator() { + this.unidg = new UniformDataGenerator(); + } + + /** + * @param seed random seed + */ + public ClusteredDataGenerator(final int seed) { + this.unidg = new UniformDataGenerator(seed); + } + + /** + * generates randomly N distinct integers from 0 to Max. + * + * @param N number of integers + * @param Max maximum integer value + * @return a randomly generated array + */ + public int[] generateClustered(int N, int Max) { + int[] array = new int[N]; + fillClustered(array, 0, N, 0, Max); + return array; + } + + void fillClustered(int[] array, int offset, int length, int Min, int Max) { + final int range = Max - Min; + if ((range == length) || (length <= 10)) { + fillUniform(array, offset, length, Min, Max); + return; + } + final int cut = length + / 2 + + ((range - length - 1 > 0) ? this.unidg.rand + .nextInt(range - length - 1) : 0); + final double p = this.unidg.rand.nextDouble(); + if (p < 0.25) { + fillUniform(array, offset, length / 2, Min, Min + cut); + fillClustered(array, offset + length / 2, length + - length / 2, Min + cut, Max); + } else if (p < 0.5) { + fillClustered(array, offset, length / 2, Min, Min + cut); + fillUniform(array, offset + length / 2, length - length + / 2, Min + cut, Max); + } else { + fillClustered(array, offset, length / 2, Min, Min + cut); + fillClustered(array, offset + length / 2, length + - length / 2, Min + cut, Max); + } + } + + void fillUniform(int[] array, int offset, int length, int Min, int Max) { + int[] v = this.unidg.generateUniform(length, Max - Min); + for (int k = 0; k < v.length; ++k) + array[k + offset] = Min + v[k]; + } + + private final UniformDataGenerator unidg; +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/synth/UniformDataGenerator.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/synth/UniformDataGenerator.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/synth/UniformDataGenerator.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/synth/UniformDataGenerator.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,110 @@ +package com.googlecode.javaewah.synth; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import java.util.*; + +/** + * This class will generate "uniform" lists of random integers. This class will + * generate "uniform" lists of random integers. + * + * @author Daniel Lemire + */ +public class UniformDataGenerator { + /** + * construct generator of random arrays. + */ + public UniformDataGenerator() { + this.rand = new Random(); + } + + /** + * @param seed random seed + */ + public UniformDataGenerator(final int seed) { + this.rand = new Random(seed); + } + + /** + * generates randomly N distinct integers from 0 to Max. + */ + int[] generateUniformHash(int N, int Max) { + if (N > Max) + throw new RuntimeException("not possible"); + int[] ans = new int[N]; + HashSet s = new HashSet(); + while (s.size() < N) + s.add(this.rand.nextInt(Max)); + Iterator i = s.iterator(); + for (int k = 0; k < N; ++k) + ans[k] = i.next(); + Arrays.sort(ans); + return ans; + } + + /** + * output all integers from the range [0,Max) that are not in the array + */ + static int[] negate(int[] x, int Max) { + int[] ans = new int[Max - x.length]; + int i = 0; + int c = 0; + for (int v : x) { + for (; i < v; ++i) + ans[c++] = i; + ++i; + } + while (c < ans.length) + ans[c++] = i++; + return ans; + } + + /** + * generates randomly N distinct integers from 0 to Max. + * + * @param N Number of integers to generate + * @param Max Maximum value of the integers + * @return array containing random integers + */ + public int[] generateUniform(int N, int Max) { + if (N * 2 > Max) { + return negate(generateUniform(Max - N, Max), Max); + } + if (2048 * N > Max) + return generateUniformBitmap(N, Max); + return generateUniformHash(N, Max); + } + + /** + * generates randomly N distinct integers from 0 to Max using a bitmap. + * + * @param N Number of integers to generate + * @param Max Maximum value of the integers + * @return array containing random integers + */ + int[] generateUniformBitmap(int N, int Max) { + if (N > Max) + throw new RuntimeException("not possible"); + int[] ans = new int[N]; + BitSet bs = new BitSet(Max); + int cardinality = 0; + while (cardinality < N) { + int v = this.rand.nextInt(Max); + if (!bs.get(v)) { + bs.set(v); + cardinality++; + } + } + int pos = 0; + for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { + ans[pos++] = i; + } + return ans; + } + + Random rand = new Random(); + +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/ThresholdFuncBitmapTest.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/ThresholdFuncBitmapTest.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah/ThresholdFuncBitmapTest.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah/ThresholdFuncBitmapTest.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,64 @@ +package com.googlecode.javaewah; + +import static com.googlecode.javaewah.EWAHCompressedBitmap.maxSizeInBits; + +import org.junit.Assert; +import org.junit.Test; + +@SuppressWarnings("javadoc") +/** + * @since 0.8.0 + * @author Daniel Lemire + */ +public class ThresholdFuncBitmapTest { + @Test + public void basictest() { + System.out.println("Testing ThresholdFuncBitmap"); + EWAHCompressedBitmap ewah1 = EWAHCompressedBitmap.bitmapOf(1, + 53, 110, 1000, 1201, 50000); + EWAHCompressedBitmap ewah2 = EWAHCompressedBitmap.bitmapOf(1, + 100, 1000, 1100, 1200, 31416, 50001); + EWAHCompressedBitmap ewah3 = EWAHCompressedBitmap.bitmapOf(1, + 110, 1000, 1101, 1200, 1201, 31416, 31417); + + Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah1) + .equals(ewah1)); + Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah2) + .equals(ewah2)); + Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah3) + .equals(ewah3)); + Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah1, + ewah1).equals(ewah1)); + Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah2, + ewah2).equals(ewah2)); + Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah3, + ewah3).equals(ewah3)); + + EWAHCompressedBitmap zero = new EWAHCompressedBitmap(); + Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah1).equals(zero)); + Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah2).equals(zero)); + Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah3).equals(zero)); + Assert.assertTrue(EWAHCompressedBitmap.threshold(4, ewah1,ewah2, ewah3).equals(zero)); + + EWAHCompressedBitmap ewahorth = EWAHCompressedBitmap.threshold( + 1, ewah1, ewah2, ewah3); + EWAHCompressedBitmap ewahtrueor = EWAHCompressedBitmap.or( + ewah1, ewah2, ewah3); + Assert.assertTrue(ewahorth.equals(ewahtrueor)); + + EWAHCompressedBitmap ewahandth = EWAHCompressedBitmap + .threshold(3, ewah1, ewah2, ewah3); + ewahandth.setSizeInBitsWithinLastWord(maxSizeInBits(ewah1, ewah2, ewah3)); + EWAHCompressedBitmap ewahtrueand = EWAHCompressedBitmap.and( + ewah1, ewah2, ewah3); + Assert.assertTrue(ewahandth.equals(ewahtrueand)); + + EWAHCompressedBitmap ewahmajth = EWAHCompressedBitmap + .threshold(2, ewah1, ewah2, ewah3); + ewahmajth.setSizeInBitsWithinLastWord(maxSizeInBits(ewah1, ewah2, ewah3)); + EWAHCompressedBitmap ewahtruemaj = EWAHCompressedBitmap.or( + ewah1.and(ewah2), ewah1.and(ewah3), ewah2.and(ewah3)); + Assert.assertTrue(ewahmajth.equals(ewahtruemaj)); + } + +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java 2019-11-08 21:55:59.000000000 +0000 @@ -1,1464 +1,2757 @@ -package com.googlecode.javaewah32; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ - -import org.junit.Test; - -import com.googlecode.javaewah.FastAggregation; -import com.googlecode.javaewah.IntIterator; -import java.util.*; -import java.io.*; -import junit.framework.Assert; - -/** - * This class is used for basic unit testing. - */ -@SuppressWarnings("javadoc") -public class EWAHCompressedBitmap32Test { - - - @Test - public void testGet() { - for (int gap = 29; gap < 10000; gap *= 10) { - EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); - for (int k = 0; k < 100; ++k) - x.set(k * gap); - for (int k = 0; k < 100 * gap; ++k) - if (x.get(k)) { - if (k % gap != 0) - throw new RuntimeException( - "spotted an extra set bit at " - + k + " gap = " - + gap); - } else if (k % gap == 0) - throw new RuntimeException( - "missed a set bit " + k - + " gap = " + gap); - } - } - - @SuppressWarnings({ "deprecation", "boxing" }) - @Test - public void OKaserBugReportJuly2013() { - System.out.println("testing OKaserBugReportJuly2013"); - int[][] data = { {}, { 5, 6, 7, 8, 9 }, { 1 }, { 2 }, { 2, 5, 7 }, - { 1 }, { 2 }, { 1, 6, 9 }, { 1, 3, 4, 6, 8, 9 }, - { 1, 3, 4, 6, 8, 9 }, { 1, 3, 6, 8, 9 }, { 2, 5, 7 }, - { 2, 5, 7 }, { 1, 3, 9 }, { 3, 8, 9 } }; - - EWAHCompressedBitmap32[] toBeOred = new EWAHCompressedBitmap32[data.length]; - Set bruteForceAnswer = new HashSet(); - for (int i = 0; i < toBeOred.length; ++i) { - toBeOred[i] = new EWAHCompressedBitmap32(); - for (int j : data[i]) { - toBeOred[i].set(j); - bruteForceAnswer.add(j); - } - toBeOred[i].setSizeInBits(1000,false); - } - - long rightcard = bruteForceAnswer.size(); - EWAHCompressedBitmap32 foo = new EWAHCompressedBitmap32(); - FastAggregation32.legacy_orWithContainer(foo, toBeOred); - Assert.assertEquals(rightcard, foo.cardinality()); - EWAHCompressedBitmap32 e1 = FastAggregation.or(toBeOred); - Assert.assertEquals(rightcard, e1.cardinality()); - EWAHCompressedBitmap32 e2 = FastAggregation32.bufferedor(65536, - toBeOred); - Assert.assertEquals(rightcard, e2.cardinality()); - } - - @Test - public void testSizeInBitsWithAnd() { - System.out.println("testing SizeInBitsWithAnd"); - EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); - - a.set(1); - a.set(2); - a.set(3); - - b.set(3); - b.set(4); - b.set(5); - - a.setSizeInBits(10); - b.setSizeInBits(10); - - EWAHCompressedBitmap32 and = a.and(b); - Assert.assertEquals(10, and.sizeInBits()); - EWAHCompressedBitmap32 and2 = EWAHCompressedBitmap32.and(a,b); - Assert.assertEquals(10, and2.sizeInBits()); - } - @Test - public void testSizeInBitsWithAndNot() { - System.out.println("testing SizeInBitsWithAndNot"); - EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); - - a.set(1); - a.set(2); - a.set(3); - - b.set(3); - b.set(4); - b.set(5); - - a.setSizeInBits(10); - b.setSizeInBits(10); - - EWAHCompressedBitmap32 and = a.andNot(b); - Assert.assertEquals(10, and.sizeInBits()); - } - - @Test - public void testSizeInBitsWithOr() { - System.out.println("testing SizeInBitsWithOr"); - EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); - - a.set(1); - a.set(2); - a.set(3); - - b.set(3); - b.set(4); - b.set(5); - - a.setSizeInBits(10); - b.setSizeInBits(10); - - EWAHCompressedBitmap32 or = a.or(b); - Assert.assertEquals(10, or.sizeInBits()); - EWAHCompressedBitmap32 or2 = EWAHCompressedBitmap32.or(a,b); - Assert.assertEquals(10, or2.sizeInBits()); - } - - - @Test - public void testSizeInBitsWithXor() { - System.out.println("testing SizeInBitsWithXor"); - EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); - - a.set(1); - a.set(2); - a.set(3); - - b.set(3); - b.set(4); - b.set(5); - - a.setSizeInBits(10); - b.setSizeInBits(10); - - EWAHCompressedBitmap32 xor = a.xor(b); - Assert.assertEquals(10, xor.sizeInBits()); - EWAHCompressedBitmap32 xor2 = EWAHCompressedBitmap32.xor(a,b); - Assert.assertEquals(10, xor2.sizeInBits()); - } - - - @Test - public void testDebugSetSizeInBitsTest() { - System.out.println("testing DebugSetSizeInBits"); - EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); - - b.set(4); - - b.setSizeInBits(6, true); - - List positions = b.getPositions(); - - Assert.assertEquals(2, positions.size()); - Assert.assertEquals(Integer.valueOf(4), positions.get(0)); - Assert.assertEquals(Integer.valueOf(5), positions.get(1)); - - Iterator iterator = b.iterator(); - Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(Integer.valueOf(4), iterator.next()); - Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(Integer.valueOf(5), iterator.next()); - Assert.assertFalse(iterator.hasNext()); - - IntIterator intIterator = b.intIterator(); - Assert.assertTrue(intIterator.hasNext()); - Assert.assertEquals(4, intIterator.next()); - Assert.assertTrue(intIterator.hasNext()); - Assert.assertEquals(5, intIterator.next()); - Assert.assertFalse(intIterator.hasNext()); - - } - - /** - * Created: 2/4/11 6:03 PM By: Arnon Moscona. - */ - @Test - public void EwahIteratorProblem() { - System.out.println("testing ArnonMoscona"); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - for (int i = 9434560; i <= 9435159; i++) { - bitmap.set(i); - } - IntIterator iterator = bitmap.intIterator(); - List v = bitmap.getPositions(); - int[] array = bitmap.toArray(); - for (int k = 0; k < v.size(); ++k) { - Assert.assertTrue(array[k] == v.get(k).intValue()); - Assert.assertTrue(iterator.hasNext()); - final int ival = iterator.next(); - final int vval = v.get(k).intValue(); - Assert.assertTrue(ival == vval); - } - Assert.assertTrue(!iterator.hasNext()); - // - for (int k = 2; k <= 1024; k *= 2) { - int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, 434455 + 5 * k); - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - for (int i : bitsToSet) { - ewah.set(i); - } - equal(ewah.iterator(), bitsToSet); - } - } - - /** - * Test submitted by Gregory Ssi-Yan-Kai - */ - @Test - public void SsiYanKaiTest() { - System.out.println("testing SsiYanKaiTest"); - EWAHCompressedBitmap32 a = EWAHCompressedBitmap32.bitmapOf(39935, - 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, - 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, - 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, - 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, - 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, - 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, - 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, - 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, - 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, - 40017, 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, - 40026, 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, - 40035, 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, - 40044, 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, - 40053, 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, - 40062, 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, - 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, - 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, - 40089, 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, - 40098, 40099, 40100); - EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf(39935, - 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, - 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, - 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, - 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, - 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, - 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, - 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, - 39999, 270000); - LinkedHashSet aPositions = new LinkedHashSet( - a.getPositions()); - int intersection = 0; - EWAHCompressedBitmap32 inter = new EWAHCompressedBitmap32(); - LinkedHashSet bPositions = new LinkedHashSet( - b.getPositions()); - for (Integer integer : bPositions) { - if (aPositions.contains(integer)) { - inter.set(integer.intValue()); - ++intersection; - } - } - EWAHCompressedBitmap32 and2 = a.and(b); - if (!and2.equals(inter)) - throw new RuntimeException("intersections don't match"); - if (intersection != and2.cardinality()) - throw new RuntimeException("cardinalities don't match"); - } - - /** - * Test inspired by William Habermaas. - */ - @Test - public void habermaasTest() { - System.out.println("testing habermaasTest"); - BitSet bitsetaa = new BitSet(); - EWAHCompressedBitmap32 aa = new EWAHCompressedBitmap32(); - int[] val = { 55400, 1000000, 1000128 }; - for (int k = 0; k < val.length; ++k) { - aa.set(val[k]); - bitsetaa.set(val[k]); - } - equal(aa, bitsetaa); - BitSet bitsetab = new BitSet(); - EWAHCompressedBitmap32 ab = new EWAHCompressedBitmap32(); - for (int i = 4096; i < (4096 + 5); i++) { - ab.set(i); - bitsetab.set(i); - } - ab.set(99000); - bitsetab.set(99000); - ab.set(1000130); - bitsetab.set(1000130); - equal(ab, bitsetab); - EWAHCompressedBitmap32 bb = aa.or(ab); - EWAHCompressedBitmap32 bbAnd = aa.and(ab); - try { - EWAHCompressedBitmap32 abnot = ab.clone(); - abnot.not(); - EWAHCompressedBitmap32 bbAnd2 = aa.andNot(abnot); - assertEquals(bbAnd2, bbAnd); - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - BitSet bitsetbb = (BitSet) bitsetaa.clone(); - bitsetbb.or(bitsetab); - BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); - bitsetbbAnd.and(bitsetab); - equal(bbAnd, bitsetbbAnd); - equal(bb, bitsetbb); - } - - @Test - public void testAndResultAppend() { - System.out.println("testing AndResultAppend"); - EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); - bitmap1.set(35); - EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.set(35); - bitmap2.set(130); - - EWAHCompressedBitmap32 resultBitmap = bitmap1.and(bitmap2); - resultBitmap.set(131); - - bitmap1.set(131); - assertEquals(bitmap1, resultBitmap); - } - - /** - * Test cardinality. - */ - @Test - public void testCardinality() { - System.out.println("testing EWAH cardinality"); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(Integer.MAX_VALUE - 32); - // System.out.format("Total Items %d\n", bitmap.cardinality()); - Assert.assertTrue(bitmap.cardinality() == 1); - } - - /** - * Test clear function - */ - @Test - public void testClear() { - System.out.println("testing Clear"); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(5); - bitmap.clear(); - bitmap.set(7); - Assert.assertTrue(1 == bitmap.cardinality()); - Assert.assertTrue(1 == bitmap.getPositions().size()); - Assert.assertTrue(1 == bitmap.toArray().length); - Assert.assertTrue(7 == bitmap.getPositions().get(0).intValue()); - Assert.assertTrue(7 == bitmap.toArray()[0]); - bitmap.clear(); - bitmap.set(5000); - Assert.assertTrue(1 == bitmap.cardinality()); - Assert.assertTrue(1 == bitmap.getPositions().size()); - Assert.assertTrue(1 == bitmap.toArray().length); - Assert.assertTrue(5000 == bitmap.getPositions().get(0).intValue()); - bitmap.set(5001); - bitmap.set(5005); - bitmap.set(5100); - bitmap.set(5500); - bitmap.clear(); - bitmap.set(5); - bitmap.set(7); - bitmap.set(1000); - bitmap.set(1001); - Assert.assertTrue(4 == bitmap.cardinality()); - List positions = bitmap.getPositions(); - Assert.assertTrue(4 == positions.size()); - Assert.assertTrue(5 == positions.get(0).intValue()); - Assert.assertTrue(7 == positions.get(1).intValue()); - Assert.assertTrue(1000 == positions.get(2).intValue()); - Assert.assertTrue(1001 == positions.get(3).intValue()); - } - - /** - * Test ewah compressed bitmap. - */ - @Test - public void testEWAHCompressedBitmap() { - System.out.println("testing EWAH"); - int zero = 0; - int specialval = 1 | (1 << 4) | (1 << 31); - int notzero = ~zero; - EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); - myarray1.add(zero); - myarray1.add(zero); - myarray1.add(zero); - myarray1.add(specialval); - myarray1.add(specialval); - myarray1.add(notzero); - myarray1.add(zero); - Assert.assertEquals(myarray1.getPositions().size(), 6 + 32); - EWAHCompressedBitmap32 myarray2 = new EWAHCompressedBitmap32(); - myarray2.add(zero); - myarray2.add(specialval); - myarray2.add(specialval); - myarray2.add(notzero); - myarray2.add(zero); - myarray2.add(zero); - myarray2.add(zero); - Assert.assertEquals(myarray2.getPositions().size(), 6 + 32); - List data1 = myarray1.getPositions(); - List data2 = myarray2.getPositions(); - Vector logicalor = new Vector(); - { - HashSet tmp = new HashSet(); - tmp.addAll(data1); - tmp.addAll(data2); - logicalor.addAll(tmp); - } - Collections.sort(logicalor); - Vector logicaland = new Vector(); - logicaland.addAll(data1); - logicaland.retainAll(data2); - Collections.sort(logicaland); - EWAHCompressedBitmap32 arrayand = myarray1.and(myarray2); - Assert.assertTrue(arrayand.getPositions().equals(logicaland)); - EWAHCompressedBitmap32 arrayor = myarray1.or(myarray2); - Assert.assertTrue(arrayor.getPositions().equals(logicalor)); - EWAHCompressedBitmap32 arrayandbis = myarray2.and(myarray1); - Assert.assertTrue(arrayandbis.getPositions().equals(logicaland)); - EWAHCompressedBitmap32 arrayorbis = myarray2.or(myarray1); - Assert.assertTrue(arrayorbis.getPositions().equals(logicalor)); - EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); - for (Integer i : myarray1.getPositions()) { - x.set(i.intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); - x = new EWAHCompressedBitmap32(); - for (Integer i : myarray2.getPositions()) { - x.set(i.intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); - x = new EWAHCompressedBitmap32(); - for (Iterator k = myarray1.iterator(); k.hasNext();) { - x.set(extracted(k).intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); - x = new EWAHCompressedBitmap32(); - for (Iterator k = myarray2.iterator(); k.hasNext();) { - x.set(extracted(k).intValue()); - } - Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); - } - - /** - * Test externalization. - * - * @throws IOException - * Signals that an I/O exception has occurred. - */ - @Test - public void testExternalization() throws IOException { - System.out.println("testing EWAH externalization"); - EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); - int[] val = { 5, 4400, 44600, 55400, 1000000 }; - for (int k = 0; k < val.length; ++k) { - ewcb.set(val[k]); - } - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - ObjectOutputStream oo = new ObjectOutputStream(bos); - ewcb.writeExternal(oo); - oo.close(); - ewcb = null; - ewcb = new EWAHCompressedBitmap32(); - ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); - ewcb.readExternal(new ObjectInputStream(bis)); - List result = ewcb.getPositions(); - Assert.assertTrue(val.length == result.size()); - for (int k = 0; k < val.length; ++k) { - Assert.assertTrue(result.get(k).intValue() == val[k]); - } - } - - @Test - public void testExtremeRange() { - System.out.println("testing EWAH at its extreme range"); - EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); - int N = 1024; - for (int i = 0; i < N; ++i) { - myarray1.set(Integer.MAX_VALUE - 32 - N + i); - Assert.assertTrue(myarray1.cardinality() == i + 1); - int[] val = myarray1.toArray(); - Assert.assertTrue(val[0] == Integer.MAX_VALUE - 32 - N); - } - } - - /** - * Test the intersects method - */ - @Test - public void testIntersectsMethod() { - System.out.println("testing Intersets Bug"); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(1); - EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.set(1); - bitmap2.set(11); - bitmap2.set(111); - bitmap2.set(1111111); - bitmap2.set(11111111); - Assert.assertTrue(bitmap.intersects(bitmap2)); - Assert.assertTrue(bitmap2.intersects(bitmap)); - - EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); - bitmap3.set(101); - EWAHCompressedBitmap32 bitmap4 = new EWAHCompressedBitmap32(); - for (int i = 0; i < 100; i++) { - bitmap4.set(i); - } - Assert.assertFalse(bitmap3.intersects(bitmap4)); - Assert.assertFalse(bitmap4.intersects(bitmap3)); - - EWAHCompressedBitmap32 bitmap5 = new EWAHCompressedBitmap32(); - bitmap5.set(0); - bitmap5.set(10); - bitmap5.set(20); - EWAHCompressedBitmap32 bitmap6 = new EWAHCompressedBitmap32(); - bitmap6.set(1); - bitmap6.set(11); - bitmap6.set(21); - bitmap6.set(1111111); - bitmap6.set(11111111); - Assert.assertFalse(bitmap5.intersects(bitmap6)); - Assert.assertFalse(bitmap6.intersects(bitmap5)); - - bitmap5.set(21); - Assert.assertTrue(bitmap5.intersects(bitmap6)); - Assert.assertTrue(bitmap6.intersects(bitmap5)); - - EWAHCompressedBitmap32 bitmap7 = new EWAHCompressedBitmap32(); - bitmap7.set(1); - bitmap7.set(10); - bitmap7.set(20); - bitmap7.set(1111111); - bitmap7.set(11111111); - EWAHCompressedBitmap32 bitmap8 = new EWAHCompressedBitmap32(); - for (int i = 0; i < 1000; i++) { - if (i != 1 && i != 10 && i != 20) { - bitmap8.set(i); - } - } - Assert.assertFalse(bitmap7.intersects(bitmap8)); - Assert.assertFalse(bitmap8.intersects(bitmap7)); - } - - /** - * as per renaud.delbru, Feb 12, 2009 this might throw an error out of bound - * exception. - */ - @Test - public void testLargeEWAHCompressedBitmap() { - System.out.println("testing EWAH over a large array"); - EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); - int N = 11000000; - for (int i = 0; i < N; ++i) { - myarray1.set(i); - } - Assert.assertTrue(myarray1.sizeInBits() == N); - } - - /** - * Test massive and. - */ - @Test - public void testMassiveAnd() { - System.out.println("testing massive logical and"); - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[1024]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap32 answer = ewah[0]; - for (int k = 1; k < ewah.length; ++k) - answer = answer.and(ewah[k]); - // result should be empty - if (answer.getPositions().size() != 0) - System.out.println(answer.toDebugString()); - Assert.assertTrue(answer.getPositions().size() == 0); - Assert.assertTrue(EWAHCompressedBitmap32.and(ewah).getPositions() - .size() == 0); - } - - /** - * Test massive and not. - */ - @Test - public void testMassiveAndNot() { - System.out.println("testing massive and not"); - final int N = 1024; - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap32 answer = ewah[0]; - EWAHCompressedBitmap32 answer2 = ewah[0]; - for (int k = 1; k < ewah.length; ++k) { - answer = answer.andNot(ewah[k]); - EWAHCompressedBitmap32 copy = null; - try { - copy = ewah[k].clone(); - copy.not(); - answer2.and(copy); - assertEqualsPositions(answer, answer2); - } catch (CloneNotSupportedException e) { - e.printStackTrace(); - } - } - } - - @Test - public void testsetSizeInBits() { - System.out.println("testing setSizeInBits"); - for (int k = 0; k < 4096; ++k) { - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - ewah.setSizeInBits(k,false); - Assert.assertEquals(ewah.sizeinbits, k); - Assert.assertEquals(ewah.cardinality(), 0); - EWAHCompressedBitmap32 ewah2 = new EWAHCompressedBitmap32(); - ewah2.setSizeInBits(k, false); - Assert.assertEquals(ewah2.sizeinbits, k); - Assert.assertEquals(ewah2.cardinality(), 0); - EWAHCompressedBitmap32 ewah3 = new EWAHCompressedBitmap32(); - for (int i = 0; i < k; ++i) { - ewah3.set(i); - } - Assert.assertEquals(ewah3.sizeinbits, k); - Assert.assertEquals(ewah3.cardinality(), k); - EWAHCompressedBitmap32 ewah4 = new EWAHCompressedBitmap32(); - ewah4.setSizeInBits(k, true); - Assert.assertEquals(ewah4.sizeinbits, k); - Assert.assertEquals(ewah4.cardinality(), k); - } - } - - /** - * Test massive or. - */ - @Test - public void testMassiveOr() { - System.out - .println("testing massive logical or (can take a couple of minutes)"); - final int N = 128; - for (int howmany = 512; howmany <= 10000; howmany *= 2) { - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - BitSet[] bset = new BitSet[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < bset.length; ++k) - bset[k] = new BitSet(); - for (int k = 0; k < N; ++k) - assertEqualsPositions(bset[k], ewah[k]); - for (int k = 0; k < howmany; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - bset[(k + 2 * k * k) % ewah.length].set(k); - } - for (int k = 0; k < N; ++k) - assertEqualsPositions(bset[k], ewah[k]); - EWAHCompressedBitmap32 answer = ewah[0]; - BitSet bitsetanswer = bset[0]; - for (int k = 1; k < ewah.length; ++k) { - EWAHCompressedBitmap32 tmp = answer.or(ewah[k]); - bitsetanswer.or(bset[k]); - answer = tmp; - assertEqualsPositions(bitsetanswer, answer); - } - assertEqualsPositions(bitsetanswer, answer); - assertEqualsPositions(bitsetanswer, EWAHCompressedBitmap32.or(ewah)); - int k = 0; - for (int j : answer) { - if (k != j) - System.out.println(answer.toDebugString()); - Assert.assertEquals(k, j); - k += 1; - } - } - } - - /** - * Test massive xor. - */ - @Test - public void testMassiveXOR() { - System.out - .println("testing massive xor (can take a couple of minutes)"); - final int N = 16; - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - BitSet[] bset = new BitSet[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < bset.length; ++k) - bset[k] = new BitSet(); - for (int k = 0; k < 30000; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - bset[(k + 2 * k * k) % ewah.length].set(k); - } - EWAHCompressedBitmap32 answer = ewah[0]; - BitSet bitsetanswer = bset[0]; - for (int k = 1; k < ewah.length; ++k) { - answer = answer.xor(ewah[k]); - bitsetanswer.xor(bset[k]); - assertEqualsPositions(bitsetanswer, answer); - } - int k = 0; - for (int j : answer) { - if (k != j) - System.out.println(answer.toDebugString()); - Assert.assertEquals(k, j); - k += 1; - } - } - - @Test - public void testMultiAnd() { - System.out.println("testing MultiAnd"); - // test bitmap3 has a literal word while bitmap1/2 have a run of 1 - EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); - bitmap1.addStreamOfEmptyWords(true, 1000); - EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.addStreamOfEmptyWords(true, 2000); - EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); - bitmap3.set(500); - bitmap3.set(502); - bitmap3.set(504); - - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // equal - bitmap1 = new EWAHCompressedBitmap32(); - bitmap1.set(35); - bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.set(35); - bitmap3 = new EWAHCompressedBitmap32(); - bitmap3.set(35); - - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // same number of words for each - bitmap3.set(63); - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // one word bigger - bitmap3.set(64); - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // two words bigger - bitmap3.set(130); - assertAndEquals(bitmap1, bitmap2, bitmap3); - - // test that result can still be appended to - EWAHCompressedBitmap32 resultBitmap = EWAHCompressedBitmap32.and( - bitmap1, bitmap2, bitmap3); - resultBitmap.set(131); - - bitmap1.set(131); - assertEquals(bitmap1, resultBitmap); - - final int N = 128; - for (int howmany = 512; howmany <= 10000; howmany *= 2) { - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < howmany; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - for (int k = 1; k <= ewah.length; ++k) { - EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; - for (int i = 0; i < k; ++i) - shortewah[i] = ewah[i]; - assertAndEquals(shortewah); - } - } - } - - @Test - public void testMultiOr() { - System.out.println("testing MultiOr"); - // test bitmap3 has a literal word while bitmap1/2 have a run of 0 - EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); - bitmap1.set(1000); - EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); - bitmap2.set(2000); - EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); - bitmap3.set(500); - bitmap3.set(502); - bitmap3.set(504); - - EWAHCompressedBitmap32 expected = bitmap1.or(bitmap2).or(bitmap3); - - assertEquals(expected, - EWAHCompressedBitmap32.or(bitmap1, bitmap2, bitmap3)); - - final int N = 128; - for (int howmany = 512; howmany <= 10000; howmany *= 2) { - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < ewah.length; ++k) - ewah[k] = new EWAHCompressedBitmap32(); - for (int k = 0; k < howmany; ++k) { - ewah[(k + 2 * k * k) % ewah.length].set(k); - } - for (int k = 1; k <= ewah.length; ++k) { - EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; - for (int i = 0; i < k; ++i) - shortewah[i] = ewah[i]; - assertOrEquals(shortewah); - } - } - - } - - /** - * Test not. (Based on an idea by Ciaran Jessup) - */ - @Test - public void testNot() { - System.out.println("testing not"); - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - for (int i = 0; i <= 184; ++i) { - ewah.set(i); - } - Assert.assertEquals(ewah.cardinality(), 185); - ewah.not(); - Assert.assertEquals(ewah.cardinality(), 0); - } - - @Test - public void testOrCardinality() { - System.out.println("testing Or Cardinality"); - for (int N = 0; N < 1024; ++N) { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - for (int i = 0; i < N; i++) { - bitmap.set(i); - } - bitmap.set(1025); - bitmap.set(1026); - Assert.assertEquals(N + 2, bitmap.cardinality()); - EWAHCompressedBitmap32 orbitmap = bitmap.or(bitmap); - assertEquals(orbitmap, bitmap); - Assert.assertEquals(N + 2, orbitmap.cardinality()); - if (N + 2 != bitmap.orCardinality(new EWAHCompressedBitmap32())) { - System.out.println("N = " + N); - System.out.println(bitmap.toDebugString()); - System.out.println("cardinality = " + bitmap.cardinality()); - System.out.println("orCardinality = " - + bitmap.orCardinality(new EWAHCompressedBitmap32())); - } - - Assert.assertEquals(N + 2, - bitmap.orCardinality(new EWAHCompressedBitmap32())); - } - } - - - /** - * Test sets and gets. - */ - @Test - public void testSetGet() { - System.out.println("testing EWAH set/get"); - EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); - int[] val = { 5, 4400, 44600, 55400, 1000000 }; - for (int k = 0; k < val.length; ++k) { - ewcb.set(val[k]); - } - List result = ewcb.getPositions(); - Assert.assertTrue(val.length == result.size()); - for (int k = 0; k < val.length; ++k) { - Assert.assertEquals(result.get(k).intValue(), val[k]); - } - } - - @Test - public void testHashCode() { - System.out.println("testing hashCode"); - EWAHCompressedBitmap32 ewcb = EWAHCompressedBitmap32.bitmapOf(50, 70) - .and(EWAHCompressedBitmap32.bitmapOf(50, 1000)); - Assert.assertEquals(EWAHCompressedBitmap32.bitmapOf(50), ewcb); - Assert.assertEquals(EWAHCompressedBitmap32.bitmapOf(50).hashCode(), - ewcb.hashCode()); - } - - @Test - public void testSetSizeInBits() { - System.out.println("testing SetSizeInBits"); - testSetSizeInBits(130, 131); - testSetSizeInBits(63, 64); - testSetSizeInBits(64, 65); - testSetSizeInBits(64, 128); - testSetSizeInBits(35, 131); - testSetSizeInBits(130, 400); - testSetSizeInBits(130, 191); - testSetSizeInBits(130, 192); - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(31); - bitmap.setSizeInBits(130, false); - bitmap.set(131); - BitSet jdkBitmap = new BitSet(); - jdkBitmap.set(31); - jdkBitmap.set(131); - assertEquals(jdkBitmap, bitmap); - } - - /** - * Test with parameters. - * - * @throws IOException - * Signals that an I/O exception has occurred. - */ - @Test - public void testWithParameters() throws IOException { - System.out - .println("These tests can run for several minutes. Please be patient."); - for (int k = 2; k < 1 << 24; k *= 8) - shouldSetBits(k); - PolizziTest(64); - PolizziTest(128); - PolizziTest(256); - PolizziTest(2048); - System.out.println("Your code is probably ok."); - } - - /** - * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, - * non-deterministic tests are bad, but the test is actually deterministic.) - */ - @Test - public void vanSchaikTest() { - System.out.println("testing vanSchaikTest (this takes some time)"); - final int totalNumBits = 32768; - final double odds = 0.9; - Random rand = new Random(323232323); - for (int t = 0; t < 100; t++) { - int numBitsSet = 0; - EWAHCompressedBitmap32 cBitMap = new EWAHCompressedBitmap32(); - for (int i = 0; i < totalNumBits; i++) { - if (rand.nextDouble() < odds) { - cBitMap.set(i); - numBitsSet++; - } - } - Assert.assertEquals(cBitMap.cardinality(), numBitsSet); - } - - } - - /** - * Function used in a test inspired by Federico Fissore. - * - * @param size - * the number of set bits - * @param seed - * the random seed - * @return the pseudo-random array int[] - */ - public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { - Random random = new Random(seed); - // build raw int array - int[] bits = new int[size]; - for (int i = 0; i < bits.length; i++) { - bits[i] = random.nextInt(TEST_BS_SIZE); - } - // might generate duplicates - Arrays.sort(bits); - // first count how many distinct values - int counter = 0; - int oldx = -1; - for (int x : bits) { - if (x != oldx) - ++counter; - oldx = x; - } - // then construct new array - int[] answer = new int[counter]; - counter = 0; - oldx = -1; - for (int x : bits) { - if (x != oldx) { - answer[counter] = x; - ++counter; - } - oldx = x; - } - return answer; - } - - /** - * Test inspired by Bilal Tayara - */ - @Test - public void TayaraTest() { - System.out.println("Tayara test"); - for (int offset = 64; offset < (1 << 30); offset *= 2) { - EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); - for (int k = 0; k < 64; ++k) { - a.set(offset + k); - b.set(offset + k); - } - if (!a.and(b).equals(a)) - throw new RuntimeException("bug"); - if (!a.or(b).equals(a)) - throw new RuntimeException("bug"); - } - } - - @Test - public void TestCloneEwahCompressedBitArray() - throws CloneNotSupportedException { - System.out.println("testing EWAH clone"); - EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); - a.set(410018); - a.set(410019); - a.set(410020); - a.set(410021); - a.set(410022); - a.set(410023); - - EWAHCompressedBitmap32 b; - - b = a.clone(); - - a.setSizeInBits(487123, false); - b.setSizeInBits(487123, false); - - Assert.assertTrue(a.equals(b)); - } - - /** - * a non-deterministic test proposed by Marc Polizzi. - * - * @param maxlength - * the maximum uncompressed size of the bitmap - */ - public static void PolizziTest(int maxlength) { - System.out.println("Polizzi test with max length = " + maxlength); - for (int k = 0; k < 10000; ++k) { - final Random rnd = new Random(); - final EWAHCompressedBitmap32 ewahBitmap1 = new EWAHCompressedBitmap32(); - final BitSet jdkBitmap1 = new BitSet(); - final EWAHCompressedBitmap32 ewahBitmap2 = new EWAHCompressedBitmap32(); - final BitSet jdkBitmap2 = new BitSet(); - final EWAHCompressedBitmap32 ewahBitmap3 = new EWAHCompressedBitmap32(); - final BitSet jdkBitmap3 = new BitSet(); - final int len = rnd.nextInt(maxlength); - for (int pos = 0; pos < len; pos++) { // random *** number of bits - // set *** - if (rnd.nextInt(7) == 0) { // random *** increasing *** values - ewahBitmap1.set(pos); - jdkBitmap1.set(pos); - } - if (rnd.nextInt(11) == 0) { // random *** increasing *** values - ewahBitmap2.set(pos); - jdkBitmap2.set(pos); - } - if (rnd.nextInt(7) == 0) { // random *** increasing *** values - ewahBitmap3.set(pos); - jdkBitmap3.set(pos); - } - } - assertEquals(jdkBitmap1, ewahBitmap1); - assertEquals(jdkBitmap2, ewahBitmap2); - assertEquals(jdkBitmap3, ewahBitmap3); - // XOR - { - final EWAHCompressedBitmap32 xorEwahBitmap = ewahBitmap1 - .xor(ewahBitmap2); - final BitSet xorJdkBitmap = (BitSet) jdkBitmap1.clone(); - xorJdkBitmap.xor(jdkBitmap2); - assertEquals(xorJdkBitmap, xorEwahBitmap); - } - // AND - { - final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap1 - .and(ewahBitmap2); - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - assertEquals(andJdkBitmap, andEwahBitmap); - } - // AND - { - final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap2 - .and(ewahBitmap1); - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - assertEquals(andJdkBitmap, andEwahBitmap); - assertEquals(andJdkBitmap, - EWAHCompressedBitmap32.and(ewahBitmap1, ewahBitmap2)); - } - // MULTI AND - { - final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); - andJdkBitmap.and(jdkBitmap2); - andJdkBitmap.and(jdkBitmap3); - assertEquals(andJdkBitmap, EWAHCompressedBitmap32.and( - ewahBitmap1, ewahBitmap2, ewahBitmap3)); - assertEquals(andJdkBitmap, EWAHCompressedBitmap32.and( - ewahBitmap3, ewahBitmap2, ewahBitmap1)); - Assert.assertEquals(andJdkBitmap.cardinality(), - EWAHCompressedBitmap32.andCardinality(ewahBitmap1, - ewahBitmap2, ewahBitmap3)); - } - // AND NOT - { - final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap1 - .andNot(ewahBitmap2); - final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1.clone(); - andNotJdkBitmap.andNot(jdkBitmap2); - assertEquals(andNotJdkBitmap, andNotEwahBitmap); - } - // AND NOT - { - final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap2 - .andNot(ewahBitmap1); - final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2.clone(); - andNotJdkBitmap.andNot(jdkBitmap1); - assertEquals(andNotJdkBitmap, andNotEwahBitmap); - } - // OR - { - final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap1 - .or(ewahBitmap2); - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - assertEquals(orJdkBitmap, orEwahBitmap); - assertEquals(orJdkBitmap, - EWAHCompressedBitmap32.or(ewahBitmap1, ewahBitmap2)); - Assert.assertEquals(orEwahBitmap.cardinality(), - ewahBitmap1.orCardinality(ewahBitmap2)); - } - // OR - { - final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap2 - .or(ewahBitmap1); - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - assertEquals(orJdkBitmap, orEwahBitmap); - } - // MULTI OR - { - final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); - orJdkBitmap.or(jdkBitmap2); - orJdkBitmap.or(jdkBitmap3); - assertEquals(orJdkBitmap, EWAHCompressedBitmap32.or( - ewahBitmap1, ewahBitmap2, ewahBitmap3)); - assertEquals(orJdkBitmap, EWAHCompressedBitmap32.or( - ewahBitmap3, ewahBitmap2, ewahBitmap1)); - Assert.assertEquals(orJdkBitmap.cardinality(), - EWAHCompressedBitmap32.orCardinality(ewahBitmap1, - ewahBitmap2, ewahBitmap3)); - } - } - } - - /** - * Pseudo-non-deterministic test inspired by Federico Fissore. - * - * @param length - * the number of set bits in a bitmap - */ - public static void shouldSetBits(int length) { - System.out.println("testing shouldSetBits " + length); - int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, 434222); - EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); - System.out.println(" ... setting " + bitsToSet.length + " values"); - for (int i : bitsToSet) { - ewah.set(i); - } - System.out.println(" ... verifying " + bitsToSet.length + " values"); - equal(ewah.iterator(), bitsToSet); - System.out.println(" ... checking cardinality"); - Assert.assertEquals(bitsToSet.length, ewah.cardinality()); - } - - @Test - public void testSizeInBits1() { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.setSizeInBits(1, false); - bitmap.not(); - Assert.assertEquals(1, bitmap.cardinality()); - } - - @Test - public void testHasNextSafe() { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(0); - IntIterator it = bitmap.intIterator(); - Assert.assertTrue(it.hasNext()); - Assert.assertEquals(0, it.next()); - } - - @Test - public void testHasNextSafe2() { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.set(0); - IntIterator it = bitmap.intIterator(); - Assert.assertEquals(0, it.next()); - } - - @Test - public void testInfiniteLoop() { - System.out.println("Testing for an infinite loop"); - EWAHCompressedBitmap32 b1 = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 b2 = new EWAHCompressedBitmap32(); - EWAHCompressedBitmap32 b3 = new EWAHCompressedBitmap32(); - b3.setSizeInBits(5,false); - b1.set(2); - b2.set(4); - EWAHCompressedBitmap32.and(b1, b2, b3); - EWAHCompressedBitmap32.or(b1, b2, b3); - - } - - @Test - public void testSizeInBits2() { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.setSizeInBits(1, true); - bitmap.not(); - Assert.assertEquals(0, bitmap.cardinality()); - } - - private static void assertAndEquals(EWAHCompressedBitmap32... bitmaps) { - EWAHCompressedBitmap32 expected = bitmaps[0]; - for (int i = 1; i < bitmaps.length; i++) { - expected = expected.and(bitmaps[i]); - } - Assert.assertTrue(expected.equals(EWAHCompressedBitmap32.and(bitmaps))); - } - - private static void assertEquals(EWAHCompressedBitmap32 expected, - EWAHCompressedBitmap32 actual) { - Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); - assertEqualsPositions(expected, actual); - } - - private static void assertOrEquals(EWAHCompressedBitmap32... bitmaps) { - EWAHCompressedBitmap32 expected = bitmaps[0]; - for (int i = 1; i < bitmaps.length; i++) { - expected = expected.or(bitmaps[i]); - } - assertEquals(expected, EWAHCompressedBitmap32.or(bitmaps)); - } - - /** - * Extracted. - * - * @param bits - * the bits - * @return the integer - */ - private static Integer extracted(final Iterator bits) { - return bits.next(); - } - - private static void testSetSizeInBits(int size, int nextBit) { - EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); - bitmap.setSizeInBits(size, false); - bitmap.set(nextBit); - BitSet jdkBitmap = new BitSet(); - jdkBitmap.set(nextBit); - assertEquals(jdkBitmap, bitmap); - } - - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi - * - * @param jdkBitmap - * the uncompressed bitmap - * @param ewahBitmap - * the compressed bitmap - */ - static void assertCardinality(BitSet jdkBitmap, - EWAHCompressedBitmap32 ewahBitmap) { - final int c1 = jdkBitmap.cardinality(); - final int c2 = ewahBitmap.cardinality(); - Assert.assertEquals(c1, c2); - } - - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi. - * - * @param jdkBitmap - * the uncompressed bitmap - * @param ewahBitmap - * the compressed bitmap - */ - static void assertEquals(BitSet jdkBitmap, EWAHCompressedBitmap32 ewahBitmap) { - assertEqualsIterator(jdkBitmap, ewahBitmap); - assertEqualsPositions(jdkBitmap, ewahBitmap); - assertCardinality(jdkBitmap, ewahBitmap); - } - - static void assertEquals(int[] v, List p) { - assertEquals(p, v); - } - - static void assertEquals(List p, int[] v) { - if (v.length != p.size()) - throw new RuntimeException("Different lengths " + v.length + " " - + p.size()); - for (int k = 0; k < v.length; ++k) - if (v[k] != p.get(k).intValue()) - throw new RuntimeException("expected equal at " + k + " " - + v[k] + " " + p.get(k)); - } - - // - /** - * Assess equality between an uncompressed bitmap and a compressed one, part - * of a test contributed by Marc Polizzi - * - * @param jdkBitmap - * the jdk bitmap - * @param ewahBitmap - * the ewah bitmap - */ - static void assertEqualsIterator(BitSet jdkBitmap, - EWAHCompressedBitmap32 ewahBitmap) { - final Vector positions = new Vector(); - final Iterator bits = ewahBitmap.iterator(); - while (bits.hasNext()) { - final int bit = extracted(bits).intValue(); - Assert.assertTrue(jdkBitmap.get(bit)); - positions.add(new Integer(bit)); - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - if (!positions.contains(new Integer(pos))) { - throw new RuntimeException( - "iterator: bitset got different bits"); - } - } - } - - // part of a test contributed by Marc Polizzi - /** - * Assert equals positions. - * - * @param jdkBitmap - * the jdk bitmap - * @param ewahBitmap - * the ewah bitmap - */ - static void assertEqualsPositions(BitSet jdkBitmap, - EWAHCompressedBitmap32 ewahBitmap) { - final List positions = ewahBitmap.getPositions(); - for (int position : positions) { - if (!jdkBitmap.get(position)) { - throw new RuntimeException( - "positions: bitset got different bits"); - } - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - if (!positions.contains(new Integer(pos))) { - throw new RuntimeException( - "positions: bitset got different bits"); - } - } - // we check again - final int[] fastpositions = ewahBitmap.toArray(); - for (int position : fastpositions) { - if (!jdkBitmap.get(position)) { - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - } - } - for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap - .nextSetBit(pos + 1)) { - int index = Arrays.binarySearch(fastpositions, pos); - if (index < 0) - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - if (fastpositions[index] != pos) - throw new RuntimeException( - "positions: bitset got different bits with toArray"); - } - } - - /** - * Assert equals positions. - * - * @param ewahBitmap1 - * the ewah bitmap1 - * @param ewahBitmap2 - * the ewah bitmap2 - */ - static void assertEqualsPositions(EWAHCompressedBitmap32 ewahBitmap1, - EWAHCompressedBitmap32 ewahBitmap2) { - final List positions1 = ewahBitmap1.getPositions(); - final List positions2 = ewahBitmap2.getPositions(); - if (!positions1.equals(positions2)) - throw new RuntimeException( - "positions: alternative got different bits (two bitmaps)"); - // - final int[] fastpositions1 = ewahBitmap1.toArray(); - assertEquals(fastpositions1, positions1); - final int[] fastpositions2 = ewahBitmap2.toArray(); - assertEquals(fastpositions2, positions2); - if (!Arrays.equals(fastpositions1, fastpositions2)) - throw new RuntimeException( - "positions: alternative got different bits with toArray but not with getPositions (two bitmaps)"); - } - - /** - * Convenience function to assess equality between a compressed bitset and - * an uncompressed bitset - * - * @param x - * the compressed bitset/bitmap - * @param y - * the uncompressed bitset/bitmap - */ - static void equal(EWAHCompressedBitmap32 x, BitSet y) { - Assert.assertEquals(x.cardinality(), y.cardinality()); - for (int i : x.getPositions()) - Assert.assertTrue(y.get(i)); - } - - /** - * Convenience function to assess equality between an array and an iterator - * over Integers - * - * @param i - * the iterator - * @param array - * the array - */ - static void equal(Iterator i, int[] array) { - int cursor = 0; - while (i.hasNext()) { - int x = extracted(i).intValue(); - int y = array[cursor++]; - Assert.assertEquals(x, y); - } - } - - /** The Constant MEGA: a large integer. */ - private static final int MEGA = 8 * 1024 * 1024; - - /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ - private static final int TEST_BS_SIZE = 8 * MEGA; -} +package com.googlecode.javaewah32; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +import com.googlecode.javaewah.ChunkIterator; +import com.googlecode.javaewah.FastAggregation; +import com.googlecode.javaewah.IntIterator; +import org.junit.Assert; +import org.junit.Test; + +import java.io.*; +import java.nio.IntBuffer; +import java.util.*; + +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.maxSizeInBits; +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.WORD_IN_BITS; + +/** + * This class is used for basic unit testing. + */ +@SuppressWarnings("javadoc") +public class EWAHCompressedBitmap32Test { + + @Test + public void swaptest() { + EWAHCompressedBitmap32 x = EWAHCompressedBitmap32.bitmapOf(1,2,3); + EWAHCompressedBitmap32 y = EWAHCompressedBitmap32.bitmapOf(1,2,3,4); + x.swap(y); + Assert.assertEquals(x.cardinality(),4); + Assert.assertEquals(y.cardinality(),3); + } + + @Test + public void shiftByWordSizeBits() { + int[] positions = { 10, 11, 12, 13 }; + EWAHCompressedBitmap32 bm1 = EWAHCompressedBitmap32.bitmapOf(positions); + EWAHCompressedBitmap32 bm2 = bm1.shift(WORD_IN_BITS); + + EWAHCompressedBitmap32 bm3 = EWAHCompressedBitmap32.bitmapOf(); + for (int pos : positions) { + bm3.set(pos + WORD_IN_BITS); + } + Assert.assertEquals(bm3, bm2); + } + + @Test + public void shiftbug001() { + EWAHCompressedBitmap32 bm1 = EWAHCompressedBitmap32.bitmapOf(10, 11, 12, 13); + EWAHCompressedBitmap32 bm2 = bm1.shift(1); + + EWAHCompressedBitmap32 bm3 = bm1.or(bm2); + EWAHCompressedBitmap32 bm4 = EWAHCompressedBitmap32.bitmapOf(10,11,12,13,14); + Assert.assertEquals(bm3, bm4); + } + + + @Test + public void shiftbug002() { + EWAHCompressedBitmap32 bm1 = EWAHCompressedBitmap32.bitmapOf(10, 11, 12, 13, 31); + EWAHCompressedBitmap32 bm2 = bm1.shift(1); + + EWAHCompressedBitmap32 bm3 = bm1.or(bm2); + EWAHCompressedBitmap32 bm4 = EWAHCompressedBitmap32.bitmapOf(10,11,12,13,14, 31, 32); + Assert.assertEquals(bm3, bm4); + } + + + @Test + public void shiftbug003() { + EWAHCompressedBitmap32 bm1 = EWAHCompressedBitmap32.bitmapOf(10, 11, 12, 13, 30); + EWAHCompressedBitmap32 bm2 = bm1.shift(1); + + EWAHCompressedBitmap32 bm3 = bm1.or(bm2); + EWAHCompressedBitmap32 bm4 = EWAHCompressedBitmap32.bitmapOf(10,11,12,13,14, 30, 31); + Assert.assertEquals(bm3, bm4); + } + + @Test + public void shiftbug004() { + EWAHCompressedBitmap32 bm1 = EWAHCompressedBitmap32.bitmapOf(10, 11, 12, 13, 32); + EWAHCompressedBitmap32 bm2 = bm1.shift(1); + + EWAHCompressedBitmap32 bm3 = bm1.or(bm2); + EWAHCompressedBitmap32 bm4 = EWAHCompressedBitmap32.bitmapOf(10,11,12,13,14, 32, 33); + Assert.assertEquals(bm3, bm4); + } + + + @Test + public void issue54() { + EWAHCompressedBitmap32 bm = new EWAHCompressedBitmap32(); + for (int i = 1500; i <1600; i ++) { + bm.set(i); + } + for (int i = 1500; i < 1535; i ++) { + bm.clear(i); + } + bm.clear(1535); + Assert.assertFalse(bm.isEmpty()); + } + + @Test + public void xorCardinality() { + EWAHCompressedBitmap32 b1 = EWAHCompressedBitmap32.bitmapOf(0,1,2,3,5,8,13,21,34,55,89); + EWAHCompressedBitmap32 b2 = EWAHCompressedBitmap32.bitmapOf(0,1,2,3,5,8,13,21,34,55,89,144,233,377,610); + Assert.assertEquals(4, b1.xorCardinality(b2)); + } + + @Test + public void andNotCardinality() { + EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf(0,1,2,3,5,8,13,21,34,55,89); + Assert.assertEquals(0, b.andNotCardinality(b)); + } + + @Test + public void getFirstSetBit() { + EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf(); + Assert.assertEquals(-1, b.getFirstSetBit()); + b.set(0); + Assert.assertEquals(0, b.getFirstSetBit()); + b.clear(); + b.setSizeInBits(WORD_IN_BITS, false); + b.setSizeInBits(2*WORD_IN_BITS, true); + Assert.assertEquals(WORD_IN_BITS, b.getFirstSetBit()); + } + + @Test + public void clearStressTest() { + System.out.println("clear stress test"); + int n = 10 * WORD_IN_BITS; + for (int k = 0; k < 100; ++k) { + List setPositions = new ArrayList(n); + List clearPositions = new ArrayList(n); + for (int i = 0; i < n; ++i) { + setPositions.add(i); + clearPositions.add(i); + } + Collections.shuffle(setPositions); + Collections.shuffle(clearPositions); + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + for (int i = 0; i < n; ++i) { + bitmap.set(setPositions.get(i)); + bitmap.clear(clearPositions.get(i)); + } + for (int i = 0; i < n; ++i) { + bitmap.clear(i); + } + Assert.assertEquals(0, bitmap.cardinality()); + Assert.assertEquals(WORD_IN_BITS / 8, bitmap.sizeInBytes()); + } + } + + @Test + public void clear() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(0, 1, 3, 199, 666); + Assert.assertEquals(667, bitmap.sizeInBits()); + bitmap.clear(900); + Assert.assertEquals(901, bitmap.sizeInBits()); + for (int i = 667; i < 901; ++i) { + Assert.assertFalse(bitmap.get(i)); + } + Assert.assertTrue(bitmap.get(199)); + bitmap.clear(199); + Assert.assertFalse(bitmap.get(199)); + } + + @Test + public void equalToSelf() { + EWAHCompressedBitmap32 ewahBitmap = EWAHCompressedBitmap32.bitmapOf(0, 2, 55, + 64, 1 << 30); + Assert.assertTrue(ewahBitmap.equals(ewahBitmap)); + } + + @Test + public void notEqualTo() { + EWAHCompressedBitmap32 b1 = EWAHCompressedBitmap32.bitmapOf(0,1,2,3,5,8,13,21,34,55,89); + EWAHCompressedBitmap32 b2 = EWAHCompressedBitmap32.bitmapOf(0,1,2,3,5,8,13,21,34,55,89,144,233,377,610); + Assert.assertFalse(b1.equals(b2)); + } + + @Test + public void safeSerialization() throws IOException { + EWAHCompressedBitmap32 ewahBitmap = EWAHCompressedBitmap32.bitmapOf(0, 2, 55, + 64, 1 << 30); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // Note: you could use a file output steam instead of ByteArrayOutputStream + ewahBitmap.serialize(new DataOutputStream(bos)); + EWAHCompressedBitmap32 ewahBitmapnew = new EWAHCompressedBitmap32(); + byte[] bout = bos.toByteArray(); + ewahBitmapnew.deserialize(new DataInputStream(new ByteArrayInputStream(bout))); + assertEquals(ewahBitmapnew, ewahBitmap); + Assert.assertEquals(ewahBitmapnew.serializedSizeInBytes(), ewahBitmap.serializedSizeInBytes()); + } + + @Test + public void simpleTestWithIntBuffer() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(IntBuffer.wrap(new int[10])); + + int maxPosition = 666; + int[] positions = new int[] { 1, maxPosition, 99, 5 }; + for (int position : positions) { + bitmap.set(position); + } + + Assert.assertEquals(positions.length, bitmap.cardinality()); + + int[] sortedPositions = positions.clone(); + Arrays.sort(sortedPositions); + Assert.assertArrayEquals(sortedPositions, bitmap.toArray()); + + bitmap.not(); + Assert.assertEquals(maxPosition+1-positions.length, bitmap.cardinality()); + + for (int i = 0; i <= maxPosition; i++) { + bitmap.set(i); + } + Assert.assertEquals(maxPosition + 1, bitmap.cardinality()); + + bitmap.clear(); + Assert.assertEquals(0, bitmap.cardinality()); + + bitmap.swap(EWAHCompressedBitmap32.bitmapOf(1)); + Assert.assertEquals(1, bitmap.cardinality()); + } + + @Test + public void andCompressedSize() { + EWAHCompressedBitmap32 b1 = EWAHCompressedBitmap32.bitmapOf(); + EWAHCompressedBitmap32 b2 = EWAHCompressedBitmap32.bitmapOf(); + + b1.set(0); + b1.set(WORD_IN_BITS); + b2.set(1); + b2.set(WORD_IN_BITS+1); + + EWAHCompressedBitmap32 result = b1.and(b2); + Assert.assertEquals(2 * WORD_IN_BITS / 8, result.sizeInBytes()); + } + + @Test + public void orCompressedSize() { + EWAHCompressedBitmap32 b1 = EWAHCompressedBitmap32.bitmapOf(); + EWAHCompressedBitmap32 b2 = EWAHCompressedBitmap32.bitmapOf(); + + b1.set(0); + b1.set(WORD_IN_BITS); + b2.setSizeInBits(1, false); + b2.setSizeInBits(WORD_IN_BITS, true); + + EWAHCompressedBitmap32 result = b1.or(b2); + Assert.assertEquals(2 * WORD_IN_BITS / 8, result.sizeInBytes()); + } + + @Test + public void xorCompressedSize() { + EWAHCompressedBitmap32 b1 = EWAHCompressedBitmap32.bitmapOf(); + EWAHCompressedBitmap32 b2 = EWAHCompressedBitmap32.bitmapOf(); + + b1.set(0); + b1.set(WORD_IN_BITS); + b2.setSizeInBits(1, false); + b2.setSizeInBits(WORD_IN_BITS, true); + + EWAHCompressedBitmap32 result = b1.xor(b2); + Assert.assertEquals(2 * WORD_IN_BITS / 8, result.sizeInBytes()); + } + + @Test + public void andNotCompressedSize() { + EWAHCompressedBitmap32 b1 = EWAHCompressedBitmap32.bitmapOf(); + + b1.set(0); + b1.set(WORD_IN_BITS); + + EWAHCompressedBitmap32 result = b1.andNot(b1); + Assert.assertEquals(2 * WORD_IN_BITS / 8, result.sizeInBytes()); + } + + @Test + public void testBug091() { + String v1 = "0000000000000000000000000000000000000000000000000000000000111101"; + String v2 = "0000000000000000001111011111111111111111111111111110001111000000"; + + EWAHCompressedBitmap32 bm1 = strToBitmap(v1); + EWAHCompressedBitmap32 bm2 = strToBitmap(v2); + + bm1 = bm1.and(bm2); // bm1 should now have no bit set + + EWAHCompressedBitmap32 bm = new EWAHCompressedBitmap32(); + bm.setSizeInBits(bm1.sizeInBits(), false); // Create a bitmap with no bit set + + Assert.assertEquals(0,bm1.cardinality()); + Assert.assertEquals(0,bm1.cardinality()); + Assert.assertEquals(bm.sizeInBits(),bm1.sizeInBits()); + Assert.assertTrue(bm.equals(bm1)); + } + + private EWAHCompressedBitmap32 strToBitmap(String str) { + EWAHCompressedBitmap32 bm = new EWAHCompressedBitmap32(); + for (int i = 0; i < str.length(); i++) { + if (str.charAt(i)=='1') { + bm.set(i); + } + } + bm.setSizeInBits(str.length(), false); + return bm; + } + + @Test + public void testBug090() throws Exception { + EWAHCompressedBitmap32 bm = new EWAHCompressedBitmap32(); + bm.setSizeInBits(8, false); // Create a bitmap with no bit set + + EWAHCompressedBitmap32 bm1 = bm.clone(); + bm1.not(); // Create a bitmap with all bits set + bm1 = bm1.and(bm); // Clear all bits + + Assert.assertEquals(0,bm.cardinality()); + Assert.assertEquals(0,bm1.cardinality()); + Assert.assertEquals(bm.sizeInBits(),bm1.sizeInBits()); + Assert.assertTrue(bm.equals(bm1)); + } + + @Test + public void testBug090b() throws Exception { + EWAHCompressedBitmap32 bm1 = new EWAHCompressedBitmap32(); + bm1.setSizeInBits(8, false); // Create a bitmap with no bit set + System.out.println(bm1.toDebugString()); + EWAHCompressedBitmap32 bm2 = new EWAHCompressedBitmap32(); + bm2.setSizeInBits(32, false); // Create a bitmap with no bit set + EWAHCompressedBitmap32 bm3 = new EWAHCompressedBitmap32(); + Assert.assertTrue(bm1.equals(bm2)); + Assert.assertTrue(bm2.equals(bm1)); + Assert.assertTrue(bm2.equals(bm3)); + Assert.assertTrue(bm3.equals(bm2)); + Assert.assertTrue(bm1.equals(bm3)); + Assert.assertTrue(bm3.equals(bm1)); + } + + @Test + public void shiftTest() { + System.out.println("testing shifts"); + for (int k = 2; k <= 4096; k *= 2) { + int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, + 434455 + 5 * k); + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + for (int i : bitsToSet) { + ewah.set(i); + } + for(int b = 0; b < 128; ++b) { + EWAHCompressedBitmap32 ewahs = ewah.shift(b); + int[] sb = ewahs.toArray(); + for(int z = 0; z < sb.length; ++z) + if(sb[z] != bitsToSet[z] + b) throw new RuntimeException("bug"); + } + for(int z = 0; z < 256;++z) { + ewah.set(z); + } + bitsToSet = ewah.toArray(); + for(int b = 0; b < 128; ++b) { + EWAHCompressedBitmap32 ewahs = ewah.shift(b); + int[] sb = ewahs.toArray(); + for(int z = 0; z < sb.length; ++z) + if(sb[z] != bitsToSet[z] + b) throw new RuntimeException("bug"); + } + } + } + + @Test + public void testBug090c() throws Exception { + EWAHCompressedBitmap32 bm1 = new EWAHCompressedBitmap32(); + bm1.setSizeInBits(8, false); // Create a bitmap with no bit set + System.out.println(bm1.toDebugString()); + EWAHCompressedBitmap32 bm2 = new EWAHCompressedBitmap32(); + bm2.setSizeInBits(64, false); // Create a bitmap with no bit set + EWAHCompressedBitmap32 bm3 = new EWAHCompressedBitmap32(); + Assert.assertEquals(bm1.hashCode(), bm2.hashCode()); + Assert.assertEquals(bm3.hashCode(), bm2.hashCode()); + } + + + @Test + public void jugovacTest() { + EWAHCompressedBitmap32 bm1 = new EWAHCompressedBitmap32(1); + bm1.set(1); + EWAHCompressedBitmap32 bm2 = new EWAHCompressedBitmap32(0); + bm1.andCardinality(bm2); + } + + @Test + public void setBitsInDecreasingOrder() { + int[] positions = new int[] { 0, 1, 2, 3, 5, 8, 13, 21 }; + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + for(int i=positions.length-1; i>=0; --i) { + Assert.assertTrue(bitmap.set(positions[i])); + } + IntIterator iterator = bitmap.intIterator(); + for(int position : positions) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(position, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void setOutOfOrderStressTest() { + System.out.println("out-of-order stress test"); + int n = 10 * WORD_IN_BITS; + for(int k = 0; k < 100; ++k) { + List positions = new ArrayList(n); + for (int i = 0; i < n; ++i) { + positions.add(i); + } + Collections.shuffle(positions); + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + for (int position : positions) { + bitmap.set(position); + } + IntIterator iterator = bitmap.intIterator(); + for (int i = 0; i < n; ++i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + Assert.assertEquals(WORD_IN_BITS / 8, bitmap.sizeInBytes()); + } + } + + @Test + public void setBitsInDecreasingOrderWithWordPrefix() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.set(10); + bitmap.setSizeInBits(WORD_IN_BITS, false); + bitmap.set(WORD_IN_BITS + 10); + bitmap.set(WORD_IN_BITS + 5); + IntIterator iterator = bitmap.intIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(10, iterator.next()); + Assert.assertEquals(WORD_IN_BITS + 5, iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(WORD_IN_BITS + 10, iterator.next()); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void setBitsInDecreasingOrderWithWordPrefixOfOnes() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, true); + bitmap.set(WORD_IN_BITS + 10); + bitmap.set(WORD_IN_BITS + 5); + IntIterator iterator = bitmap.intIterator(); + for(int i=0; i=0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(positions[i], iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void reverseIntIteratorOverBitmapsOfOnes() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, true); + IntIterator iterator = bitmap.reverseIntIterator(); + for(int i= WORD_IN_BITS-1; i>=0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void reverseIntIteratorOverBitmapsOfZeros() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, false); + IntIterator iterator = bitmap.reverseIntIterator(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void reverseIntIteratorOverBitmapsOfOnesAndZeros() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS-10, true); + bitmap.setSizeInBits(WORD_IN_BITS, false); + IntIterator iterator = bitmap.reverseIntIterator(); + for(int i= WORD_IN_BITS-10; i>0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i-1, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void reverseIntIteratorOverMultipleRLWs() { + EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf(1000, 100000, 100000 + WORD_IN_BITS); + IntIterator iterator = b.reverseIntIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(100000 + WORD_IN_BITS, iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(100000, iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(1000, iterator.next()); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void reverseIntIteratorOverMixedRunningLengthWords() { + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + b.setSizeInBits(WORD_IN_BITS, true); + b.set(WORD_IN_BITS + 5); + + IntIterator iterator = b.reverseIntIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(WORD_IN_BITS+5, iterator.next()); + for(int i= WORD_IN_BITS-1; i>=0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void reverseIntIteratorOverConsecutiveLiteralsInSameRunningLengthWord() { + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + b.setSizeInBits(WORD_IN_BITS, true); + b.setSizeInBits(2*WORD_IN_BITS, false); + b.setSizeInBits(3*WORD_IN_BITS, true); + b.set(3*WORD_IN_BITS+5); + b.set(5*WORD_IN_BITS-1); + + IntIterator iterator = b.reverseIntIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(5*WORD_IN_BITS - 1, iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(3*WORD_IN_BITS+5, iterator.next()); + for(int i=3*WORD_IN_BITS-1; i>=2*WORD_IN_BITS; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + for(int i= WORD_IN_BITS-1; i>=0; --i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void isEmpty() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(1000, false); + Assert.assertTrue(bitmap.isEmpty()); + bitmap.set(1001); + Assert.assertFalse(bitmap.isEmpty()); + } + + @Test + public void issue58() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(52344, 52344 + 9); + ChunkIterator iterator = bitmap.chunkIterator(); + + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(52344, iterator.nextLength()); + iterator.move(iterator.nextLength()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(1, iterator.nextLength()); + iterator.move(iterator.nextLength()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(8, iterator.nextLength()); + iterator.move(iterator.nextLength()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(1, iterator.nextLength()); + iterator.move(iterator.nextLength()); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void issue59() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(243, 260, 1000); + ChunkIterator iter = bitmap.chunkIterator(); + iter.move(245); + Assert.assertEquals(15, iter.nextLength()); + } + + @Test + public void issue61() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(210696); + bitmap.set(210984); + bitmap.set(210985); + ChunkIterator iter = bitmap.chunkIterator(); + iter.move(210984); + Assert.assertEquals(2, iter.nextLength()); + + bitmap = new EWAHCompressedBitmap32(); + bitmap.set(210696); + bitmap.set(210698); + bitmap.set(210699); + iter = bitmap.chunkIterator(); + iter.move(210698); + Assert.assertEquals(2, iter.nextLength()); + } + + @Test + public void chunkIterator() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(0, 1, 2, 3, 4, 7, 8, 9, 10); + + ChunkIterator iterator = bitmap.chunkIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(5, iterator.nextLength()); + iterator.move(2); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(3, iterator.nextLength()); + iterator.move(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(2, iterator.nextLength()); + iterator.move(5); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(1, iterator.nextLength()); + iterator.move(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void chunkIteratorOverBitmapOfZeros() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, false); + + ChunkIterator iterator = bitmap.chunkIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS, iterator.nextLength()); + iterator.move(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void chunkIteratorOverBitmapOfZerosAndOnes() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS + 10, false); + bitmap.setSizeInBits(2 * WORD_IN_BITS, true); + + ChunkIterator iterator = bitmap.chunkIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS + 10, iterator.nextLength()); + iterator.move(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS - 10, iterator.nextLength()); + iterator.move(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void chunkIteratorOverBitmapOfOnesAndZeros() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS - 10, true); + bitmap.setSizeInBits(2 * WORD_IN_BITS, false); + + ChunkIterator iterator = bitmap.chunkIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS - 10, iterator.nextLength()); + iterator.move(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertFalse(iterator.nextBit()); + Assert.assertEquals(WORD_IN_BITS + 10, iterator.nextLength()); + iterator.move(); + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void simpleCompose() { + EWAHCompressedBitmap32 bitmap1 = EWAHCompressedBitmap32.bitmapOf(1, 3, 4); + bitmap1.setSizeInBits(5, false); + + EWAHCompressedBitmap32 bitmap2 = EWAHCompressedBitmap32.bitmapOf(0, 2); + + EWAHCompressedBitmap32 result = bitmap1.compose(bitmap2); + + Assert.assertEquals(5, result.sizeInBits()); + Assert.assertEquals(2, result.cardinality()); + Assert.assertEquals(Integer.valueOf(1), result.toList().get(0)); + Assert.assertEquals(Integer.valueOf(4), result.toList().get(1)); + } + + @Test + public void composeBitmapOfOnesWithItself() { + EWAHCompressedBitmap32 bitmap = EWAHCompressedBitmap32.bitmapOf(); + bitmap.setSizeInBits(WORD_IN_BITS, true); + + EWAHCompressedBitmap32 result = bitmap.compose(bitmap); + + Assert.assertEquals(bitmap, result); + } + + @Test + public void composeBitmapOfZerosAndOnesWithBitmapOfOnes() { + EWAHCompressedBitmap32 bitmap1 = EWAHCompressedBitmap32.bitmapOf(); + bitmap1.setSizeInBits(WORD_IN_BITS, false); + bitmap1.setSizeInBits(2 * WORD_IN_BITS, true); + + EWAHCompressedBitmap32 bitmap2 = EWAHCompressedBitmap32.bitmapOf(); + bitmap2.setSizeInBits(WORD_IN_BITS, true); + + EWAHCompressedBitmap32 result = bitmap1.compose(bitmap2); + + Assert.assertEquals(bitmap1, result); + } + + @Test + public void composeBitmapOfOnesWithBitmapOfZerosAndOnes() { + EWAHCompressedBitmap32 bitmap1 = EWAHCompressedBitmap32.bitmapOf(); + bitmap1.setSizeInBits(2 * WORD_IN_BITS, true); + + EWAHCompressedBitmap32 bitmap2 = EWAHCompressedBitmap32.bitmapOf(); + bitmap2.setSizeInBits(WORD_IN_BITS, false); + bitmap2.setSizeInBits(2 * WORD_IN_BITS, true); + + EWAHCompressedBitmap32 result = bitmap1.compose(bitmap2); + + Assert.assertEquals(bitmap2, result); + } + + @Test + public void composeBitmapWithBitmapOfZeros() { + EWAHCompressedBitmap32 bitmap1 = EWAHCompressedBitmap32.bitmapOf(1, 3, 4, 9); + bitmap1.setSizeInBits(WORD_IN_BITS, false); + + EWAHCompressedBitmap32 bitmap2 = EWAHCompressedBitmap32.bitmapOf(); + bitmap2.setSizeInBits(5, false); + + EWAHCompressedBitmap32 result = bitmap1.compose(bitmap2); + + Assert.assertEquals(0, result.cardinality()); + Assert.assertEquals(WORD_IN_BITS, result.sizeInBits()); + } + + @Test + public void testAstesana() throws Exception { + for (int k = 5; k < 256; ++k) { + EWAHCompressedBitmap32 bm = new EWAHCompressedBitmap32(); + bm.set(1); + bm.setSizeInBits(k, false); + EWAHCompressedBitmap32 bm1 = bm.clone(); + bm1.not(); + EWAHCompressedBitmap32 x = bm1.and(bm1); + Assert.assertEquals(x.cardinality(),k-1); + x = bm1.andNot(bm1); + Assert.assertEquals(x.cardinality(),0); + x = bm1.xor(bm1); + Assert.assertEquals(x.cardinality(),0); + x = bm1.or(bm1); + Assert.assertEquals(x.cardinality(),k-1); + } + } + + @Test + public void testAstesana2() { + for (int k = 1; k < 256; ++k) { + + // Create two equivalent bitmaps + EWAHCompressedBitmap32 bm = new EWAHCompressedBitmap32(); + bm.set(0); + bm.setSizeInBits(k, false); + EWAHCompressedBitmap32 bm3 = new EWAHCompressedBitmap32(); + bm3.set(0); + bm3.setSizeInBits(k, false); + + // Perform two negation + // -> should change nothing + bm.not(); + bm.not(); + + // Verify it changes nothing + + Assert.assertArrayEquals(bm.toArray(), bm3.toArray()); + Assert.assertEquals(bm.sizeInBits(), bm3.sizeInBits()); + Assert.assertTrue(bm.equals(bm3)); + } + } + + @Test + public void clearIntIterator() { + EWAHCompressedBitmap32 x = EWAHCompressedBitmap32.bitmapOf(1, 3, 7, 8, 10); + x.setSizeInBits(500, true); + x.setSizeInBits(501, false); + x.setSizeInBits(1000, true); + x.set(1001); + IntIterator iterator = x.clearIntIterator(); + for (int i : Arrays.asList(0, 2, 4, 5, 6, 9, 500, 1000)) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void clearIntIteratorOverBitmapOfZeros() { + EWAHCompressedBitmap32 x = EWAHCompressedBitmap32.bitmapOf(); + x.setSizeInBits(WORD_IN_BITS, false); + IntIterator iterator = x.clearIntIterator(); + for (int i = 0; i < WORD_IN_BITS; ++i) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(i, iterator.next()); + } + Assert.assertFalse(iterator.hasNext()); + } + + @Test + public void testGet() { + for (int gap = 29; gap < 10000; gap *= 10) { + EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); + for (int k = 0; k < 100; ++k) + x.set(k * gap); + for (int k = 0; k < 100 * gap; ++k) + if (x.get(k)) { + if (k % gap != 0) + throw new RuntimeException( + "spotted an extra set bit at " + + k + " gap = " + + gap + ); + } else if (k % gap == 0) + throw new RuntimeException( + "missed a set bit " + k + + " gap = " + gap + ); + } + } + + public static Iterator toIterator(final EWAHCompressedBitmap32[] bitmaps) { + return new Iterator() { + int k = 0; + + @Override + public boolean hasNext() { + return k < bitmaps.length; + } + + @Override + public Object next() { + return bitmaps[k++]; + } + + @Override + public void remove() { + // nothing + } + }; + } + + @Test + public void fastand() { + int[][] data = { {5, 6, 7, 8, 9}, {1, 5}, {2, 5}}; + + EWAHCompressedBitmap32[] bitmaps = new EWAHCompressedBitmap32[data.length]; + + for (int i = 0; i < bitmaps.length; ++i) { + bitmaps[i] = new EWAHCompressedBitmap32(); + for (int j : data[i]) { + bitmaps[i].set(j); + } + bitmaps[i].setSizeInBits(1000, false); + } + EWAHCompressedBitmap32 and1 = FastAggregation32.bufferedand(1024, bitmaps[0],bitmaps[1],bitmaps[2]); + EWAHCompressedBitmap32 and2 = new EWAHCompressedBitmap32(); + FastAggregation32.bufferedandWithContainer(and2, 32, bitmaps[0],bitmaps[1],bitmaps[2]); + EWAHCompressedBitmap32 and3 = EWAHCompressedBitmap32.and(bitmaps[0],bitmaps[1],bitmaps[2]); + System.out.println(and1.sizeInBits()); + System.out.println(and2.sizeInBits()); + System.out.println(and3.sizeInBits()); + assertEqualsPositions(and1, and2); + assertEqualsPositions(and2, and3); + } + + + @Test + public void fastagg() { + int[][] data = {{}, {5, 6, 7, 8, 9}, {1}, {2}}; + + EWAHCompressedBitmap32[] bitmaps = new EWAHCompressedBitmap32[data.length]; + + for (int i = 0; i < bitmaps.length; ++i) { + bitmaps[i] = new EWAHCompressedBitmap32(); + for (int j : data[i]) { + bitmaps[i].set(j); + } + bitmaps[i].setSizeInBits(1000, false); + } + EWAHCompressedBitmap32 or1 = FastAggregation32.bufferedor(1024, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap32 or2 = FastAggregation32.or(bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap32 or3 = FastAggregation32.bufferedor(1024, bitmaps); + EWAHCompressedBitmap32 or4 = FastAggregation32.or(bitmaps); + EWAHCompressedBitmap32 or5 = FastAggregation32.or(toIterator(bitmaps)); + EWAHCompressedBitmap32 or6 = new EWAHCompressedBitmap32(); + FastAggregation32.orToContainer(or6, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + + assertEquals(or1,or2); + assertEquals(or2,or3); + assertEquals(or3,or4); + assertEquals(or4,or5); + assertEquals(or5,or6); + + EWAHCompressedBitmap32 xor1 = FastAggregation32.bufferedxor(1024, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap32 xor2 = FastAggregation32.xor(bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + EWAHCompressedBitmap32 xor3 = FastAggregation32.bufferedxor(1024, bitmaps); + EWAHCompressedBitmap32 xor4 = FastAggregation32.xor(bitmaps); + EWAHCompressedBitmap32 xor5 = FastAggregation32.xor(toIterator(bitmaps)); + EWAHCompressedBitmap32 xor6 = new EWAHCompressedBitmap32(); + FastAggregation32.xorToContainer(xor6, bitmaps[0],bitmaps[1],bitmaps[2],bitmaps[3]); + + assertEquals(xor1,xor2); + assertEquals(xor2,xor3); + assertEquals(xor3,xor4); + assertEquals(xor4,xor5); + assertEquals(xor5,xor6); + } + + + @SuppressWarnings({"deprecation", "boxing"}) + @Test + public void OKaserBugReportJuly2013() { + System.out.println("testing OKaserBugReportJuly2013"); + int[][] data = {{}, {5, 6, 7, 8, 9}, {1}, {2}, + {2, 5, 7}, {1}, {2}, {1, 6, 9}, + {1, 3, 4, 6, 8, 9}, {1, 3, 4, 6, 8, 9}, + {1, 3, 6, 8, 9}, {2, 5, 7}, {2, 5, 7}, + {1, 3, 9}, {3, 8, 9}}; + + EWAHCompressedBitmap32[] toBeOred = new EWAHCompressedBitmap32[data.length]; + Set bruteForceAnswer = new HashSet(); + for (int i = 0; i < toBeOred.length; ++i) { + toBeOred[i] = new EWAHCompressedBitmap32(); + for (int j : data[i]) { + toBeOred[i].set(j); + bruteForceAnswer.add(j); + } + toBeOred[i].setSizeInBits(1000, false); + } + + long rightcard = bruteForceAnswer.size(); + EWAHCompressedBitmap32 foo = new EWAHCompressedBitmap32(); + FastAggregation32.orToContainer(foo, toBeOred); + Assert.assertEquals(rightcard, foo.cardinality()); + EWAHCompressedBitmap32 e1 = FastAggregation.or(toBeOred); + Assert.assertEquals(rightcard, e1.cardinality()); + EWAHCompressedBitmap32 e2 = FastAggregation32.bufferedor(65536, + toBeOred); + Assert.assertEquals(rightcard, e2.cardinality()); + } + + @Test + public void testSizeInBitsWithAnd() { + System.out.println("testing SizeInBitsWithAnd"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBitsWithinLastWord(10); + b.setSizeInBitsWithinLastWord(10); + + EWAHCompressedBitmap32 and = a.and(b); + Assert.assertEquals(10, and.sizeInBits()); + EWAHCompressedBitmap32 and2 = EWAHCompressedBitmap32.and(a, b); + Assert.assertEquals(10, and2.sizeInBits()); + } + + @Test + public void testSizeInBitsWithAndNot() { + System.out.println("testing SizeInBitsWithAndNot"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBitsWithinLastWord(10); + b.setSizeInBitsWithinLastWord(10); + + EWAHCompressedBitmap32 and = a.andNot(b); + Assert.assertEquals(10, and.sizeInBits()); + } + + @Test + public void testSizeInBitsWithOr() { + System.out.println("testing SizeInBitsWithOr"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBitsWithinLastWord(10); + b.setSizeInBitsWithinLastWord(10); + + EWAHCompressedBitmap32 or = a.or(b); + Assert.assertEquals(10, or.sizeInBits()); + EWAHCompressedBitmap32 or2 = EWAHCompressedBitmap32.or(a, b); + Assert.assertEquals(10, or2.sizeInBits()); + } + + @Test + public void testSizeInBitsWithXor() { + System.out.println("testing SizeInBitsWithXor"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + a.set(1); + a.set(2); + a.set(3); + + b.set(3); + b.set(4); + b.set(5); + + a.setSizeInBitsWithinLastWord(10); + b.setSizeInBitsWithinLastWord(10); + + EWAHCompressedBitmap32 xor = a.xor(b); + Assert.assertEquals(10, xor.sizeInBits()); + EWAHCompressedBitmap32 xor2 = EWAHCompressedBitmap32.xor(a, b); + Assert.assertEquals(10, xor2.sizeInBits()); + } + + @Test + public void testDebugSetSizeInBitsTest() { + System.out.println("testing DebugSetSizeInBits"); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + + b.set(4); + + b.setSizeInBits(6, true); + + List positions = b.toList(); + + Assert.assertEquals(2, positions.size()); + Assert.assertEquals(Integer.valueOf(4), positions.get(0)); + Assert.assertEquals(Integer.valueOf(5), positions.get(1)); + + Iterator iterator = b.iterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(Integer.valueOf(4), iterator.next()); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(Integer.valueOf(5), iterator.next()); + Assert.assertFalse(iterator.hasNext()); + + IntIterator intIterator = b.intIterator(); + Assert.assertTrue(intIterator.hasNext()); + Assert.assertEquals(4, intIterator.next()); + Assert.assertTrue(intIterator.hasNext()); + Assert.assertEquals(5, intIterator.next()); + Assert.assertFalse(intIterator.hasNext()); + + } + + /** + * Created: 2/4/11 6:03 PM By: Arnon Moscona. + */ + @Test + public void EwahIteratorProblem() { + System.out.println("testing ArnonMoscona"); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + for (int i = 9434560; i <= 9435159; i++) { + bitmap.set(i); + } + IntIterator iterator = bitmap.intIterator(); + List v = bitmap.toList(); + int[] array = bitmap.toArray(); + for (int k = 0; k < v.size(); ++k) { + Assert.assertTrue(array[k] == v.get(k)); + Assert.assertTrue(iterator.hasNext()); + final int ival = iterator.next(); + final int vval = v.get(k); + Assert.assertTrue(ival == vval); + } + Assert.assertTrue(!iterator.hasNext()); + // + for (int k = 2; k <= 1024; k *= 2) { + int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, + 434455 + 5 * k); + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + for (int i : bitsToSet) { + ewah.set(i); + } + equal(ewah.iterator(), bitsToSet); + } + } + + /** + * Test submitted by Gregory Ssi-Yan-Kai + */ + @Test + public void SsiYanKaiTest() { + System.out.println("testing SsiYanKaiTest"); + EWAHCompressedBitmap32 a = EWAHCompressedBitmap32.bitmapOf( + 39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, + 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, + 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, + 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, + 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, + 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, + 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, + 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, + 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, + 40007, 40008, 40009, 40010, 40011, 40012, 40013, 40014, + 40015, 40016, 40017, 40018, 40019, 40020, 40021, 40022, + 40023, 40024, 40025, 40026, 40027, 40028, 40029, 40030, + 40031, 40032, 40033, 40034, 40035, 40036, 40037, 40038, + 40039, 40040, 40041, 40042, 40043, 40044, 40045, 40046, + 40047, 40048, 40049, 40050, 40051, 40052, 40053, 40054, + 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, + 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, + 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, + 40079, 40080, 40081, 40082, 40083, 40084, 40085, 40086, + 40087, 40088, 40089, 40090, 40091, 40092, 40093, 40094, + 40095, 40096, 40097, 40098, 40099, 40100); + EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf( + 39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, + 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, + 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, + 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, + 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, + 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, + 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, + 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, + 39999, 270000); + LinkedHashSet aPositions = new LinkedHashSet( + a.toList()); + int intersection = 0; + EWAHCompressedBitmap32 inter = new EWAHCompressedBitmap32(); + LinkedHashSet bPositions = new LinkedHashSet( + b.toList()); + for (Integer integer : bPositions) { + if (aPositions.contains(integer)) { + inter.set(integer); + ++intersection; + } + } + inter.setSizeInBits(maxSizeInBits(a, b), false); + EWAHCompressedBitmap32 and2 = a.and(b); + if (!and2.equals(inter)) + throw new RuntimeException("intersections don't match"); + if (intersection != and2.cardinality()) + throw new RuntimeException("cardinalities don't match"); + } + + /** + * Test inspired by William Habermaas. + */ + @Test + public void habermaasTest() throws Exception { + System.out.println("testing habermaasTest"); + BitSet bitsetaa = new BitSet(); + EWAHCompressedBitmap32 aa = new EWAHCompressedBitmap32(); + int[] val = {55400, 1000000, 1000128}; + for (int aVal : val) { + aa.set(aVal); + bitsetaa.set(aVal); + } + equal(aa, bitsetaa); + BitSet bitsetab = new BitSet(); + EWAHCompressedBitmap32 ab = new EWAHCompressedBitmap32(); + for (int i = 4096; i < (4096 + 5); i++) { + ab.set(i); + bitsetab.set(i); + } + ab.set(99000); + bitsetab.set(99000); + ab.set(1000130); + bitsetab.set(1000130); + equal(ab, bitsetab); + EWAHCompressedBitmap32 bb = aa.or(ab); + EWAHCompressedBitmap32 bbAnd = aa.and(ab); + EWAHCompressedBitmap32 abnot = ab.clone(); + abnot.not(); + EWAHCompressedBitmap32 bbAnd2 = aa.andNot(abnot); + assertEquals(bbAnd2, bbAnd); + BitSet bitsetbb = (BitSet) bitsetaa.clone(); + bitsetbb.or(bitsetab); + BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); + bitsetbbAnd.and(bitsetab); + equal(bbAnd, bitsetbbAnd); + equal(bb, bitsetbb); + } + + @Test + public void testAndResultAppend() { + System.out.println("testing AndResultAppend"); + EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); + bitmap1.set(35); + EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.set(35); + bitmap2.set(130); + + EWAHCompressedBitmap32 resultBitmap = bitmap1.and(bitmap2); + resultBitmap.set(131); + + bitmap1.set(131); + assertEquals(bitmap1, resultBitmap); + } + + /** + * Test cardinality. + */ + @Test + public void testCardinality() { + System.out.println("testing EWAH cardinality"); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(Integer.MAX_VALUE - 32); + // System.out.format("Total Items %d\n", bitmap.cardinality()); + Assert.assertTrue(bitmap.cardinality() == 1); + } + + /** + * Test clear function + */ + @Test + public void testClear() { + System.out.println("testing Clear"); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(5); + bitmap.clear(); + bitmap.set(7); + Assert.assertTrue(1 == bitmap.cardinality()); + Assert.assertTrue(1 == bitmap.toList().size()); + Assert.assertTrue(1 == bitmap.toArray().length); + Assert.assertTrue(7 == bitmap.toList().get(0)); + Assert.assertTrue(7 == bitmap.toArray()[0]); + bitmap.clear(); + bitmap.set(5000); + Assert.assertTrue(1 == bitmap.cardinality()); + Assert.assertTrue(1 == bitmap.toList().size()); + Assert.assertTrue(1 == bitmap.toArray().length); + Assert.assertTrue(5000 == bitmap.toList().get(0)); + bitmap.set(5001); + bitmap.set(5005); + bitmap.set(5100); + bitmap.set(5500); + bitmap.clear(); + bitmap.set(5); + bitmap.set(7); + bitmap.set(1000); + bitmap.set(1001); + Assert.assertTrue(4 == bitmap.cardinality()); + List positions = bitmap.toList(); + Assert.assertTrue(4 == positions.size()); + Assert.assertTrue(5 == positions.get(0)); + Assert.assertTrue(7 == positions.get(1)); + Assert.assertTrue(1000 == positions.get(2)); + Assert.assertTrue(1001 == positions.get(3)); + } + + /** + * Test ewah compressed bitmap. + */ + @Test + public void testEWAHCompressedBitmap() { + System.out.println("testing EWAH"); + int zero = 0; + int specialval = 1 | (1 << 4) | (1 << 31); + int notzero = ~zero; + EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); + myarray1.addWord(zero); + myarray1.addWord(zero); + myarray1.addWord(zero); + myarray1.addWord(specialval); + myarray1.addWord(specialval); + myarray1.addWord(notzero); + myarray1.addWord(zero); + Assert.assertEquals(myarray1.toList().size(), 6 + 32); + EWAHCompressedBitmap32 myarray2 = new EWAHCompressedBitmap32(); + myarray2.addWord(zero); + myarray2.addWord(specialval); + myarray2.addWord(specialval); + myarray2.addWord(notzero); + myarray2.addWord(zero); + myarray2.addWord(zero); + myarray2.addWord(zero); + Assert.assertEquals(myarray2.toList().size(), 6 + 32); + List data1 = myarray1.toList(); + List data2 = myarray2.toList(); + ArrayList logicalor = new ArrayList(); + { + HashSet tmp = new HashSet(); + tmp.addAll(data1); + tmp.addAll(data2); + logicalor.addAll(tmp); + } + Collections.sort(logicalor); + ArrayList logicaland = new ArrayList(); + logicaland.addAll(data1); + logicaland.retainAll(data2); + Collections.sort(logicaland); + EWAHCompressedBitmap32 arrayand = myarray1.and(myarray2); + Assert.assertTrue(arrayand.toList().equals(logicaland)); + EWAHCompressedBitmap32 arrayor = myarray1.or(myarray2); + Assert.assertTrue(arrayor.toList().equals(logicalor)); + EWAHCompressedBitmap32 arrayandbis = myarray2.and(myarray1); + Assert.assertTrue(arrayandbis.toList().equals(logicaland)); + EWAHCompressedBitmap32 arrayorbis = myarray2.or(myarray1); + Assert.assertTrue(arrayorbis.toList().equals(logicalor)); + EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); + for (Integer i : myarray1.toList()) { + x.set(i); + } + Assert.assertTrue(x.toList().equals( + myarray1.toList())); + x = new EWAHCompressedBitmap32(); + for (Integer i : myarray2.toList()) { + x.set(i); + } + Assert.assertTrue(x.toList().equals( + myarray2.toList())); + x = new EWAHCompressedBitmap32(); + for (Iterator k = myarray1.iterator(); k.hasNext(); ) { + x.set(extracted(k)); + } + Assert.assertTrue(x.toList().equals( + myarray1.toList())); + x = new EWAHCompressedBitmap32(); + for (Iterator k = myarray2.iterator(); k.hasNext(); ) { + x.set(extracted(k)); + } + Assert.assertTrue(x.toList().equals( + myarray2.toList())); + } + + /** + * Test externalization. + * + * @throws IOException Signals that an I/O exception has occurred. + */ + @Test + public void testExternalization() throws Exception { + System.out.println("testing EWAH externalization"); + EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); + int[] val = {5, 4400, 44600, 55400, 1000000}; + for (int aVal : val) { + ewcb.set(aVal); + } + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oo = new ObjectOutputStream(bos); + ewcb.writeExternal(oo); + oo.close(); + ewcb = new EWAHCompressedBitmap32(); + ByteArrayInputStream bis = new ByteArrayInputStream( + bos.toByteArray()); + ewcb.readExternal(new ObjectInputStream(bis)); + List result = ewcb.toList(); + Assert.assertTrue(val.length == result.size()); + for (int k = 0; k < val.length; ++k) { + Assert.assertTrue(result.get(k) == val[k]); + } + } + + @Test + public void testExtremeRange() { + System.out.println("testing EWAH at its extreme range"); + EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); + int N = 1024; + for (int i = 0; i < N; ++i) { + myarray1.set(Integer.MAX_VALUE - 32 - N + i); + Assert.assertTrue(myarray1.cardinality() == i + 1); + int[] val = myarray1.toArray(); + Assert.assertTrue(val[0] == Integer.MAX_VALUE - 32 - N); + } + } + + /** + * Test the intersects method + */ + @Test + public void testIntersectsMethod() { + System.out.println("testing Intersets Bug"); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(1); + EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.set(1); + bitmap2.set(11); + bitmap2.set(111); + bitmap2.set(1111111); + bitmap2.set(11111111); + Assert.assertTrue(bitmap.intersects(bitmap2)); + Assert.assertTrue(bitmap2.intersects(bitmap)); + + EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); + bitmap3.set(101); + EWAHCompressedBitmap32 bitmap4 = new EWAHCompressedBitmap32(); + for (int i = 0; i < 100; i++) { + bitmap4.set(i); + } + Assert.assertFalse(bitmap3.intersects(bitmap4)); + Assert.assertFalse(bitmap4.intersects(bitmap3)); + + EWAHCompressedBitmap32 bitmap5 = new EWAHCompressedBitmap32(); + bitmap5.set(0); + bitmap5.set(10); + bitmap5.set(20); + EWAHCompressedBitmap32 bitmap6 = new EWAHCompressedBitmap32(); + bitmap6.set(1); + bitmap6.set(11); + bitmap6.set(21); + bitmap6.set(1111111); + bitmap6.set(11111111); + Assert.assertFalse(bitmap5.intersects(bitmap6)); + Assert.assertFalse(bitmap6.intersects(bitmap5)); + + bitmap5.set(21); + Assert.assertTrue(bitmap5.intersects(bitmap6)); + Assert.assertTrue(bitmap6.intersects(bitmap5)); + + EWAHCompressedBitmap32 bitmap7 = new EWAHCompressedBitmap32(); + bitmap7.set(1); + bitmap7.set(10); + bitmap7.set(20); + bitmap7.set(1111111); + bitmap7.set(11111111); + EWAHCompressedBitmap32 bitmap8 = new EWAHCompressedBitmap32(); + for (int i = 0; i < 1000; i++) { + if (i != 1 && i != 10 && i != 20) { + bitmap8.set(i); + } + } + Assert.assertFalse(bitmap7.intersects(bitmap8)); + Assert.assertFalse(bitmap8.intersects(bitmap7)); + } + + /** + * as per renaud.delbru, Feb 12, 2009 this might throw an error out of + * bound exception. + */ + @Test + public void testLargeEWAHCompressedBitmap() { + System.out.println("testing EWAH over a large array"); + EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); + int N = 11000000; + for (int i = 0; i < N; ++i) { + myarray1.set(i); + } + Assert.assertTrue(myarray1.sizeInBits() == N); + } + + /** + * Test massive and. + */ + @Test + public void testMassiveAnd() { + System.out.println("testing massive logical and"); + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[1024]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap32 answer = ewah[0]; + for (int k = 1; k < ewah.length; ++k) + answer = answer.and(ewah[k]); + // result should be empty + if (answer.toList().size() != 0) + System.out.println(answer.toDebugString()); + Assert.assertTrue(answer.toList().size() == 0); + Assert.assertTrue(EWAHCompressedBitmap32.and(ewah) + .toList().size() == 0); + } + + /** + * Test massive and not. + */ + @Test + public void testMassiveAndNot() throws Exception { + System.out.println("testing massive and not"); + final int N = 1024; + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap32 answer = ewah[0]; + EWAHCompressedBitmap32 answer2 = ewah[0]; + for (int k = 1; k < ewah.length; ++k) { + answer = answer.andNot(ewah[k]); + EWAHCompressedBitmap32 copy = ewah[k].clone(); + copy.not(); + answer2.and(copy); + assertEqualsPositions(answer, answer2); + } + } + + @Test + public void testsetSizeInBits() { + System.out.println("testing setSizeInBits"); + for (int k = 0; k < 4096; ++k) { + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + ewah.setSizeInBits(k, false); + Assert.assertEquals(ewah.sizeInBits(), k); + Assert.assertEquals(ewah.cardinality(), 0); + EWAHCompressedBitmap32 ewah2 = new EWAHCompressedBitmap32(); + ewah2.setSizeInBits(k, false); + Assert.assertEquals(ewah2.sizeInBits(), k); + Assert.assertEquals(ewah2.cardinality(), 0); + EWAHCompressedBitmap32 ewah3 = new EWAHCompressedBitmap32(); + for (int i = 0; i < k; ++i) { + ewah3.set(i); + } + Assert.assertEquals(ewah3.sizeInBits(), k); + Assert.assertEquals(ewah3.cardinality(), k); + EWAHCompressedBitmap32 ewah4 = new EWAHCompressedBitmap32(); + ewah4.setSizeInBits(k, true); + Assert.assertEquals(ewah4.sizeInBits(), k); + Assert.assertEquals(ewah4.cardinality(), k); + } + } + + /** + * Test massive or. + */ + @Test + public void testMassiveOr() { + System.out + .println("testing massive logical or (can take a couple of minutes)"); + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + BitSet[] bset = new BitSet[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < bset.length; ++k) + bset[k] = new BitSet(); + for (int k = 0; k < N; ++k) + assertEqualsPositions(bset[k], ewah[k]); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + bset[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 0; k < N; ++k) + assertEqualsPositions(bset[k], ewah[k]); + EWAHCompressedBitmap32 answer = ewah[0]; + BitSet bitsetanswer = bset[0]; + for (int k = 1; k < ewah.length; ++k) { + EWAHCompressedBitmap32 tmp = answer.or(ewah[k]); + bitsetanswer.or(bset[k]); + answer = tmp; + assertEqualsPositions(bitsetanswer, answer); + } + assertEqualsPositions(bitsetanswer, answer); + assertEqualsPositions(bitsetanswer, + EWAHCompressedBitmap32.or(ewah)); + int k = 0; + for (int j : answer) { + if (k != j) + System.out.println(answer + .toDebugString()); + Assert.assertEquals(k, j); + k += 1; + } + } + } + + /** + * Test massive xor. + */ + @Test + public void testMassiveXOR() { + System.out + .println("testing massive xor (can take a couple of minutes)"); + final int N = 16; + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + BitSet[] bset = new BitSet[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < bset.length; ++k) + bset[k] = new BitSet(); + for (int k = 0; k < 30000; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + bset[(k + 2 * k * k) % ewah.length].set(k); + } + EWAHCompressedBitmap32 answer = ewah[0]; + BitSet bitsetanswer = bset[0]; + for (int k = 1; k < ewah.length; ++k) { + answer = answer.xor(ewah[k]); + bitsetanswer.xor(bset[k]); + assertEqualsPositions(bitsetanswer, answer); + } + int k = 0; + for (int j : answer) { + if (k != j) + System.out.println(answer.toDebugString()); + Assert.assertEquals(k, j); + k += 1; + } + } + + @Test + public void testMultiAnd() { + System.out.println("testing MultiAnd"); + // test bitmap3 has a literal word while bitmap1/2 have a run of + // 1 + EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); + bitmap1.addStreamOfEmptyWords(true, 1000); + EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.addStreamOfEmptyWords(true, 2000); + EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); + bitmap3.set(500); + bitmap3.set(502); + bitmap3.set(504); + + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // equal + bitmap1 = new EWAHCompressedBitmap32(); + bitmap1.set(35); + bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.set(35); + bitmap3 = new EWAHCompressedBitmap32(); + bitmap3.set(35); + + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // same number of words for each + bitmap3.set(63); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // one word bigger + bitmap3.set(64); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // two words bigger + bitmap3.set(130); + assertAndEquals(bitmap1, bitmap2, bitmap3); + + // test that result can still be appended to + EWAHCompressedBitmap32 resultBitmap = EWAHCompressedBitmap32 + .and(bitmap1, bitmap2, bitmap3); + resultBitmap.set(131); + + bitmap1.set(131); + assertEquals(bitmap1, resultBitmap); + + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 1; k <= ewah.length; ++k) { + EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; + System.arraycopy(ewah, 0, shortewah, 0, k); + assertAndEquals(shortewah); + } + } + } + + @Test + public void testMultiOr() { + System.out.println("testing MultiOr"); + // test bitmap3 has a literal word while bitmap1/2 have a run of + // 0 + EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); + bitmap1.set(1000); + EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); + bitmap2.set(2000); + EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); + bitmap3.set(500); + bitmap3.set(502); + bitmap3.set(504); + + EWAHCompressedBitmap32 expected = bitmap1.or(bitmap2).or( + bitmap3); + + assertEquals(expected, + EWAHCompressedBitmap32.or(bitmap1, bitmap2, bitmap3)); + + final int N = 128; + for (int howmany = 512; howmany <= 10000; howmany *= 2) { + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < ewah.length; ++k) + ewah[k] = new EWAHCompressedBitmap32(); + for (int k = 0; k < howmany; ++k) { + ewah[(k + 2 * k * k) % ewah.length].set(k); + } + for (int k = 1; k <= ewah.length; ++k) { + EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; + System.arraycopy(ewah, 0, shortewah, 0, k); + assertOrEquals(shortewah); + } + } + + } + + /** + * Test not. (Based on an idea by Ciaran Jessup) + */ + @Test + public void testNot() { + System.out.println("testing not"); + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + for (int i = 0; i <= 184; ++i) { + ewah.set(i); + } + Assert.assertEquals(ewah.cardinality(), 185); + ewah.not(); + Assert.assertEquals(ewah.cardinality(), 0); + } + + @Test + public void testOrCardinality() { + System.out.println("testing Or Cardinality"); + for (int N = 0; N < 1024; ++N) { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + for (int i = 0; i < N; i++) { + bitmap.set(i); + } + bitmap.set(1025); + bitmap.set(1026); + Assert.assertEquals(N + 2, bitmap.cardinality()); + EWAHCompressedBitmap32 orbitmap = bitmap.or(bitmap); + assertEquals(orbitmap, bitmap); + Assert.assertEquals(N + 2, orbitmap.cardinality()); + if (N + 2 != bitmap + .orCardinality(new EWAHCompressedBitmap32())) { + System.out.println("N = " + N); + System.out.println(bitmap.toDebugString()); + System.out.println("cardinality = " + + bitmap.cardinality()); + System.out + .println("orCardinality = " + + bitmap.orCardinality(new EWAHCompressedBitmap32())); + } + + Assert.assertEquals(N + 2, bitmap + .orCardinality(new EWAHCompressedBitmap32())); + } + } + + /** + * Test sets and gets. + */ + @Test + public void testSetGet() { + System.out.println("testing EWAH set/get"); + EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); + int[] val = {5, 4400, 44600, 55400, 1000000}; + for (int aVal : val) { + ewcb.set(aVal); + } + List result = ewcb.toList(); + Assert.assertTrue(val.length == result.size()); + for (int k = 0; k < val.length; ++k) { + Assert.assertEquals(result.get(k).intValue(), val[k]); + } + } + + @Test + public void testHashCode() throws Exception { + System.out.println("testing hashCode"); + EWAHCompressedBitmap32 ewcb = EWAHCompressedBitmap32.bitmapOf( + 50, 70).and(EWAHCompressedBitmap32.bitmapOf(50, 1000)); + EWAHCompressedBitmap32 expected = EWAHCompressedBitmap32.bitmapOf(50); + expected.setSizeInBits(1000, false); + Assert.assertEquals(expected, ewcb); + Assert.assertEquals(expected + .hashCode(), ewcb.hashCode()); + ewcb.addWord(~0); + EWAHCompressedBitmap32 ewcb2 = ewcb.clone(); + ewcb2.addWord(0); + Assert.assertEquals(ewcb + .hashCode(), ewcb2.hashCode()); + + } + + @Test + public void testSetSizeInBits() { + System.out.println("testing SetSizeInBits"); + testSetSizeInBits(130, 131); + testSetSizeInBits(63, 64); + testSetSizeInBits(64, 65); + testSetSizeInBits(64, 128); + testSetSizeInBits(35, 131); + testSetSizeInBits(130, 400); + testSetSizeInBits(130, 191); + testSetSizeInBits(130, 192); + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(31); + bitmap.setSizeInBits(130, false); + bitmap.set(131); + BitSet jdkBitmap = new BitSet(); + jdkBitmap.set(31); + jdkBitmap.set(131); + assertEquals(jdkBitmap, bitmap); + } + + /** + * Test with parameters. + * + * @throws IOException Signals that an I/O exception has occurred. + */ + @Test + public void testWithParameters() throws IOException { + System.out + .println("These tests can run for several minutes. Please be patient."); + for (int k = 2; k < 1 << 24; k *= 8) + shouldSetBits(k); + PolizziTest(64); + PolizziTest(128); + PolizziTest(256); + System.out.println("Your code is probably ok."); + } + + /** + * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, + * non-deterministic tests are bad, but the test is actually + * deterministic.) + */ + @Test + public void vanSchaikTest() { + System.out + .println("testing vanSchaikTest (this takes some time)"); + final int totalNumBits = 32768; + final double odds = 0.9; + Random rand = new Random(323232323); + for (int t = 0; t < 100; t++) { + int numBitsSet = 0; + EWAHCompressedBitmap32 cBitMap = new EWAHCompressedBitmap32(); + for (int i = 0; i < totalNumBits; i++) { + if (rand.nextDouble() < odds) { + cBitMap.set(i); + numBitsSet++; + } + } + Assert.assertEquals(cBitMap.cardinality(), numBitsSet); + } + + } + + /** + * Function used in a test inspired by Federico Fissore. + * + * @param size the number of set bits + * @param seed the random seed + * @return the pseudo-random array int[] + */ + public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { + Random random = new Random(seed); + // build raw int array + int[] bits = new int[size]; + for (int i = 0; i < bits.length; i++) { + bits[i] = random.nextInt(TEST_BS_SIZE); + } + // might generate duplicates + Arrays.sort(bits); + // first count how many distinct values + int counter = 0; + int oldx = -1; + for (int x : bits) { + if (x != oldx) + ++counter; + oldx = x; + } + // then construct new array + int[] answer = new int[counter]; + counter = 0; + oldx = -1; + for (int x : bits) { + if (x != oldx) { + answer[counter] = x; + ++counter; + } + oldx = x; + } + return answer; + } + + /** + * Test inspired by Bilal Tayara + */ + @Test + public void TayaraTest() { + System.out.println("Tayara test"); + for (int offset = 64; offset < (1 << 30); offset *= 2) { + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); + for (int k = 0; k < 64; ++k) { + a.set(offset + k); + b.set(offset + k); + } + if (!a.and(b).equals(a)) + throw new RuntimeException("bug"); + if (!a.or(b).equals(a)) + throw new RuntimeException("bug"); + } + } + + @Test + public void TestCloneEwahCompressedBitArray() throws Exception { + System.out.println("testing EWAH clone"); + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + a.set(410018); + a.set(410019); + a.set(410020); + a.set(410021); + a.set(410022); + a.set(410023); + + EWAHCompressedBitmap32 b; + + b = a.clone(); + + a.setSizeInBits(487123, false); + b.setSizeInBits(487123, false); + + Assert.assertTrue(a.equals(b)); + } + + /** + * a non-deterministic test proposed by Marc Polizzi. + * + * @param maxlength the maximum uncompressed size of the bitmap + */ + public static void PolizziTest(int maxlength) { + System.out.println("Polizzi test with max length = " + + maxlength); + for (int k = 0; k < 10000; ++k) { + final Random rnd = new Random(); + final EWAHCompressedBitmap32 ewahBitmap1 = new EWAHCompressedBitmap32(); + final BitSet jdkBitmap1 = new BitSet(); + final EWAHCompressedBitmap32 ewahBitmap2 = new EWAHCompressedBitmap32(); + final BitSet jdkBitmap2 = new BitSet(); + final EWAHCompressedBitmap32 ewahBitmap3 = new EWAHCompressedBitmap32(); + final BitSet jdkBitmap3 = new BitSet(); + final int len = rnd.nextInt(maxlength); + for (int pos = 0; pos < len; pos++) { // random *** + // number of bits + // set *** + if (rnd.nextInt(7) == 0) { // random *** + // increasing *** + // values + ewahBitmap1.set(pos); + jdkBitmap1.set(pos); + } + if (rnd.nextInt(11) == 0) { // random *** + // increasing *** + // values + ewahBitmap2.set(pos); + jdkBitmap2.set(pos); + } + if (rnd.nextInt(7) == 0) { // random *** + // increasing *** + // values + ewahBitmap3.set(pos); + jdkBitmap3.set(pos); + } + } + assertEquals(jdkBitmap1, ewahBitmap1); + assertEquals(jdkBitmap2, ewahBitmap2); + assertEquals(jdkBitmap3, ewahBitmap3); + // XOR + { + final EWAHCompressedBitmap32 xorEwahBitmap = ewahBitmap1 + .xor(ewahBitmap2); + final BitSet xorJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + xorJdkBitmap.xor(jdkBitmap2); + assertEquals(xorJdkBitmap, xorEwahBitmap); + } + // AND + { + final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap1 + .and(ewahBitmap2); + final BitSet andJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + andJdkBitmap.and(jdkBitmap2); + assertEquals(andJdkBitmap, andEwahBitmap); + } + // AND + { + final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap2 + .and(ewahBitmap1); + final BitSet andJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + andJdkBitmap.and(jdkBitmap2); + assertEquals(andJdkBitmap, andEwahBitmap); + assertEquals(andJdkBitmap, + EWAHCompressedBitmap32.and(ewahBitmap1, + ewahBitmap2) + ); + } + // MULTI AND + { + final BitSet andJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + andJdkBitmap.and(jdkBitmap2); + andJdkBitmap.and(jdkBitmap3); + assertEquals(andJdkBitmap, + EWAHCompressedBitmap32.and(ewahBitmap1, + ewahBitmap2, ewahBitmap3) + ); + assertEquals(andJdkBitmap, + EWAHCompressedBitmap32.and(ewahBitmap3, + ewahBitmap2, ewahBitmap1) + ); + Assert.assertEquals(andJdkBitmap.cardinality(), + EWAHCompressedBitmap32.andCardinality( + ewahBitmap1, ewahBitmap2, + ewahBitmap3) + ); + } + // AND NOT + { + final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap1 + .andNot(ewahBitmap2); + final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + andNotJdkBitmap.andNot(jdkBitmap2); + assertEquals(andNotJdkBitmap, andNotEwahBitmap); + } + // AND NOT + { + final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap2 + .andNot(ewahBitmap1); + final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2 + .clone(); + andNotJdkBitmap.andNot(jdkBitmap1); + assertEquals(andNotJdkBitmap, andNotEwahBitmap); + } + // OR + { + final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap1 + .or(ewahBitmap2); + final BitSet orJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + orJdkBitmap.or(jdkBitmap2); + assertEquals(orJdkBitmap, orEwahBitmap); + assertEquals(orJdkBitmap, + EWAHCompressedBitmap32.or(ewahBitmap1, + ewahBitmap2) + ); + Assert.assertEquals(orEwahBitmap.cardinality(), + ewahBitmap1.orCardinality(ewahBitmap2)); + } + // OR + { + final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap2 + .or(ewahBitmap1); + final BitSet orJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + orJdkBitmap.or(jdkBitmap2); + assertEquals(orJdkBitmap, orEwahBitmap); + } + // MULTI OR + { + final BitSet orJdkBitmap = (BitSet) jdkBitmap1 + .clone(); + orJdkBitmap.or(jdkBitmap2); + orJdkBitmap.or(jdkBitmap3); + assertEquals(orJdkBitmap, + EWAHCompressedBitmap32.or(ewahBitmap1, + ewahBitmap2, ewahBitmap3) + ); + assertEquals(orJdkBitmap, + EWAHCompressedBitmap32.or(ewahBitmap3, + ewahBitmap2, ewahBitmap1) + ); + Assert.assertEquals(orJdkBitmap.cardinality(), + EWAHCompressedBitmap32.orCardinality( + ewahBitmap1, ewahBitmap2, + ewahBitmap3) + ); + } + } + } + + /** + * Pseudo-non-deterministic test inspired by Federico Fissore. + * + * @param length the number of set bits in a bitmap + */ + public static void shouldSetBits(int length) { + System.out.println("testing shouldSetBits " + length); + int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, + 434222); + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + System.out.println(" ... setting " + bitsToSet.length + + " values"); + for (int i : bitsToSet) { + ewah.set(i); + } + System.out.println(" ... verifying " + bitsToSet.length + + " values"); + equal(ewah.iterator(), bitsToSet); + System.out.println(" ... checking cardinality"); + Assert.assertEquals(bitsToSet.length, ewah.cardinality()); + } + + @Test + public void testSizeInBits1() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.setSizeInBits(1, false); + bitmap.not(); + Assert.assertEquals(1, bitmap.cardinality()); + } + + @Test + public void testHasNextSafe() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(0); + IntIterator it = bitmap.intIterator(); + Assert.assertTrue(it.hasNext()); + Assert.assertEquals(0, it.next()); + } + + @Test + public void testHasNextSafe2() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.set(0); + IntIterator it = bitmap.intIterator(); + Assert.assertEquals(0, it.next()); + } + + @Test + public void testInfiniteLoop() { + System.out.println("Testing for an infinite loop"); + EWAHCompressedBitmap32 b1 = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b2 = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 b3 = new EWAHCompressedBitmap32(); + b3.setSizeInBits(5, false); + b1.set(2); + b2.set(4); + EWAHCompressedBitmap32.and(b1, b2, b3); + EWAHCompressedBitmap32.or(b1, b2, b3); + + } + + @Test + public void testSizeInBits2() { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.setSizeInBits(1, true); + bitmap.not(); + Assert.assertEquals(0, bitmap.cardinality()); + } + + private static void assertAndEquals(EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 expected = bitmaps[0]; + for (int i = 1; i < bitmaps.length; i++) { + expected = expected.and(bitmaps[i]); + } + Assert.assertTrue(expected.equals(EWAHCompressedBitmap32 + .and(bitmaps))); + } + + private static void assertEquals(EWAHCompressedBitmap32 expected, + EWAHCompressedBitmap32 actual) { + Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); + assertEqualsPositions(expected, actual); + } + + private static void assertOrEquals(EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 expected = bitmaps[0]; + for (int i = 1; i < bitmaps.length; i++) { + expected = expected.or(bitmaps[i]); + } + assertEquals(expected, EWAHCompressedBitmap32.or(bitmaps)); + } + + /** + * Extracted. + * + * @param bits the bits + * @return the integer + */ + private static Integer extracted(final Iterator bits) { + return bits.next(); + } + + private static void testSetSizeInBits(int size, int nextBit) { + EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); + bitmap.setSizeInBits(size, false); + bitmap.set(nextBit); + BitSet jdkBitmap = new BitSet(); + jdkBitmap.set(nextBit); + assertEquals(jdkBitmap, bitmap); + } + + /** + * Assess equality between an uncompressed bitmap and a compressed one, + * part of a test contributed by Marc Polizzi + * + * @param jdkBitmap the uncompressed bitmap + * @param ewahBitmap the compressed bitmap + */ + static void assertCardinality(BitSet jdkBitmap, + EWAHCompressedBitmap32 ewahBitmap) { + final int c1 = jdkBitmap.cardinality(); + final int c2 = ewahBitmap.cardinality(); + Assert.assertEquals(c1, c2); + } + + /** + * Assess equality between an uncompressed bitmap and a compressed one, + * part of a test contributed by Marc Polizzi. + * + * @param jdkBitmap the uncompressed bitmap + * @param ewahBitmap the compressed bitmap + */ + static void assertEquals(BitSet jdkBitmap, + EWAHCompressedBitmap32 ewahBitmap) { + assertEqualsIterator(jdkBitmap, ewahBitmap); + assertEqualsPositions(jdkBitmap, ewahBitmap); + assertCardinality(jdkBitmap, ewahBitmap); + } + + static void assertEquals(int[] v, List p) { + assertEquals(p, v); + } + + static void assertEquals(List p, int[] v) { + if (v.length != p.size()) + throw new RuntimeException("Different lengths " + + v.length + " " + p.size()); + for (int k = 0; k < v.length; ++k) + if (v[k] != p.get(k)) + throw new RuntimeException("expected equal at " + + k + " " + v[k] + " " + p.get(k)); + } + + // + + /** + * Assess equality between an uncompressed bitmap and a compressed one, + * part of a test contributed by Marc Polizzi + * + * @param jdkBitmap the jdk bitmap + * @param ewahBitmap the ewah bitmap + */ + static void assertEqualsIterator(BitSet jdkBitmap, + EWAHCompressedBitmap32 ewahBitmap) { + final ArrayList positions = new ArrayList(); + final Iterator bits = ewahBitmap.iterator(); + while (bits.hasNext()) { + final int bit = extracted(bits); + Assert.assertTrue(jdkBitmap.get(bit)); + positions.add(bit); + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + if (!positions.contains(new Integer(pos))) { + throw new RuntimeException( + "iterator: bitset got different bits"); + } + } + } + + // part of a test contributed by Marc Polizzi + + /** + * Assert equals positions. + * + * @param jdkBitmap the jdk bitmap + * @param ewahBitmap the ewah bitmap + */ + static void assertEqualsPositions(BitSet jdkBitmap, + EWAHCompressedBitmap32 ewahBitmap) { + final List positions = ewahBitmap.toList(); + for (int position : positions) { + if (!jdkBitmap.get(position)) { + throw new RuntimeException( + "positions: bitset got different bits"); + } + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + if (!positions.contains(new Integer(pos))) { + throw new RuntimeException( + "positions: bitset got different bits"); + } + } + // we check again + final int[] fastpositions = ewahBitmap.toArray(); + for (int position : fastpositions) { + if (!jdkBitmap.get(position)) { + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + } + } + for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap + .nextSetBit(pos + 1)) { + int index = Arrays.binarySearch(fastpositions, pos); + if (index < 0) + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + if (fastpositions[index] != pos) + throw new RuntimeException( + "positions: bitset got different bits with toArray"); + } + } + + /** + * Assert equals positions. + * + * @param ewahBitmap1 the ewah bitmap1 + * @param ewahBitmap2 the ewah bitmap2 + */ + static void assertEqualsPositions(EWAHCompressedBitmap32 ewahBitmap1, + EWAHCompressedBitmap32 ewahBitmap2) { + final List positions1 = ewahBitmap1.toList(); + final List positions2 = ewahBitmap2.toList(); + if (!positions1.equals(positions2)) + throw new RuntimeException( + "positions: alternative got different bits (two bitmaps)"); + // + final int[] fastpositions1 = ewahBitmap1.toArray(); + assertEquals(fastpositions1, positions1); + final int[] fastpositions2 = ewahBitmap2.toArray(); + assertEquals(fastpositions2, positions2); + if (!Arrays.equals(fastpositions1, fastpositions2)) + throw new RuntimeException( + "positions: alternative got different bits with toArray but not with toList (two bitmaps)"); + } + + /** + * Convenience function to assess equality between a compressed bitset + * and an uncompressed bitset + * + * @param x the compressed bitset/bitmap + * @param y the uncompressed bitset/bitmap + */ + static void equal(EWAHCompressedBitmap32 x, BitSet y) { + Assert.assertEquals(x.cardinality(), y.cardinality()); + for (int i : x.toList()) + Assert.assertTrue(y.get(i)); + } + + + @Test + public void insertTest() { + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + for(int k = 0; k < 1<<20; ++k) + ewah.addLiteralWord(0xF0); + Assert.assertEquals(ewah.cardinality(), 4 * (1<<20)); + } + + + /** + * Convenience function to assess equality between an array and an + * iterator over Integers + * + * @param i the iterator + * @param array the array + */ + static void equal(Iterator i, int[] array) { + int cursor = 0; + while (i.hasNext()) { + int x = extracted(i); + int y = array[cursor++]; + Assert.assertEquals(x, y); + } + } + + /** + * The Constant MEGA: a large integer. + */ + private static final int MEGA = 8 * 1024 * 1024; + + /** + * The Constant TEST_BS_SIZE: used to represent the size of a large + * bitmap. + */ + private static final int TEST_BS_SIZE = 8 * MEGA; +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32Test.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32Test.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32Test.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32Test.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,161 @@ +package com.googlecode.javaewah32; + +import org.junit.Test; + +import com.googlecode.javaewah.IntIterator; + +import static org.junit.Assert.*; + +import java.util.Iterator; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Tests for utility class. + */ +@SuppressWarnings("javadoc") +public class IntIteratorOverIteratingRLW32Test { + @Test + public void iteratorAggregation() { + EWAHCompressedBitmap32 e1 = EWAHCompressedBitmap32.bitmapOf(0, 2, 1000, 10001); + EWAHCompressedBitmap32 e2 = new EWAHCompressedBitmap32(); + for (int k = 64; k < 450; ++k) + e2.set(k); + EWAHCompressedBitmap32 e3 = new EWAHCompressedBitmap32(); + for (int k = 64; k < 450; ++k) + e2.set(400 * k); + assertEquals(IteratorUtil32.materialize( + IteratorAggregation32.bufferedand(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation32.bufferedand(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize( + IteratorAggregation32.bufferedor(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation32.bufferedor(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize( + IteratorAggregation32.bufferedxor(e1.getIteratingRLW(), e2.getIteratingRLW(), e3.getIteratingRLW())), + FastAggregation32.bufferedxor(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize(IteratorAggregation32.bufferedand(500, e1.getIteratingRLW(), + e2.getIteratingRLW(), e3.getIteratingRLW())), FastAggregation32.bufferedand(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize(IteratorAggregation32.bufferedor(500, e1.getIteratingRLW(), + e2.getIteratingRLW(), e3.getIteratingRLW())), FastAggregation32.bufferedor(1024, e1, e2, e3)); + assertEquals(IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(500, e1.getIteratingRLW(), + e2.getIteratingRLW(), e3.getIteratingRLW())), FastAggregation32.bufferedxor(1024, e1, e2, e3)); + } + @Test + // had problems with bitmaps beginning with two consecutive clean runs + public void testConsecClean() { + System.out.println("testing int iteration, 2 consec clean runs starting with zeros"); + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + for (int i = 32; i < 64; ++i) + e.set(i); + IntIterator ii = IteratorUtil32.toSetBitsIntIterator(e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(32, ctr); + Iterator iii = IteratorUtil32.toSetBitsIterator(e.getIteratingRLW()); + assertTrue(iii.hasNext()); + ctr = 0; + while (iii.hasNext()) { + ++ctr; + iii.next(); + } + assertEquals(32, ctr); + + } + + + @Test + public void testMaterialize() { + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + for (int i = 64; i < 128; ++i) + e.set(333 * i); + assertEquals(e.cardinality(), IteratorUtil32.cardinality(e.getIteratingRLW())); + EWAHCompressedBitmap32 newe = new EWAHCompressedBitmap32(); + IteratorUtil32.materialize(e.getIteratingRLW(), newe); + assertEquals(e,newe); + newe.clear(); + IteratorUtil32.materialize(e.getIteratingRLW(), newe,4096); + assertEquals(e,newe); + } + + @Test + public void testConsecCleanStartOnes() { + System.out + .println("testing int iteration, 2 consec clean runs starting with ones"); + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + for (int i = 0; i < 2 * 32; ++i) + e.set(i); + for (int i = 4 * 32; i < 5 * 32; ++i) + e.set(i); + + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32(e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(3 * 32, ctr); + } + + @Test + public void testStartDirty() { + System.out.println("testing int iteration, no initial runs"); + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + for (int i = 1; i < 2 * 32; ++i) + e.set(i); + for (int i = 4 * 32; i < 5 * 32; ++i) + e.set(i); + + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32(e.getIteratingRLW()); + assertTrue(ii.hasNext()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + assertEquals(3 * 32 - 1, ctr); + } + + @Test + public void testEmpty() { + System.out.println("testing int iteration over empty bitmap"); + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32(e.getIteratingRLW()); + assertFalse(ii.hasNext()); + } + + @Test + public void testRandomish() { + EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); + + int upperLimit = 100000; + for (int i = 0; i < upperLimit; ++i) { + double probabilityOfOne = i / (double) (upperLimit / 2); + if (probabilityOfOne > 1.0) + probabilityOfOne = 1.0; + if (Math.random() < probabilityOfOne) { + e.set(i); + } + } + + IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32(e.getIteratingRLW()); + int ctr = 0; + while (ii.hasNext()) { + ++ctr; + ii.next(); + } + + assertEquals(e.cardinality(), ctr); + System.out.println("checking int iteration over a var density bitset of size " + e.cardinality()); + + } + +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLWTest32.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLWTest32.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLWTest32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLWTest32.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,119 +0,0 @@ -package com.googlecode.javaewah32; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import org.junit.Test; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ -/** - * Tests for utility class. Sketchy for now. - * - */ -@SuppressWarnings("javadoc") -public class IntIteratorOverIteratingRLWTest32 { - - @Test - // had problems with bitmaps beginning with two consecutive clean runs - public void testConsecClean() { - System.out - .println("testing int iteration, 2 consec clean runs starting with zeros"); - EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); - for (int i = 64; i < 128; ++i) - e.set(i); - IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( - e.getIteratingRLW()); - assertTrue(ii.hasNext()); - int ctr = 0; - while (ii.hasNext()) { - ++ctr; - ii.next(); - } - assertEquals(64, ctr); - } - - @Test - public void testConsecCleanStartOnes() { - System.out - .println("testing int iteration, 2 consec clean runs starting with ones"); - EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); - for (int i = 0; i < 2 * 64; ++i) - e.set(i); - for (int i = 4 * 64; i < 5 * 64; ++i) - e.set(i); - - IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( - e.getIteratingRLW()); - assertTrue(ii.hasNext()); - int ctr = 0; - while (ii.hasNext()) { - ++ctr; - ii.next(); - } - assertEquals(3 * 64, ctr); - } - - @Test - public void testStartDirty() { - System.out.println("testing int iteration, no initial runs"); - EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); - for (int i = 1; i < 2 * 64; ++i) - e.set(i); - for (int i = 4 * 64; i < 5 * 64; ++i) - e.set(i); - - IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( - e.getIteratingRLW()); - assertTrue(ii.hasNext()); - int ctr = 0; - while (ii.hasNext()) { - ++ctr; - ii.next(); - } - assertEquals(3 * 64 - 1, ctr); - } - - @Test - public void testEmpty() { - System.out.println("testing int iteration over empty bitmap"); - EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); - - IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( - e.getIteratingRLW()); - assertFalse(ii.hasNext()); - } - - @Test - public void testRandomish() { - EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); - - int upperlimit = 100000; - for (int i = 0; i < upperlimit; ++i) { - double probabilityOfOne = i / (double) (upperlimit / 2); - if (probabilityOfOne > 1.0) - probabilityOfOne = 1.0; - if (Math.random() < probabilityOfOne) { - e.set(i); - } - } - - IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( - e.getIteratingRLW()); - int ctr = 0; - while (ii.hasNext()) { - ++ctr; - ii.next(); - } - - assertEquals(e.cardinality(), ctr); - System.out - .println("checking int iteration over a var density bitset of size " - + e.cardinality()); - - } - -} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IteratorAggregation32Test.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/IteratorAggregation32Test.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IteratorAggregation32Test.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/IteratorAggregation32Test.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,198 @@ +package com.googlecode.javaewah32; + +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.maxSizeInBits; +import static org.junit.Assert.assertTrue; + +import java.util.Iterator; + +import org.junit.Test; + +import com.googlecode.javaewah.synth.ClusteredDataGenerator; + +/* + * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Tests specifically for iterators. + */ +public class IteratorAggregation32Test { + + /** + * @param N number of bitmaps to generate in each set + * @param nbr parameter determining the size of the arrays (in a log + * scale) + * @return an iterator over sets of bitmaps + */ + public static Iterator getCollections( + final int N, final int nbr) { + final ClusteredDataGenerator cdg = new ClusteredDataGenerator( + 123); + return new Iterator() { + int sparsity = 1; + + @Override + public boolean hasNext() { + return this.sparsity < 5; + } + + @Override + public EWAHCompressedBitmap32[] next() { + int[][] data = new int[N][]; + int Max = (1 << (nbr + this.sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered( + 1 << nbr, Max); + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + this.sparsity += 3; + return ewah; + } + + @Override + public void remove() { + // unimplemented + } + + }; + + } + + /** + * + */ + @Test + public void testAnd() { + for (int N = 1; N < 10; ++N) { + System.out.println("testAnd N = " + N); + Iterator i = getCollections( + N, 3); + while (i.hasNext()) { + EWAHCompressedBitmap32[] x = i.next(); + EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32 + .and(x); + EWAHCompressedBitmap32 x1 = IteratorUtil32 + .materialize(IteratorAggregation32 + .bufferedand(IteratorUtil32 + .toIterators(x))); + x1.setSizeInBits(maxSizeInBits(x), false); + x1.setSizeInBitsWithinLastWord(maxSizeInBits(x)); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + + } + + /** + * + */ + @Test + public void testOr() { + for (int N = 1; N < 10; ++N) { + System.out.println("testOr N = " + N); + Iterator i = getCollections( + N, 3); + while (i.hasNext()) { + EWAHCompressedBitmap32[] x = i.next(); + EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32 + .or(x); + EWAHCompressedBitmap32 x1 = IteratorUtil32 + .materialize(IteratorAggregation32 + .bufferedor(IteratorUtil32 + .toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + } + + /** + * + */ + @SuppressWarnings("deprecation") + @Test + public void testWideOr() { + for (int nbr = 3; nbr <= 24; nbr += 3) { + for (int N = 100; N < 1000; N += 100) { + System.out.println("testWideOr N = " + N); + Iterator i = getCollections( + N, 3); + while (i.hasNext()) { + EWAHCompressedBitmap32[] x = i.next(); + EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32 + .or(x); + EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + FastAggregation32 + .orToContainer( + container, x); + assertTrue(container.equals(tanswer)); + EWAHCompressedBitmap32 x1 = IteratorUtil32 + .materialize(IteratorAggregation32 + .bufferedor(IteratorUtil32 + .toIterators(x))); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + } + } + + /** + * + */ + @Test + public void testXor() { + System.out.println("testXor "); + Iterator i = getCollections(2, 3); + while (i.hasNext()) { + EWAHCompressedBitmap32[] x = i.next(); + EWAHCompressedBitmap32 tanswer = x[0].xor(x[1]); + EWAHCompressedBitmap32 x1 = IteratorUtil32 + .materialize(IteratorAggregation32.bufferedxor( + x[0].getIteratingRLW(), + x[1].getIteratingRLW())); + assertTrue(x1.equals(tanswer)); + } + System.gc(); + } + + /** + * + */ + @Test + public void testMat() throws Exception { + System.out.println("testMat "); + EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf(0,3); + EWAHCompressedBitmap32 n = IteratorUtil32.materialize(b.getIteratingRLW()); + assertTrue(n.sizeInBits() == 32); + n.setSizeInBitsWithinLastWord(b.sizeInBits()); + assertTrue(n.sizeInBits() == b.sizeInBits()); + assertTrue(n.equals(b)); + EWAHCompressedBitmap32 neg = IteratorUtil32.materialize(IteratorAggregation32.not(b.getIteratingRLW())); + neg.setSizeInBitsWithinLastWord(b.sizeInBits()); + EWAHCompressedBitmap32 x= b.clone(); + x.not(); + assertTrue(x.equals(neg)); + for(int k = 145; k<1024; ++k) + b.set(k); + n = IteratorUtil32.materialize(b.getIteratingRLW()); + assertTrue(n.sizeInBits()/64 * 64 == n.sizeInBits()); + n.setSizeInBitsWithinLastWord(b.sizeInBits()); + assertTrue(n.sizeInBits() == b.sizeInBits()); + assertTrue(n.equals(b)); + neg = IteratorUtil32.materialize(IteratorAggregation32.not(b.getIteratingRLW())); + neg.setSizeInBitsWithinLastWord(b.sizeInBits()); + x= b.clone(); + x.not(); + assertTrue(x.equals(neg)); + } + +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,145 +0,0 @@ -package com.googlecode.javaewah32; - -import static org.junit.Assert.*; -import java.util.Iterator; -import org.junit.Test; -import com.googlecode.javaewah.benchmark.ClusteredDataGenerator; - -/* - * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser - * Licensed under the Apache License, Version 2.0. - */ -/** - * Tests specifically for iterators. - * - */ -public class IteratorAggregationTest32 { - - /** - * @param N number of bitmaps to generate in each set - * @param nbr parameter determining the size of the arrays (in a log scale) - * @return an iterator over sets of bitmaps - */ - public static Iterator getCollections(final int N, final int nbr) { - final ClusteredDataGenerator cdg = new ClusteredDataGenerator(123); - return new Iterator() { - int sparsity = 1; - - @Override - public boolean hasNext() { - return this.sparsity < 5; - } - - @Override - public EWAHCompressedBitmap32[] next() { - int[][] data = new int[N][]; - int Max = (1 << (nbr + this.sparsity)); - for (int k = 0; k < N; ++k) - data[k] = cdg.generateClustered(1 << nbr, Max); - EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; - for (int k = 0; k < N; ++k) { - ewah[k] = new EWAHCompressedBitmap32(); - for (int x = 0; x < data[k].length; ++x) { - ewah[k].set(data[k][x]); - } - data[k] = null; - } - this.sparsity += 3; - return ewah; - } - - @Override - public void remove() { - // unimplemented - } - - }; - - } - - /** - * - */ - @Test - public void testAnd() { - for (int N = 1; N < 10; ++N) { - System.out.println("testAnd N = " + N); - Iterator i = getCollections(N,3); - while (i.hasNext()) { - EWAHCompressedBitmap32[] x = i.next(); - EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.and(x); - EWAHCompressedBitmap32 x1 = IteratorUtil32 - .materialize(IteratorAggregation32.bufferedand(IteratorUtil32 - .toIterators(x))); - assertTrue(x1.equals(tanswer)); - } - System.gc(); - } - - } - - /** - * - */ - @Test - public void testOr() { - for (int N = 1; N < 10; ++N) { - System.out.println("testOr N = " + N); - Iterator i = getCollections(N,3); - while (i.hasNext()) { - EWAHCompressedBitmap32[] x = i.next(); - EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.or(x); - EWAHCompressedBitmap32 x1 = IteratorUtil32 - .materialize(IteratorAggregation32.bufferedor(IteratorUtil32 - .toIterators(x))); - assertTrue(x1.equals(tanswer)); - } - System.gc(); - } - } - - - /** - * - */ - @SuppressWarnings("deprecation") - @Test - public void testWideOr() { - for (int nbr = 3; nbr <= 24; nbr += 3) { - for (int N = 100; N < 1000; N += 100) { - System.out.println("testWideOr N = " + N); - Iterator i = getCollections(N, 3); - while (i.hasNext()) { - EWAHCompressedBitmap32[] x = i.next(); - EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.or(x); - EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); - FastAggregation32.legacy_orWithContainer(container, x); - assertTrue(container.equals(tanswer)); - EWAHCompressedBitmap32 x1 = IteratorUtil32 - .materialize(IteratorAggregation32 - .bufferedor(IteratorUtil32.toIterators(x))); - assertTrue(x1.equals(tanswer)); - } - System.gc(); - } - } - } - /** - * - */ - @Test - public void testXor() { - System.out.println("testXor "); - Iterator i = getCollections(2,3); - while (i.hasNext()) { - EWAHCompressedBitmap32[] x = i.next(); - EWAHCompressedBitmap32 tanswer = x[0].xor(x[1]); - EWAHCompressedBitmap32 x1 = IteratorUtil32 - .materialize(IteratorAggregation32.bufferedxor( - x[0].getIteratingRLW(), x[1].getIteratingRLW())); - assertTrue(x1.equals(tanswer)); - } - System.gc(); - } - -} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/MemoryMapTest.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/MemoryMapTest.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/MemoryMapTest.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/MemoryMapTest.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,47 @@ +package com.googlecode.javaewah32; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; + +import org.junit.Assert; +import org.junit.Test; + + +public class MemoryMapTest +{ + + @Test + public void basicTest() throws IOException, CloneNotSupportedException { + EWAHCompressedBitmap32 ewahBitmap = EWAHCompressedBitmap32.bitmapOf(0, 2, 55, + 64, 1 << 30); + EWAHCompressedBitmap32 newewahBitmap = ewahBitmap.clone(); + Assert.assertEquals(newewahBitmap, ewahBitmap); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ewahBitmap.serialize(new DataOutputStream(bos)); + ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray()); + EWAHCompressedBitmap32 mapped = new EWAHCompressedBitmap32(bb); + Assert.assertEquals(mapped, ewahBitmap); + EWAHCompressedBitmap32 newmapped; + newmapped = mapped.clone(); + Assert.assertEquals(newmapped, ewahBitmap); + } + + @Test + public void basicFileTest() throws IOException { + File tmpfile = File.createTempFile("roaring", "bin"); + tmpfile.deleteOnExit(); + final FileOutputStream fos = new FileOutputStream(tmpfile); + EWAHCompressedBitmap32 ewahBitmap = EWAHCompressedBitmap32.bitmapOf(0, 2, 55, + 64, 1 << 30); + ewahBitmap.serialize(new DataOutputStream(fos)); + long totalcount = fos.getChannel().position(); + fos.close(); + RandomAccessFile memoryMappedFile = new RandomAccessFile(tmpfile, "r"); + ByteBuffer bb = memoryMappedFile.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, totalcount); + EWAHCompressedBitmap32 mapped = new EWAHCompressedBitmap32(bb); + memoryMappedFile.close(); + Assert.assertEquals(mapped, ewahBitmap); + } +} diff -Nru libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/ThresholdFuncBitmap32Test.java libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/ThresholdFuncBitmap32Test.java --- libjavaewah-java-0.7.9/src/test/java/com/googlecode/javaewah32/ThresholdFuncBitmap32Test.java 1970-01-01 00:00:00.000000000 +0000 +++ libjavaewah-java-1.1.7/src/test/java/com/googlecode/javaewah32/ThresholdFuncBitmap32Test.java 2019-11-08 21:55:59.000000000 +0000 @@ -0,0 +1,68 @@ +package com.googlecode.javaewah32; + +import static com.googlecode.javaewah32.EWAHCompressedBitmap32.maxSizeInBits; + +import org.junit.Assert; +import org.junit.Test; + +@SuppressWarnings("javadoc") +/** + * @since 0.8.2 + * @author Daniel Lemire + */ +public class ThresholdFuncBitmap32Test { + @Test + public void basictest() { + System.out.println("Testing ThresholdFuncBitmap"); + EWAHCompressedBitmap32 ewah1 = EWAHCompressedBitmap32.bitmapOf(1, + 53, 110, 1000, 1201, 50000); + EWAHCompressedBitmap32 ewah2 = EWAHCompressedBitmap32.bitmapOf(1, + 100, 1000, 1100, 1200, 31416, 50001); + EWAHCompressedBitmap32 ewah3 = EWAHCompressedBitmap32.bitmapOf(1, + 110, 1000, 1101, 1200, 1201, 31416, 31417); + + Assert.assertTrue(EWAHCompressedBitmap32.threshold(1, ewah1) + .equals(ewah1)); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(1, ewah2) + .equals(ewah2)); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(1, ewah3) + .equals(ewah3)); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(2, ewah1, + ewah1).equals(ewah1)); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(2, ewah2, + ewah2).equals(ewah2)); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(2, ewah3, + ewah3).equals(ewah3)); + + EWAHCompressedBitmap32 zero = new EWAHCompressedBitmap32(); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(2, ewah1) + .equals(zero)); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(2, ewah2) + .equals(zero)); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(2, ewah3) + .equals(zero)); + Assert.assertTrue(EWAHCompressedBitmap32.threshold(4, ewah1, + ewah2, ewah3).equals(zero)); + + EWAHCompressedBitmap32 ewahorth = EWAHCompressedBitmap32.threshold( + 1, ewah1, ewah2, ewah3); + EWAHCompressedBitmap32 ewahtrueor = EWAHCompressedBitmap32.or( + ewah1, ewah2, ewah3); + Assert.assertTrue(ewahorth.equals(ewahtrueor)); + + EWAHCompressedBitmap32 ewahandth = EWAHCompressedBitmap32 + .threshold(3, ewah1, ewah2, ewah3); + ewahandth.setSizeInBitsWithinLastWord(maxSizeInBits(ewah1, ewah2, ewah3)); + EWAHCompressedBitmap32 ewahtrueand = EWAHCompressedBitmap32.and( + ewah1, ewah2, ewah3); + Assert.assertTrue(ewahandth.equals(ewahtrueand)); + + EWAHCompressedBitmap32 ewahmajth = EWAHCompressedBitmap32 + .threshold(2, ewah1, ewah2, ewah3); + ewahmajth.setSizeInBitsWithinLastWord(maxSizeInBits(ewah1, ewah2, ewah3)); + EWAHCompressedBitmap32 ewahtruemaj = EWAHCompressedBitmap32.or( + ewah1.and(ewah2), ewah1.and(ewah3), ewah2.and(ewah3)); + Assert.assertTrue(ewahmajth.equals(ewahtruemaj)); + } + +} diff -Nru libjavaewah-java-0.7.9/TODO libjavaewah-java-1.1.7/TODO --- libjavaewah-java-0.7.9/TODO 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/TODO 1970-01-01 00:00:00.000000000 +0000 @@ -1,5 +0,0 @@ -TODO - - - - diff -Nru libjavaewah-java-0.7.9/.travis.yml libjavaewah-java-1.1.7/.travis.yml --- libjavaewah-java-0.7.9/.travis.yml 2013-11-12 14:31:20.000000000 +0000 +++ libjavaewah-java-1.1.7/.travis.yml 2019-11-08 21:55:59.000000000 +0000 @@ -1,10 +1,15 @@ language: java +sudo: false jdk: - - oraclejdk7 - - openjdk7 - - openjdk6 + - oraclejdk12 + - openjdk8 install: true -script: mvn test +branches: + only: + - master + +script: mvn clean test +