From cc11892672de320e42e6a3d2ef6173963babecfa Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Tue, 6 May 2025 17:44:53 -0700
Subject: [PATCH 01/25] Add "final" to method parameters where they were
missing.
---
pom.xml | 14 +++++++-------
.../quantilescommon/DoublesSortedView.java | 4 ++--
.../quantilescommon/FloatsSortedView.java | 4 ++--
.../quantilescommon/LongsSortedView.java | 4 ++--
.../quantilescommon/PartitioningFeature.java | 4 ++--
.../quantilescommon/QuantilesDoublesAPI.java | 12 ++++++------
.../quantilescommon/QuantilesFloatsAPI.java | 12 ++++++------
.../quantilescommon/QuantilesGenericAPI.java | 12 ++++++------
.../quantilescommon/QuantilesLongsAPI.java | 12 ++++++------
.../theta/ConcurrentSharedThetaSketch.java | 2 +-
10 files changed, 40 insertions(+), 40 deletions(-)
diff --git a/pom.xml b/pom.xml
index 84e032947..a9858526a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,11 +94,11 @@ under the License.
3.6.3
- 21
- --enable-preview
+ 22
+
${java.version}
${java.version}
- -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${jvm-ffm-flag}
+ -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${jvm-arguments}
UTF-8
${charset.encoding}
${charset.encoding}
@@ -164,7 +164,7 @@ under the License.
${maven-compiler-plugin.version}
- ${jvm-ffm-flag}
+ ${jvm-arguments}
@@ -190,7 +190,7 @@ under the License.
- [21,)
+ [22,)
[${maven.version},4.0.0)
@@ -239,7 +239,7 @@ under the License.
public
all,-missing
- ${jvm-ffm-flag}
+ ${jvm-arguments}
@@ -285,7 +285,7 @@ under the License.
maven-surefire-plugin
${maven-surefire-failsafe-plugins.version}
- ${jvm-ffm-flag}
+ ${jvm-arguments}
false
false
true
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java
index 1427f6279..47bad1c67 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java
@@ -60,7 +60,7 @@ public interface DoublesSortedView extends SortedView {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
@@ -129,7 +129,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit)
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java
index eec699d94..0667a6748 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java
@@ -60,7 +60,7 @@ public interface FloatsSortedView extends SortedView {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
QuantilesUtil.checkFloatsSplitPointsOrder(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
@@ -129,7 +129,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit)
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(float[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java
index e7e3521c7..eaeceeb92 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java
@@ -60,7 +60,7 @@ public interface LongsSortedView extends SortedView {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getCDF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) {
QuantilesUtil.checkLongsSplitPointsOrder(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
@@ -129,7 +129,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) {
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getPMF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
index 5672c2a02..82b293b3a 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
@@ -53,7 +53,7 @@ public interface PartitioningFeature {
*
* @return an instance of {@link GenericPartitionBoundaries GenericPartitionBoundaries}.
*/
- default GenericPartitionBoundaries getPartitionBoundariesFromNumParts(int numEquallySizedParts) {
+ default GenericPartitionBoundaries getPartitionBoundariesFromNumParts(final int numEquallySizedParts) {
return getPartitionBoundariesFromNumParts(numEquallySizedParts, INCLUSIVE);
}
@@ -106,7 +106,7 @@ GenericPartitionBoundaries getPartitionBoundariesFromNumParts(
*
* @return an instance of {@link GenericPartitionBoundaries GenericPartitionBoundaries}.
*/
- default GenericPartitionBoundaries getPartitionBoundariesFromPartSize(long nominalPartSizeItems) {
+ default GenericPartitionBoundaries getPartitionBoundariesFromPartSize(final long nominalPartSizeItems) {
return getPartitionBoundariesFromPartSize(nominalPartSizeItems, INCLUSIVE);
}
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java
index 8c4f6620f..09cfd6071 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java
@@ -35,7 +35,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(double[] splitPoints) {
+ default double[] getCDF(final double[] splitPoints) {
return getCDF(splitPoints, INCLUSIVE);
}
@@ -98,7 +98,7 @@ default double[] getCDF(double[] splitPoints) {
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(double[] splitPoints) {
+ default double[] getPMF(final double[] splitPoints) {
return getPMF(splitPoints, INCLUSIVE);
}
@@ -150,7 +150,7 @@ default double[] getPMF(double[] splitPoints) {
* @return the approximate quantile given the normalized rank.
* @throws IllegalArgumentException if sketch is empty.
*/
- default double getQuantile(double rank) {
+ default double getQuantile(final double rank) {
return getQuantile(rank, INCLUSIVE);
}
@@ -207,7 +207,7 @@ default double getQuantile(double rank) {
* @return an array of quantiles corresponding to the given array of normalized ranks.
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getQuantiles(double[] ranks) {
+ default double[] getQuantiles(final double[] ranks) {
return getQuantiles(ranks, INCLUSIVE);
}
@@ -230,7 +230,7 @@ default double[] getQuantiles(double[] ranks) {
* @return the normalized rank corresponding to the given quantile
* @throws IllegalArgumentException if sketch is empty.
*/
- default double getRank(double quantile) {
+ default double getRank(final double quantile) {
return getRank(quantile, INCLUSIVE);
}
@@ -251,7 +251,7 @@ default double getRank(double quantile) {
* @return an array of normalized ranks corresponding to the given array of quantiles.
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getRanks(double[] quantiles) {
+ default double[] getRanks(final double[] quantiles) {
return getRanks(quantiles, INCLUSIVE);
}
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
index 8b8a91bdd..2713e1b02 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
@@ -34,7 +34,7 @@ public interface QuantilesFloatsAPI extends QuantilesAPI {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(float[] splitPoints) {
+ default double[] getCDF(final float[] splitPoints) {
return getCDF(splitPoints, INCLUSIVE);
}
@@ -97,7 +97,7 @@ default double[] getCDF(float[] splitPoints) {
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(float[] splitPoints) {
+ default double[] getPMF(final float[] splitPoints) {
return getPMF(splitPoints, INCLUSIVE);
}
@@ -149,7 +149,7 @@ default double[] getPMF(float[] splitPoints) {
* @return the approximate quantile given the normalized rank.
* @throws IllegalArgumentException if sketch is empty.
*/
- default float getQuantile(double rank) {
+ default float getQuantile(final double rank) {
return getQuantile(rank, INCLUSIVE);
}
@@ -206,7 +206,7 @@ default float getQuantile(double rank) {
* @return an array of quantiles corresponding to the given array of normalized ranks.
* @throws IllegalArgumentException if sketch is empty.
*/
- default float[] getQuantiles(double[] ranks) {
+ default float[] getQuantiles(final double[] ranks) {
return getQuantiles(ranks, INCLUSIVE);
}
@@ -229,7 +229,7 @@ default float[] getQuantiles(double[] ranks) {
* @return the normalized rank corresponding to the given quantile.
* @throws IllegalArgumentException if sketch is empty.
*/
- default double getRank(float quantile) {
+ default double getRank(final float quantile) {
return getRank(quantile, INCLUSIVE);
}
@@ -250,7 +250,7 @@ default double getRank(float quantile) {
* @return an array of normalized ranks corresponding to the given array of quantiles.
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getRanks(float[] quantiles) {
+ default double[] getRanks(final float[] quantiles) {
return getRanks(quantiles, INCLUSIVE);
}
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java
index c6a05ffa1..d1592e244 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java
@@ -38,7 +38,7 @@ public interface QuantilesGenericAPI extends QuantilesAPI, PartitioningFeatur
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(T[] splitPoints) {
+ default double[] getCDF(final T[] splitPoints) {
return getCDF(splitPoints, INCLUSIVE);
}
@@ -118,7 +118,7 @@ default int getMaxPartitions() {
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(T[] splitPoints) {
+ default double[] getPMF(final T[] splitPoints) {
return getPMF(splitPoints, INCLUSIVE);
}
@@ -170,7 +170,7 @@ default double[] getPMF(T[] splitPoints) {
* @return the approximate quantile given the normalized rank.
* @throws IllegalArgumentException if sketch is empty.
*/
- default T getQuantile(double rank) {
+ default T getQuantile(final double rank) {
return getQuantile(rank, INCLUSIVE);
}
@@ -227,7 +227,7 @@ default T getQuantile(double rank) {
* @return an array of quantiles corresponding to the given array of normalized ranks.
* @throws IllegalArgumentException if sketch is empty.
*/
- default T[] getQuantiles(double[] ranks) {
+ default T[] getQuantiles(final double[] ranks) {
return getQuantiles(ranks, INCLUSIVE);
}
@@ -250,7 +250,7 @@ default T[] getQuantiles(double[] ranks) {
* @return the normalized rank corresponding to the given quantile.
* @throws IllegalArgumentException if sketch is empty.
*/
- default double getRank(T quantile) {
+ default double getRank(final T quantile) {
return getRank(quantile, INCLUSIVE);
}
@@ -271,7 +271,7 @@ default double getRank(T quantile) {
* @return an array of normalized ranks corresponding to the given array of quantiles.
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getRanks(T[] quantiles) {
+ default double[] getRanks(final T[] quantiles) {
return getRanks(quantiles, INCLUSIVE);
}
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java
index fb1ca5817..51802df71 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java
@@ -35,7 +35,7 @@ public interface QuantilesLongsAPI extends QuantilesAPI {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(long[] splitPoints) {
+ default double[] getCDF(final long[] splitPoints) {
return getCDF(splitPoints, INCLUSIVE);
}
@@ -98,7 +98,7 @@ default double[] getCDF(long[] splitPoints) {
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(long[] splitPoints) {
+ default double[] getPMF(final long[] splitPoints) {
return getPMF(splitPoints, INCLUSIVE);
}
@@ -150,7 +150,7 @@ default double[] getPMF(long[] splitPoints) {
* @return the approximate quantile given the normalized rank.
* @throws IllegalArgumentException if sketch is empty.
*/
- default long getQuantile(double rank) {
+ default long getQuantile(final double rank) {
return getQuantile(rank, INCLUSIVE);
}
@@ -207,7 +207,7 @@ default long getQuantile(double rank) {
* @return an array of quantiles corresponding to the given array of normalized ranks.
* @throws IllegalArgumentException if sketch is empty.
*/
- default long[] getQuantiles(double[] ranks) {
+ default long[] getQuantiles(final double[] ranks) {
return getQuantiles(ranks, INCLUSIVE);
}
@@ -230,7 +230,7 @@ default long[] getQuantiles(double[] ranks) {
* @return the normalized rank corresponding to the given quantile
* @throws IllegalArgumentException if sketch is empty.
*/
- default double getRank(long quantile) {
+ default double getRank(final long quantile) {
return getRank(quantile, INCLUSIVE);
}
@@ -251,7 +251,7 @@ default double getRank(long quantile) {
* @return an array of normalized ranks corresponding to the given array of quantiles.
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getRanks(long[] quantiles) {
+ default double[] getRanks(final long[] quantiles) {
return getRanks(quantiles, INCLUSIVE);
}
diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java
index cdc843f8b..1bbdcc12a 100644
--- a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java
@@ -36,7 +36,7 @@ interface ConcurrentSharedThetaSketch extends MemoryStatus {
long NOT_SINGLE_HASH = -1L;
double MIN_ERROR = 0.0000001;
- static long computeExactLimit(long k, double error) {
+ static long computeExactLimit(final long k, final double error) {
return 2 * Math.min(k, (long) Math.ceil(1.0 / Math.pow(Math.max(error,MIN_ERROR), 2.0)));
}
From b9a626194d19effefbf00ea9aedbc6698050ef4e Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Wed, 7 May 2025 15:52:41 -0700
Subject: [PATCH 02/25] revert to jdk21 for the time being.
---
pom.xml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/pom.xml b/pom.xml
index a9858526a..5ea3a7ac4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,8 +94,8 @@ under the License.
3.6.3
- 22
-
+ 21
+ --enable-preview
${java.version}
${java.version}
-Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${jvm-arguments}
@@ -190,7 +190,7 @@ under the License.
- [22,)
+ [21,)
[${maven.version},4.0.0)
From 127b667f5658b12f920ff2c51809de67199534d2 Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Fri, 9 May 2025 14:57:04 -0700
Subject: [PATCH 03/25] Set pom to java 24.
---
pom.xml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/pom.xml b/pom.xml
index 5ea3a7ac4..bcc76b53b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,8 +94,8 @@ under the License.
3.6.3
- 21
- --enable-preview
+ 24
+
${java.version}
${java.version}
-Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${jvm-arguments}
@@ -190,7 +190,7 @@ under the License.
- [21,)
+ [22,)
[${maven.version},4.0.0)
From b0addfe7036316f19a6772fc2bc60eee7636206d Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Fri, 9 May 2025 15:51:01 -0700
Subject: [PATCH 04/25] The XxHash function was removed from
datasketches-memory and moved here.
Currently the only usage of this is in filters.bloomfilter.
The original dependency on net.openhft.hashing.LongHashFunction has been
removed as it is obsolete and uses sun.misc.unsafe.
---
.../org/apache/datasketches/hash/XxHash.java | 178 ++-
.../hash/XxHash64LoopingTest.java | 1082 +++++++++++++++++
.../datasketches/hash/XxHash64Test.java | 177 +++
.../apache/datasketches/hash/XxHashTest.java | 44 -
4 files changed, 1420 insertions(+), 61 deletions(-)
create mode 100644 src/test/java/org/apache/datasketches/hash/XxHash64LoopingTest.java
create mode 100644 src/test/java/org/apache/datasketches/hash/XxHash64Test.java
delete mode 100644 src/test/java/org/apache/datasketches/hash/XxHashTest.java
diff --git a/src/main/java/org/apache/datasketches/hash/XxHash.java b/src/main/java/org/apache/datasketches/hash/XxHash.java
index a93d4d348..e0d6947d5 100644
--- a/src/main/java/org/apache/datasketches/hash/XxHash.java
+++ b/src/main/java/org/apache/datasketches/hash/XxHash.java
@@ -19,41 +19,185 @@
package org.apache.datasketches.hash;
-import org.apache.datasketches.memory.Memory;
+import static org.apache.datasketches.memory.internal.XxHash64.hash;
+import static org.apache.datasketches.memory.internal.XxHash64.hashBytes;
+import static org.apache.datasketches.memory.internal.XxHash64.hashChars;
+import static org.apache.datasketches.memory.internal.XxHash64.hashDoubles;
+import static org.apache.datasketches.memory.internal.XxHash64.hashFloats;
+import static org.apache.datasketches.memory.internal.XxHash64.hashInts;
+import static org.apache.datasketches.memory.internal.XxHash64.hashLongs;
+import static org.apache.datasketches.memory.internal.XxHash64.hashShorts;
/**
* The XxHash is a fast, non-cryptographic, 64-bit hash function that has
* excellent avalanche and 2-way bit independence properties.
+ * This java version adapted the C++ version and the OpenHFT/Zero-Allocation-Hashing implementation
+ * referenced below as inspiration.
*
- * This class wraps the
- * Memory Component XxHash
- * implementation.
+ *
The C++ source repository:
+ *
+ * https://github.com/Cyan4973/xxHash. It has a BSD 2-Clause License:
+ *
+ * http://www.opensource.org/licenses/bsd-license.php. See LICENSE.
+ *
+ *
Portions of this code were adapted from
+ *
+ * OpenHFT/Zero-Allocation-Hashing, which has an Apache 2 license as does this site. See LICENSE.
*
* @author Lee Rhodes
*/
-public class XxHash {
+public final class XxHash {
+
+ private XxHash() { /* singleton */ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetBytes starting at this offset
+ * @param lengthBytes continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashByteArr(
+ final byte[] arr,
+ final int offsetBytes,
+ final int lengthBytes,
+ final long seed) {
+ return hashBytes(arr, offsetBytes, lengthBytes, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetShorts starting at this offset
+ * @param lengthShorts continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashShortArr(
+ final short[] arr,
+ final int offsetShorts,
+ final int lengthShorts,
+ final long seed) {
+ return hashShorts(arr, offsetShorts, lengthShorts, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetChars starting at this offset
+ * @param lengthChars continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashCharArr(
+ final char[] arr,
+ final int offsetChars,
+ final int lengthChars,
+ final long seed) {
+ return hashChars(arr, offsetChars, lengthChars, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetInts starting at this offset
+ * @param lengthInts continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashIntArr(
+ final int[] arr,
+ final int offsetInts,
+ final int lengthInts,
+ final long seed) {
+ return hashInts(arr, offsetInts, lengthInts, seed);
+ }
/**
- * Compute the hash of the given Memory object.
- * @param mem The given Memory object
- * @param offsetBytes Starting at this offset in bytes
- * @param lengthBytes Continuing for this number of bytes
- * @param seed use this seed for the hash function
- * @return return the resulting 64-bit hash value.
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetLongs starting at this offset
+ * @param lengthLongs continuing for this length
+ * @param seed the given seed
+ * @return the hash
*/
- public static long hash(final Memory mem, final long offsetBytes, final long lengthBytes,
+ public static long hashLongArr(
+ final long[] arr,
+ final int offsetLongs,
+ final int lengthLongs,
final long seed) {
- return mem.xxHash64(offsetBytes, lengthBytes, seed);
+ return hashLongs(arr, offsetLongs, lengthLongs, seed);
}
/**
- * Returns a 64-bit hash.
- * @param in a long
+ * Returns a 64-bit hash from a single long. This method has been optimized for speed when only
+ * a single hash of a long is required.
+ * @param in A long.
* @param seed A long valued seed.
+ * @return the hash.
+ */
+ public static long hashLong(
+ final long in,
+ final long seed) {
+ return hash(in, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetFloats starting at this offset
+ * @param lengthFloats continuing for this length
+ * @param seed the given seed
* @return the hash
*/
- public static long hash(final long in, final long seed) {
- return org.apache.datasketches.memory.XxHash.hashLong(in, seed);
+ public static long hashFloatArr(
+ final float[] arr,
+ final int offsetFloats,
+ final int lengthFloats,
+ final long seed) {
+ return hashFloats(arr, offsetFloats, lengthFloats, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetDoubles starting at this offset
+ * @param lengthDoubles continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashDoubleArr(
+ final double[] arr,
+ final int offsetDoubles,
+ final int lengthDoubles,
+ final long seed) {
+ return hashDoubles(arr, offsetDoubles, lengthDoubles, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param str the given string
+ * @param offsetChars starting at this offset
+ * @param lengthChars continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashString(
+ final String str,
+ final int offsetChars,
+ final int lengthChars,
+ final long seed) {
+ return org.apache.datasketches.memory.internal.XxHash64.hashString(str, offsetChars, lengthChars, seed);
}
}
+
diff --git a/src/test/java/org/apache/datasketches/hash/XxHash64LoopingTest.java b/src/test/java/org/apache/datasketches/hash/XxHash64LoopingTest.java
new file mode 100644
index 000000000..4ae2b4956
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/hash/XxHash64LoopingTest.java
@@ -0,0 +1,1082 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.hash;
+
+import static org.testng.Assert.assertEquals;
+
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class XxHash64LoopingTest {
+
+ /*
+ * This test is adapted from
+ *
+ * OpenHFT/Zero-Allocation-Hashing to test hash compatibility with that implementation.
+ * See LICENSE.
+ */
+ @Test
+ public void testWithSeed() {
+ long seed = 42L;
+ for (int i = 0; i < 1025; i++) {
+ byte[] byteArr = new byte[i];
+ for (int j = 0; j < byteArr.length; j++) { byteArr[j] = (byte) j; }
+ WritableMemory wmem = WritableMemory.writableWrap(byteArr);
+ long hash = wmem.xxHash64(0, byteArr.length, seed);
+ assertEquals(hash, HASHES_OF_LOOPING_BYTES_WITH_SEED_42[i]);
+ }
+ }
+
+ /*This data is from
+ *
+ * OpenHFT/Zero-Allocation-Hashing to test hash compatibility with that implementation.
+ * See LICENSE.
+ */
+ private static final long[] HASHES_OF_LOOPING_BYTES_WITH_SEED_42 = {
+ -7444071767201028348L,
+ -8959994473701255385L,
+ 7116559933691734543L,
+ 6019482000716350659L,
+ -6625277557348586272L,
+ -5507563483608914162L,
+ 1540412690865189709L,
+ 4522324563441226749L,
+ -7143238906056518746L,
+ -7989831429045113014L,
+ -7103973673268129917L,
+ -2319060423616348937L,
+ -7576144055863289344L,
+ -8903544572546912743L,
+ 6376815151655939880L,
+ 5913754614426879871L,
+ 6466567997237536608L,
+ -869838547529805462L,
+ -2416009472486582019L,
+ -3059673981515537339L,
+ 4211239092494362041L,
+ 1414635639471257331L,
+ 166863084165354636L,
+ -3761330575439628223L,
+ 3524931906845391329L,
+ 6070229753198168844L,
+ -3740381894759773016L,
+ -1268276809699008557L,
+ 1518581707938531581L,
+ 7988048690914090770L,
+ -4510281763783422346L,
+ -8988936099728967847L,
+ -8644129751861931918L,
+ 2046936095001747419L,
+ 339737284852751748L,
+ -8493525091666023417L,
+ -3962890767051635164L,
+ -5799948707353228709L,
+ -6503577434416464161L,
+ 7718729912902936653L,
+ 191197390694726650L,
+ -2677870679247057207L,
+ 20411540801847004L,
+ 2738354376741059902L,
+ -3754251900675510347L,
+ -3208495075154651980L,
+ 5505877218642938179L,
+ 6710910171520780908L,
+ -9060809096139575515L,
+ 6936438027860748388L,
+ -6675099569841255629L,
+ -5358120966884144380L,
+ -4970515091611332076L,
+ -1810965683604454696L,
+ -516197887510505242L,
+ 1240864593087756274L,
+ 6033499571835033332L,
+ 7223146028771530185L,
+ 909128106589125206L,
+ 1567720774747329341L,
+ -1867353301780159863L,
+ 4655107429511759333L,
+ 5356891185236995950L,
+ 182631115370802890L,
+ -3582744155969569138L,
+ 595148673029792797L,
+ 495183136068540256L,
+ 5536689004903505647L,
+ -8472683670935785889L,
+ -4335021702965928166L,
+ 7306662983232020244L,
+ 4285260837125010956L,
+ 8288813008819191181L,
+ -3442351913745287612L,
+ 4883297703151707194L,
+ 9135546183059994964L,
+ 123663780425483012L,
+ 509606241253238381L,
+ 5940344208569311369L,
+ -2650142344608291176L,
+ 3232776678942440459L,
+ -922581627593772181L,
+ 7617977317085633049L,
+ 7154902266379028518L,
+ -5806388675416795571L,
+ 4368003766009575737L,
+ -2922716024457242064L,
+ 4771160713173250118L,
+ 3275897444752647349L,
+ -297220751499763878L,
+ 5095659287766176401L,
+ 1181843887132908826L,
+ 9058283605301070357L,
+ 3984713963471276643L,
+ 6050484112980480005L,
+ 1551535065359244224L,
+ 565337293533335618L,
+ 7412521035272884309L,
+ -4735469481351389369L,
+ 6998597101178745656L,
+ -9107075101236275961L,
+ 5879828914430779796L,
+ 6034964979406620806L,
+ 5666406915264701514L,
+ -4666218379625258428L,
+ 2749972203764815656L,
+ -782986256139071446L,
+ 6830581400521008570L,
+ 2588852022632995043L,
+ -5484725487363818922L,
+ -3319556935687817112L,
+ 6481961252981840893L,
+ 2204492445852963006L,
+ -5301091763401031066L,
+ -2615065677047206256L,
+ -6769817545131782460L,
+ -8421640685322953142L,
+ -3669062629317949176L,
+ -9167016978640750490L,
+ 2783671191687959562L,
+ -7599469568522039782L,
+ -7589134103255480011L,
+ -5932706841188717592L,
+ -8689756354284562694L,
+ -3934347391198581249L,
+ -1344748563236040701L,
+ 2172701592984478834L,
+ -5322052340624064417L,
+ -8493945390573620511L,
+ 3349021988137788403L,
+ -1806262525300459538L,
+ -8091524448239736618L,
+ 4022306289903960690L,
+ -8346915997379834224L,
+ -2106001381993805461L,
+ -5784123934724688161L,
+ 6775158099649720388L,
+ -3869682756870293568L,
+ 4356490186652082006L,
+ 8469371446702290916L,
+ -2972961082318458602L,
+ -7188106622222784561L,
+ -4961006366631572412L,
+ 3199991182014172900L,
+ 2917435868590434179L,
+ 8385845305547872127L,
+ 7706824402560674655L,
+ -1587379863634865277L,
+ -4212156212298809650L,
+ -1305209322000720233L,
+ -7866728337506665880L,
+ 8195089740529247049L,
+ -4876930125798534239L,
+ 798222697981617129L,
+ -2441020897729372845L,
+ -3926158482651178666L,
+ -1254795122048514130L,
+ 5192463866522217407L,
+ -5426289318796042964L,
+ -3267454004443530826L,
+ 471043133625225785L,
+ -660956397365869974L,
+ -6149209189144999161L,
+ -2630977660039166559L,
+ 8512219789663151219L,
+ -3309844068134074620L,
+ -6211275327487847132L,
+ -2130171729366885995L,
+ 6569302074205462321L,
+ 4855778342281619706L,
+ 3867211421508653033L,
+ -3002480002418725542L,
+ -8297543107467502696L,
+ 8049642289208775831L,
+ -5439825716055425635L,
+ 7251760070798756432L,
+ -4774526021749797528L,
+ -3892389575184442548L,
+ 5162451061244344424L,
+ 6000530226398686578L,
+ -5713092252241819676L,
+ 8740913206879606081L,
+ -8693282419677309723L,
+ 1576205127972543824L,
+ 5760354502610401246L,
+ 3173225529903529385L,
+ 1785166236732849743L,
+ -1024443476832068882L,
+ -7389053248306187459L,
+ 1171021620017782166L,
+ 1471572212217428724L,
+ 7720766400407679932L,
+ -8844781213239282804L,
+ -7030159830170200877L,
+ 2195066352895261150L,
+ 1343620937208608634L,
+ 9178233160016731645L,
+ -757883447602665223L,
+ 3303032934975960867L,
+ -3685775162104101116L,
+ -4454903657585596656L,
+ -5721532367620482629L,
+ 8453227136542829644L,
+ 5397498317904798888L,
+ 7820279586106842836L,
+ -2369852356421022546L,
+ 3910437403657116169L,
+ 6072677490463894877L,
+ -2651044781586183960L,
+ 5173762670440434510L,
+ -2970017317595590978L,
+ -1024698859439768763L,
+ -3098335260967738522L,
+ -1983156467650050768L,
+ -8132353894276010246L,
+ -1088647368768943835L,
+ -3942884234250555927L,
+ 7169967005748210436L,
+ 2870913702735953746L,
+ -2207022373847083021L,
+ 1104181306093040609L,
+ 5026420573696578749L,
+ -5874879996794598513L,
+ -4777071762424874671L,
+ -7506667858329720470L,
+ -2926679936584725232L,
+ -5530649174168373609L,
+ 5282408526788020384L,
+ 3589529249264153135L,
+ -6220724706210580398L,
+ -7141769650716479812L,
+ 5142537361821482047L,
+ -7029808662366864423L,
+ -6593520217660744466L,
+ 1454581737122410695L,
+ -139542971769349865L,
+ 1727752089112067235L,
+ -775001449688420017L,
+ -5011311035350652032L,
+ -8671171179275033159L,
+ -2850915129917664667L,
+ -5258897903906998781L,
+ -6954153088230718761L,
+ -4070351752166223959L,
+ -6902592976462171099L,
+ -7850366369290661391L,
+ -4562443925864904705L,
+ 3186922928616271015L,
+ 2208521081203400591L,
+ -2727824999830592777L,
+ -3817861137262331295L,
+ 2236720618756809066L,
+ -4888946967413746075L,
+ -446884183491477687L,
+ -43021963625359034L,
+ -5857689226703189898L,
+ -2156533592262354883L,
+ -2027655907961967077L,
+ 7151844076490292500L,
+ -5029149124756905464L,
+ 526404452686156976L,
+ 8741076980297445408L,
+ 7962851518384256467L,
+ -105985852299572102L,
+ -2614605270539434398L,
+ -8265006689379110448L,
+ 8158561071761524496L,
+ -6923530157382047308L,
+ 5551949335037580397L,
+ 565709346370307061L,
+ -4780869469938333359L,
+ 6931895917517004830L,
+ 565234767538051407L,
+ -8663136372880869656L,
+ 1427340323685448983L,
+ 6492705666640232290L,
+ 1481585578088475369L,
+ -1712711110946325531L,
+ 3281685342714380741L,
+ 6441384790483098576L,
+ -1073539554682358394L,
+ 5704050067194788964L,
+ -5495724689443043319L,
+ -5425043165837577535L,
+ 8349736730194941321L,
+ -4123620508872850061L,
+ 4687874980541143573L,
+ -468891940172550975L,
+ -3212254545038049829L,
+ -6830802881920725628L,
+ 9033050533972480988L,
+ 4204031879107709260L,
+ -677513987701096310L,
+ -3286978557209370155L,
+ 1644111582609113135L,
+ 2040089403280131741L,
+ 3323690950628902653L,
+ -7686964480987925756L,
+ -4664519769497402737L,
+ 3358384147145476542L,
+ -4699919744264452277L,
+ -4795197464927839170L,
+ 5051607253379734527L,
+ -8987703459734976898L,
+ 8993686795574431834L,
+ -2688919474688811047L,
+ 375938183536293311L,
+ 1049459889197081920L,
+ -1213022037395838295L,
+ 4932989235110984138L,
+ -6647247877090282452L,
+ -7698817539128166242L,
+ -3264029336002462659L,
+ 6487828018122309795L,
+ -2660821091484592878L,
+ 7104391069028909121L,
+ -1765840012354703384L,
+ 85428166783788931L,
+ -6732726318028261938L,
+ 7566202549055682933L,
+ 229664898114413280L,
+ -1474237851782211353L,
+ -1571058880058007603L,
+ -7926453582850712144L,
+ 2487148368914275243L,
+ 8740031015380673473L,
+ 1908345726881363169L,
+ -2510061320536523178L,
+ 7854780026906019630L,
+ -6023415596650016493L,
+ -6264841978089051107L,
+ 4024998278016087488L,
+ -4266288992025826072L,
+ -3222176619422665563L,
+ -1999258726038299316L,
+ 1715270077442385636L,
+ 6764658837948099754L,
+ -8646962299105812577L,
+ -51484064212171546L,
+ -1482515279051057493L,
+ -8663965522608868414L,
+ -256555202123523670L,
+ 1973279596140303801L,
+ -7280796173024508575L,
+ -5691760367231354704L,
+ -5915786562256300861L,
+ -3697715074906156565L,
+ 3710290115318541949L,
+ 6796151623958134374L,
+ -935299482515386356L,
+ -7078378973978660385L,
+ 5379481350768846927L,
+ -9011221735308556302L,
+ 5936568631579608418L,
+ -6060732654964511813L,
+ -4243141607840017809L,
+ 3198488845875349355L,
+ -7809288876010447646L,
+ 4371587872421472389L,
+ -1304197371105522943L,
+ 7389861473143460103L,
+ -1892352887992004024L,
+ 2214828764044713398L,
+ 6347546952883613388L,
+ 1275694314105480954L,
+ -5262663163358903733L,
+ 1524757505892047607L,
+ 1474285098416162746L,
+ -7976447341881911786L,
+ 4014100291977623265L,
+ 8994982266451461043L,
+ -7737118961020539453L,
+ -2303955536994331092L,
+ 1383016539349937136L,
+ 1771516393548245271L,
+ -5441914919967503849L,
+ 5449813464890411403L,
+ -3321280356474552496L,
+ 4084073849712624363L,
+ 4290039323210935932L,
+ 2449523715173349652L,
+ 7494827882138362156L,
+ 9035007221503623051L,
+ 5722056230130603177L,
+ -5443061851556843748L,
+ -7554957764207092109L,
+ 447883090204372074L,
+ 533916651576859197L,
+ -3104765246501904165L,
+ -4002281505194601516L,
+ -8402008431255610992L,
+ -408273018037005304L,
+ 214196458752109430L,
+ 6458513309998070914L,
+ 2665048360156607904L,
+ 96698248584467992L,
+ -3238403026096269033L,
+ 6759639479763272920L,
+ -4231971627796170796L,
+ -2149574977639731179L,
+ -1437035755788460036L,
+ -6000005629185669767L,
+ 145244292800946348L,
+ -3056352941404947199L,
+ 3748284277779018970L,
+ 7328354565489106580L,
+ -2176895260373660284L,
+ 3077983936372755601L,
+ 1215485830019410079L,
+ 683050801367331140L,
+ -3173237622987755212L,
+ -1951990779107873701L,
+ -4714366021269652421L,
+ 4934690664256059008L,
+ 1674823104333774474L,
+ -3974408282362828040L,
+ 2001478896492417760L,
+ -4115105568354384199L,
+ -2039694725495941666L,
+ -587763432329933431L,
+ -391276713546911316L,
+ -5543400904809469053L,
+ 1882564440421402418L,
+ -4991793588968693036L,
+ 3454088185914578321L,
+ 2290855447126188424L,
+ 3027910585026909453L,
+ 2136873580213167431L,
+ -6243562989966916730L,
+ 5887939953208193029L,
+ -3491821629467655741L,
+ -3138303216306660662L,
+ 8572629205737718669L,
+ 4154439973110146459L,
+ 5542921963475106759L,
+ -2025215496720103521L,
+ -4047933760493641640L,
+ -169455456138383823L,
+ -1164572689128024473L,
+ -8551078127234162906L,
+ -7247713218016599028L,
+ 8725299775220778242L,
+ 6263466461599623132L,
+ 7931568057263751768L,
+ 7365493014712655238L,
+ -7343740914722477108L,
+ 8294118602089088477L,
+ 7677867223984211483L,
+ -7052188421655969232L,
+ -3739992520633991431L,
+ 772835781531324307L,
+ 881441588914692737L,
+ 6321450879891466401L,
+ 5682516032668315027L,
+ 8493068269270840662L,
+ -3895212467022280567L,
+ -3241911302335746277L,
+ -7199586338775635848L,
+ -4606922569968527974L,
+ -806850906331637768L,
+ 2433670352784844513L,
+ -5787982146811444512L,
+ 7852193425348711165L,
+ 8669396209073850051L,
+ -6898875695148963118L,
+ 6523939610287206782L,
+ -8084962379210153174L,
+ 8159432443823995836L,
+ -2631068535470883494L,
+ -338649779993793113L,
+ 6514650029997052016L,
+ 3926259678521802094L,
+ 5443275905907218528L,
+ 7312187582713433551L,
+ -2993773587362997676L,
+ -1068335949405953411L,
+ 4499730398606216151L,
+ 8538015793827433712L,
+ -4057209365270423575L,
+ -1504284818438273559L,
+ -6460688570035010846L,
+ 1765077117408991117L,
+ 8278320303525164177L,
+ 8510128922449361533L,
+ 1305722765578569816L,
+ 7250861238779078656L,
+ -576624504295396147L,
+ -4363714566147521011L,
+ -5932111494795524073L,
+ 1837387625936544674L,
+ -4186755953373944712L,
+ -7657073597826358867L,
+ 140408487263951108L,
+ 5578463635002659628L,
+ 3400326044813475885L,
+ -6092804808386714986L,
+ -2410324417287268694L,
+ 3222007930183458970L,
+ 4932471983280850419L,
+ 3554114546976144528L,
+ -7216067928362857082L,
+ -6115289896923351748L,
+ -6769646077108881947L,
+ 4263895947722578066L,
+ 2939136721007694271L,
+ 1426030606447416658L,
+ -1316192446807442076L,
+ 5366182640480055129L,
+ 6527003877470258527L,
+ 5849680119000207603L,
+ 5263993237214222328L,
+ -6936533648789185663L,
+ -9063642143790846605L,
+ 3795892210758087672L,
+ 4987213125282940176L,
+ 2505500970421590750L,
+ -1014022559552365387L,
+ -3574736245968367770L,
+ 1180676507127340259L,
+ -2261908445207512503L,
+ -8416682633172243509L,
+ 1114990703652673283L,
+ 7753746660364401380L,
+ 1874908722469707905L,
+ 2033421444403047677L,
+ 21412168602505589L,
+ 385957952615286205L,
+ 2053171460074727107L,
+ 1915131899400103774L,
+ 6680879515029368390L,
+ 568807208929724162L,
+ -6211541450459087674L,
+ -5026690733412145448L,
+ 1384781941404886235L,
+ -98027820852587266L,
+ 1806580495924249669L,
+ 6322077317403503963L,
+ 9078162931419569939L,
+ -2809061215428363978L,
+ 7697867577577415733L,
+ -5270063855897737274L,
+ 5649864555290587388L,
+ -6970990547695444247L,
+ 579684606137331754L,
+ 3871931565451195154L,
+ 2030008578322050218L,
+ -5012357307111799829L,
+ -2271365921756144065L,
+ 4551962665158074190L,
+ -3385474923040271312L,
+ -7647625164191633577L,
+ 6634635380316963029L,
+ -5201190933687061585L,
+ 8864818738548593973L,
+ 2855828214210882907L,
+ 9154512990734024165L,
+ -6945306719789457786L,
+ 1200243352799481087L,
+ 875998327415853787L,
+ 1275313054449881011L,
+ -6105772045375948736L,
+ -2926927684328291437L,
+ 9200050852144954779L,
+ 5188726645765880663L,
+ 5197037323312705176L,
+ 3434926231010121611L,
+ -5054013669361906544L,
+ 2582959199749224670L,
+ -6053757512723474059L,
+ -5016308176846054473L,
+ -2509827316698626133L,
+ 7700343644503853204L,
+ -1997627249894596731L,
+ 3993168688325352290L,
+ -8181743677541277704L,
+ 3719056119682565597L,
+ -7264411659282947790L,
+ 7177028972346484464L,
+ -5460831176884283278L,
+ 1799904662416293978L,
+ -6549616005092764514L,
+ 5472403994001122052L,
+ 8683463751708388502L,
+ -7873363037838316398L,
+ 689134758256487260L,
+ -1287443614028696450L,
+ 4452712919702709507L,
+ 762909374167538893L,
+ 6594302592326281411L,
+ 1183786629674781984L,
+ 5021847859620133476L,
+ -2490098069181538915L,
+ 5105145136026716679L,
+ 4437836948098585718L,
+ 1987270426215858862L,
+ 6170312798826946249L,
+ 634297557126003407L,
+ -1672811625495999581L,
+ 6282971595586218191L,
+ 4549149305727581687L,
+ -5652165370435317782L,
+ 1064501550023753890L,
+ -5334885527127139723L,
+ -6904378001629481237L,
+ -1807576691784201230L,
+ -205688432992053911L,
+ 7621619053293393289L,
+ 6258649161313982470L,
+ -1111634238359342096L,
+ -8044260779481691987L,
+ 400270655839010807L,
+ -7806833581382890725L,
+ -2970563349459508036L,
+ -7392591524816802798L,
+ 2918924613160219805L,
+ -6444161627929149002L,
+ 6096497501321778876L,
+ -1477975665655830038L,
+ 1690651307597306138L,
+ -2364076888826085362L,
+ -6521987420014905821L,
+ -4419193480146960582L,
+ 3538587780233092477L,
+ 8374665961716940404L,
+ 7492412312405424500L,
+ 6311662249091276767L,
+ -1240235198282023566L,
+ 5478559631401166447L,
+ 3476714419313462133L,
+ 377427285984503784L,
+ 2570472638778991109L,
+ -2741381313777447835L,
+ -7123472905503039596L,
+ 2493658686946955193L,
+ 1024677789035847585L,
+ -2916713904339582981L,
+ -4532003852004642304L,
+ -2202143560366234111L,
+ 5832267856442755135L,
+ -261740607772957384L,
+ 239435959690278014L,
+ 5755548341947719409L,
+ 6138795458221887696L,
+ -7709506987360146385L,
+ -6657487758065140444L,
+ -7006376793203657499L,
+ 6544409861846502033L,
+ 3171929352014159247L,
+ 1051041925048792869L,
+ 2617300158375649749L,
+ 952652799620095175L,
+ -576661730162168147L,
+ -1634191369221345988L,
+ 4833656816115993519L,
+ 647566759700005786L,
+ 2473810683785291822L,
+ 3005977181064745326L,
+ -3321881966853149523L,
+ 7595337666427588699L,
+ 6004093624251057224L,
+ -563917505657690279L,
+ 6117428527147449302L,
+ -6287297509522976113L,
+ -4527219334756214406L,
+ 742626429298092489L,
+ 3057351806086972041L,
+ 645967551210272605L,
+ -4428701157828864227L,
+ 3236379103879435414L,
+ -8477089892132066300L,
+ -6127365537275859058L,
+ -4052490484706946358L,
+ -8004854976625046469L,
+ -3679456917426613424L,
+ -8212793762082595299L,
+ -818288739465424130L,
+ 1358812099481667095L,
+ 7835987612195254310L,
+ -3663247409614323059L,
+ -2931105150130396604L,
+ 7296136776835614792L,
+ -2014557408985889628L,
+ 7267662411237959788L,
+ 3699280615819277743L,
+ -212010675469091396L,
+ -6518374332458360120L,
+ 145026010541628849L,
+ 1879297324213501001L,
+ -7146296067751816833L,
+ -5002958800391379931L,
+ 6060682439924517608L,
+ -432234782921170964L,
+ -6669688947353256956L,
+ 7728943532792041267L,
+ 830911367341171721L,
+ 3396934884314289432L,
+ -779464156662780749L,
+ 2330041851883352285L,
+ -4783350380736276693L,
+ -5758476056890049254L,
+ -7551552301614791791L,
+ 1253334187723911710L,
+ -2685018208308798978L,
+ 5379636036360946454L,
+ 6154668487114681217L,
+ -8641287462255458898L,
+ 4676087643800649558L,
+ -2405142641398691475L,
+ 1088685126864246881L,
+ 6431149082338374041L,
+ -607357695335069155L,
+ -720970692129524140L,
+ 2648766932394044468L,
+ 8408344790179354573L,
+ -6193808387735667350L,
+ 7722524628524697419L,
+ -6975433852560238120L,
+ -2925851029234475295L,
+ -4274458387165211028L,
+ -8355836377702147319L,
+ 5278146397877332061L,
+ 8502098812383680707L,
+ 2292836642336580326L,
+ -6127608082651070062L,
+ 2222301962240611208L,
+ -1930887695854799378L,
+ 7640503480494894592L,
+ 1162652186586436094L,
+ -1918002592943761683L,
+ 7648998601717261840L,
+ -8472603250832757057L,
+ -988877663117552456L,
+ 2368458128168026494L,
+ -6480813811998475245L,
+ -5896967824416018967L,
+ -2593783161701820446L,
+ 6950098417530252598L,
+ 6362589545555771236L,
+ 7981389665448567125L,
+ 3954017080198558850L,
+ 1626078615050230622L,
+ 6650159066527969109L,
+ 697345338922935394L,
+ -1226816215461768626L,
+ 8740408765973837440L,
+ -4194155864629568323L,
+ 7016680023232424746L,
+ 6043281358142429469L,
+ -4201005667174376809L,
+ 1216727117859013155L,
+ 6367202436544203935L,
+ 35414869396444636L,
+ 3715622794033998412L,
+ 488654435687670554L,
+ -2503747297224687460L,
+ 3147101919441470388L,
+ -8248611218693190922L,
+ 970697264481229955L,
+ 3411465763826851418L,
+ 9117405004661599969L,
+ -5204346498331519734L,
+ -19637460819385174L,
+ -5039124225167977219L,
+ 2990108874601696668L,
+ -2623857460235459202L,
+ 4256291692861397446L,
+ 6724147860870760443L,
+ 3558616688507246537L,
+ 6487680097936412800L,
+ -6470792832935928161L,
+ 4314814550912237614L,
+ -1292878983006062345L,
+ 6791915152630414174L,
+ 5971652079925815310L,
+ 2557529546662864312L,
+ 466175054322801580L,
+ -585216717310746872L,
+ -2486640422147349036L,
+ 7212029603994220134L,
+ 3958995069888972500L,
+ 4950471855791412790L,
+ -3721948842035712763L,
+ -6184503487488243051L,
+ 4079570444585775332L,
+ -3952156172546996872L,
+ 4543894231118208322L,
+ -1739995588466209963L,
+ 9155948355455935530L,
+ 5821980345462207860L,
+ -2431287667309520417L,
+ -3890108130519441316L,
+ -558124689277030490L,
+ 6079823537335801717L,
+ 5409742395192364262L,
+ -2329885777717160453L,
+ -7332804342513677651L,
+ 1466490574975950555L,
+ -420549419907427929L,
+ -5249909814389692516L,
+ -5145692168206210661L,
+ 5934113980649113921L,
+ 3241618428555359661L,
+ -6622110266160980250L,
+ 5048250878669516223L,
+ 5747219637359976174L,
+ 2975906212588223728L,
+ 5730216838646273215L,
+ -176713127129024690L,
+ 6734624279336671146L,
+ 5127866734316017180L,
+ 7111761230887705595L,
+ 3457811808274317235L,
+ 3362961434604932375L,
+ -1877869936854991246L,
+ 7171428594877765665L,
+ -8252167178400462374L,
+ -6306888185035821047L,
+ -6684702191247683887L,
+ -7754928454824190529L,
+ -1902605599135704386L,
+ -4037319846689421239L,
+ 8493746058123583457L,
+ -8156648963857047193L,
+ 2051510355149839497L,
+ -1256416624177218909L,
+ -3344927996254072010L,
+ -1838853051925943568L,
+ 316927471680974556L,
+ -1502257066700798003L,
+ -5836095610125837606L,
+ -1594125583615895424L,
+ 1442211486559637962L,
+ -144295071206619569L,
+ 5159850900959273410L,
+ 4589139881166423678L,
+ -7038726987463097509L,
+ 2886082400772974595L,
+ 2780759114707171916L,
+ 5694649587906297495L,
+ 1260349041268169667L,
+ 4921517488271434890L,
+ 644696475796073018L,
+ 6262811963753436289L,
+ -6128198676595868773L,
+ -3625352083004760261L,
+ -8751453332943236675L,
+ 8749249479868749221L,
+ -2450808199545048250L,
+ -6517435817046180917L,
+ -3433321727429234998L,
+ -2591586258908763451L,
+ 3847750870868804507L,
+ 6603614438546398643L,
+ -7598682191291031287L,
+ 8710261565627204971L,
+ 4753389483755344355L,
+ -4645333069458786881L,
+ -6742695046613492214L,
+ 643070478568866643L,
+ -7543096104151965610L,
+ 7171495384655926161L,
+ 595063872610714431L,
+ 3292310150781130424L,
+ 4326847806055440904L,
+ -4580020566072794152L,
+ 3142286571820373678L,
+ 5530356537440155930L,
+ 546372639737516181L,
+ 7401214477400367500L,
+ 7406531960402873109L,
+ 3287639667219172570L,
+ 4977301681213633671L,
+ 5253257820925174498L,
+ 2906216636104297878L,
+ 6142955758238347523L,
+ -3498651268741727235L,
+ -5875053958265588593L,
+ 3896719087169993883L,
+ -910904726885775073L,
+ 380107493197368177L,
+ -4993591912695447004L,
+ 2970487257212582761L,
+ 2551762717569548774L,
+ 953061649962736812L,
+ 8949739538606589463L,
+ -2962839167079475801L,
+ -1375673191272573835L,
+ 3761793818361866390L,
+ -389577789190726878L,
+ 5661262051502180269L,
+ -6558556411143987683L,
+ -702798336372315031L,
+ -336662820551371779L,
+ 998576401126580155L,
+ -5945021269112582755L,
+ 6108533925730179871L,
+ 2207095297001999618L,
+ -9042779159998880435L,
+ -6177868444342118372L,
+ 6775965402605895077L,
+ -3788428885163306576L,
+ 7790055010527190387L,
+ 3581587652196995358L,
+ -6176354155561607694L,
+ -5859381340906321207L,
+ 395898765763528395L,
+ 8132967590863909348L,
+ -3329092504090544483L,
+ -6785855381158040247L,
+ 1497218517051796750L,
+ -5352392845588925911L,
+ -6271364901230559194L,
+ 2314830370653350118L,
+ -7617588269001325450L,
+ 1423166885758213795L,
+ 8538612578307869519L,
+ -61918791718295474L,
+ -8177103503192338593L,
+ -4740086042584326695L,
+ 3677931948215558698L,
+ 6558856291580149558L,
+ 2674975452453336335L,
+ 5133796555646930522L,
+ 5139252693299337100L,
+ 7949476871295347205L,
+ 4407815324662880678L,
+ -3758305875280581215L,
+ 6066309507576587415L,
+ -7368508486398350973L,
+ -3181640264332856492L,
+ 6905100869343314145L,
+ 3677177673848733417L,
+ 8862933624870506941L,
+ -8575223195813810568L,
+ 9178470351355678144L,
+ 4677809017145408358L,
+ -1194833416287894989L,
+ 3436364743255571183L,
+ -5204770725795363579L,
+ 560599448536335263L,
+ -3192077522964776200L,
+ -751575299648803575L,
+ 6334581746534596579L,
+ -8358187891202563300L,
+ -1462480609823525055L,
+ 5605961062646987941L,
+ 4968399805931440889L,
+ 7968693270782626653L,
+ -5868205923557518188L,
+ 1830234928743560617L,
+ -8435261076693154407L,
+ 2138416970728681332L,
+ 8088740745199685138L,
+ 806532400344230520L,
+ 1800590379902909333L,
+ -8909128842071238901L,
+ -7357495566969170860L,
+ 3679766664126940553L,
+ 2060050474865839094L,
+ 2363972840121763414L,
+ 525695004292982714L,
+ -1224842191746529593L,
+ 7011317848855545003L,
+ -6337167558180299938L,
+ -5184688833363785939L,
+ -8426673387248359061L,
+ -5035438815930785229L,
+ 3521810320608058994L,
+ 4803742557254962242L,
+ 6623527039545786598L,
+ -1221475882122634738L,
+ -3344794405518401087L,
+ 6510298498414053658L,
+ 2844753907937720338L,
+ 90502309714994895L,
+ -750403235344282494L,
+ -4825474181021465833L,
+ -3405519947983849510L,
+ 3503875590944089793L,
+ 7286294700691822468L,
+ 7828126881500292486L,
+ 8437899353709338096L,
+ 136052254470293480L,
+ 1113259077339995086L,
+ -8244887265606191121L,
+ 8089569503800461649L,
+ -1429698194850157567L,
+ 1575595674002364989L,
+ 3576095286627428675L,
+ -7653655285807569222L,
+ -6053506977362539111L,
+ -3923855345805787169L,
+ -8001149080454232377L,
+ -4382867706931832271L,
+ 4212860258835896297L,
+ 4207674254247034014L,
+ 5519424058779519159L,
+ -754483042161434654L,
+ 1434113479814210082L,
+ -6416645032698336896L,
+ 5624329676066514819L,
+ -8229557208322175959L,
+ 3922640911653270376L,
+ 7826932478782081910L,
+ -4862787164488635842L,
+ 1449234668827944573L,
+ -1781657689570106327L,
+ 5442827552725289699L,
+ 3589862161007644641L,
+ 4787115581650652778L,
+ -3512152721942525726L,
+ -6750103117958685206L,
+ 5012970446659949261L,
+ 6797752795961689017L,
+ 5086454597639943700L,
+ -7616068364979994076L,
+ 1492846825433110217L,
+ 2967476304433704510L,
+ -8413824338284112078L,
+ -1319049442043273974L,
+ -1756090916806844109L,
+ -9061091728950139525L,
+ -6864767830358160810L,
+ 4879532090226251157L,
+ 5528644708740739488L
+ };
+}
diff --git a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java
new file mode 100644
index 000000000..cda076131
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.hash;
+
+import static org.apache.datasketches.memory.XxHash.*;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import java.util.Random;
+import java.util.concurrent.ThreadLocalRandom;
+
+import org.apache.datasketches.memory.Resource;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class XxHash64Test {
+
+ @Test
+ public void offsetChecks() {
+ long seed = 12345;
+ int blocks = 6;
+ int cap = blocks * 16;
+
+ long hash;
+
+ WritableMemory wmem = WritableMemory.allocate(cap);
+ for (int i = 0; i < cap; i++) { wmem.putByte(i, (byte)(-128 + i)); }
+
+ for (int offset = 0; offset < 16; offset++) {
+ int arrLen = cap - offset;
+ hash = wmem.xxHash64(offset, arrLen, seed);
+ assertTrue(hash != 0);
+ }
+ }
+
+ @Test
+ public void byteArrChecks() {
+ long seed = 0;
+ int offset = 0;
+ int bytes = 16;
+
+ for (int j = 1; j < bytes; j++) {
+ byte[] in = new byte[bytes];
+
+ WritableMemory wmem = WritableMemory.writableWrap(in);
+ for (int i = 0; i < j; i++) { wmem.putByte(i, (byte) (-128 + i)); }
+
+ long hash =wmem.xxHash64(offset, bytes, seed);
+ assertTrue(hash != 0);
+ }
+ }
+
+ /*
+ * This test is adapted from
+ *
+ * OpenHFT/Zero-Allocation-Hashing to test hash compatibility with that implementation.
+ * It is licensed under Apache License, version 2.0. See LICENSE.
+ */
+ @Test
+ public void collisionTest() {
+ WritableMemory wmem = WritableMemory.allocate(128);
+ wmem.putLong(0, 1);
+ wmem.putLong(16, 42);
+ wmem.putLong(32, 2);
+ long h1 = wmem.xxHash64(0, wmem.getCapacity(), 0);
+
+ wmem.putLong(0, 1L + 0xBA79078168D4BAFL);
+ wmem.putLong(32, 2L + 0x9C90005B80000000L);
+ long h2 = wmem.xxHash64(0, wmem.getCapacity(), 0);
+ assertEquals(h1, h2);
+
+ wmem.putLong(0, 1L + (0xBA79078168D4BAFL * 2));
+ wmem.putLong(32, 2L + (0x392000b700000000L)); //= (0x9C90005B80000000L * 2) fix overflow false pos
+
+ long h3 = wmem.xxHash64(0, wmem.getCapacity(), 0);
+ assertEquals(h2, h3);
+ }
+
+// This test had to be disabled because the net.openhft.hashing.LongHashFunction is obsolete and depends on sun.misc.unsafe.
+// /**
+// * This simple test compares the output of {@link Resource#xxHash64(long, long, long)} with the
+// * output of {@link net.openhft.hashing.LongHashFunction}, that itself is tested against the
+// * reference implementation in C. This increases confidence that the xxHash function implemented
+// * in this package is in fact the same xxHash function implemented in C.
+// *
+// * @author Roman Leventov
+// * @author Lee Rhodes
+// */
+// @Test
+// public void testXxHash() {
+// Random random = ThreadLocalRandom.current();
+// for (int len = 0; len < 100; len++) {
+// byte[] bytes = new byte[len];
+// for (int i = 0; i < 10; i++) {
+// long zahXxHash = LongHashFunction.xx().hashBytes(bytes);
+// long memoryXxHash = Memory.wrap(bytes).xxHash64(0, len, 0);
+// assertEquals(memoryXxHash, zahXxHash);
+// random.nextBytes(bytes);
+// }
+// }
+// }
+
+ private static final byte[] barr = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ @Test
+ public void testArrHashes() {
+ WritableMemory wmem = WritableMemory.writableWrap(barr);
+ long hash0 = wmem.xxHash64(8, 8, 0);
+ long hash1 = hashByteArr(barr, 8, 8, 0);
+ assertEquals(hash1, hash0);
+
+ char[] carr = new char[8];
+ wmem.getCharArray(0, carr, 0, 8);
+ hash1 = hashCharArr(carr, 4, 4, 0);
+ assertEquals(hash1, hash0);
+
+ short[] sarr = new short[8];
+ wmem.getShortArray(0, sarr, 0, 8);
+ hash1 = hashShortArr(sarr, 4, 4, 0);
+ assertEquals(hash1, hash0);
+
+ int[] iarr = new int[4];
+ wmem.getIntArray(0, iarr, 0, 4);
+ hash1 = hashIntArr(iarr, 2, 2, 0);
+ assertEquals(hash1, hash0);
+
+ float[] farr = new float[4];
+ wmem.getFloatArray(0, farr, 0, 4);
+ hash1 = hashFloatArr(farr, 2, 2, 0);
+ assertEquals(hash1, hash0);
+
+ long[] larr = new long[2];
+ wmem.getLongArray(0, larr, 0, 2);
+ hash1 = hashLongArr(larr, 1, 1, 0);
+ long in = wmem.getLong(8);
+ long hash2 = hashLong(in, 00); //tests the single long hash
+ assertEquals(hash1, hash0);
+ assertEquals(hash2, hash0);
+
+ double[] darr = new double[2];
+ wmem.getDoubleArray(0, darr, 0, 2);
+ hash1 = hashDoubleArr(darr, 1, 1, 0);
+ assertEquals(hash1, hash0);
+ }
+
+ @Test
+ public void testString() {
+ String s = "Now is the time for all good men to come to the aid of their country.";
+ char[] arr = s.toCharArray();
+ long hash0 = hashString(s, 0, s.length(), 0);
+ long hash1 = hashCharArr(arr, 0, arr.length, 0);
+ assertEquals(hash1, hash0);
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/hash/XxHashTest.java b/src/test/java/org/apache/datasketches/hash/XxHashTest.java
deleted file mode 100644
index 4ad433b2d..000000000
--- a/src/test/java/org/apache/datasketches/hash/XxHashTest.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.hash;
-
-import static org.testng.Assert.assertEquals;
-
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
-
-/**
- * @author Lee Rhodes
- */
-public class XxHashTest {
-
- @Test
- public void longCheck() {
- long seed = 0;
- long hash1 = XxHash.hash(123L, seed);
- long[] arr = new long[1];
- arr[0] = 123L;
- Memory mem = Memory.wrap(arr);
- long hash2 = XxHash.hash(mem, 0, 8, 0);
- assertEquals(hash2, hash1);
- }
-
-}
From 256ee592d88ca3b0125f632845a00947553d775d Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Sat, 10 May 2025 16:57:57 -0700
Subject: [PATCH 05/25] Disabled workflows.
filters.BloomFilter, common.Util uses XxHash now in
o.a.datasketches.hash
POM aligned with ds-memory pom
---
.github/workflows/auto-jdk-matrix.yml | 14 +++++++-------
.github/workflows/auto-os-matrix.yml | 14 +++++++-------
.github/workflows/check_cpp_files.yml | 14 +++++++-------
.github/workflows/codeql-analysis.yml | 14 +++++++-------
.github/workflows/javadoc.yml | 4 ++--
pom.xml | 8 +++++---
.../filters/bloomfilter/BloomFilter.java | 2 +-
.../java/org/apache/datasketches/tuple/Util.java | 4 ++--
8 files changed, 38 insertions(+), 36 deletions(-)
diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml
index 0afbaf065..176d1fd6e 100644
--- a/.github/workflows/auto-jdk-matrix.yml
+++ b/.github/workflows/auto-jdk-matrix.yml
@@ -1,13 +1,13 @@
name: Auto JDK Matrix Test & Install
on:
- push:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
- pull_request:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- # The branches below must be a subset of the branches above
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# push:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# pull_request:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# # The branches below must be a subset of the branches above
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:
env:
diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml
index 83ecc1ffe..413b7a957 100644
--- a/.github/workflows/auto-os-matrix.yml
+++ b/.github/workflows/auto-os-matrix.yml
@@ -1,13 +1,13 @@
name: Auto OS Matrix Test & Install
on:
- push:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
- pull_request:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- # The branches below must be a subset of the branches above
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# push:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# pull_request:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# # The branches below must be a subset of the branches above
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:
env:
diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml
index 243eda985..778859d0d 100644
--- a/.github/workflows/check_cpp_files.yml
+++ b/.github/workflows/check_cpp_files.yml
@@ -1,13 +1,13 @@
name: CPP SerDe Compatibility Test
on:
- push:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
- pull_request:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- # The branches below must be a subset of the branches above
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# push:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# pull_request:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# # The branches below must be a subset of the branches above
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:
jobs:
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index bb42fe345..f3fde1de0 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -1,13 +1,13 @@
name: "CodeQL"
on:
- push:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
- pull_request:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- # The branches below must be a subset of the branches above
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# push:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# pull_request:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# # The branches below must be a subset of the branches above
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:
jobs:
diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml
index 4862d64e4..977c87b27 100644
--- a/.github/workflows/javadoc.yml
+++ b/.github/workflows/javadoc.yml
@@ -1,8 +1,8 @@
name: JavaDoc
on:
- push:
- branches: main
+# push:
+# branches: main
workflow_dispatch:
jobs:
diff --git a/pom.xml b/pom.xml
index bcc76b53b..8da8c5777 100644
--- a/pom.xml
+++ b/pom.xml
@@ -86,7 +86,7 @@ under the License.
6.0.0
- 7.10.2
+ 7.11.0
generate_java_files
check_cpp_files
@@ -114,9 +114,9 @@ under the License.
3.4.2
3.11.2
3.1.1
- 3.2.0
3.3.1
- 3.5.2
+
+ 3.5.2
3.2.0
4.9.10
@@ -340,6 +340,7 @@ under the License.
**/*.sk
LICENSE
NOTICE
+ **/*.code-workspace
@@ -668,5 +669,6 @@ under the License.
+
diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
index 7c166a29d..a56a5eeef 100644
--- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
+++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
@@ -26,11 +26,11 @@
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.hash.XxHash;
import org.apache.datasketches.memory.Buffer;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableBuffer;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.memory.XxHash;
/**
* A Bloom filter is a data structure that can be used for probabilistic
diff --git a/src/main/java/org/apache/datasketches/tuple/Util.java b/src/main/java/org/apache/datasketches/tuple/Util.java
index 92193ca56..bda6e7c25 100644
--- a/src/main/java/org/apache/datasketches/tuple/Util.java
+++ b/src/main/java/org/apache/datasketches/tuple/Util.java
@@ -22,8 +22,8 @@
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.datasketches.common.Util.ceilingPowerOf2;
import static org.apache.datasketches.hash.MurmurHash3.hash;
-import static org.apache.datasketches.memory.XxHash.hashCharArr;
-import static org.apache.datasketches.memory.XxHash.hashString;
+import static org.apache.datasketches.hash.XxHash.hashCharArr;
+import static org.apache.datasketches.hash.XxHash.hashString;
import java.lang.reflect.Array;
From d90e5a572d36264a0ee11ca9db96c3acb0e83ced Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Sat, 10 May 2025 17:20:42 -0700
Subject: [PATCH 06/25] Upadate ds-java dependency on ds-memory to local
ds-memory-6.1.0-SNAPSHOT.
---
pom.xml | 4 ++--
src/test/java/org/apache/datasketches/hash/XxHash64Test.java | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/pom.xml b/pom.xml
index 8da8c5777..01d0cc65c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -83,7 +83,7 @@ under the License.
- 6.0.0
+ 6.1.0-SNAPSHOT
7.11.0
@@ -126,7 +126,7 @@ under the License.
4.3.0
- 0.8.12
+ 0.8.13
2.18.0
diff --git a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java
index cda076131..8e6aeef0e 100644
--- a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java
+++ b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java
@@ -19,7 +19,7 @@
package org.apache.datasketches.hash;
-import static org.apache.datasketches.memory.XxHash.*;
+import static org.apache.datasketches.hash.XxHash.*;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
From 4572611e6e3a390dbf43b0af75e15f973bb3c9a7 Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Mon, 12 May 2025 10:18:43 -0700
Subject: [PATCH 07/25] Fix imports
---
.../apache/datasketches/hash/XxHash64Test.java | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java
index 8e6aeef0e..4b9c31ab9 100644
--- a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java
+++ b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java
@@ -19,15 +19,18 @@
package org.apache.datasketches.hash;
-import static org.apache.datasketches.hash.XxHash.*;
+import static org.apache.datasketches.hash.XxHash.hashByteArr;
+import static org.apache.datasketches.hash.XxHash.hashCharArr;
+import static org.apache.datasketches.hash.XxHash.hashDoubleArr;
+import static org.apache.datasketches.hash.XxHash.hashFloatArr;
+import static org.apache.datasketches.hash.XxHash.hashIntArr;
+import static org.apache.datasketches.hash.XxHash.hashLong;
+import static org.apache.datasketches.hash.XxHash.hashLongArr;
+import static org.apache.datasketches.hash.XxHash.hashShortArr;
+import static org.apache.datasketches.hash.XxHash.hashString;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
-import java.util.Random;
-import java.util.concurrent.ThreadLocalRandom;
-
-import org.apache.datasketches.memory.Resource;
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.testng.annotations.Test;
From d948ce71ff976ae60268fea6f69cb01280ed0ddb Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Fri, 16 May 2025 09:59:26 -0700
Subject: [PATCH 08/25] Update .asf.yaml
---
.asf.yaml | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/.asf.yaml b/.asf.yaml
index a149bf396..57e258daa 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -45,11 +45,11 @@ github:
edit_comment_discussion: "Re: [D] {title} ({repository})"
delete_comment_discussion: "Re: [D] {title} ({repository})"
- notifications:
- commits: commits@dataskethces.apache.org
- issues: dev@dataskethces.apache.org
- discussions: dev@dataskethces.apache.org
- pullrequests_status: dev@dataskethces.apache.org
- pullrequests_comment: dev@dataskethces.apache.org
- # Send dependabot PRs to commits@ instead
- pullrequests_bot_dependabot: commits@dataskethces.apache.org
+notifications:
+ commits: commits@dataskethces.apache.org
+ issues: dev@dataskethces.apache.org
+ discussions: dev@dataskethces.apache.org
+ pullrequests_status: dev@dataskethces.apache.org
+ pullrequests_comment: dev@dataskethces.apache.org
+ # Send dependabot PRs to commits@ instead
+ pullrequests_bot_dependabot: commits@dataskethces.apache.org
From 6008cb35f7dc075d24d0a47af687f15b36a79eee Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Fri, 23 May 2025 16:04:37 -0700
Subject: [PATCH 09/25] Theta rework, phase 1, no testing yet!
---
.../org/apache/datasketches/common/Util.java | 112 +-
.../org/apache/datasketches/theta2/AnotB.java | 205 +
.../apache/datasketches/theta2/AnotBimpl.java | 241 +
.../datasketches/theta2/BitPacking.java | 6292 +++++++++++++++++
.../BytesCompactCompressedHashIterator.java | 93 +
.../theta2/BytesCompactHashIterator.java | 53 +
.../theta2/CompactOperations.java | 388 +
.../datasketches/theta2/CompactSketch.java | 478 ++
.../ConcurrentBackgroundThetaPropagation.java | 110 +
.../ConcurrentDirectQuickSelectSketch.java | 270 +
.../ConcurrentHeapQuickSelectSketch.java | 266 +
.../theta2/ConcurrentHeapThetaBuffer.java | 223 +
.../theta2/ConcurrentPropagationService.java | 72 +
.../theta2/ConcurrentSharedThetaSketch.java | 187 +
.../theta2/DirectCompactCompressedSketch.java | 142 +
.../theta2/DirectCompactSketch.java | 174 +
.../theta2/DirectQuickSelectSketch.java | 339 +
.../theta2/DirectQuickSelectSketchR.java | 284 +
.../theta2/EmptyCompactSketch.java | 147 +
.../theta2/ForwardCompatibility.java | 164 +
.../datasketches/theta2/HashIterator.java | 40 +
.../datasketches/theta2/HeapAlphaSketch.java | 601 ++
.../theta2/HeapCompactHashIterator.java | 41 +
.../theta2/HeapCompactSketch.java | 158 +
.../datasketches/theta2/HeapHashIterator.java | 54 +
.../theta2/HeapQuickSelectSketch.java | 326 +
.../datasketches/theta2/HeapUpdateSketch.java | 139 +
.../datasketches/theta2/Intersection.java | 218 +
.../datasketches/theta2/IntersectionImpl.java | 561 ++
.../MemoryCompactCompressedHashIterator.java | 108 +
.../theta2/MemoryHashIterator.java | 62 +
.../datasketches/theta2/PreambleUtil.java | 533 ++
.../apache/datasketches/theta2/Rebuilder.java | 175 +
.../datasketches/theta2/SetOperation.java | 259 +
.../theta2/SetOperationBuilder.java | 275 +
.../datasketches/theta2/SingleItemSketch.java | 413 ++
.../apache/datasketches/theta2/Sketch.java | 695 ++
.../org/apache/datasketches/theta2/Union.java | 231 +
.../apache/datasketches/theta2/UnionImpl.java | 365 +
.../theta2/UpdateReturnState.java | 79 +
.../datasketches/theta2/UpdateSketch.java | 469 ++
.../theta2/UpdateSketchBuilder.java | 493 ++
.../WrappedCompactCompressedSketch.java | 111 +
.../theta2/WrappedCompactSketch.java | 159 +
.../datasketches/theta2/package-info.java | 27 +
.../thetacommon/HashOperations.java | 104 +
46 files changed, 16934 insertions(+), 2 deletions(-)
create mode 100644 src/main/java/org/apache/datasketches/theta2/AnotB.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/AnotBimpl.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/BitPacking.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/CompactOperations.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/CompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/HashIterator.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/Intersection.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/PreambleUtil.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/Rebuilder.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/SetOperation.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/Sketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/Union.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/UnionImpl.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/package-info.java
diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java
index 19a8ee614..969cdc389 100644
--- a/src/main/java/org/apache/datasketches/common/Util.java
+++ b/src/main/java/org/apache/datasketches/common/Util.java
@@ -24,9 +24,11 @@
import static java.lang.Math.log;
import static java.lang.Math.pow;
import static java.lang.Math.round;
-import static java.util.Arrays.fill;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import java.lang.foreign.MemorySegment;
import java.util.Comparator;
+import java.util.Objects;
/**
* Common utility functions.
@@ -257,7 +259,7 @@ public static String characterPad(final String s, final int fieldLength, final c
final int sLen = s.length();
if (sLen < fieldLength) {
final char[] cArr = new char[fieldLength - sLen];
- fill(cArr, padChar);
+ java.util.Arrays.fill(cArr, padChar);
final String addstr = String.valueOf(cArr);
return (postpend) ? s.concat(addstr) : addstr.concat(s);
}
@@ -798,4 +800,110 @@ public static boolean le(final Object item1, final Object item2, final Compa
return c.compare((T)item1, (T)item2) <= 0;
}
+ //MemorySegment related
+
+ /**
+ * Returns true if the two given MemorySegments refer to the same backing resource,
+ * which is either an off-heap memory location and size, or the same on-heap array object.
+ *
+ * If both segment are off-heap, they both must have the same starting address and the same size.
+ *
+ * For on-heap segments, both segments must be based on or derived from the same array object and neither segment
+ * can be read-only.
+ *
+ * @param seg1 The first given MemorySegment
+ * @param seg2 The second given MemorySegment
+ * @return true if both MemorySegments are determined to be the same backing memory.
+ */
+ public static boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) {
+ Objects.requireNonNull(seg1, "seg1 must not be null.");
+ Objects.requireNonNull(seg2, "seg2 must not be null.");
+ if (!seg1.scope().isAlive() || !seg2.scope().isAlive()) {
+ throw new IllegalArgumentException("Both arguments must be alive.");
+ }
+ final boolean seg1Native = seg1.isNative();
+ final boolean seg2Native = seg2.isNative();
+ if (seg1Native ^ seg2Native) { return false; }
+ if (seg1Native && seg2Native) { //both off heap
+ return (seg1.address() == seg2.address()) && (seg1.byteSize() == seg2.byteSize());
+ }
+ //both on heap
+ if (seg1.isReadOnly() || seg2.isReadOnly()) {
+ throw new IllegalArgumentException("Cannot determine 'isSameBackingMemory(..)' on heap if either MemorySegment is Read-only.");
+ }
+ return (seg1.heapBase().orElse(null) == seg2.heapBase().orElse(null));
+ }
+
+ /**
+ * Request a new heap MemorySegment with the given capacityBytes.
+ *
+ * The returned MemorySegment will be constructed from a long[] array.
+ * As a result, it will be on-heap and have a memory alignment of 8.
+ * If the requested capacity is not divisible by eight, the returned size
+ * will be rolled up to the next multiple of eight.
+ *
+ * @param capacityBytes The new capacity being requested.
+ * @return a new MemorySegment with the requested capacity.
+ */
+ public static MemorySegment newHeapSegment(final int capacityBytes) {
+ if (capacityBytes < 0) {
+ throw new IllegalArgumentException("Requested capacity must be positive.");
+ }
+ final long[] array = ((capacityBytes * 0x7) == 0) ? new long[capacityBytes >>> 3] : new long[(capacityBytes >>> 3) + 1];
+ return MemorySegment.ofArray(array);
+ }
+
+ /**
+ * Clears all bytes of this MemorySegment to zero.
+ * @param seg the given MemorySegment
+ */
+ public static void clear(final MemorySegment seg) {
+ seg.fill((byte)0);
+ }
+
+ /**
+ * Clears a portion of this MemorySegment to zero.
+ * @param seg the given MemorySegment
+ * @param offsetBytes offset bytes relative to this MemorySegment start
+ * @param lengthBytes the length in bytes
+ */
+ public static void clear(final MemorySegment seg, final long offsetBytes, final long lengthBytes) {
+ final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes);
+ slice.fill((byte)0);
+ }
+
+ /**
+ * Fills a portion of this Memory region to the given byte value.
+ * @param seg the given MemorySegment
+ * @param offsetBytes offset bytes relative to this Memory start
+ * @param lengthBytes the length in bytes
+ * @param value the given byte value
+ */
+ public static void fill(final MemorySegment seg, final long offsetBytes, final long lengthBytes, final byte value) {
+ final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes);
+ slice.fill(value);
+ }
+
+ /**
+ * Clears the bits defined by the bitMask
+ * @param seg the given MemorySegment
+ * @param offsetBytes offset bytes relative to this Memory start.
+ * @param bitMask the bits set to one will be cleared
+ */
+ public static void clearBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) {
+ final byte b = seg.get(JAVA_BYTE, offsetBytes);
+ seg.set(JAVA_BYTE, offsetBytes, (byte)(b & ~bitMask));
+ }
+
+ /**
+ * Sets the bits defined by the bitMask
+ * @param seg the given MemorySegment
+ * @param offsetBytes offset bytes relative to this Memory start
+ * @param bitMask the bits set to one will be set
+ */
+ public static void setBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) {
+ final byte b = seg.get(JAVA_BYTE, offsetBytes);
+ seg.set(JAVA_BYTE, offsetBytes, (byte)(b | bitMask));
+ }
+
}
diff --git a/src/main/java/org/apache/datasketches/theta2/AnotB.java b/src/main/java/org/apache/datasketches/theta2/AnotB.java
new file mode 100644
index 000000000..72f7e89b2
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/AnotB.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+
+/**
+ * Computes a set difference, A-AND-NOT-B, of two theta sketches.
+ * This class includes both stateful and stateless operations.
+ *
+ * The stateful operation is as follows:
+ *
+ * AnotB anotb = SetOperationBuilder.buildAnotB();
+ *
+ * anotb.setA(Sketch skA); //The first argument.
+ * anotb.notB(Sketch skB); //The second (subtraction) argument.
+ * anotb.notB(Sketch skC); // ...any number of additional subtractions...
+ * anotb.getResult(false); //Get an interim result.
+ * anotb.notB(Sketch skD); //Additional subtractions.
+ * anotb.getResult(true); //Final result and resets the AnotB operator.
+ *
+ *
+ * The stateless operation is as follows:
+ *
+ * AnotB anotb = SetOperationBuilder.buildAnotB();
+ *
+ * CompactSketch csk = anotb.aNotB(Sketch skA, Sketch skB);
+ *
+ *
+ * Calling the setA operation a second time essentially clears the internal state and loads
+ * the new sketch.
+ *
+ * The stateless and stateful operations are independent of each other with the exception of
+ * sharing the same update hash seed loaded as the default seed or specified by the user as an
+ * argument to the builder.
+ *
+ * @author Lee Rhodes
+ */
+public abstract class AnotB extends SetOperation {
+
+ /**
+ * Constructor
+ */
+ AnotB() {}
+
+ @Override
+ public Family getFamily() {
+ return Family.A_NOT_B;
+ }
+
+ /**
+ * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the
+ * first argument A of A-AND-NOT-B. This overwrites the internal state of this
+ * AnotB operator with the contents of the given sketch.
+ * This sets the stage for multiple following notB steps.
+ *
+ * An input argument of null will throw an exception.
+ *
+ * Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
+ * That is distinctly different from the java null, which represents a nonexistent object.
+ * In most cases it is a programming error due to some object that was not properly initialized.
+ * With a null as the first argument, we cannot know what the user's intent is.
+ * Since it is very likely that a null is a programming error, we throw a an exception.
+ *
+ * An empty input argument will set the internal state to empty.
+ *
+ * Rationale: An empty set is a mathematically legal concept. Although it makes any subsequent,
+ * valid argument for B irrelevant, we must allow this and assume the user knows what they are
+ * doing.
+ *
+ * Performing {@link #getResult(boolean)} just after this step will return a compact form of
+ * the given argument.
+ *
+ * @param skA The incoming sketch for the first argument, A.
+ */
+ public abstract void setA(Sketch skA);
+
+ /**
+ * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the
+ * second (or n+1th) argument B of A-AND-NOT-B.
+ * Performs an AND NOT operation with the existing internal state of this AnotB operator.
+ *
+ * An input argument of null or empty is ignored.
+ *
+ * Rationale: A null for the second or following arguments is more tolerable because
+ * A NOT null is still A even if we don't know exactly what the null represents. It
+ * clearly does not have any content that overlaps with A. Also, because this can be part of
+ * a multistep operation with multiple notB steps. Other following steps can still produce
+ * a valid result.
+ *
+ * Use {@link #getResult(boolean)} to obtain the result.
+ *
+ * @param skB The incoming Theta sketch for the second (or following) argument B.
+ */
+ public abstract void notB(Sketch skB);
+
+ /**
+ * Gets the result of the multistep, stateful operation AnotB that have been executed with calls
+ * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or
+ * {@link #notB(org.apache.datasketches.theta2.Sketch)}).
+ *
+ * @param reset If true, clears this operator to the empty state after this result is
+ * returned. Set this to false if you wish to obtain an intermediate result.
+ *
+ * @return the result of this operation as an ordered, on-heap {@link CompactSketch}.
+ */
+ public abstract CompactSketch getResult(boolean reset);
+
+ /**
+ * Gets the result of the multistep, stateful operation AnotB that have been executed with calls
+ * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or
+ * {@link #notB(org.apache.datasketches.theta2.Sketch)}).
+ *
+ * @param dstOrdered If true, the result will be an ordered {@link CompactSketch}.
+ * See Destination Ordered.
+ *
+ * @param dstSeg if not null the given MemorySegment will be the target location of the result.
+ * See Destination MemorySegment.
+ *
+ * @param reset If true, clears this operator to the empty state after this result is
+ * returned. Set this to false if you wish to obtain an intermediate result.
+ *
+ * @return the result of this operation as a {@link CompactSketch} in the given dstMem.
+ */
+ public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg, boolean reset);
+
+ /**
+ * Perform A-and-not-B set operation on the two given sketches and return the result as an
+ * ordered CompactSketch on the heap.
+ *
+ * This a stateless operation and has no impact on the internal state of this operator.
+ * Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)},
+ * {@link #notB(Sketch)}, {@link #getResult(boolean)}, or
+ * {@link #getResult(boolean, MemorySegment, boolean)} methods.
+ *
+ * If either argument is null an exception is thrown.
+ *
+ * Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
+ * That is distinctly different from the java null, which represents a nonexistent object.
+ * In most cases null is a programming error due to a non-initialized object.
+ *
+ * With a null as the first argument we cannot know what the user's intent is and throw an
+ * exception. With a null as the second argument for this method we must return a result and
+ * there is no following possible viable arguments for the second argument so we thrown an
+ * exception.
+ *
+ * @param skA The incoming sketch for the first argument. It must not be null.
+ * @param skB The incoming sketch for the second argument. It must not be null.
+ * @return an ordered CompactSketch on the heap
+ */
+ public CompactSketch aNotB(final Sketch skA, final Sketch skB) {
+ return aNotB(skA, skB, true, null);
+ }
+
+ /**
+ * Perform A-and-not-B set operation on the two given sketches and return the result as a
+ * CompactSketch.
+ *
+ * This a stateless operation and has no impact on the internal state of this operator.
+ * Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)},
+ * {@link #notB(Sketch)}, {@link #getResult(boolean)}, or
+ * {@link #getResult(boolean, MemorySegment, boolean)} methods.
+ *
+ * If either argument is null an exception is thrown.
+ *
+ * Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
+ * That is distinctly different from the java null, which represents a nonexistent object.
+ * In most cases null is a programming error due to a non-initialized object.
+ *
+ * With a null as the first argument we cannot know what the user's intent is and throw an
+ * exception. With a null as the second argument for this method we must return a result and
+ * there is no following possible viable arguments for the second argument so we thrown an
+ * exception.
+ *
+ * @param skA The incoming sketch for the first argument. It must not be null.
+ * @param skB The incoming sketch for the second argument. It must not be null.
+ * @param dstOrdered
+ * See Destination Ordered.
+ * @param dstSeg
+ * See Destination MemorySegment.
+ * @return the result as a CompactSketch.
+ */
+ public abstract CompactSketch aNotB(Sketch skA, Sketch skB, boolean dstOrdered,
+ MemorySegment dstSeg);
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java
new file mode 100644
index 000000000..4931bb680
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.Util.exactLog2OfLong;
+import static org.apache.datasketches.thetacommon.HashOperations.checkThetaCorruption;
+import static org.apache.datasketches.thetacommon.HashOperations.continueCondition;
+import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;
+import static org.apache.datasketches.thetacommon.HashOperations.hashSearchOrInsert;
+import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Implements the A-and-not-B operations.
+ * @author Lee Rhodes
+ * @author Kevin Lang
+ */
+final class AnotBimpl extends AnotB {
+ private final short seedHash_;
+ private boolean empty_;
+ private long thetaLong_;
+ private long[] hashArr_ = new long[0]; //compact array w curCount_ entries
+ private int curCount_;
+
+ /**
+ * Construct a new AnotB SetOperation on the java heap. Called by SetOperation.Builder.
+ *
+ * @param seed See seed
+ */
+ AnotBimpl(final long seed) {
+ this(ThetaUtil.computeSeedHash(seed));
+ }
+
+ /**
+ * Construct a new AnotB SetOperation on the java heap.
+ *
+ * @param seedHash 16 bit hash of the chosen update seed.
+ */
+ private AnotBimpl(final short seedHash) {
+ seedHash_ = seedHash;
+ reset();
+ }
+
+ @Override
+ public void setA(final Sketch skA) {
+ if (skA == null) {
+ reset();
+ throw new SketchesArgumentException("The input argument A must not be null");
+ }
+ if (skA.isEmpty()) {
+ reset();
+ return;
+ }
+ //skA is not empty
+ ThetaUtil.checkSeedHashes(seedHash_, skA.getSeedHash());
+
+ //process A
+ hashArr_ = getHashArrA(skA);
+ empty_ = false;
+ thetaLong_ = skA.getThetaLong();
+ curCount_ = hashArr_.length;
+ }
+
+ @Override
+ public void notB(final Sketch skB) {
+ if (empty_ || skB == null || skB.isEmpty()) { return; }
+ //local and skB is not empty
+ ThetaUtil.checkSeedHashes(seedHash_, skB.getSeedHash());
+
+ thetaLong_ = Math.min(thetaLong_, skB.getThetaLong());
+
+ //process B
+ hashArr_ = getResultHashArr(thetaLong_, curCount_, hashArr_, skB);
+ curCount_ = hashArr_.length;
+ empty_ = curCount_ == 0 && thetaLong_ == Long.MAX_VALUE;
+ }
+
+ @Override
+ public CompactSketch getResult(final boolean reset) {
+ return getResult(true, null, reset);
+ }
+
+ @Override
+ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg,
+ final boolean reset) {
+ final CompactSketch result = CompactOperations.componentsToCompact(
+ thetaLong_, curCount_, seedHash_, empty_, true, false, dstOrdered, dstSeg, hashArr_.clone());
+ if (reset) { reset(); }
+ return result;
+ }
+
+ @Override
+ public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dstOrdered,
+ final MemorySegment dstSeg) {
+ if (skA == null || skB == null) {
+ throw new SketchesArgumentException("Neither argument may be null");
+ }
+ //Both skA & skB are not null
+
+ final long minThetaLong = Math.min(skA.getThetaLong(), skB.getThetaLong());
+
+ if (skA.isEmpty()) { return skA.compact(dstOrdered, dstSeg); }
+ //A is not Empty
+ ThetaUtil.checkSeedHashes(skA.getSeedHash(), seedHash_);
+
+ if (skB.isEmpty()) {
+ return skA.compact(dstOrdered, dstSeg);
+ }
+ ThetaUtil.checkSeedHashes(skB.getSeedHash(), seedHash_);
+ //Both skA & skB are not empty
+
+ //process A
+ final long[] hashArrA = getHashArrA(skA);
+ final int countA = hashArrA.length;
+
+ //process B
+ final long[] hashArrOut = getResultHashArr(minThetaLong, countA, hashArrA, skB); //out is clone
+ final int countOut = hashArrOut.length;
+ final boolean empty = countOut == 0 && minThetaLong == Long.MAX_VALUE;
+
+ final CompactSketch result = CompactOperations.componentsToCompact(
+ minThetaLong, countOut, seedHash_, empty, true, false, dstOrdered, dstSeg, hashArrOut);
+ return result;
+ }
+
+ @Override
+ int getRetainedEntries() {
+ return curCount_;
+ }
+
+ //restricted
+
+ private static long[] getHashArrA(final Sketch skA) { //returns a new array
+ //Get skA cache as array
+ final CompactSketch cskA = skA.compact(false, null); //sorting not required
+ final long[] hashArrA = cskA.getCache().clone();
+ return hashArrA;
+ }
+
+ private static long[] getResultHashArr( //returns a new array
+ final long minThetaLong,
+ final int countA,
+ final long[] hashArrA,
+ final Sketch skB) {
+
+ // Rebuild or get hashtable of skB
+ final long[] hashTableB; //read only
+ if (skB instanceof CompactSketch) {
+ hashTableB = convertToHashTable(skB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD);
+ } else {
+ hashTableB = skB.getCache();
+ }
+
+ //build temporary result arrays of skA
+ final long[] tmpHashArrA = new long[countA];
+
+ //search for non matches and build temp arrays
+ final int lgHTBLen = exactLog2OfLong(hashTableB.length);
+ int nonMatches = 0;
+ for (int i = 0; i < countA; i++) {
+ final long hash = hashArrA[i];
+ if (hash != 0 && hash < minThetaLong) { //only allows hashes of A < minTheta
+ final int index = hashSearch(hashTableB, lgHTBLen, hash);
+ if (index == -1) {
+ tmpHashArrA[nonMatches] = hash;
+ nonMatches++;
+ }
+ }
+ }
+ return Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
+ }
+
+ private static long[] convertToHashTable(
+ final Sketch sketch,
+ final long thetaLong,
+ final double rebuildThreshold) {
+ final int lgArrLongs = minLgHashTableSize(sketch.getRetainedEntries(true), rebuildThreshold);
+ final int arrLongs = 1 << lgArrLongs;
+ final long[] hashTable = new long[arrLongs];
+ checkThetaCorruption(thetaLong);
+ final HashIterator it = sketch.iterator();
+ while (it.next()) {
+ final long hash = it.get();
+ if (continueCondition(thetaLong, hash) ) {
+ continue;
+ }
+ hashSearchOrInsert(hashTable, lgArrLongs, hash);
+ }
+ return hashTable;
+ }
+
+ private void reset() {
+ thetaLong_ = Long.MAX_VALUE;
+ empty_ = true;
+ hashArr_ = new long[0];
+ curCount_ = 0;
+ }
+
+ @Override
+ long[] getCache() {
+ return hashArr_.clone();
+ }
+
+ @Override
+ short getSeedHash() {
+ return seedHash_;
+ }
+
+ @Override
+ long getThetaLong() {
+ return thetaLong_;
+ }
+
+ @Override
+ boolean isEmpty() {
+ return empty_;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/BitPacking.java b/src/main/java/org/apache/datasketches/theta2/BitPacking.java
new file mode 100644
index 000000000..e2b6be2fd
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/BitPacking.java
@@ -0,0 +1,6292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+
+/**
+ * Used as part of Theta compression.
+ */
+public class BitPacking {
+
+ /**
+ * The bit packing operation
+ * @param value the value to pack
+ * @param bits number of bits to pack
+ * @param buffer the output byte array buffer
+ * @param bufOffset the byte offset in the buffer
+ * @param bitOffset the bit offset
+ */
+ public static void packBits(final long value, int bits, final byte[] buffer, int bufOffset, final int bitOffset) {
+ if (bitOffset > 0) {
+ final int chunkBits = 8 - bitOffset;
+ final int mask = (1 << chunkBits) - 1;
+ if (bits < chunkBits) {
+ buffer[bufOffset] |= (value << (chunkBits - bits)) & mask;
+ return;
+ }
+ buffer[bufOffset++] |= (value >>> (bits - chunkBits)) & mask;
+ bits -= chunkBits;
+ }
+ while (bits >= 8) {
+ buffer[bufOffset++] = (byte)(value >>> (bits - 8));
+ bits -= 8;
+ }
+ if (bits > 0) {
+ buffer[bufOffset] = (byte)(value << (8 - bits));
+ }
+ }
+
+ /**
+ * The unpacking operation
+ * @param value the output array
+ * @param index index of the value array
+ * @param bits the number of bits to unpack
+ * @param buffer the input packed buffer
+ * @param bufOffset the buffer offset
+ * @param bitOffset the bit offset
+ */
+ public static void unpackBits(final long[] value, final int index, int bits, final byte[] buffer,
+ int bufOffset,final int bitOffset) {
+ final int availBits = 8 - bitOffset;
+ final int chunkBits = availBits <= bits ? availBits : bits;
+ final int mask = (1 << chunkBits) - 1;
+ value[index] = (buffer[bufOffset] >>> (availBits - chunkBits)) & mask;
+ bufOffset += availBits == chunkBits ? 1 : 0;
+ bits -= chunkBits;
+ while (bits >= 8) {
+ value[index] <<= 8;
+ value[index] |= (Byte.toUnsignedLong(buffer[bufOffset++]));
+ bits -= 8;
+ }
+ if (bits > 0) {
+ value[index] <<= bits;
+ value[index] |= Byte.toUnsignedLong(buffer[bufOffset]) >>> (8 - bits);
+ }
+ }
+
+ // pack given number of bits from a block of 8 64-bit values into bytes
+ // we don't need 0 and 64 bits
+ // we assume that higher bits (which we are not packing) are zeros
+ // this assumption allows to avoid masking operations
+
+ static void packBitsBlock8(final long[] values, final int i, final byte[] buf, final int off, final int bits) {
+ switch (bits) {
+ case 1: packBits1(values, i, buf, off); break;
+ case 2: packBits2(values, i, buf, off); break;
+ case 3: packBits3(values, i, buf, off); break;
+ case 4: packBits4(values, i, buf, off); break;
+ case 5: packBits5(values, i, buf, off); break;
+ case 6: packBits6(values, i, buf, off); break;
+ case 7: packBits7(values, i, buf, off); break;
+ case 8: packBits8(values, i, buf, off); break;
+ case 9: packBits9(values, i, buf, off); break;
+ case 10: packBits10(values, i, buf, off); break;
+ case 11: packBits11(values, i, buf, off); break;
+ case 12: packBits12(values, i, buf, off); break;
+ case 13: packBits13(values, i, buf, off); break;
+ case 14: packBits14(values, i, buf, off); break;
+ case 15: packBits15(values, i, buf, off); break;
+ case 16: packBits16(values, i, buf, off); break;
+ case 17: packBits17(values, i, buf, off); break;
+ case 18: packBits18(values, i, buf, off); break;
+ case 19: packBits19(values, i, buf, off); break;
+ case 20: packBits20(values, i, buf, off); break;
+ case 21: packBits21(values, i, buf, off); break;
+ case 22: packBits22(values, i, buf, off); break;
+ case 23: packBits23(values, i, buf, off); break;
+ case 24: packBits24(values, i, buf, off); break;
+ case 25: packBits25(values, i, buf, off); break;
+ case 26: packBits26(values, i, buf, off); break;
+ case 27: packBits27(values, i, buf, off); break;
+ case 28: packBits28(values, i, buf, off); break;
+ case 29: packBits29(values, i, buf, off); break;
+ case 30: packBits30(values, i, buf, off); break;
+ case 31: packBits31(values, i, buf, off); break;
+ case 32: packBits32(values, i, buf, off); break;
+ case 33: packBits33(values, i, buf, off); break;
+ case 34: packBits34(values, i, buf, off); break;
+ case 35: packBits35(values, i, buf, off); break;
+ case 36: packBits36(values, i, buf, off); break;
+ case 37: packBits37(values, i, buf, off); break;
+ case 38: packBits38(values, i, buf, off); break;
+ case 39: packBits39(values, i, buf, off); break;
+ case 40: packBits40(values, i, buf, off); break;
+ case 41: packBits41(values, i, buf, off); break;
+ case 42: packBits42(values, i, buf, off); break;
+ case 43: packBits43(values, i, buf, off); break;
+ case 44: packBits44(values, i, buf, off); break;
+ case 45: packBits45(values, i, buf, off); break;
+ case 46: packBits46(values, i, buf, off); break;
+ case 47: packBits47(values, i, buf, off); break;
+ case 48: packBits48(values, i, buf, off); break;
+ case 49: packBits49(values, i, buf, off); break;
+ case 50: packBits50(values, i, buf, off); break;
+ case 51: packBits51(values, i, buf, off); break;
+ case 52: packBits52(values, i, buf, off); break;
+ case 53: packBits53(values, i, buf, off); break;
+ case 54: packBits54(values, i, buf, off); break;
+ case 55: packBits55(values, i, buf, off); break;
+ case 56: packBits56(values, i, buf, off); break;
+ case 57: packBits57(values, i, buf, off); break;
+ case 58: packBits58(values, i, buf, off); break;
+ case 59: packBits59(values, i, buf, off); break;
+ case 60: packBits60(values, i, buf, off); break;
+ case 61: packBits61(values, i, buf, off); break;
+ case 62: packBits62(values, i, buf, off); break;
+ case 63: packBits63(values, i, buf, off); break;
+ default: throw new SketchesArgumentException("wrong number of bits in packBitsBlock8: " + bits);
+ }
+ }
+
+ static void unpackBitsBlock8(final long[] values, final int i, final byte[] buf, final int off, final int bits) {
+ switch (bits) {
+ case 1: unpackBits1(values, i, buf, off); break;
+ case 2: unpackBits2(values, i, buf, off); break;
+ case 3: unpackBits3(values, i, buf, off); break;
+ case 4: unpackBits4(values, i, buf, off); break;
+ case 5: unpackBits5(values, i, buf, off); break;
+ case 6: unpackBits6(values, i, buf, off); break;
+ case 7: unpackBits7(values, i, buf, off); break;
+ case 8: unpackBits8(values, i, buf, off); break;
+ case 9: unpackBits9(values, i, buf, off); break;
+ case 10: unpackBits10(values, i, buf, off); break;
+ case 11: unpackBits11(values, i, buf, off); break;
+ case 12: unpackBits12(values, i, buf, off); break;
+ case 13: unpackBits13(values, i, buf, off); break;
+ case 14: unpackBits14(values, i, buf, off); break;
+ case 15: unpackBits15(values, i, buf, off); break;
+ case 16: unpackBits16(values, i, buf, off); break;
+ case 17: unpackBits17(values, i, buf, off); break;
+ case 18: unpackBits18(values, i, buf, off); break;
+ case 19: unpackBits19(values, i, buf, off); break;
+ case 20: unpackBits20(values, i, buf, off); break;
+ case 21: unpackBits21(values, i, buf, off); break;
+ case 22: unpackBits22(values, i, buf, off); break;
+ case 23: unpackBits23(values, i, buf, off); break;
+ case 24: unpackBits24(values, i, buf, off); break;
+ case 25: unpackBits25(values, i, buf, off); break;
+ case 26: unpackBits26(values, i, buf, off); break;
+ case 27: unpackBits27(values, i, buf, off); break;
+ case 28: unpackBits28(values, i, buf, off); break;
+ case 29: unpackBits29(values, i, buf, off); break;
+ case 30: unpackBits30(values, i, buf, off); break;
+ case 31: unpackBits31(values, i, buf, off); break;
+ case 32: unpackBits32(values, i, buf, off); break;
+ case 33: unpackBits33(values, i, buf, off); break;
+ case 34: unpackBits34(values, i, buf, off); break;
+ case 35: unpackBits35(values, i, buf, off); break;
+ case 36: unpackBits36(values, i, buf, off); break;
+ case 37: unpackBits37(values, i, buf, off); break;
+ case 38: unpackBits38(values, i, buf, off); break;
+ case 39: unpackBits39(values, i, buf, off); break;
+ case 40: unpackBits40(values, i, buf, off); break;
+ case 41: unpackBits41(values, i, buf, off); break;
+ case 42: unpackBits42(values, i, buf, off); break;
+ case 43: unpackBits43(values, i, buf, off); break;
+ case 44: unpackBits44(values, i, buf, off); break;
+ case 45: unpackBits45(values, i, buf, off); break;
+ case 46: unpackBits46(values, i, buf, off); break;
+ case 47: unpackBits47(values, i, buf, off); break;
+ case 48: unpackBits48(values, i, buf, off); break;
+ case 49: unpackBits49(values, i, buf, off); break;
+ case 50: unpackBits50(values, i, buf, off); break;
+ case 51: unpackBits51(values, i, buf, off); break;
+ case 52: unpackBits52(values, i, buf, off); break;
+ case 53: unpackBits53(values, i, buf, off); break;
+ case 54: unpackBits54(values, i, buf, off); break;
+ case 55: unpackBits55(values, i, buf, off); break;
+ case 56: unpackBits56(values, i, buf, off); break;
+ case 57: unpackBits57(values, i, buf, off); break;
+ case 58: unpackBits58(values, i, buf, off); break;
+ case 59: unpackBits59(values, i, buf, off); break;
+ case 60: unpackBits60(values, i, buf, off); break;
+ case 61: unpackBits61(values, i, buf, off); break;
+ case 62: unpackBits62(values, i, buf, off); break;
+ case 63: unpackBits63(values, i, buf, off); break;
+ default: throw new SketchesArgumentException("wrong number of bits unpackBitsBlock8: " + bits);
+ }
+ }
+
+ static void packBits1(final long[] values, final int i, final byte[] buf, final int off) {
+ buf[off] = (byte) (values[i + 0] << 7);
+ buf[off] |= values[i + 1] << 6;
+ buf[off] |= values[i + 2] << 5;
+ buf[off] |= values[i + 3] << 4;
+ buf[off] |= values[i + 4] << 3;
+ buf[off] |= values[i + 5] << 2;
+ buf[off] |= values[i + 6] << 1;
+ buf[off] |= values[i + 7];
+ }
+
+ static void packBits2(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off] = (byte) (values[i + 0] << 6);
+ buf[off] |= values[i + 1] << 4;
+ buf[off] |= values[i + 2] << 2;
+ buf[off++] |= values[i + 3];
+
+ buf[off] = (byte) (values[i + 4] << 6);
+ buf[off] |= values[i + 5] << 4;
+ buf[off] |= values[i + 6] << 2;
+ buf[off] |= values[i + 7];
+ }
+
+ static void packBits3(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off] = (byte) (values[i + 0] << 5);
+ buf[off] |= values[i + 1] << 2;
+ buf[off++] |= values[i + 2] >>> 1;
+
+ buf[off] = (byte) (values[i + 2] << 7);
+ buf[off] |= values[i + 3] << 4;
+ buf[off] |= values[i + 4] << 1;
+ buf[off++] |= values[i + 5] >>> 2;
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off] |= values[i + 6] << 3;
+ buf[off] |= values[i + 7];
+ }
+
+ static void packBits4(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off] = (byte) (values[i + 0] << 4);
+ buf[off++] |= values[i + 1];
+
+ buf[off] = (byte) (values[i + 2] << 4);
+ buf[off++] |= values[i + 3];
+
+ buf[off] = (byte) (values[i + 4] << 4);
+ buf[off++] |= values[i + 5];
+
+ buf[off] = (byte) (values[i + 6] << 4);
+ buf[off] |= values[i + 7];
+ }
+
+ static void packBits5(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off] = (byte) (values[i + 0] << 3);
+ buf[off++] |= values[i + 1] >>> 2;
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off] |= values[i + 2] << 1;
+ buf[off++] |= values[i + 3] >>> 4;
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 1;
+
+ buf[off] = (byte) (values[i + 4] << 7);
+ buf[off] |= values[i + 5] << 2;
+ buf[off++] |= values[i + 6] >>> 3;
+
+ buf[off] = (byte) (values[i + 6] << 5);
+ buf[off] |= values[i + 7];
+ }
+
+ static void packBits6(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off] = (byte) (values[i + 0] << 2);
+ buf[off++] |= values[i + 1] >>> 4;
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 2;
+
+ buf[off] = (byte) (values[i + 2] << 6);
+ buf[off++] |= values[i + 3];
+
+ buf[off] = (byte) (values[i + 4] << 2);
+ buf[off++] |= values[i + 5] >>> 4;
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 2;
+
+ buf[off] = (byte) (values[i + 6] << 6);
+ buf[off] |= values[i + 7];
+ }
+
+ static void packBits7(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off] = (byte) (values[i + 0] << 1);
+ buf[off++] |= values[i + 1] >>> 6;
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 5;
+
+ buf[off] = (byte) (values[i + 2] << 3);
+ buf[off++] |= values[i + 3] >>> 4;
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 3;
+
+ buf[off] = (byte) (values[i + 4] << 5);
+ buf[off++] |= values[i + 5] >>> 2;
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 1;
+
+ buf[off] = (byte) (values[i + 6] << 7);
+ buf[off] |= values[i + 7];
+ }
+
+ static void packBits8(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0]);
+ buf[off++] = (byte) (values[i + 1]);
+ buf[off++] = (byte) (values[i + 2]);
+ buf[off++] = (byte) (values[i + 3]);
+ buf[off++] = (byte) (values[i + 4]);
+ buf[off++] = (byte) (values[i + 5]);
+ buf[off++] = (byte) (values[i + 6]);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits9(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 1);
+
+ buf[off] = (byte) (values[i + 0] << 7);
+ buf[off++] |= values[i + 1] >>> 2;
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 3;
+
+ buf[off] = (byte) (values[i + 2] << 5);
+ buf[off++] |= values[i + 3] >>> 4;
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 5;
+
+ buf[off] = (byte) (values[i + 4] << 3);
+ buf[off++] |= values[i + 5] >>> 6;
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 7;
+
+ buf[off] = (byte) (values[i + 6] << 1);
+ buf[off++] |= values[i + 7] >>> 8;
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits10(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 2);
+
+ buf[off] = (byte) (values[i + 0] << 6);
+ buf[off++] |= values[i + 1] >>> 4;
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 6;
+
+ buf[off] = (byte) (values[i + 2] << 2);
+ buf[off++] |= values[i + 3] >>> 8;
+
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 2);
+
+ buf[off] = (byte) (values[i + 4] << 6);
+ buf[off++] |= values[i + 5] >>> 4;
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 6;
+
+ buf[off] = (byte) (values[i + 6] << 2);
+ buf[off++] |= values[i + 7] >>> 8;
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits11(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 3);
+
+ buf[off] = (byte) (values[i + 0] << 5);
+ buf[off++] |= values[i + 1] >>> 6;
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 9;
+
+ buf[off++] = (byte) (values[i + 2] >>> 1);
+
+ buf[off] = (byte) (values[i + 2] << 7);
+ buf[off++] |= values[i + 3] >>> 4;
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 7;
+
+ buf[off] = (byte) (values[i + 4] << 1);
+ buf[off++] |= values[i + 5] >>> 10;
+
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 5;
+
+ buf[off] = (byte) (values[i + 6] << 3);
+ buf[off++] |= values[i + 7] >>> 8;
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits12(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 4);
+
+ buf[off] = (byte) (values[i + 0] << 4);
+ buf[off++] |= values[i + 1] >>> 8;
+
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 4);
+
+ buf[off] = (byte) (values[i + 2] << 4);
+ buf[off++] |= values[i + 3] >>> 8;
+
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 4);
+
+ buf[off] = (byte) (values[i + 4] << 4);
+ buf[off++] |= values[i + 5] >>> 8;
+
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 4);
+
+ buf[off] = (byte) (values[i + 6] << 4);
+ buf[off++] |= values[i + 7] >>> 8;
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits13(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 5);
+
+ buf[off] = (byte) (values[i + 0] << 3);
+ buf[off++] |= values[i + 1] >>> 10;
+
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 7;
+
+ buf[off] = (byte) (values[i + 2] << 1);
+ buf[off++] |= values[i + 3] >>> 12;
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 9;
+
+ buf[off++] = (byte) (values[i + 4] >>> 1);
+
+ buf[off] = (byte) (values[i + 4] << 7);
+ buf[off++] |= values[i + 5] >>> 6;
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 11;
+
+ buf[off++] = (byte) (values[i + 6] >>> 3);
+
+ buf[off] = (byte) (values[i + 6] << 5);
+ buf[off++] |= values[i + 7] >>> 8;
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits14(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 6);
+
+ buf[off] = (byte) (values[i + 0] << 2);
+ buf[off++] |= values[i + 1] >>> 12;
+
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 10;
+
+ buf[off++] = (byte) (values[i + 2] >>> 2);
+
+ buf[off] = (byte) (values[i + 2] << 6);
+ buf[off++] |= values[i + 3] >>> 8;
+
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 6);
+
+ buf[off] = (byte) (values[i + 4] << 2);
+ buf[off++] |= values[i + 5] >>> 12;
+
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 10;
+
+ buf[off++] = (byte) (values[i + 6] >>> 2);
+
+ buf[off] = (byte) (values[i + 6] << 6);
+ buf[off++] |= values[i + 7] >>> 8;
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits15(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 7);
+
+ buf[off] = (byte) (values[i + 0] << 1);
+ buf[off++] |= values[i + 1] >>> 14;
+
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 13;
+
+ buf[off++] = (byte) (values[i + 2] >>> 5);
+
+ buf[off] = (byte) (values[i + 2] << 3);
+ buf[off++] |= values[i + 3] >>> 12;
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 11;
+
+ buf[off++] = (byte) (values[i + 4] >>> 3);
+
+ buf[off] = (byte) (values[i + 4] << 5);
+ buf[off++] |= values[i + 5] >>> 10;
+
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 9;
+
+ buf[off++] = (byte) (values[i + 6] >>> 1);
+
+ buf[off] = (byte) (values[i + 6] << 7);
+ buf[off++] |= values[i + 7] >>> 8;
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits16(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 8);
+ buf[off++] = (byte) (values[i + 0]);
+
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 8);
+ buf[off++] = (byte) (values[i + 2]);
+
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 8);
+ buf[off++] = (byte) (values[i + 4]);
+
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 8);
+ buf[off++] = (byte) (values[i + 6]);
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits17(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 9);
+
+ buf[off++] = (byte) (values[i + 0] >>> 1);
+
+ buf[off] = (byte) (values[i + 0] << 7);
+ buf[off++] |= values[i + 1] >>> 10;
+
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 11;
+
+ buf[off++] = (byte) (values[i + 2] >>> 3);
+
+ buf[off] = (byte) (values[i + 2] << 5);
+ buf[off++] |= values[i + 3] >>> 12;
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 13;
+
+ buf[off++] = (byte) (values[i + 4] >>> 5);
+
+ buf[off] = (byte) (values[i + 4] << 3);
+ buf[off++] |= values[i + 5] >>> 14;
+
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 15;
+
+ buf[off++] = (byte) (values[i + 6] >>> 7);
+
+ buf[off] = (byte) (values[i + 6] << 1);
+ buf[off++] |= values[i + 7] >>> 16;
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits18(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 10);
+
+ buf[off++] = (byte) (values[i + 0] >>> 2);
+
+ buf[off] = (byte) (values[i + 0] << 6);
+ buf[off++] |= values[i + 1] >>> 12;
+
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 14;
+
+ buf[off++] = (byte) (values[i + 2] >>> 6);
+
+ buf[off] = (byte) (values[i + 2] << 2);
+ buf[off++] |= values[i + 3] >>> 16;
+
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 10);
+
+ buf[off++] = (byte) (values[i + 4] >>> 2);
+
+ buf[off] = (byte) (values[i + 4] << 6);
+ buf[off++] |= values[i + 5] >>> 12;
+
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 14;
+
+ buf[off++] = (byte) (values[i + 6] >>> 6);
+
+ buf[off] = (byte) (values[i + 6] << 2);
+ buf[off++] |= values[i + 7] >>> 16;
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits19(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 11);
+
+ buf[off++] = (byte) (values[i + 0] >>> 3);
+
+ buf[off] = (byte) (values[i + 0] << 5);
+ buf[off++] |= values[i + 1] >>> 14;
+
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 17;
+
+ buf[off++] = (byte) (values[i + 2] >>> 9);
+
+ buf[off++] = (byte) (values[i + 2] >>> 1);
+
+ buf[off] = (byte) (values[i + 2] << 7);
+ buf[off++] |= values[i + 3] >>> 12;
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 15;
+
+ buf[off++] |= values[i + 4] >>> 7;
+
+ buf[off] = (byte) (values[i + 4] << 1);
+ buf[off++] |= values[i + 5] >>> 18;
+
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 13;
+
+ buf[off++] = (byte) (values[i + 6] >>> 5);
+
+ buf[off] = (byte) (values[i + 6] << 3);
+ buf[off++] |= values[i + 7] >>> 16;
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits20(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 12);
+
+ buf[off++] = (byte) (values[i + 0] >>> 4);
+
+ buf[off] = (byte) (values[i + 0] << 4);
+ buf[off++] |= values[i + 1] >>> 16;
+
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 12);
+
+ buf[off++] = (byte) (values[i + 2] >>> 4);
+
+ buf[off] = (byte) (values[i + 2] << 4);
+ buf[off++] |= values[i + 3] >>> 16;
+
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 12);
+
+ buf[off++] = (byte) (values[i + 4] >>> 4);
+
+ buf[off] = (byte) (values[i + 4] << 4);
+ buf[off++] |= values[i + 5] >>> 16;
+
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 12);
+
+ buf[off++] = (byte) (values[i + 6] >>> 4);
+
+ buf[off] = (byte) (values[i + 6] << 4);
+ buf[off++] |= values[i + 7] >>> 16;
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits21(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 13);
+
+ buf[off++] = (byte) (values[i + 0] >>> 5);
+
+ buf[off] = (byte) (values[i + 0] << 3);
+ buf[off++] |= values[i + 1] >>> 18;
+
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 15;
+
+ buf[off++] = (byte) (values[i + 2] >>> 7);
+
+ buf[off] = (byte) (values[i + 2] << 1);
+ buf[off++] |= values[i + 3] >>> 20;
+
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 17;
+
+ buf[off++] = (byte) (values[i + 4] >>> 9);
+
+ buf[off++] = (byte) (values[i + 4] >>> 1);
+
+ buf[off] = (byte) (values[i + 4] << 7);
+ buf[off++] |= values[i + 5] >>> 14;
+
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 19;
+
+ buf[off++] = (byte) (values[i + 6] >>> 11);
+
+ buf[off++] = (byte) (values[i + 6] >>> 3);
+
+ buf[off] = (byte) (values[i + 6] << 5);
+ buf[off++] |= values[i + 7] >>> 16;
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits22(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 14);
+
+ buf[off++] = (byte) (values[i + 0] >>> 6);
+
+ buf[off] = (byte) (values[i + 0] << 2);
+ buf[off++] |= values[i + 1] >>> 20;
+
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 18;
+
+ buf[off++] = (byte) (values[i + 2] >>> 10);
+
+ buf[off++] = (byte) (values[i + 2] >>> 2);
+
+ buf[off] = (byte) (values[i + 2] << 6);
+ buf[off++] |= values[i + 3] >>> 16;
+
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 14);
+
+ buf[off++] = (byte) (values[i + 4] >>> 6);
+
+ buf[off] = (byte) (values[i + 4] << 2);
+ buf[off++] |= values[i + 5] >>> 20;
+
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 18;
+
+ buf[off++] = (byte) (values[i + 6] >>> 10);
+
+ buf[off++] = (byte) (values[i + 6] >>> 2);
+
+ buf[off] = (byte) (values[i + 6] << 6);
+ buf[off++] |= values[i + 7] >>> 16;
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits23(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 15);
+
+ buf[off++] = (byte) (values[i + 0] >>> 7);
+
+ buf[off] = (byte) (values[i + 0] << 1);
+ buf[off++] |= values[i + 1] >>> 22;
+
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 21;
+
+ buf[off++] = (byte) (values[i + 2] >>> 13);
+
+ buf[off++] = (byte) (values[i + 2] >>> 5);
+
+ buf[off] = (byte) (values[i + 2] << 3);
+ buf[off++] |= values[i + 3] >>> 20;
+
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 19;
+
+ buf[off++] = (byte) (values[i + 4] >>> 11);
+
+ buf[off++] = (byte) (values[i + 4] >>> 3);
+
+ buf[off] = (byte) (values[i + 4] << 5);
+ buf[off++] |= values[i + 5] >>> 18;
+
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 17;
+
+ buf[off++] = (byte) (values[i + 6] >>> 9);
+
+ buf[off++] = (byte) (values[i + 6] >>> 1);
+
+ buf[off] = (byte) (values[i + 6] << 7);
+ buf[off++] |= values[i + 7] >>> 16;
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits24(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 16);
+ buf[off++] = (byte) (values[i + 0] >>> 8);
+ buf[off++] = (byte) (values[i + 0]);
+
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 16);
+ buf[off++] = (byte) (values[i + 2] >>> 8);
+ buf[off++] = (byte) (values[i + 2]);
+
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 16);
+ buf[off++] = (byte) (values[i + 4] >>> 8);
+ buf[off++] = (byte) (values[i + 4]);
+
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 16);
+ buf[off++] = (byte) (values[i + 6] >>> 8);
+ buf[off++] = (byte) (values[i + 6]);
+
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits25(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 17);
+
+ buf[off++] = (byte) (values[i + 0] >>> 9);
+
+ buf[off++] = (byte) (values[i + 0] >>> 1);
+
+ buf[off] = (byte) (values[i + 0] << 7);
+ buf[off++] |= values[i + 1] >>> 18;
+
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 19;
+
+ buf[off++] = (byte) (values[i + 2] >>> 11);
+
+ buf[off++] = (byte) (values[i + 2] >>> 3);
+
+ buf[off] = (byte) (values[i + 2] << 5);
+ buf[off++] |= values[i + 3] >>> 20;
+
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 21;
+
+ buf[off++] = (byte) (values[i + 4] >>> 13);
+
+ buf[off++] = (byte) (values[i + 4] >>> 5);
+
+ buf[off] = (byte) (values[i + 4] << 3);
+ buf[off++] |= values[i + 5] >>> 22;
+
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 23;
+
+ buf[off++] = (byte) (values[i + 6] >>> 15);
+
+ buf[off++] = (byte) (values[i + 6] >>> 7);
+
+ buf[off] = (byte) (values[i + 6] << 1);
+ buf[off++] |= values[i + 7] >>> 24;
+
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits26(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 18);
+
+ buf[off++] = (byte) (values[i + 0] >>> 10);
+
+ buf[off++] = (byte) (values[i + 0] >>> 2);
+
+ buf[off] = (byte) (values[i + 0] << 6);
+ buf[off++] |= values[i + 1] >>> 20;
+
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 22;
+
+ buf[off++] = (byte) (values[i + 2] >>> 14);
+
+ buf[off++] = (byte) (values[i + 2] >>> 6);
+
+ buf[off] = (byte) (values[i + 2] << 2);
+ buf[off++] |= values[i + 3] >>> 24;
+
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 18);
+
+ buf[off++] = (byte) (values[i + 4] >>> 10);
+
+ buf[off++] = (byte) (values[i + 4] >>> 2);
+
+ buf[off] = (byte) (values[i + 4] << 6);
+ buf[off++] |= values[i + 5] >>> 20;
+
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 22;
+
+ buf[off++] = (byte) (values[i + 6] >>> 14);
+
+ buf[off++] = (byte) (values[i + 6] >>> 6);
+
+ buf[off] = (byte) (values[i + 6] << 2);
+ buf[off++] |= values[i + 7] >>> 24;
+
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits27(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 19);
+
+ buf[off++] = (byte) (values[i + 0] >>> 11);
+
+ buf[off++] = (byte) (values[i + 0] >>> 3);
+
+ buf[off] = (byte) (values[i + 0] << 5);
+ buf[off++] |= values[i + 1] >>> 22;
+
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 25;
+
+ buf[off++] = (byte) (values[i + 2] >>> 17);
+
+ buf[off++] = (byte) (values[i + 2] >>> 9);
+
+ buf[off++] = (byte) (values[i + 2] >>> 1);
+
+ buf[off] = (byte) (values[i + 2] << 7);
+ buf[off++] |= values[i + 3] >>> 20;
+
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 23;
+
+ buf[off++] = (byte) (values[i + 4] >>> 15);
+
+ buf[off++] = (byte) (values[i + 4] >>> 7);
+
+ buf[off] = (byte) (values[i + 4] << 1);
+ buf[off++] |= values[i + 5] >>> 26;
+
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 21;
+
+ buf[off++] = (byte) (values[i + 6] >>> 13);
+
+ buf[off++] = (byte) (values[i + 6] >>> 5);
+
+ buf[off] = (byte) (values[i + 6] << 3);
+ buf[off++] |= values[i + 7] >>> 24;
+
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits28(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 20);
+ buf[off++] = (byte) (values[i + 0] >>> 12);
+ buf[off++] = (byte) (values[i + 0] >>> 4);
+ buf[off] = (byte) (values[i + 0] << 4);
+ buf[off++] |= values[i + 1] >>> 24;
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+ buf[off++] = (byte) (values[i + 2] >>> 20);
+ buf[off++] = (byte) (values[i + 2] >>> 12);
+ buf[off++] = (byte) (values[i + 2] >>> 4);
+ buf[off] = (byte) (values[i + 2] << 4);
+ buf[off++] |= values[i + 3] >>> 24;
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+ buf[off++] = (byte) (values[i + 4] >>> 20);
+ buf[off++] = (byte) (values[i + 4] >>> 12);
+ buf[off++] = (byte) (values[i + 4] >>> 4);
+ buf[off] = (byte) (values[i + 4] << 4);
+ buf[off++] |= values[i + 5] >>> 24;
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+ buf[off++] = (byte) (values[i + 6] >>> 20);
+ buf[off++] = (byte) (values[i + 6] >>> 12);
+ buf[off++] = (byte) (values[i + 6] >>> 4);
+ buf[off] = (byte) (values[i + 6] << 4);
+ buf[off++] |= values[i + 7] >>> 24;
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits29(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 21);
+
+ buf[off++] = (byte) (values[i + 0] >>> 13);
+
+ buf[off++] = (byte) (values[i + 0] >>> 5);
+
+ buf[off] = (byte) (values[i + 0] << 3);
+ buf[off++] |= values[i + 1] >>> 26;
+
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 23;
+
+ buf[off++] = (byte) (values[i + 2] >>> 15);
+
+ buf[off++] = (byte) (values[i + 2] >>> 7);
+
+ buf[off] = (byte) (values[i + 2] << 1);
+ buf[off++] |= values[i + 3] >>> 28;
+
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 25;
+
+ buf[off++] = (byte) (values[i + 4] >>> 17);
+
+ buf[off++] = (byte) (values[i + 4] >>> 9);
+
+ buf[off++] = (byte) (values[i + 4] >>> 1);
+
+ buf[off] = (byte) (values[i + 4] << 7);
+ buf[off++] |= values[i + 5] >>> 22;
+
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 27;
+
+ buf[off++] = (byte) (values[i + 6] >>> 19);
+
+ buf[off++] = (byte) (values[i + 6] >>> 11);
+
+ buf[off++] = (byte) (values[i + 6] >>> 3);
+
+ buf[off] = (byte) (values[i + 6] << 5);
+ buf[off++] |= values[i + 7] >>> 24;
+
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits30(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 22);
+ buf[off++] = (byte) (values[i + 0] >>> 14);
+ buf[off++] = (byte) (values[i + 0] >>> 6);
+
+ buf[off] = (byte) (values[i + 0] << 2);
+ buf[off++] |= values[i + 1] >>> 28;
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 26;
+ buf[off++] = (byte) (values[i + 2] >>> 18);
+ buf[off++] = (byte) (values[i + 2] >>> 10);
+ buf[off++] = (byte) (values[i + 2] >>> 2);
+
+ buf[off] = (byte) (values[i + 2] << 6);
+ buf[off++] |= values[i + 3] >>> 24;
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 22);
+ buf[off++] = (byte) (values[i + 4] >>> 14);
+ buf[off++] = (byte) (values[i + 4] >>> 6);
+
+ buf[off] = (byte) (values[i + 4] << 2);
+ buf[off++] |= values[i + 5] >>> 28;
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 26;
+ buf[off++] = (byte) (values[i + 6] >>> 18);
+ buf[off++] = (byte) (values[i + 6] >>> 10);
+ buf[off++] = (byte) (values[i + 6] >>> 2);
+
+ buf[off] = (byte) (values[i + 6] << 6);
+ buf[off++] |= values[i + 7] >>> 24;
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits31(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 23);
+ buf[off++] = (byte) (values[i + 0] >>> 15);
+ buf[off++] = (byte) (values[i + 0] >>> 7);
+
+ buf[off] = (byte) (values[i + 0] << 1);
+ buf[off++] |= values[i + 1] >>> 30;
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 29;
+ buf[off++] = (byte) (values[i + 2] >>> 21);
+ buf[off++] = (byte) (values[i + 2] >>> 13);
+ buf[off++] = (byte) (values[i + 2] >>> 5);
+
+ buf[off] = (byte) (values[i + 2] << 3);
+ buf[off++] |= values[i + 3] >>> 28;
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 27;
+ buf[off++] = (byte) (values[i + 4] >>> 19);
+ buf[off++] = (byte) (values[i + 4] >>> 11);
+ buf[off++] = (byte) (values[i + 4] >>> 3);
+
+ buf[off] = (byte) (values[i + 4] << 5);
+ buf[off++] |= values[i + 5] >>> 26;
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 25;
+ buf[off++] = (byte) (values[i + 6] >>> 17);
+ buf[off++] = (byte) (values[i + 6] >>> 9);
+ buf[off++] = (byte) (values[i + 6] >>> 1);
+
+ buf[off] = (byte) (values[i + 6] << 7);
+ buf[off++] |= values[i + 7] >>> 24;
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits32(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 24);
+ buf[off++] = (byte) (values[i + 0] >>> 16);
+ buf[off++] = (byte) (values[i + 0] >>> 8);
+ buf[off++] = (byte) (values[i + 0]);
+
+ buf[off++] = (byte) (values[i + 1] >>> 24);
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 24);
+ buf[off++] = (byte) (values[i + 2] >>> 16);
+ buf[off++] = (byte) (values[i + 2] >>> 8);
+ buf[off++] = (byte) (values[i + 2]);
+
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 24);
+ buf[off++] = (byte) (values[i + 4] >>> 16);
+ buf[off++] = (byte) (values[i + 4] >>> 8);
+ buf[off++] = (byte) (values[i + 4]);
+
+ buf[off++] = (byte) (values[i + 5] >>> 24);
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 24);
+ buf[off++] = (byte) (values[i + 6] >>> 16);
+ buf[off++] = (byte) (values[i + 6] >>> 8);
+ buf[off++] = (byte) (values[i + 6]);
+
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits33(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 25);
+ buf[off++] = (byte) (values[i + 0] >>> 17);
+ buf[off++] = (byte) (values[i + 0] >>> 9);
+ buf[off++] = (byte) (values[i + 0] >>> 1);
+
+ buf[off] = (byte) (values[i + 0] << 7);
+ buf[off++] |= values[i + 1] >>> 26;
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 27;
+ buf[off++] = (byte) (values[i + 2] >>> 19);
+ buf[off++] = (byte) (values[i + 2] >>> 11);
+ buf[off++] = (byte) (values[i + 2] >>> 3);
+
+ buf[off] = (byte) (values[i + 2] << 5);
+ buf[off++] |= values[i + 3] >>> 28;
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 29;
+ buf[off++] = (byte) (values[i + 4] >>> 21);
+ buf[off++] = (byte) (values[i + 4] >>> 13);
+ buf[off++] = (byte) (values[i + 4] >>> 5);
+
+ buf[off] = (byte) (values[i + 4] << 3);
+ buf[off++] |= values[i + 5] >>> 30;
+ buf[off++] = (byte) (values[i + 5] >>> 22);
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 31;
+ buf[off++] = (byte) (values[i + 6] >>> 23);
+ buf[off++] = (byte) (values[i + 6] >>> 15);
+ buf[off++] = (byte) (values[i + 6] >>> 7);
+
+ buf[off] = (byte) (values[i + 6] << 1);
+ buf[off++] |= values[i + 7] >>> 32;
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits34(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 26);
+ buf[off++] = (byte) (values[i + 0] >>> 18);
+ buf[off++] = (byte) (values[i + 0] >>> 10);
+ buf[off++] = (byte) (values[i + 0] >>> 2);
+
+ buf[off] = (byte) (values[i + 0] << 6);
+ buf[off++] |= values[i + 1] >>> 28;
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 30;
+ buf[off++] = (byte) (values[i + 2] >>> 22);
+ buf[off++] = (byte) (values[i + 2] >>> 14);
+ buf[off++] = (byte) (values[i + 2] >>> 6);
+
+ buf[off] = (byte) (values[i + 2] << 2);
+ buf[off++] |= values[i + 3] >>> 32;
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 26);
+ buf[off++] = (byte) (values[i + 4] >>> 18);
+ buf[off++] = (byte) (values[i + 4] >>> 10);
+ buf[off++] = (byte) (values[i + 4] >>> 2);
+
+ buf[off] = (byte) (values[i + 4] << 6);
+ buf[off++] |= values[i + 5] >>> 28;
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 30;
+ buf[off++] = (byte) (values[i + 6] >>> 22);
+ buf[off++] = (byte) (values[i + 6] >>> 14);
+ buf[off++] = (byte) (values[i + 6] >>> 6);
+
+ buf[off] = (byte) (values[i + 6] << 2);
+ buf[off++] |= values[i + 7] >>> 32;
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits35(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 27);
+ buf[off++] = (byte) (values[i + 0] >>> 19);
+ buf[off++] = (byte) (values[i + 0] >>> 11);
+ buf[off++] = (byte) (values[i + 0] >>> 3);
+
+ buf[off] = (byte) (values[i + 0] << 5);
+ buf[off++] |= values[i + 1] >>> 30;
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 33;
+ buf[off++] = (byte) (values[i + 2] >>> 25);
+ buf[off++] = (byte) (values[i + 2] >>> 17);
+ buf[off++] = (byte) (values[i + 2] >>> 9);
+ buf[off++] = (byte) (values[i + 2] >>> 1);
+
+ buf[off] = (byte) (values[i + 2] << 7);
+ buf[off++] |= values[i + 3] >>> 28;
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 31;
+ buf[off++] = (byte) (values[i + 4] >>> 23);
+ buf[off++] = (byte) (values[i + 4] >>> 15);
+ buf[off++] = (byte) (values[i + 4] >>> 7);
+
+ buf[off] = (byte) (values[i + 4] << 1);
+ buf[off++] |= values[i + 5] >>> 34;
+ buf[off++] = (byte) (values[i + 5] >>> 26);
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 29;
+ buf[off++] = (byte) (values[i + 6] >>> 21);
+ buf[off++] = (byte) (values[i + 6] >>> 13);
+ buf[off++] = (byte) (values[i + 6] >>> 5);
+
+ buf[off] = (byte) (values[i + 6] << 3);
+ buf[off++] |= values[i + 7] >>> 32;
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits36(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 28);
+ buf[off++] = (byte) (values[i + 0] >>> 20);
+ buf[off++] = (byte) (values[i + 0] >>> 12);
+ buf[off++] = (byte) (values[i + 0] >>> 4);
+
+ buf[off] = (byte) (values[i + 0] << 4);
+ buf[off++] |= values[i + 1] >>> 32;
+ buf[off++] = (byte) (values[i + 1] >>> 24);
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 28);
+ buf[off++] = (byte) (values[i + 2] >>> 20);
+ buf[off++] = (byte) (values[i + 2] >>> 12);
+ buf[off++] = (byte) (values[i + 2] >>> 4);
+
+ buf[off] = (byte) (values[i + 2] << 4);
+ buf[off++] |= values[i + 3] >>> 32;
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 28);
+ buf[off++] = (byte) (values[i + 4] >>> 20);
+ buf[off++] = (byte) (values[i + 4] >>> 12);
+ buf[off++] = (byte) (values[i + 4] >>> 4);
+
+ buf[off] = (byte) (values[i + 4] << 4);
+ buf[off++] |= values[i + 5] >>> 32;
+ buf[off++] = (byte) (values[i + 5] >>> 24);
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 28);
+ buf[off++] = (byte) (values[i + 6] >>> 20);
+ buf[off++] = (byte) (values[i + 6] >>> 12);
+ buf[off++] = (byte) (values[i + 6] >>> 4);
+
+ buf[off] = (byte) (values[i + 6] << 4);
+ buf[off++] |= values[i + 7] >>> 32;
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits37(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 29);
+ buf[off++] = (byte) (values[i + 0] >>> 21);
+ buf[off++] = (byte) (values[i + 0] >>> 13);
+ buf[off++] = (byte) (values[i + 0] >>> 5);
+
+ buf[off] = (byte) (values[i + 0] << 3);
+ buf[off++] |= values[i + 1] >>> 34;
+ buf[off++] = (byte) (values[i + 1] >>> 26);
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 31;
+ buf[off++] = (byte) (values[i + 2] >>> 23);
+ buf[off++] = (byte) (values[i + 2] >>> 15);
+ buf[off++] = (byte) (values[i + 2] >>> 7);
+
+ buf[off] = (byte) (values[i + 2] << 1);
+ buf[off++] |= values[i + 3] >>> 36;
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 33;
+ buf[off++] = (byte) (values[i + 4] >>> 25);
+ buf[off++] = (byte) (values[i + 4] >>> 17);
+ buf[off++] = (byte) (values[i + 4] >>> 9);
+ buf[off++] = (byte) (values[i + 4] >>> 1);
+
+ buf[off] = (byte) (values[i + 4] << 7);
+ buf[off++] |= values[i + 5] >>> 30;
+ buf[off++] = (byte) (values[i + 5] >>> 22);
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 35;
+ buf[off++] = (byte) (values[i + 6] >>> 27);
+ buf[off++] = (byte) (values[i + 6] >>> 19);
+ buf[off++] = (byte) (values[i + 6] >>> 11);
+ buf[off++] = (byte) (values[i + 6] >>> 3);
+
+ buf[off] = (byte) (values[i + 6] << 5);
+ buf[off++] |= values[i + 7] >>> 32;
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits38(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 30);
+ buf[off++] = (byte) (values[i + 0] >>> 22);
+ buf[off++] = (byte) (values[i + 0] >>> 14);
+ buf[off++] = (byte) (values[i + 0] >>> 6);
+
+ buf[off] = (byte) (values[i + 0] << 2);
+ buf[off++] |= values[i + 1] >>> 36;
+ buf[off++] = (byte) (values[i + 1] >>> 28);
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 34;
+ buf[off++] = (byte) (values[i + 2] >>> 26);
+ buf[off++] = (byte) (values[i + 2] >>> 18);
+ buf[off++] = (byte) (values[i + 2] >>> 10);
+ buf[off++] = (byte) (values[i + 2] >>> 2);
+
+ buf[off] = (byte) (values[i + 2] << 6);
+ buf[off++] |= values[i + 3] >>> 32;
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 30);
+ buf[off++] = (byte) (values[i + 4] >>> 22);
+ buf[off++] = (byte) (values[i + 4] >>> 14);
+ buf[off++] = (byte) (values[i + 4] >>> 6);
+
+ buf[off] = (byte) (values[i + 4] << 2);
+ buf[off++] |= values[i + 5] >>> 36;
+ buf[off++] = (byte) (values[i + 5] >>> 28);
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 34;
+ buf[off++] = (byte) (values[i + 6] >>> 26);
+ buf[off++] = (byte) (values[i + 6] >>> 18);
+ buf[off++] = (byte) (values[i + 6] >>> 10);
+ buf[off++] = (byte) (values[i + 6] >>> 2);
+
+ buf[off] = (byte) (values[i + 6] << 6);
+ buf[off++] |= values[i + 7] >>> 32;
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits39(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 31);
+ buf[off++] = (byte) (values[i + 0] >>> 23);
+ buf[off++] = (byte) (values[i + 0] >>> 15);
+ buf[off++] = (byte) (values[i + 0] >>> 7);
+
+ buf[off] = (byte) (values[i + 0] << 1);
+ buf[off++] |= values[i + 1] >>> 38;
+ buf[off++] = (byte) (values[i + 1] >>> 30);
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 37;
+ buf[off++] = (byte) (values[i + 2] >>> 29);
+ buf[off++] = (byte) (values[i + 2] >>> 21);
+ buf[off++] = (byte) (values[i + 2] >>> 13);
+ buf[off++] = (byte) (values[i + 2] >>> 5);
+
+ buf[off] = (byte) (values[i + 2] << 3);
+ buf[off++] |= values[i + 3] >>> 36;
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 35;
+ buf[off++] = (byte) (values[i + 4] >>> 27);
+ buf[off++] = (byte) (values[i + 4] >>> 19);
+ buf[off++] = (byte) (values[i + 4] >>> 11);
+ buf[off++] = (byte) (values[i + 4] >>> 3);
+
+ buf[off] = (byte) (values[i + 4] << 5);
+ buf[off++] |= values[i + 5] >>> 34;
+ buf[off++] = (byte) (values[i + 5] >>> 26);
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 33;
+ buf[off++] = (byte) (values[i + 6] >>> 25);
+ buf[off++] = (byte) (values[i + 6] >>> 17);
+ buf[off++] = (byte) (values[i + 6] >>> 9);
+ buf[off++] = (byte) (values[i + 6] >>> 1);
+
+ buf[off] = (byte) (values[i + 6] << 7);
+ buf[off++] |= values[i + 7] >>> 32;
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits40(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 32);
+ buf[off++] = (byte) (values[i + 0] >>> 24);
+ buf[off++] = (byte) (values[i + 0] >>> 16);
+ buf[off++] = (byte) (values[i + 0] >>> 8);
+ buf[off++] = (byte) (values[i + 0]);
+
+ buf[off++] = (byte) (values[i + 1] >>> 32);
+ buf[off++] = (byte) (values[i + 1] >>> 24);
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 32);
+ buf[off++] = (byte) (values[i + 2] >>> 24);
+ buf[off++] = (byte) (values[i + 2] >>> 16);
+ buf[off++] = (byte) (values[i + 2] >>> 8);
+ buf[off++] = (byte) (values[i + 2]);
+
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 32);
+ buf[off++] = (byte) (values[i + 4] >>> 24);
+ buf[off++] = (byte) (values[i + 4] >>> 16);
+ buf[off++] = (byte) (values[i + 4] >>> 8);
+ buf[off++] = (byte) (values[i + 4]);
+
+ buf[off++] = (byte) (values[i + 5] >>> 32);
+ buf[off++] = (byte) (values[i + 5] >>> 24);
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 32);
+ buf[off++] = (byte) (values[i + 6] >>> 24);
+ buf[off++] = (byte) (values[i + 6] >>> 16);
+ buf[off++] = (byte) (values[i + 6] >>> 8);
+ buf[off++] = (byte) (values[i + 6]);
+
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits41(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 33);
+ buf[off++] = (byte) (values[i + 0] >>> 25);
+ buf[off++] = (byte) (values[i + 0] >>> 17);
+ buf[off++] = (byte) (values[i + 0] >>> 9);
+ buf[off++] = (byte) (values[i + 0] >>> 1);
+
+ buf[off] = (byte) (values[i + 0] << 7);
+ buf[off++] |= values[i + 1] >>> 34;
+ buf[off++] = (byte) (values[i + 1] >>> 26);
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 35;
+ buf[off++] = (byte) (values[i + 2] >>> 27);
+ buf[off++] = (byte) (values[i + 2] >>> 19);
+ buf[off++] = (byte) (values[i + 2] >>> 11);
+ buf[off++] = (byte) (values[i + 2] >>> 3);
+
+ buf[off] = (byte) (values[i + 2] << 5);
+ buf[off++] |= values[i + 3] >>> 36;
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 37;
+ buf[off++] = (byte) (values[i + 4] >>> 29);
+ buf[off++] = (byte) (values[i + 4] >>> 21);
+ buf[off++] = (byte) (values[i + 4] >>> 13);
+ buf[off++] = (byte) (values[i + 4] >>> 5);
+
+ buf[off] = (byte) (values[i + 4] << 3);
+ buf[off++] |= values[i + 5] >>> 38;
+ buf[off++] = (byte) (values[i + 5] >>> 30);
+ buf[off++] = (byte) (values[i + 5] >>> 22);
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 39;
+ buf[off++] = (byte) (values[i + 6] >>> 31);
+ buf[off++] = (byte) (values[i + 6] >>> 23);
+ buf[off++] = (byte) (values[i + 6] >>> 15);
+ buf[off++] = (byte) (values[i + 6] >>> 7);
+
+ buf[off] = (byte) (values[i + 6] << 1);
+ buf[off++] |= values[i + 7] >>> 40;
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits42(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 34);
+ buf[off++] = (byte) (values[i + 0] >>> 26);
+ buf[off++] = (byte) (values[i + 0] >>> 18);
+ buf[off++] = (byte) (values[i + 0] >>> 10);
+ buf[off++] = (byte) (values[i + 0] >>> 2);
+
+ buf[off] = (byte) (values[i + 0] << 6);
+ buf[off++] |= values[i + 1] >>> 36;
+ buf[off++] = (byte) (values[i + 1] >>> 28);
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 38;
+ buf[off++] = (byte) (values[i + 2] >>> 30);
+ buf[off++] = (byte) (values[i + 2] >>> 22);
+ buf[off++] = (byte) (values[i + 2] >>> 14);
+ buf[off++] = (byte) (values[i + 2] >>> 6);
+
+ buf[off] = (byte) (values[i + 2] << 2);
+ buf[off++] |= values[i + 3] >>> 40;
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 34);
+ buf[off++] = (byte) (values[i + 4] >>> 26);
+ buf[off++] = (byte) (values[i + 4] >>> 18);
+ buf[off++] = (byte) (values[i + 4] >>> 10);
+ buf[off++] = (byte) (values[i + 4] >>> 2);
+
+ buf[off] = (byte) (values[i + 4] << 6);
+ buf[off++] |= values[i + 5] >>> 36;
+ buf[off++] = (byte) (values[i + 5] >>> 28);
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 38;
+ buf[off++] = (byte) (values[i + 6] >>> 30);
+ buf[off++] = (byte) (values[i + 6] >>> 22);
+ buf[off++] = (byte) (values[i + 6] >>> 14);
+ buf[off++] = (byte) (values[i + 6] >>> 6);
+
+ buf[off] = (byte) (values[i + 6] << 2);
+ buf[off++] |= values[i + 7] >>> 40;
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits43(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 35);
+ buf[off++] = (byte) (values[i + 0] >>> 27);
+ buf[off++] = (byte) (values[i + 0] >>> 19);
+ buf[off++] = (byte) (values[i + 0] >>> 11);
+ buf[off++] = (byte) (values[i + 0] >>> 3);
+
+ buf[off] = (byte) (values[i + 0] << 5);
+ buf[off++] |= values[i + 1] >>> 38;
+ buf[off++] = (byte) (values[i + 1] >>> 30);
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 41;
+ buf[off++] = (byte) (values[i + 2] >>> 33);
+ buf[off++] = (byte) (values[i + 2] >>> 25);
+ buf[off++] = (byte) (values[i + 2] >>> 17);
+ buf[off++] = (byte) (values[i + 2] >>> 9);
+ buf[off++] = (byte) (values[i + 2] >>> 1);
+
+ buf[off] = (byte) (values[i + 2] << 7);
+ buf[off++] |= values[i + 3] >>> 36;
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 39;
+ buf[off++] = (byte) (values[i + 4] >>> 31);
+ buf[off++] = (byte) (values[i + 4] >>> 23);
+ buf[off++] = (byte) (values[i + 4] >>> 15);
+ buf[off++] = (byte) (values[i + 4] >>> 7);
+
+ buf[off] = (byte) (values[i + 4] << 1);
+ buf[off++] |= values[i + 5] >>> 42;
+ buf[off++] = (byte) (values[i + 5] >>> 34);
+ buf[off++] = (byte) (values[i + 5] >>> 26);
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 37;
+ buf[off++] = (byte) (values[i + 6] >>> 29);
+ buf[off++] = (byte) (values[i + 6] >>> 21);
+ buf[off++] = (byte) (values[i + 6] >>> 13);
+ buf[off++] = (byte) (values[i + 6] >>> 5);
+
+ buf[off] = (byte) (values[i + 6] << 3);
+ buf[off++] |= values[i + 7] >>> 40;
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits44(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 36);
+ buf[off++] = (byte) (values[i + 0] >>> 28);
+ buf[off++] = (byte) (values[i + 0] >>> 20);
+ buf[off++] = (byte) (values[i + 0] >>> 12);
+ buf[off++] = (byte) (values[i + 0] >>> 4);
+
+ buf[off] = (byte) (values[i + 0] << 4);
+ buf[off++] |= values[i + 1] >>> 40;
+ buf[off++] = (byte) (values[i + 1] >>> 32);
+ buf[off++] = (byte) (values[i + 1] >>> 24);
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 36);
+ buf[off++] = (byte) (values[i + 2] >>> 28);
+ buf[off++] = (byte) (values[i + 2] >>> 20);
+ buf[off++] = (byte) (values[i + 2] >>> 12);
+ buf[off++] = (byte) (values[i + 2] >>> 4);
+
+ buf[off] = (byte) (values[i + 2] << 4);
+ buf[off++] |= values[i + 3] >>> 40;
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 36);
+ buf[off++] = (byte) (values[i + 4] >>> 28);
+ buf[off++] = (byte) (values[i + 4] >>> 20);
+ buf[off++] = (byte) (values[i + 4] >>> 12);
+ buf[off++] = (byte) (values[i + 4] >>> 4);
+
+ buf[off] = (byte) (values[i + 4] << 4);
+ buf[off++] |= values[i + 5] >>> 40;
+ buf[off++] = (byte) (values[i + 5] >>> 32);
+ buf[off++] = (byte) (values[i + 5] >>> 24);
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 36);
+ buf[off++] = (byte) (values[i + 6] >>> 28);
+ buf[off++] = (byte) (values[i + 6] >>> 20);
+ buf[off++] = (byte) (values[i + 6] >>> 12);
+ buf[off++] = (byte) (values[i + 6] >>> 4);
+
+ buf[off] = (byte) (values[i + 6] << 4);
+ buf[off++] |= values[i + 7] >>> 40;
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits45(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 37);
+ buf[off++] = (byte) (values[i + 0] >>> 29);
+ buf[off++] = (byte) (values[i + 0] >>> 21);
+ buf[off++] = (byte) (values[i + 0] >>> 13);
+ buf[off++] = (byte) (values[i + 0] >>> 5);
+
+ buf[off] = (byte) (values[i + 0] << 3);
+ buf[off++] |= values[i + 1] >>> 42;
+ buf[off++] = (byte) (values[i + 1] >>> 34);
+ buf[off++] = (byte) (values[i + 1] >>> 26);
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 39;
+ buf[off++] = (byte) (values[i + 2] >>> 31);
+ buf[off++] = (byte) (values[i + 2] >>> 23);
+ buf[off++] = (byte) (values[i + 2] >>> 15);
+ buf[off++] = (byte) (values[i + 2] >>> 7);
+
+ buf[off] = (byte) (values[i + 2] << 1);
+ buf[off++] |= values[i + 3] >>> 44;
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 41;
+ buf[off++] = (byte) (values[i + 4] >>> 33);
+ buf[off++] = (byte) (values[i + 4] >>> 25);
+ buf[off++] = (byte) (values[i + 4] >>> 17);
+ buf[off++] = (byte) (values[i + 4] >>> 9);
+ buf[off++] = (byte) (values[i + 4] >>> 1);
+
+ buf[off] = (byte) (values[i + 4] << 7);
+ buf[off++] |= values[i + 5] >>> 38;
+ buf[off++] = (byte) (values[i + 5] >>> 30);
+ buf[off++] = (byte) (values[i + 5] >>> 22);
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 43;
+ buf[off++] = (byte) (values[i + 6] >>> 35);
+ buf[off++] = (byte) (values[i + 6] >>> 27);
+ buf[off++] = (byte) (values[i + 6] >>> 19);
+ buf[off++] = (byte) (values[i + 6] >>> 11);
+ buf[off++] = (byte) (values[i + 6] >>> 3);
+
+ buf[off] = (byte) (values[i + 6] << 5);
+ buf[off++] |= values[i + 7] >>> 40;
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits46(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 38);
+ buf[off++] = (byte) (values[i + 0] >>> 30);
+ buf[off++] = (byte) (values[i + 0] >>> 22);
+ buf[off++] = (byte) (values[i + 0] >>> 14);
+ buf[off++] = (byte) (values[i + 0] >>> 6);
+
+ buf[off] = (byte) (values[i + 0] << 2);
+ buf[off++] |= values[i + 1] >>> 44;
+ buf[off++] = (byte) (values[i + 1] >>> 36);
+ buf[off++] = (byte) (values[i + 1] >>> 28);
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 42;
+ buf[off++] = (byte) (values[i + 2] >>> 34);
+ buf[off++] = (byte) (values[i + 2] >>> 26);
+ buf[off++] = (byte) (values[i + 2] >>> 18);
+ buf[off++] = (byte) (values[i + 2] >>> 10);
+ buf[off++] = (byte) (values[i + 2] >>> 2);
+
+ buf[off] = (byte) (values[i + 2] << 6);
+ buf[off++] |= values[i + 3] >>> 40;
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 38);
+ buf[off++] = (byte) (values[i + 4] >>> 30);
+ buf[off++] = (byte) (values[i + 4] >>> 22);
+ buf[off++] = (byte) (values[i + 4] >>> 14);
+ buf[off++] = (byte) (values[i + 4] >>> 6);
+
+ buf[off] = (byte) (values[i + 4] << 2);
+ buf[off++] |= values[i + 5] >>> 44;
+ buf[off++] = (byte) (values[i + 5] >>> 36);
+ buf[off++] = (byte) (values[i + 5] >>> 28);
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 42;
+ buf[off++] = (byte) (values[i + 6] >>> 34);
+ buf[off++] = (byte) (values[i + 6] >>> 26);
+ buf[off++] = (byte) (values[i + 6] >>> 18);
+ buf[off++] = (byte) (values[i + 6] >>> 10);
+ buf[off++] = (byte) (values[i + 6] >>> 2);
+
+ buf[off] = (byte) (values[i + 6] << 6);
+ buf[off++] |= values[i + 7] >>> 40;
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits47(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 39);
+ buf[off++] = (byte) (values[i + 0] >>> 31);
+ buf[off++] = (byte) (values[i + 0] >>> 23);
+ buf[off++] = (byte) (values[i + 0] >>> 15);
+ buf[off++] = (byte) (values[i + 0] >>> 7);
+
+ buf[off] = (byte) (values[i + 0] << 1);
+ buf[off++] |= values[i + 1] >>> 46;
+ buf[off++] = (byte) (values[i + 1] >>> 38);
+ buf[off++] = (byte) (values[i + 1] >>> 30);
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 45;
+ buf[off++] = (byte) (values[i + 2] >>> 37);
+ buf[off++] = (byte) (values[i + 2] >>> 29);
+ buf[off++] = (byte) (values[i + 2] >>> 21);
+ buf[off++] = (byte) (values[i + 2] >>> 13);
+ buf[off++] = (byte) (values[i + 2] >>> 5);
+
+ buf[off] = (byte) (values[i + 2] << 3);
+ buf[off++] |= values[i + 3] >>> 44;
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 43;
+ buf[off++] = (byte) (values[i + 4] >>> 35);
+ buf[off++] = (byte) (values[i + 4] >>> 27);
+ buf[off++] = (byte) (values[i + 4] >>> 19);
+ buf[off++] = (byte) (values[i + 4] >>> 11);
+ buf[off++] = (byte) (values[i + 4] >>> 3);
+
+ buf[off] = (byte) (values[i + 4] << 5);
+ buf[off++] |= values[i + 5] >>> 42;
+ buf[off++] = (byte) (values[i + 5] >>> 34);
+ buf[off++] = (byte) (values[i + 5] >>> 26);
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 41;
+ buf[off++] = (byte) (values[i + 6] >>> 33);
+ buf[off++] = (byte) (values[i + 6] >>> 25);
+ buf[off++] = (byte) (values[i + 6] >>> 17);
+ buf[off++] = (byte) (values[i + 6] >>> 9);
+ buf[off++] = (byte) (values[i + 6] >>> 1);
+
+ buf[off] = (byte) (values[i + 6] << 7);
+ buf[off++] |= values[i + 7] >>> 40;
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits48(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 40);
+ buf[off++] = (byte) (values[i + 0] >>> 32);
+ buf[off++] = (byte) (values[i + 0] >>> 24);
+ buf[off++] = (byte) (values[i + 0] >>> 16);
+ buf[off++] = (byte) (values[i + 0] >>> 8);
+ buf[off++] = (byte) (values[i + 0]);
+
+ buf[off++] = (byte) (values[i + 1] >>> 40);
+ buf[off++] = (byte) (values[i + 1] >>> 32);
+ buf[off++] = (byte) (values[i + 1] >>> 24);
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 40);
+ buf[off++] = (byte) (values[i + 2] >>> 32);
+ buf[off++] = (byte) (values[i + 2] >>> 24);
+ buf[off++] = (byte) (values[i + 2] >>> 16);
+ buf[off++] = (byte) (values[i + 2] >>> 8);
+ buf[off++] = (byte) (values[i + 2]);
+
+ buf[off++] = (byte) (values[i + 3] >>> 40);
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 40);
+ buf[off++] = (byte) (values[i + 4] >>> 32);
+ buf[off++] = (byte) (values[i + 4] >>> 24);
+ buf[off++] = (byte) (values[i + 4] >>> 16);
+ buf[off++] = (byte) (values[i + 4] >>> 8);
+ buf[off++] = (byte) (values[i + 4]);
+
+ buf[off++] = (byte) (values[i + 5] >>> 40);
+ buf[off++] = (byte) (values[i + 5] >>> 32);
+ buf[off++] = (byte) (values[i + 5] >>> 24);
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 40);
+ buf[off++] = (byte) (values[i + 6] >>> 32);
+ buf[off++] = (byte) (values[i + 6] >>> 24);
+ buf[off++] = (byte) (values[i + 6] >>> 16);
+ buf[off++] = (byte) (values[i + 6] >>> 8);
+ buf[off++] = (byte) (values[i + 6]);
+
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits49(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 41);
+ buf[off++] = (byte) (values[i + 0] >>> 33);
+ buf[off++] = (byte) (values[i + 0] >>> 25);
+ buf[off++] = (byte) (values[i + 0] >>> 17);
+ buf[off++] = (byte) (values[i + 0] >>> 9);
+ buf[off++] = (byte) (values[i + 0] >>> 1);
+
+ buf[off] = (byte) (values[i + 0] << 7);
+ buf[off++] |= values[i + 1] >>> 42;
+ buf[off++] = (byte) (values[i + 1] >>> 34);
+ buf[off++] = (byte) (values[i + 1] >>> 26);
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 43;
+ buf[off++] = (byte) (values[i + 2] >>> 35);
+ buf[off++] = (byte) (values[i + 2] >>> 27);
+ buf[off++] = (byte) (values[i + 2] >>> 19);
+ buf[off++] = (byte) (values[i + 2] >>> 11);
+ buf[off++] = (byte) (values[i + 2] >>> 3);
+
+ buf[off] = (byte) (values[i + 2] << 5);
+ buf[off++] |= values[i + 3] >>> 44;
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 45;
+ buf[off++] = (byte) (values[i + 4] >>> 37);
+ buf[off++] = (byte) (values[i + 4] >>> 29);
+ buf[off++] = (byte) (values[i + 4] >>> 21);
+ buf[off++] = (byte) (values[i + 4] >>> 13);
+ buf[off++] = (byte) (values[i + 4] >>> 5);
+
+ buf[off] = (byte) (values[i + 4] << 3);
+ buf[off++] |= values[i + 5] >>> 46;
+ buf[off++] = (byte) (values[i + 5] >>> 38);
+ buf[off++] = (byte) (values[i + 5] >>> 30);
+ buf[off++] = (byte) (values[i + 5] >>> 22);
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 47;
+ buf[off++] = (byte) (values[i + 6] >>> 39);
+ buf[off++] = (byte) (values[i + 6] >>> 31);
+ buf[off++] = (byte) (values[i + 6] >>> 23);
+ buf[off++] = (byte) (values[i + 6] >>> 15);
+ buf[off++] = (byte) (values[i + 6] >>> 7);
+
+ buf[off] = (byte) (values[i + 6] << 1);
+ buf[off++] |= values[i + 7] >>> 48;
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits50(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 42);
+ buf[off++] = (byte) (values[i + 0] >>> 34);
+ buf[off++] = (byte) (values[i + 0] >>> 26);
+ buf[off++] = (byte) (values[i + 0] >>> 18);
+ buf[off++] = (byte) (values[i + 0] >>> 10);
+ buf[off++] = (byte) (values[i + 0] >>> 2);
+
+ buf[off] = (byte) (values[i + 0] << 6);
+ buf[off++] |= values[i + 1] >>> 44;
+ buf[off++] = (byte) (values[i + 1] >>> 36);
+ buf[off++] = (byte) (values[i + 1] >>> 28);
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 46;
+ buf[off++] = (byte) (values[i + 2] >>> 38);
+ buf[off++] = (byte) (values[i + 2] >>> 30);
+ buf[off++] = (byte) (values[i + 2] >>> 22);
+ buf[off++] = (byte) (values[i + 2] >>> 14);
+ buf[off++] = (byte) (values[i + 2] >>> 6);
+
+ buf[off] = (byte) (values[i + 2] << 2);
+ buf[off++] |= values[i + 3] >>> 48;
+ buf[off++] = (byte) (values[i + 3] >>> 40);
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 42);
+ buf[off++] = (byte) (values[i + 4] >>> 34);
+ buf[off++] = (byte) (values[i + 4] >>> 26);
+ buf[off++] = (byte) (values[i + 4] >>> 18);
+ buf[off++] = (byte) (values[i + 4] >>> 10);
+ buf[off++] = (byte) (values[i + 4] >>> 2);
+
+ buf[off] = (byte) (values[i + 4] << 6);
+ buf[off++] |= values[i + 5] >>> 44;
+ buf[off++] = (byte) (values[i + 5] >>> 36);
+ buf[off++] = (byte) (values[i + 5] >>> 28);
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 46;
+ buf[off++] = (byte) (values[i + 6] >>> 38);
+ buf[off++] = (byte) (values[i + 6] >>> 30);
+ buf[off++] = (byte) (values[i + 6] >>> 22);
+ buf[off++] = (byte) (values[i + 6] >>> 14);
+ buf[off++] = (byte) (values[i + 6] >>> 6);
+
+ buf[off] = (byte) (values[i + 6] << 2);
+ buf[off++] |= values[i + 7] >>> 48;
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits51(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 43);
+ buf[off++] = (byte) (values[i + 0] >>> 35);
+ buf[off++] = (byte) (values[i + 0] >>> 27);
+ buf[off++] = (byte) (values[i + 0] >>> 19);
+ buf[off++] = (byte) (values[i + 0] >>> 11);
+ buf[off++] = (byte) (values[i + 0] >>> 3);
+
+ buf[off] = (byte) (values[i + 0] << 5);
+ buf[off++] |= values[i + 1] >>> 46;
+ buf[off++] = (byte) (values[i + 1] >>> 38);
+ buf[off++] = (byte) (values[i + 1] >>> 30);
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 49;
+ buf[off++] = (byte) (values[i + 2] >>> 41);
+ buf[off++] = (byte) (values[i + 2] >>> 33);
+ buf[off++] = (byte) (values[i + 2] >>> 25);
+ buf[off++] = (byte) (values[i + 2] >>> 17);
+ buf[off++] = (byte) (values[i + 2] >>> 9);
+ buf[off++] = (byte) (values[i + 2] >>> 1);
+
+ buf[off] = (byte) (values[i + 2] << 7);
+ buf[off++] |= values[i + 3] >>> 44;
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 47;
+ buf[off++] = (byte) (values[i + 4] >>> 39);
+ buf[off++] = (byte) (values[i + 4] >>> 31);
+ buf[off++] = (byte) (values[i + 4] >>> 23);
+ buf[off++] = (byte) (values[i + 4] >>> 15);
+ buf[off++] = (byte) (values[i + 4] >>> 7);
+
+ buf[off] = (byte) (values[i + 4] << 1);
+ buf[off++] |= values[i + 5] >>> 50;
+ buf[off++] = (byte) (values[i + 5] >>> 42);
+ buf[off++] = (byte) (values[i + 5] >>> 34);
+ buf[off++] = (byte) (values[i + 5] >>> 26);
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 45;
+ buf[off++] = (byte) (values[i + 6] >>> 37);
+ buf[off++] = (byte) (values[i + 6] >>> 29);
+ buf[off++] = (byte) (values[i + 6] >>> 21);
+ buf[off++] = (byte) (values[i + 6] >>> 13);
+ buf[off++] = (byte) (values[i + 6] >>> 5);
+
+ buf[off] = (byte) (values[i + 6] << 3);
+ buf[off++] |= values[i + 7] >>> 48;
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits52(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 44);
+ buf[off++] = (byte) (values[i + 0] >>> 36);
+ buf[off++] = (byte) (values[i + 0] >>> 28);
+ buf[off++] = (byte) (values[i + 0] >>> 20);
+ buf[off++] = (byte) (values[i + 0] >>> 12);
+ buf[off++] = (byte) (values[i + 0] >>> 4);
+
+ buf[off] = (byte) (values[i + 0] << 4);
+ buf[off++] |= values[i + 1] >>> 48;
+ buf[off++] = (byte) (values[i + 1] >>> 40);
+ buf[off++] = (byte) (values[i + 1] >>> 32);
+ buf[off++] = (byte) (values[i + 1] >>> 24);
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 44);
+ buf[off++] = (byte) (values[i + 2] >>> 36);
+ buf[off++] = (byte) (values[i + 2] >>> 28);
+ buf[off++] = (byte) (values[i + 2] >>> 20);
+ buf[off++] = (byte) (values[i + 2] >>> 12);
+ buf[off++] = (byte) (values[i + 2] >>> 4);
+
+ buf[off] = (byte) (values[i + 2] << 4);
+ buf[off++] |= values[i + 3] >>> 48;
+ buf[off++] = (byte) (values[i + 3] >>> 40);
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 44);
+ buf[off++] = (byte) (values[i + 4] >>> 36);
+ buf[off++] = (byte) (values[i + 4] >>> 28);
+ buf[off++] = (byte) (values[i + 4] >>> 20);
+ buf[off++] = (byte) (values[i + 4] >>> 12);
+ buf[off++] = (byte) (values[i + 4] >>> 4);
+
+ buf[off] = (byte) (values[i + 4] << 4);
+ buf[off++] |= values[i + 5] >>> 48;
+ buf[off++] = (byte) (values[i + 5] >>> 40);
+ buf[off++] = (byte) (values[i + 5] >>> 32);
+ buf[off++] = (byte) (values[i + 5] >>> 24);
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 44);
+ buf[off++] = (byte) (values[i + 6] >>> 36);
+ buf[off++] = (byte) (values[i + 6] >>> 28);
+ buf[off++] = (byte) (values[i + 6] >>> 20);
+ buf[off++] = (byte) (values[i + 6] >>> 12);
+ buf[off++] = (byte) (values[i + 6] >>> 4);
+
+ buf[off] = (byte) (values[i + 6] << 4);
+ buf[off++] |= values[i + 7] >>> 48;
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits53(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 45);
+ buf[off++] = (byte) (values[i + 0] >>> 37);
+ buf[off++] = (byte) (values[i + 0] >>> 29);
+ buf[off++] = (byte) (values[i + 0] >>> 21);
+ buf[off++] = (byte) (values[i + 0] >>> 13);
+ buf[off++] = (byte) (values[i + 0] >>> 5);
+
+ buf[off] = (byte) (values[i + 0] << 3);
+ buf[off++] |= values[i + 1] >>> 50;
+ buf[off++] = (byte) (values[i + 1] >>> 42);
+ buf[off++] = (byte) (values[i + 1] >>> 34);
+ buf[off++] = (byte) (values[i + 1] >>> 26);
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 47;
+ buf[off++] = (byte) (values[i + 2] >>> 39);
+ buf[off++] = (byte) (values[i + 2] >>> 31);
+ buf[off++] = (byte) (values[i + 2] >>> 23);
+ buf[off++] = (byte) (values[i + 2] >>> 15);
+ buf[off++] = (byte) (values[i + 2] >>> 7);
+
+ buf[off] = (byte) (values[i + 2] << 1);
+ buf[off++] |= values[i + 3] >>> 52;
+ buf[off++] = (byte) (values[i + 3] >>> 44);
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 49;
+ buf[off++] = (byte) (values[i + 4] >>> 41);
+ buf[off++] = (byte) (values[i + 4] >>> 33);
+ buf[off++] = (byte) (values[i + 4] >>> 25);
+ buf[off++] = (byte) (values[i + 4] >>> 17);
+ buf[off++] = (byte) (values[i + 4] >>> 9);
+ buf[off++] = (byte) (values[i + 4] >>> 1);
+
+ buf[off] = (byte) (values[i + 4] << 7);
+ buf[off++] |= values[i + 5] >>> 46;
+ buf[off++] = (byte) (values[i + 5] >>> 38);
+ buf[off++] = (byte) (values[i + 5] >>> 30);
+ buf[off++] = (byte) (values[i + 5] >>> 22);
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 51;
+ buf[off++] = (byte) (values[i + 6] >>> 43);
+ buf[off++] = (byte) (values[i + 6] >>> 35);
+ buf[off++] = (byte) (values[i + 6] >>> 27);
+ buf[off++] = (byte) (values[i + 6] >>> 19);
+ buf[off++] = (byte) (values[i + 6] >>> 11);
+ buf[off++] = (byte) (values[i + 6] >>> 3);
+
+ buf[off] = (byte) (values[i + 6] << 5);
+ buf[off++] |= values[i + 7] >>> 48;
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits54(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 46);
+ buf[off++] = (byte) (values[i + 0] >>> 38);
+ buf[off++] = (byte) (values[i + 0] >>> 30);
+ buf[off++] = (byte) (values[i + 0] >>> 22);
+ buf[off++] = (byte) (values[i + 0] >>> 14);
+ buf[off++] = (byte) (values[i + 0] >>> 6);
+
+ buf[off] = (byte) (values[i + 0] << 2);
+ buf[off++] |= values[i + 1] >>> 52;
+ buf[off++] = (byte) (values[i + 1] >>> 44);
+ buf[off++] = (byte) (values[i + 1] >>> 36);
+ buf[off++] = (byte) (values[i + 1] >>> 28);
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 50;
+ buf[off++] = (byte) (values[i + 2] >>> 42);
+ buf[off++] = (byte) (values[i + 2] >>> 34);
+ buf[off++] = (byte) (values[i + 2] >>> 26);
+ buf[off++] = (byte) (values[i + 2] >>> 18);
+ buf[off++] = (byte) (values[i + 2] >>> 10);
+ buf[off++] = (byte) (values[i + 2] >>> 2);
+
+ buf[off] = (byte) (values[i + 2] << 6);
+ buf[off++] |= values[i + 3] >>> 48;
+ buf[off++] = (byte) (values[i + 3] >>> 40);
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 46);
+ buf[off++] = (byte) (values[i + 4] >>> 38);
+ buf[off++] = (byte) (values[i + 4] >>> 30);
+ buf[off++] = (byte) (values[i + 4] >>> 22);
+ buf[off++] = (byte) (values[i + 4] >>> 14);
+ buf[off++] = (byte) (values[i + 4] >>> 6);
+
+ buf[off] = (byte) (values[i + 4] << 2);
+ buf[off++] |= values[i + 5] >>> 52;
+ buf[off++] = (byte) (values[i + 5] >>> 44);
+ buf[off++] = (byte) (values[i + 5] >>> 36);
+ buf[off++] = (byte) (values[i + 5] >>> 28);
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 50;
+ buf[off++] = (byte) (values[i + 6] >>> 42);
+ buf[off++] = (byte) (values[i + 6] >>> 34);
+ buf[off++] = (byte) (values[i + 6] >>> 26);
+ buf[off++] = (byte) (values[i + 6] >>> 18);
+ buf[off++] = (byte) (values[i + 6] >>> 10);
+ buf[off++] = (byte) (values[i + 6] >>> 2);
+
+ buf[off] = (byte) (values[i + 6] << 6);
+ buf[off++] |= values[i + 7] >>> 48;
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits55(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 47);
+ buf[off++] = (byte) (values[i + 0] >>> 39);
+ buf[off++] = (byte) (values[i + 0] >>> 31);
+ buf[off++] = (byte) (values[i + 0] >>> 23);
+ buf[off++] = (byte) (values[i + 0] >>> 15);
+ buf[off++] = (byte) (values[i + 0] >>> 7);
+
+ buf[off] = (byte) (values[i + 0] << 1);
+ buf[off++] |= values[i + 1] >>> 54;
+ buf[off++] = (byte) (values[i + 1] >>> 46);
+ buf[off++] = (byte) (values[i + 1] >>> 38);
+ buf[off++] = (byte) (values[i + 1] >>> 30);
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 53;
+ buf[off++] = (byte) (values[i + 2] >>> 45);
+ buf[off++] = (byte) (values[i + 2] >>> 37);
+ buf[off++] = (byte) (values[i + 2] >>> 29);
+ buf[off++] = (byte) (values[i + 2] >>> 21);
+ buf[off++] = (byte) (values[i + 2] >>> 13);
+ buf[off++] = (byte) (values[i + 2] >>> 5);
+
+ buf[off] = (byte) (values[i + 2] << 3);
+ buf[off++] |= values[i + 3] >>> 52;
+ buf[off++] = (byte) (values[i + 3] >>> 44);
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 51;
+ buf[off++] = (byte) (values[i + 4] >>> 43);
+ buf[off++] = (byte) (values[i + 4] >>> 35);
+ buf[off++] = (byte) (values[i + 4] >>> 27);
+ buf[off++] = (byte) (values[i + 4] >>> 19);
+ buf[off++] = (byte) (values[i + 4] >>> 11);
+ buf[off++] = (byte) (values[i + 4] >>> 3);
+
+ buf[off] = (byte) (values[i + 4] << 5);
+ buf[off++] |= values[i + 5] >>> 50;
+ buf[off++] = (byte) (values[i + 5] >>> 42);
+ buf[off++] = (byte) (values[i + 5] >>> 34);
+ buf[off++] = (byte) (values[i + 5] >>> 26);
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 49;
+ buf[off++] = (byte) (values[i + 6] >>> 41);
+ buf[off++] = (byte) (values[i + 6] >>> 33);
+ buf[off++] = (byte) (values[i + 6] >>> 25);
+ buf[off++] = (byte) (values[i + 6] >>> 17);
+ buf[off++] = (byte) (values[i + 6] >>> 9);
+ buf[off++] = (byte) (values[i + 6] >>> 1);
+
+ buf[off] = (byte) (values[i + 6] << 7);
+ buf[off++] |= values[i + 7] >>> 48;
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits56(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 48);
+ buf[off++] = (byte) (values[i + 0] >>> 40);
+ buf[off++] = (byte) (values[i + 0] >>> 32);
+ buf[off++] = (byte) (values[i + 0] >>> 24);
+ buf[off++] = (byte) (values[i + 0] >>> 16);
+ buf[off++] = (byte) (values[i + 0] >>> 8);
+ buf[off++] = (byte) (values[i + 0]);
+
+ buf[off++] = (byte) (values[i + 1] >>> 48);
+ buf[off++] = (byte) (values[i + 1] >>> 40);
+ buf[off++] = (byte) (values[i + 1] >>> 32);
+ buf[off++] = (byte) (values[i + 1] >>> 24);
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 48);
+ buf[off++] = (byte) (values[i + 2] >>> 40);
+ buf[off++] = (byte) (values[i + 2] >>> 32);
+ buf[off++] = (byte) (values[i + 2] >>> 24);
+ buf[off++] = (byte) (values[i + 2] >>> 16);
+ buf[off++] = (byte) (values[i + 2] >>> 8);
+ buf[off++] = (byte) (values[i + 2]);
+
+ buf[off++] = (byte) (values[i + 3] >>> 48);
+ buf[off++] = (byte) (values[i + 3] >>> 40);
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 48);
+ buf[off++] = (byte) (values[i + 4] >>> 40);
+ buf[off++] = (byte) (values[i + 4] >>> 32);
+ buf[off++] = (byte) (values[i + 4] >>> 24);
+ buf[off++] = (byte) (values[i + 4] >>> 16);
+ buf[off++] = (byte) (values[i + 4] >>> 8);
+ buf[off++] = (byte) (values[i + 4]);
+
+ buf[off++] = (byte) (values[i + 5] >>> 48);
+ buf[off++] = (byte) (values[i + 5] >>> 40);
+ buf[off++] = (byte) (values[i + 5] >>> 32);
+ buf[off++] = (byte) (values[i + 5] >>> 24);
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 48);
+ buf[off++] = (byte) (values[i + 6] >>> 40);
+ buf[off++] = (byte) (values[i + 6] >>> 32);
+ buf[off++] = (byte) (values[i + 6] >>> 24);
+ buf[off++] = (byte) (values[i + 6] >>> 16);
+ buf[off++] = (byte) (values[i + 6] >>> 8);
+ buf[off++] = (byte) (values[i + 6]);
+
+ buf[off++] = (byte) (values[i + 7] >>> 48);
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits57(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 49);
+ buf[off++] = (byte) (values[i + 0] >>> 41);
+ buf[off++] = (byte) (values[i + 0] >>> 33);
+ buf[off++] = (byte) (values[i + 0] >>> 25);
+ buf[off++] = (byte) (values[i + 0] >>> 17);
+ buf[off++] = (byte) (values[i + 0] >>> 9);
+ buf[off++] = (byte) (values[i + 0] >>> 1);
+
+ buf[off] = (byte) (values[i + 0] << 7);
+ buf[off++] |= values[i + 1] >>> 50;
+ buf[off++] = (byte) (values[i + 1] >>> 42);
+ buf[off++] = (byte) (values[i + 1] >>> 34);
+ buf[off++] = (byte) (values[i + 1] >>> 26);
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 51;
+ buf[off++] = (byte) (values[i + 2] >>> 43);
+ buf[off++] = (byte) (values[i + 2] >>> 35);
+ buf[off++] = (byte) (values[i + 2] >>> 27);
+ buf[off++] = (byte) (values[i + 2] >>> 19);
+ buf[off++] = (byte) (values[i + 2] >>> 11);
+ buf[off++] = (byte) (values[i + 2] >>> 3);
+
+ buf[off] = (byte) (values[i + 2] << 5);
+ buf[off++] |= values[i + 3] >>> 52;
+ buf[off++] = (byte) (values[i + 3] >>> 44);
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 53;
+ buf[off++] = (byte) (values[i + 4] >>> 45);
+ buf[off++] = (byte) (values[i + 4] >>> 37);
+ buf[off++] = (byte) (values[i + 4] >>> 29);
+ buf[off++] = (byte) (values[i + 4] >>> 21);
+ buf[off++] = (byte) (values[i + 4] >>> 13);
+ buf[off++] = (byte) (values[i + 4] >>> 5);
+
+ buf[off] = (byte) (values[i + 4] << 3);
+ buf[off++] |= values[i + 5] >>> 54;
+ buf[off++] = (byte) (values[i + 5] >>> 46);
+ buf[off++] = (byte) (values[i + 5] >>> 38);
+ buf[off++] = (byte) (values[i + 5] >>> 30);
+ buf[off++] = (byte) (values[i + 5] >>> 22);
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 55;
+ buf[off++] = (byte) (values[i + 6] >>> 47);
+ buf[off++] = (byte) (values[i + 6] >>> 39);
+ buf[off++] = (byte) (values[i + 6] >>> 31);
+ buf[off++] = (byte) (values[i + 6] >>> 23);
+ buf[off++] = (byte) (values[i + 6] >>> 15);
+ buf[off++] = (byte) (values[i + 6] >>> 7);
+
+ buf[off] = (byte) (values[i + 6] << 1);
+ buf[off++] |= values[i + 7] >>> 56;
+ buf[off++] = (byte) (values[i + 7] >>> 48);
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits58(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 50);
+ buf[off++] = (byte) (values[i + 0] >>> 42);
+ buf[off++] = (byte) (values[i + 0] >>> 34);
+ buf[off++] = (byte) (values[i + 0] >>> 26);
+ buf[off++] = (byte) (values[i + 0] >>> 18);
+ buf[off++] = (byte) (values[i + 0] >>> 10);
+ buf[off++] = (byte) (values[i + 0] >>> 2);
+
+ buf[off] = (byte) (values[i + 0] << 6);
+ buf[off++] |= values[i + 1] >>> 52;
+ buf[off++] = (byte) (values[i + 1] >>> 44);
+ buf[off++] = (byte) (values[i + 1] >>> 36);
+ buf[off++] = (byte) (values[i + 1] >>> 28);
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 54;
+ buf[off++] = (byte) (values[i + 2] >>> 46);
+ buf[off++] = (byte) (values[i + 2] >>> 38);
+ buf[off++] = (byte) (values[i + 2] >>> 30);
+ buf[off++] = (byte) (values[i + 2] >>> 22);
+ buf[off++] = (byte) (values[i + 2] >>> 14);
+ buf[off++] = (byte) (values[i + 2] >>> 6);
+
+ buf[off] = (byte) (values[i + 2] << 2);
+ buf[off++] |= values[i + 3] >>> 56;
+ buf[off++] = (byte) (values[i + 3] >>> 48);
+ buf[off++] = (byte) (values[i + 3] >>> 40);
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 50);
+ buf[off++] = (byte) (values[i + 4] >>> 42);
+ buf[off++] = (byte) (values[i + 4] >>> 34);
+ buf[off++] = (byte) (values[i + 4] >>> 26);
+ buf[off++] = (byte) (values[i + 4] >>> 18);
+ buf[off++] = (byte) (values[i + 4] >>> 10);
+ buf[off++] = (byte) (values[i + 4] >>> 2);
+
+ buf[off] = (byte) (values[i + 4] << 6);
+ buf[off++] |= values[i + 5] >>> 52;
+ buf[off++] = (byte) (values[i + 5] >>> 44);
+ buf[off++] = (byte) (values[i + 5] >>> 36);
+ buf[off++] = (byte) (values[i + 5] >>> 28);
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 54;
+ buf[off++] = (byte) (values[i + 6] >>> 46);
+ buf[off++] = (byte) (values[i + 6] >>> 38);
+ buf[off++] = (byte) (values[i + 6] >>> 30);
+ buf[off++] = (byte) (values[i + 6] >>> 22);
+ buf[off++] = (byte) (values[i + 6] >>> 14);
+ buf[off++] = (byte) (values[i + 6] >>> 6);
+
+ buf[off] = (byte) (values[i + 6] << 2);
+ buf[off++] |= values[i + 7] >>> 56;
+ buf[off++] = (byte) (values[i + 7] >>> 48);
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits59(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 51);
+ buf[off++] = (byte) (values[i + 0] >>> 43);
+ buf[off++] = (byte) (values[i + 0] >>> 35);
+ buf[off++] = (byte) (values[i + 0] >>> 27);
+ buf[off++] = (byte) (values[i + 0] >>> 19);
+ buf[off++] = (byte) (values[i + 0] >>> 11);
+ buf[off++] = (byte) (values[i + 0] >>> 3);
+
+ buf[off] = (byte) (values[i + 0] << 5);
+ buf[off++] |= values[i + 1] >>> 54;
+ buf[off++] = (byte) (values[i + 1] >>> 46);
+ buf[off++] = (byte) (values[i + 1] >>> 38);
+ buf[off++] = (byte) (values[i + 1] >>> 30);
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 57;
+ buf[off++] = (byte) (values[i + 2] >>> 49);
+ buf[off++] = (byte) (values[i + 2] >>> 41);
+ buf[off++] = (byte) (values[i + 2] >>> 33);
+ buf[off++] = (byte) (values[i + 2] >>> 25);
+ buf[off++] = (byte) (values[i + 2] >>> 17);
+ buf[off++] = (byte) (values[i + 2] >>> 9);
+ buf[off++] = (byte) (values[i + 2] >>> 1);
+
+ buf[off] = (byte) (values[i + 2] << 7);
+ buf[off++] |= values[i + 3] >>> 52;
+ buf[off++] = (byte) (values[i + 3] >>> 44);
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 55;
+ buf[off++] = (byte) (values[i + 4] >>> 47);
+ buf[off++] = (byte) (values[i + 4] >>> 39);
+ buf[off++] = (byte) (values[i + 4] >>> 31);
+ buf[off++] = (byte) (values[i + 4] >>> 23);
+ buf[off++] = (byte) (values[i + 4] >>> 15);
+ buf[off++] = (byte) (values[i + 4] >>> 7);
+
+ buf[off] = (byte) (values[i + 4] << 1);
+ buf[off++] |= values[i + 5] >>> 58;
+ buf[off++] = (byte) (values[i + 5] >>> 50);
+ buf[off++] = (byte) (values[i + 5] >>> 42);
+ buf[off++] = (byte) (values[i + 5] >>> 34);
+ buf[off++] = (byte) (values[i + 5] >>> 26);
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 53;
+ buf[off++] = (byte) (values[i + 6] >>> 45);
+ buf[off++] = (byte) (values[i + 6] >>> 37);
+ buf[off++] = (byte) (values[i + 6] >>> 29);
+ buf[off++] = (byte) (values[i + 6] >>> 21);
+ buf[off++] = (byte) (values[i + 6] >>> 13);
+ buf[off++] = (byte) (values[i + 6] >>> 5);
+
+ buf[off] = (byte) (values[i + 6] << 3);
+ buf[off++] |= values[i + 7] >>> 56;
+ buf[off++] = (byte) (values[i + 7] >>> 48);
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits60(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 52);
+ buf[off++] = (byte) (values[i + 0] >>> 44);
+ buf[off++] = (byte) (values[i + 0] >>> 36);
+ buf[off++] = (byte) (values[i + 0] >>> 28);
+ buf[off++] = (byte) (values[i + 0] >>> 20);
+ buf[off++] = (byte) (values[i + 0] >>> 12);
+ buf[off++] = (byte) (values[i + 0] >>> 4);
+
+ buf[off] = (byte) (values[i + 0] << 4);
+ buf[off++] |= values[i + 1] >>> 56;
+ buf[off++] = (byte) (values[i + 1] >>> 48);
+ buf[off++] = (byte) (values[i + 1] >>> 40);
+ buf[off++] = (byte) (values[i + 1] >>> 32);
+ buf[off++] = (byte) (values[i + 1] >>> 24);
+ buf[off++] = (byte) (values[i + 1] >>> 16);
+ buf[off++] = (byte) (values[i + 1] >>> 8);
+ buf[off++] = (byte) (values[i + 1]);
+
+ buf[off++] = (byte) (values[i + 2] >>> 52);
+ buf[off++] = (byte) (values[i + 2] >>> 44);
+ buf[off++] = (byte) (values[i + 2] >>> 36);
+ buf[off++] = (byte) (values[i + 2] >>> 28);
+ buf[off++] = (byte) (values[i + 2] >>> 20);
+ buf[off++] = (byte) (values[i + 2] >>> 12);
+ buf[off++] = (byte) (values[i + 2] >>> 4);
+
+ buf[off] = (byte) (values[i + 2] << 4);
+ buf[off++] |= values[i + 3] >>> 56;
+ buf[off++] = (byte) (values[i + 3] >>> 48);
+ buf[off++] = (byte) (values[i + 3] >>> 40);
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 52);
+ buf[off++] = (byte) (values[i + 4] >>> 44);
+ buf[off++] = (byte) (values[i + 4] >>> 36);
+ buf[off++] = (byte) (values[i + 4] >>> 28);
+ buf[off++] = (byte) (values[i + 4] >>> 20);
+ buf[off++] = (byte) (values[i + 4] >>> 12);
+ buf[off++] = (byte) (values[i + 4] >>> 4);
+
+ buf[off] = (byte) (values[i + 4] << 4);
+ buf[off++] |= values[i + 5] >>> 56;
+ buf[off++] = (byte) (values[i + 5] >>> 48);
+ buf[off++] = (byte) (values[i + 5] >>> 40);
+ buf[off++] = (byte) (values[i + 5] >>> 32);
+ buf[off++] = (byte) (values[i + 5] >>> 24);
+ buf[off++] = (byte) (values[i + 5] >>> 16);
+ buf[off++] = (byte) (values[i + 5] >>> 8);
+ buf[off++] = (byte) (values[i + 5]);
+
+ buf[off++] = (byte) (values[i + 6] >>> 52);
+ buf[off++] = (byte) (values[i + 6] >>> 44);
+ buf[off++] = (byte) (values[i + 6] >>> 36);
+ buf[off++] = (byte) (values[i + 6] >>> 28);
+ buf[off++] = (byte) (values[i + 6] >>> 20);
+ buf[off++] = (byte) (values[i + 6] >>> 12);
+ buf[off++] = (byte) (values[i + 6] >>> 4);
+
+ buf[off] = (byte) (values[i + 6] << 4);
+ buf[off++] |= values[i + 7] >>> 56;
+ buf[off++] = (byte) (values[i + 7] >>> 48);
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits61(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 53);
+ buf[off++] = (byte) (values[i + 0] >>> 45);
+ buf[off++] = (byte) (values[i + 0] >>> 37);
+ buf[off++] = (byte) (values[i + 0] >>> 29);
+ buf[off++] = (byte) (values[i + 0] >>> 21);
+ buf[off++] = (byte) (values[i + 0] >>> 13);
+ buf[off++] = (byte) (values[i + 0] >>> 5);
+
+ buf[off] = (byte) (values[i + 0] << 3);
+ buf[off++] |= values[i + 1] >>> 58;
+ buf[off++] = (byte) (values[i + 1] >>> 50);
+ buf[off++] = (byte) (values[i + 1] >>> 42);
+ buf[off++] = (byte) (values[i + 1] >>> 34);
+ buf[off++] = (byte) (values[i + 1] >>> 26);
+ buf[off++] = (byte) (values[i + 1] >>> 18);
+ buf[off++] = (byte) (values[i + 1] >>> 10);
+ buf[off++] = (byte) (values[i + 1] >>> 2);
+
+ buf[off] = (byte) (values[i + 1] << 6);
+ buf[off++] |= values[i + 2] >>> 55;
+ buf[off++] = (byte) (values[i + 2] >>> 47);
+ buf[off++] = (byte) (values[i + 2] >>> 39);
+ buf[off++] = (byte) (values[i + 2] >>> 31);
+ buf[off++] = (byte) (values[i + 2] >>> 23);
+ buf[off++] = (byte) (values[i + 2] >>> 15);
+ buf[off++] = (byte) (values[i + 2] >>> 7);
+
+ buf[off] = (byte) (values[i + 2] << 1);
+ buf[off++] |= values[i + 3] >>> 60;
+ buf[off++] = (byte) (values[i + 3] >>> 52);
+ buf[off++] = (byte) (values[i + 3] >>> 44);
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 57;
+ buf[off++] = (byte) (values[i + 4] >>> 49);
+ buf[off++] = (byte) (values[i + 4] >>> 41);
+ buf[off++] = (byte) (values[i + 4] >>> 33);
+ buf[off++] = (byte) (values[i + 4] >>> 25);
+ buf[off++] = (byte) (values[i + 4] >>> 17);
+ buf[off++] = (byte) (values[i + 4] >>> 9);
+ buf[off++] = (byte) (values[i + 4] >>> 1);
+
+ buf[off] = (byte) (values[i + 4] << 7);
+ buf[off++] |= values[i + 5] >>> 54;
+ buf[off++] = (byte) (values[i + 5] >>> 46);
+ buf[off++] = (byte) (values[i + 5] >>> 38);
+ buf[off++] = (byte) (values[i + 5] >>> 30);
+ buf[off++] = (byte) (values[i + 5] >>> 22);
+ buf[off++] = (byte) (values[i + 5] >>> 14);
+ buf[off++] = (byte) (values[i + 5] >>> 6);
+
+ buf[off] = (byte) (values[i + 5] << 2);
+ buf[off++] |= values[i + 6] >>> 59;
+ buf[off++] = (byte) (values[i + 6] >>> 51);
+ buf[off++] = (byte) (values[i + 6] >>> 43);
+ buf[off++] = (byte) (values[i + 6] >>> 35);
+ buf[off++] = (byte) (values[i + 6] >>> 27);
+ buf[off++] = (byte) (values[i + 6] >>> 19);
+ buf[off++] = (byte) (values[i + 6] >>> 11);
+ buf[off++] = (byte) (values[i + 6] >>> 3);
+
+ buf[off] = (byte) (values[i + 6] << 5);
+ buf[off++] |= values[i + 7] >>> 56;
+ buf[off++] = (byte) (values[i + 7] >>> 48);
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits62(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 54);
+ buf[off++] = (byte) (values[i + 0] >>> 46);
+ buf[off++] = (byte) (values[i + 0] >>> 38);
+ buf[off++] = (byte) (values[i + 0] >>> 30);
+ buf[off++] = (byte) (values[i + 0] >>> 22);
+ buf[off++] = (byte) (values[i + 0] >>> 14);
+ buf[off++] = (byte) (values[i + 0] >>> 6);
+
+ buf[off] = (byte) (values[i + 0] << 2);
+ buf[off++] |= values[i + 1] >>> 60;
+ buf[off++] = (byte) (values[i + 1] >>> 52);
+ buf[off++] = (byte) (values[i + 1] >>> 44);
+ buf[off++] = (byte) (values[i + 1] >>> 36);
+ buf[off++] = (byte) (values[i + 1] >>> 28);
+ buf[off++] = (byte) (values[i + 1] >>> 20);
+ buf[off++] = (byte) (values[i + 1] >>> 12);
+ buf[off++] = (byte) (values[i + 1] >>> 4);
+
+ buf[off] = (byte) (values[i + 1] << 4);
+ buf[off++] |= values[i + 2] >>> 58;
+ buf[off++] = (byte) (values[i + 2] >>> 50);
+ buf[off++] = (byte) (values[i + 2] >>> 42);
+ buf[off++] = (byte) (values[i + 2] >>> 34);
+ buf[off++] = (byte) (values[i + 2] >>> 26);
+ buf[off++] = (byte) (values[i + 2] >>> 18);
+ buf[off++] = (byte) (values[i + 2] >>> 10);
+ buf[off++] = (byte) (values[i + 2] >>> 2);
+
+ buf[off] = (byte) (values[i + 2] << 6);
+ buf[off++] |= values[i + 3] >>> 56;
+ buf[off++] = (byte) (values[i + 3] >>> 48);
+ buf[off++] = (byte) (values[i + 3] >>> 40);
+ buf[off++] = (byte) (values[i + 3] >>> 32);
+ buf[off++] = (byte) (values[i + 3] >>> 24);
+ buf[off++] = (byte) (values[i + 3] >>> 16);
+ buf[off++] = (byte) (values[i + 3] >>> 8);
+ buf[off++] = (byte) (values[i + 3]);
+
+ buf[off++] = (byte) (values[i + 4] >>> 54);
+ buf[off++] = (byte) (values[i + 4] >>> 46);
+ buf[off++] = (byte) (values[i + 4] >>> 38);
+ buf[off++] = (byte) (values[i + 4] >>> 30);
+ buf[off++] = (byte) (values[i + 4] >>> 22);
+ buf[off++] = (byte) (values[i + 4] >>> 14);
+ buf[off++] = (byte) (values[i + 4] >>> 6);
+
+ buf[off] = (byte) (values[i + 4] << 2);
+ buf[off++] |= values[i + 5] >>> 60;
+ buf[off++] = (byte) (values[i + 5] >>> 52);
+ buf[off++] = (byte) (values[i + 5] >>> 44);
+ buf[off++] = (byte) (values[i + 5] >>> 36);
+ buf[off++] = (byte) (values[i + 5] >>> 28);
+ buf[off++] = (byte) (values[i + 5] >>> 20);
+ buf[off++] = (byte) (values[i + 5] >>> 12);
+ buf[off++] = (byte) (values[i + 5] >>> 4);
+
+ buf[off] = (byte) (values[i + 5] << 4);
+ buf[off++] |= values[i + 6] >>> 58;
+ buf[off++] = (byte) (values[i + 6] >>> 50);
+ buf[off++] = (byte) (values[i + 6] >>> 42);
+ buf[off++] = (byte) (values[i + 6] >>> 34);
+ buf[off++] = (byte) (values[i + 6] >>> 26);
+ buf[off++] = (byte) (values[i + 6] >>> 18);
+ buf[off++] = (byte) (values[i + 6] >>> 10);
+ buf[off++] = (byte) (values[i + 6] >>> 2);
+
+ buf[off] = (byte) (values[i + 6] << 6);
+ buf[off++] |= values[i + 7] >>> 56;
+ buf[off++] = (byte) (values[i + 7] >>> 48);
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) (values[i + 7]);
+ }
+
+ static void packBits63(final long[] values, final int i, final byte[] buf, int off) {
+ buf[off++] = (byte) (values[i + 0] >>> 55);
+ buf[off++] = (byte) (values[i + 0] >>> 47);
+ buf[off++] = (byte) (values[i + 0] >>> 39);
+ buf[off++] = (byte) (values[i + 0] >>> 31);
+ buf[off++] = (byte) (values[i + 0] >>> 23);
+ buf[off++] = (byte) (values[i + 0] >>> 15);
+ buf[off++] = (byte) (values[i + 0] >>> 7);
+
+ buf[off] = (byte) (values[i + 0] << 1);
+ buf[off++] |= values[i + 1] >>> 62;
+ buf[off++] = (byte) (values[i + 1] >>> 54);
+ buf[off++] = (byte) (values[i + 1] >>> 46);
+ buf[off++] = (byte) (values[i + 1] >>> 38);
+ buf[off++] = (byte) (values[i + 1] >>> 30);
+ buf[off++] = (byte) (values[i + 1] >>> 22);
+ buf[off++] = (byte) (values[i + 1] >>> 14);
+ buf[off++] = (byte) (values[i + 1] >>> 6);
+
+ buf[off] = (byte) (values[i + 1] << 2);
+ buf[off++] |= values[i + 2] >>> 61;
+ buf[off++] = (byte) (values[i + 2] >>> 53);
+ buf[off++] = (byte) (values[i + 2] >>> 45);
+ buf[off++] = (byte) (values[i + 2] >>> 37);
+ buf[off++] = (byte) (values[i + 2] >>> 29);
+ buf[off++] = (byte) (values[i + 2] >>> 21);
+ buf[off++] = (byte) (values[i + 2] >>> 13);
+ buf[off++] = (byte) (values[i + 2] >>> 5);
+
+ buf[off] = (byte) (values[i + 2] << 3);
+ buf[off++] |= values[i + 3] >>> 60;
+ buf[off++] = (byte) (values[i + 3] >>> 52);
+ buf[off++] = (byte) (values[i + 3] >>> 44);
+ buf[off++] = (byte) (values[i + 3] >>> 36);
+ buf[off++] = (byte) (values[i + 3] >>> 28);
+ buf[off++] = (byte) (values[i + 3] >>> 20);
+ buf[off++] = (byte) (values[i + 3] >>> 12);
+ buf[off++] = (byte) (values[i + 3] >>> 4);
+
+ buf[off] = (byte) (values[i + 3] << 4);
+ buf[off++] |= values[i + 4] >>> 59;
+ buf[off++] = (byte) (values[i + 4] >>> 51);
+ buf[off++] = (byte) (values[i + 4] >>> 43);
+ buf[off++] = (byte) (values[i + 4] >>> 35);
+ buf[off++] = (byte) (values[i + 4] >>> 27);
+ buf[off++] = (byte) (values[i + 4] >>> 19);
+ buf[off++] = (byte) (values[i + 4] >>> 11);
+ buf[off++] = (byte) (values[i + 4] >>> 3);
+
+ buf[off] = (byte) (values[i + 4] << 5);
+ buf[off++] |= values[i + 5] >>> 58;
+ buf[off++] = (byte) (values[i + 5] >>> 50);
+ buf[off++] = (byte) (values[i + 5] >>> 42);
+ buf[off++] = (byte) (values[i + 5] >>> 34);
+ buf[off++] = (byte) (values[i + 5] >>> 26);
+ buf[off++] = (byte) (values[i + 5] >>> 18);
+ buf[off++] = (byte) (values[i + 5] >>> 10);
+ buf[off++] = (byte) (values[i + 5] >>> 2);
+
+ buf[off] = (byte) (values[i + 5] << 6);
+ buf[off++] |= values[i + 6] >>> 57;
+ buf[off++] = (byte) (values[i + 6] >>> 49);
+ buf[off++] = (byte) (values[i + 6] >>> 41);
+ buf[off++] = (byte) (values[i + 6] >>> 33);
+ buf[off++] = (byte) (values[i + 6] >>> 25);
+ buf[off++] = (byte) (values[i + 6] >>> 17);
+ buf[off++] = (byte) (values[i + 6] >>> 9);
+ buf[off++] = (byte) (values[i + 6] >>> 1);
+
+ buf[off] = (byte) (values[i + 6] << 7);
+ buf[off++] |= values[i + 7] >>> 56;
+ buf[off++] = (byte) (values[i + 7] >>> 48);
+ buf[off++] = (byte) (values[i + 7] >>> 40);
+ buf[off++] = (byte) (values[i + 7] >>> 32);
+ buf[off++] = (byte) (values[i + 7] >>> 24);
+ buf[off++] = (byte) (values[i + 7] >>> 16);
+ buf[off++] = (byte) (values[i + 7] >>> 8);
+ buf[off] = (byte) values[i + 7];
+ }
+
+ static void unpackBits1(final long[] values, final int i, final byte[] buf, final int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off]) >>> 7) & 1;
+ values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 1;
+ values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 5) & 1;
+ values[i + 3] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 1;
+ values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 3) & 1;
+ values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 1;
+ values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 1;
+ values[i + 7] = Byte.toUnsignedLong(buf[off]) & 1;
+ }
+
+ static void unpackBits2(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 3;
+ values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 3;
+ values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 3;
+ values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 3;
+ values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 3;
+ values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 3;
+ values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 3;
+ values[i + 7] = Byte.toUnsignedLong(buf[off]) & 3;
+ }
+
+ static void unpackBits3(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 5;
+ values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 7;
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 1;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+ values[i + 3] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 7;
+ values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 7;
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+ values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 3) & 7;
+ values[i + 7] = Byte.toUnsignedLong(buf[off]) & 7;
+ }
+
+ static void unpackBits4(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 4;
+ values[i + 1] = Byte.toUnsignedLong(buf[off++]) & 0xf;
+ values[i + 2] = Byte.toUnsignedLong(buf[off]) >>> 4;
+ values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 0xf;
+ values[i + 4] = Byte.toUnsignedLong(buf[off]) >>> 4;
+ values[i + 5] = Byte.toUnsignedLong(buf[off++]) & 0xf;
+ values[i + 6] = Byte.toUnsignedLong(buf[off]) >>> 4;
+ values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0xf;
+ }
+
+ static void unpackBits5(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 0x1f;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 1;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 0x1f;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 3;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x1f;
+ }
+
+ static void unpackBits6(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 2;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 0x3f;
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 2;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x3f;
+ }
+
+ static void unpackBits7(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 5;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 3;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 1;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x7f;
+ }
+
+ static void unpackBits8(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]);
+ values[i + 1] = Byte.toUnsignedLong(buf[off++]);
+ values[i + 2] = Byte.toUnsignedLong(buf[off++]);
+ values[i + 3] = Byte.toUnsignedLong(buf[off++]);
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]);
+ values[i + 5] = Byte.toUnsignedLong(buf[off++]);
+ values[i + 6] = Byte.toUnsignedLong(buf[off++]);
+ values[i + 7] = Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits9(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 3;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 5;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 7;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits10(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 6;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 6;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits11(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 9;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 7;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 5;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits12(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits13(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 7;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 9;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 11;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits14(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 10;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 10;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits15(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 13;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 11;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 9;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits16(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 1] = Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 3] = Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 5] = Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 7] = Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits17(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 11;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 13;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 15;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits18(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 14;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 14;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits19(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 17;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 15;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 13;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits20(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits21(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 15;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 17;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 19;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits22(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 18;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 18;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits23(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 21;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 19;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 17;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits24(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 1] = Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 3] = Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 5] = Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 7] = Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits25(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 19;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 21;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 23;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits26(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 22;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 22;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits27(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 25;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 23;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 21;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits28(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits29(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 23;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 25;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 27;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits30(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 26;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 26;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits31(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 29;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 27;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 25;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits32(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits33(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 27;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 29;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 30;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 31;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 32;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits34(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 30;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 30;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]);
+ }
+
+ static void unpackBits35(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 33;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 31;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 34;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 29;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits36(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits37(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 34;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 31;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 33;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 30;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 35;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits38(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 36;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 34;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 36;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 34;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits39(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 38;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 37;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 35;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 34;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 33;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits40(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits41(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 34;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 35;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 37;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 38;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 39;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits42(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 36;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 38;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 36;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 38;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits43(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 38;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 41;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 39;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 42;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 37;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits44(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits45(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 42;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 39;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 41;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 38;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 43;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits46(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 44;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 42;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 44;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 42;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits47(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 46;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 45;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 43;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 42;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 41;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits48(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits49(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 41;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 42;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 43;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 45;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 46;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 47;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits50(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 44;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 46;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 48;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 44;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 46;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits51(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 43;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 46;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 49;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 41;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 47;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 50;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 45;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits52(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits53(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 45;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 50;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 47;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 52;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 49;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 41;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 46;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 51;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 43;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits54(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 52;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 50;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 48;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 52;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 50;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]);
+ }
+
+ static void unpackBits55(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 47;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 54;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 53;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 45;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 52;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 51;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 43;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 50;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 49;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 41;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits56(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]);
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits57(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 49;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 41;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 50;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 51;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 43;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 52;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 53;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 45;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 54;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 55;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 47;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 56;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits58(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 50;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 52;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 54;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 56;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 50;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 52;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 54;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 56;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]);
+ }
+
+ static void unpackBits59(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 51;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 43;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 54;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 57;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 49;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 41;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 52;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 55;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 47;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 58;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 50;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 53;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 45;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 56;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits60(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 52;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 52;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 52;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 52;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits61(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 53;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 45;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 58;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 50;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 55;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 47;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 60;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 52;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 57;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 49;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 41;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 54;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 59;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 51;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 43;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 56;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits62(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 54;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 60;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 52;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 58;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 50;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 56;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]);
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 54;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 60;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 52;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 58;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 50;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 56;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+ static void unpackBits63(final long[] values, final int i, final byte[] buf, int off) {
+ values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 55;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 47;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 39;
+ values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7;
+ values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1;
+
+ values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 62;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 54;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38;
+ values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6;
+ values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2;
+
+ values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 61;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 53;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 45;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37;
+ values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5;
+ values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3;
+
+ values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 60;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 52;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36;
+ values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4;
+ values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4;
+
+ values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 59;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 51;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 43;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35;
+ values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3;
+ values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5;
+
+ values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 58;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 50;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34;
+ values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2;
+ values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6;
+
+ values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 57;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 49;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 41;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33;
+ values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1;
+ values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7;
+
+ values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 56;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32;
+ values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8;
+ values[i + 7] |= Byte.toUnsignedLong(buf[off]);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java
new file mode 100644
index 000000000..81a985922
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+/*
+ * This is to uncompress serial version 4 sketch incrementally
+ */
+class BytesCompactCompressedHashIterator implements HashIterator {
+ private byte[] bytes;
+ private int offset;
+ private int entryBits;
+ private int numEntries;
+ private int index;
+ private long previous;
+ private int offsetBits;
+ private long[] buffer;
+ private boolean isBlockMode;
+
+ BytesCompactCompressedHashIterator(
+ final byte[] bytes,
+ final int offset,
+ final int entryBits,
+ final int numEntries
+ ) {
+ this.bytes = bytes;
+ this.offset = offset;
+ this.entryBits = entryBits;
+ this.numEntries = numEntries;
+ index = -1;
+ previous = 0;
+ offsetBits = 0;
+ buffer = new long[8];
+ isBlockMode = numEntries >= 8;
+ }
+
+ @Override
+ public long get() {
+ return buffer[index & 7];
+ }
+
+ @Override
+ public boolean next() {
+ if (++index == numEntries) { return false; }
+ if (isBlockMode) {
+ if ((index & 7) == 0) {
+ if (numEntries - index >= 8) {
+ unpack8();
+ } else {
+ isBlockMode = false;
+ unpack1();
+ }
+ }
+ } else {
+ unpack1();
+ }
+ return true;
+ }
+
+ private void unpack1() {
+ final int i = index & 7;
+ BitPacking.unpackBits(buffer, i, entryBits, bytes, offset, offsetBits);
+ offset += (offsetBits + entryBits) >>> 3;
+ offsetBits = (offsetBits + entryBits) & 7;
+ buffer[i] += previous;
+ previous = buffer[i];
+ }
+
+ private void unpack8() {
+ BitPacking.unpackBitsBlock8(buffer, 0, bytes, offset, entryBits);
+ offset += entryBits;
+ for (int i = 0; i < 8; i++) {
+ buffer[i] += previous;
+ previous = buffer[i];
+ }
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java
new file mode 100644
index 000000000..9a4754574
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import org.apache.datasketches.common.ByteArrayUtil;
+
+/*
+ * This is to iterate over serial version 3 sketch representation
+ */
+class BytesCompactHashIterator implements HashIterator {
+ final private byte[] bytes;
+ final private int offset;
+ final private int numEntries;
+ private int index;
+
+ BytesCompactHashIterator(
+ final byte[] bytes,
+ final int offset,
+ final int numEntries
+ ) {
+ this.bytes = bytes;
+ this.offset = offset;
+ this.numEntries = numEntries;
+ index = -1;
+ }
+
+ @Override
+ public long get() {
+ return ByteArrayUtil.getLongLE(bytes, offset + index * Long.BYTES);
+ }
+
+ @Override
+ public boolean next() {
+ return ++index < numEntries;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/CompactOperations.java b/src/main/java/org/apache/datasketches/theta2/CompactOperations.java
new file mode 100644
index 000000000..ab342a1f9
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/CompactOperations.java
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
+import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER;
+import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.insertFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.insertP;
+import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer;
+import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.SketchesStateException;
+
+/**
+ * @author Lee Rhodes
+ */
+final class CompactOperations {
+
+ private CompactOperations() {}
+
+ static CompactSketch componentsToCompact( //No error checking
+ final long thetaLong,
+ final int curCount,
+ final short seedHash,
+ final boolean srcEmpty,
+ final boolean srcCompact,
+ final boolean srcOrdered,
+ final boolean dstOrdered,
+ final MemorySegment dstWSeg,
+ final long[] hashArr) //may not be compacted, ordered or unordered, may be null
+ {
+ final boolean direct = dstWSeg != null;
+ final boolean empty = srcEmpty || ((curCount == 0) && (thetaLong == Long.MAX_VALUE));
+ final boolean single = (curCount == 1) && (thetaLong == Long.MAX_VALUE);
+ final long[] hashArrOut;
+ if (!srcCompact) {
+ hashArrOut = CompactOperations.compactCache(hashArr, curCount, thetaLong, dstOrdered);
+ } else {
+ hashArrOut = hashArr;
+ }
+ if (!srcOrdered && dstOrdered && !empty && !single) {
+ Arrays.sort(hashArrOut);
+ }
+ //Note: for empty or single we always output the ordered form.
+ final boolean dstOrderedOut = (empty || single) ? true : dstOrdered;
+ if (direct) {
+ final int preLongs = computeCompactPreLongs(empty, curCount, thetaLong);
+ int flags = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK; //always LE
+ flags |= empty ? EMPTY_FLAG_MASK : 0;
+ flags |= dstOrderedOut ? ORDERED_FLAG_MASK : 0;
+ flags |= single ? SINGLEITEM_FLAG_MASK : 0;
+
+ final MemorySegment seg =
+ loadCompactMemory(hashArrOut, seedHash, curCount, thetaLong, dstWSeg, (byte)flags, preLongs);
+ return new DirectCompactSketch(seg);
+
+ } else { //Heap
+ if (empty) {
+ return EmptyCompactSketch.getInstance();
+ }
+ if (single) {
+ return new SingleItemSketch(hashArrOut[0], seedHash);
+ }
+ return new HeapCompactSketch(hashArrOut, empty, seedHash, curCount, thetaLong, dstOrderedOut);
+ }
+ }
+
+ /**
+ * Heapify or convert a source Theta Sketch MemorySegment image into a heap or target MemorySegment CompactSketch.
+ * This assumes hashSeed is OK; serVer = 3.
+ * @param srcSeg the given input source MemorySegment image. Can be Read Only.
+ * @param dstOrdered the desired ordering of the resulting CompactSketch
+ * @param dstWSeg Used for the target CompactSketch if it is MemorySegment-based. Must be Writable.
+ * @return a CompactSketch of the correct form.
+ */
+ @SuppressWarnings("unused")
+ static CompactSketch memoryToCompact(
+ final MemorySegment srcSeg,
+ final boolean dstOrdered,
+ final MemorySegment dstWSeg)
+ {
+ //extract Pre0 fields and Flags from srcMem
+ final int srcPreLongs = extractPreLongs(srcSeg);
+ final int srcSerVer = extractSerVer(srcSeg); //not used
+ final int srcFamId = extractFamilyID(srcSeg);
+ final int srcLgArrLongs = extractLgArrLongs(srcSeg);
+ final int srcFlags = extractFlags(srcSeg);
+ final short srcSeedHash = (short) extractSeedHash(srcSeg);
+
+ //srcFlags
+ final boolean srcReadOnlyFlag = (srcFlags & READ_ONLY_FLAG_MASK) > 0;
+ final boolean srcEmptyFlag = (srcFlags & EMPTY_FLAG_MASK) > 0;
+ final boolean srcCompactFlag = (srcFlags & COMPACT_FLAG_MASK) > 0;
+ final boolean srcOrderedFlag = (srcFlags & ORDERED_FLAG_MASK) > 0;
+ final boolean srcSingleFlag = (srcFlags & SINGLEITEM_FLAG_MASK) > 0;
+
+ final boolean single = srcSingleFlag
+ || SingleItemSketch.otherCheckForSingleItem(srcPreLongs, srcSerVer, srcFamId, srcFlags);
+
+ //extract pre1 and pre2 fields
+ final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcSeg) : 0;
+ final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcSeg) : Long.MAX_VALUE;
+
+ //do some basic checks ...
+ if (srcEmptyFlag) { assert (curCount == 0) && (thetaLong == Long.MAX_VALUE); }
+ if (single) { assert (curCount == 1) && (thetaLong == Long.MAX_VALUE); }
+ checkFamilyAndFlags(srcFamId, srcCompactFlag, srcReadOnlyFlag);
+
+ //dispatch empty and single cases
+ //Note: for empty and single we always output the ordered form.
+ final boolean dstOrderedOut = (srcEmptyFlag || single) ? true : dstOrdered;
+ if (srcEmptyFlag) {
+ if (dstWSeg != null) {
+ MemorySegment.copy(EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8);
+ return new DirectCompactSketch(dstWSeg);
+ } else {
+ return EmptyCompactSketch.getInstance();
+ }
+ }
+ if (single) {
+ final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, srcPreLongs << 3);
+ final SingleItemSketch sis = new SingleItemSketch(hash, srcSeedHash);
+ if (dstWSeg != null) {
+ MemorySegment.copy(sis.toByteArray(), 0, dstWSeg, JAVA_BYTE, 0, 16);
+ return new DirectCompactSketch(dstWSeg);
+ } else { //heap
+ return sis;
+ }
+ }
+
+ //extract hashArr > 1
+ final long[] hashArr;
+ if (srcCompactFlag) {
+ hashArr = new long[curCount];
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, srcPreLongs << 3, hashArr, 0, curCount);
+ } else { //update sketch, thus hashTable form
+ final int srcCacheLen = 1 << srcLgArrLongs;
+ final long[] tempHashArr = new long[srcCacheLen];
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, srcPreLongs << 3, tempHashArr, 0, srcCacheLen);
+ hashArr = compactCache(tempHashArr, curCount, thetaLong, dstOrderedOut);
+ }
+
+ final int flagsOut = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK
+ | ((dstOrderedOut) ? ORDERED_FLAG_MASK : 0);
+
+ //load the destination.
+ if (dstWSeg != null) {
+ final MemorySegment tgtSeg = loadCompactMemory(hashArr, srcSeedHash, curCount, thetaLong, dstWSeg,
+ (byte)flagsOut, srcPreLongs);
+ return new DirectCompactSketch(tgtSeg);
+ } else { //heap
+ return new HeapCompactSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong,
+ dstOrderedOut);
+ }
+ }
+
+ private static final void checkFamilyAndFlags(
+ final int srcFamId,
+ final boolean srcCompactFlag,
+ final boolean srcReadOnlyFlag) {
+ final Family srcFamily = Family.idToFamily(srcFamId);
+ if (srcCompactFlag) {
+ if ((srcFamily == Family.COMPACT) && srcReadOnlyFlag) { return; }
+ } else {
+ if (srcFamily == Family.ALPHA) { return; }
+ if (srcFamily == Family.QUICKSELECT) { return; }
+ }
+ throw new SketchesArgumentException(
+ "Possible Corruption: Family does not match flags: Family: "
+ + srcFamily.toString()
+ + ", Compact Flag: " + srcCompactFlag
+ + ", ReadOnly Flag: " + srcReadOnlyFlag);
+ }
+
+ //All arguments must be valid and correct including flags.
+ // Used as helper to create byte arrays as well as loading Memory for direct compact sketches
+ //Input must be writable, return can be Read Only
+ static final MemorySegment loadCompactMemory(
+ final long[] compactHashArr,
+ final short seedHash,
+ final int curCount,
+ final long thetaLong,
+ final MemorySegment dstWSeg,
+ final byte flags,
+ final int preLongs)
+ {
+ assert (dstWSeg != null) && (compactHashArr != null);
+ final int outLongs = preLongs + curCount;
+ final int outBytes = outLongs << 3;
+ final int dstBytes = (int) dstWSeg.byteSize();
+ if (outBytes > dstBytes) {
+ throw new SketchesArgumentException("Insufficient Memory: " + dstBytes
+ + ", Need: " + outBytes);
+ }
+ final byte famID = (byte) Family.COMPACT.getID();
+
+ //Caution: The following loads directly into Memory without creating a heap byte[] first,
+ // which would act as a pre-clearing, initialization mechanism. So it is important to make sure
+ // that all fields are initialized, even those that are not used by the CompactSketch.
+ // Otherwise, uninitialized fields could be filled with off-heap garbage, which could cause
+ // other problems downstream if those fields are not filtered out first.
+ // As written below, all fields are initialized avoiding an extra copy.
+
+ //The first 8 bytes (pre0)
+ insertPreLongs(dstWSeg, preLongs); //RF not used = 0
+ insertSerVer(dstWSeg, SER_VER);
+ insertFamilyID(dstWSeg, famID);
+ //The following initializes the lgNomLongs and lgArrLongs to 0.
+ //They are not used in CompactSketches.
+ dstWSeg.set(JAVA_SHORT_UNALIGNED, LG_NOM_LONGS_BYTE, (short)0);
+ insertFlags(dstWSeg, flags);
+ insertSeedHash(dstWSeg, seedHash);
+
+ if ((preLongs == 1) && (curCount == 1)) { //singleItem, theta = 1.0
+ dstWSeg.set(JAVA_LONG_UNALIGNED, 8, compactHashArr[0]);
+ return dstWSeg;
+ }
+ if (preLongs > 1) {
+ insertCurCount(dstWSeg, curCount);
+ insertP(dstWSeg, (float) 1.0);
+ }
+ if (preLongs > 2) {
+ insertThetaLong(dstWSeg, thetaLong);
+ }
+ if (curCount > 0) { //theta could be < 1.0.
+ //dstWSeg.putLongArray(preLongs << 3, compactHashArr, 0, curCount);
+ MemorySegment.copy(compactHashArr, 0, dstWSeg, JAVA_LONG_UNALIGNED, preLongs << 3, curCount);
+ }
+ return dstWSeg; //if prelongs == 3 & curCount == 0, theta could be < 1.0. This can be RO
+ }
+
+ /**
+ * Copies then compacts, cleans, and may sort the resulting array.
+ * The source cache can be a hash table with interstitial zeros or
+ * "dirty" values, which are hash values greater than theta.
+ * These can be generated by the Alpha sketch.
+ * @param srcCache anything
+ * @param curCount must be correct
+ * @param thetaLong The correct
+ * thetaLong.
+ * @param dstOrdered true if output array must be sorted
+ * @return the compacted array.
+ */
+ static final long[] compactCache(final long[] srcCache, final int curCount,
+ final long thetaLong, final boolean dstOrdered) {
+ if (curCount == 0) {
+ return new long[0];
+ }
+ final long[] cacheOut = new long[curCount];
+ final int len = srcCache.length;
+ int j = 0;
+ for (int i = 0; i < len; i++) { //scan the full srcCache
+ final long v = srcCache[i];
+ if ((v <= 0L) || (v >= thetaLong) ) { continue; } //ignoring zeros or dirty values
+ cacheOut[j++] = v;
+ }
+ if (j < curCount) {
+ throw new SketchesStateException(
+ "Possible Corruption: curCount parameter is incorrect.");
+ }
+ if (dstOrdered && (curCount > 1)) {
+ Arrays.sort(cacheOut);
+ }
+ return cacheOut;
+ }
+
+ /*
+ * The truth table for empty, curCount and theta when compacting is as follows:
+ *
+ * Num Theta CurCount Empty State Name, Comments
+ * 0 1.0 0 T OK EMPTY: The Normal Empty State
+ * 1 1.0 0 F Internal This can occur internally as the result of an intersection of two exact,
+ * disjoint sets, or AnotB of two exact, identical sets. There is no probability
+ * distribution, so this is converted internally to EMPTY {1.0, 0, T}.
+ * This is handled in SetOperation.createCompactSketch().
+ * 2 1.0 !0 T Error Empty=T and curCount !0 should never coexist.
+ * This is checked in all compacting operations.
+ * 3 1.0 !0 F OK EXACT: This corresponds to a sketch in exact mode
+ * 4 <1.0 0 T Internal This can be an initial UpdateSketch state if p < 1.0,
+ * so change theta to 1.0. Return {Th = 1.0, 0, T}.
+ * This is handled in UpdateSketch.compact() and toByteArray().
+ * 5 <1.0 0 F OK This can result from set operations
+ * 6 <1.0 !0 T Error Empty=T and curCount !0 should never coexist.
+ * This is checked in all compacting operations.
+ * 7 <1.0 !0 F OK This corresponds to a sketch in estimation mode
+ *
+ * #4 is handled by correctThetaOnCompat(boolean, int) (below).
+ * #2 & #6 handled by checkIllegalCurCountAndEmpty(boolean, int)
+ */
+
+ /**
+ * This corrects a temporary anomalous condition where compact() is called on an UpdateSketch
+ * that was initialized with p < 1.0 and update() was never called. In this case Theta < 1.0,
+ * curCount = 0, and empty = true. The correction is to change Theta to 1.0, which makes the
+ * returning sketch empty. This should only be used in the compaction or serialization of an
+ * UpdateSketch.
+ * @param empty the given empty state
+ * @param curCount the given curCount
+ * @param thetaLong the given thetaLong
+ * @return thetaLong
+ */
+ static final long correctThetaOnCompact(final boolean empty, final int curCount,
+ final long thetaLong) { //handles #4 above
+ return (empty && (curCount == 0)) ? Long.MAX_VALUE : thetaLong;
+ }
+
+ /**
+ * This checks for the illegal condition where curCount > 0 and the state of
+ * empty = true. This check can be used anywhere a sketch is returned or a sketch is created
+ * from complete arguments.
+ * @param empty the given empty state
+ * @param curCount the given current count
+ */ //This handles #2 and #6 above
+ static final void checkIllegalCurCountAndEmpty(final boolean empty, final int curCount) {
+ if (empty && (curCount != 0)) { //this handles #2 and #6 above
+ throw new SketchesStateException("Illegal State: Empty=true and Current Count != 0.");
+ }
+ }
+
+ /**
+ * This compute number of preamble longs for a compact sketch based on empty,
+ * curCount and thetaLong.
+ * This also accommodates for EmptyCompactSketch and SingleItemSketch.
+ * @param empty The given empty state
+ * @param curCount The given current count (retained entries)
+ * @param thetaLong the current thetaLong
+ * @return the number of preamble longs
+ */
+ static final int computeCompactPreLongs(final boolean empty, final int curCount,
+ final long thetaLong) {
+ return (thetaLong < Long.MAX_VALUE) ? 3 : empty ? 1 : (curCount > 1) ? 2 : 1;
+ }
+
+ /**
+ * This checks for the singleItem Compact Sketch.
+ * @param empty the given empty state
+ * @param curCount the given curCount
+ * @param thetaLong the given thetaLong
+ * @return true if notEmpty, curCount = 1 and theta = 1.0;
+ */
+ static final boolean isSingleItem(final boolean empty, final int curCount,
+ final long thetaLong) {
+ return !empty && (curCount == 1) && (thetaLong == Long.MAX_VALUE);
+ }
+}
+
diff --git a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java
new file mode 100644
index 000000000..0498eed34
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java
@@ -0,0 +1,478 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
+import static org.apache.datasketches.common.ByteArrayUtil.getShortLE;
+import static org.apache.datasketches.common.Family.idToFamily;
+import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT;
+import static org.apache.datasketches.theta2.PreambleUtil.extractEntryBitsV4;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.extractNumEntriesBytesV4;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLongV4;
+import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits;
+import static org.apache.datasketches.theta2.SingleItemSketch.otherCheckForSingleItem;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * The parent class of all the CompactSketches. CompactSketches are never created directly.
+ * They are created as a result of the compact() method of an UpdateSketch, a result of a
+ * getResult() of a SetOperation, or from a heapify method.
+ *
+ * A CompactSketch is the simplest form of a Theta Sketch. It consists of a compact list
+ * (i.e., no intervening spaces) of hash values, which may be ordered or not, a value for theta
+ * and a seed hash. A CompactSketch is immutable (read-only),
+ * and the space required when stored is only the space required for the hash values and 8 to 24
+ * bytes of preamble. An empty CompactSketch consumes only 8 bytes.
+ *
+ * @author Lee Rhodes
+ */
+public abstract class CompactSketch extends Sketch {
+
+ /**
+ * Heapify takes a CompactSketch image in a MemorySegment and instantiates an on-heap CompactSketch.
+ *
+ * The resulting sketch will not retain any link to the source MemorySegment and all of its data will be
+ * copied to the heap CompactSketch.
+ *
+ * This method assumes that the sketch image was created with the correct hash seed, so it is not checked.
+ * The resulting on-heap CompactSketch will be given the seedHash derived from the given sketch image.
+ * However, Serial Version 1 sketch images do not have a seedHash field,
+ * so the resulting heapified CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.
+ *
+ * @param srcSeg an image of a CompactSketch.
+ * @return a CompactSketch on the heap.
+ */
+ public static CompactSketch heapify(final MemorySegment srcSeg) {
+ return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED, false);
+ }
+
+ /**
+ * Heapify takes a CompactSketch image in Memory and instantiates an on-heap CompactSketch.
+ *
+ * The resulting sketch will not retain any link to the source Memory and all of its data will be
+ * copied to the heap CompactSketch.
+ *
+ * This method checks if the given expectedSeed was used to create the source Memory image.
+ * However, SerialVersion 1 sketch images cannot be checked as they don't have a seedHash field,
+ * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.
+ *
+ * @param srcSeg an image of a CompactSketch that was created using the given expectedSeed.
+ * See Memory.
+ * @param expectedSeed the seed used to validate the given Memory image.
+ * See Update Hash Seed.
+ * @return a CompactSketch on the heap.
+ */
+ public static CompactSketch heapify(final MemorySegment srcSeg, final long expectedSeed) {
+ return heapify(srcSeg, expectedSeed, true);
+ }
+
+ private static CompactSketch heapify(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) {
+ final int serVer = extractSerVer(srcSeg);
+ final int familyID = extractFamilyID(srcSeg);
+ final Family family = idToFamily(familyID);
+ if (family != Family.COMPACT) {
+ throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!");
+ }
+ if (serVer == 4) {
+ return heapifyV4(srcSeg, seed, enforceSeed);
+ }
+ if (serVer == 3) {
+ final int flags = extractFlags(srcSeg);
+ final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0;
+ final boolean empty = (flags & EMPTY_FLAG_MASK) != 0;
+ if (enforceSeed && !empty) { PreambleUtil.checkMemorySeedHash(srcSeg, seed); }
+ return CompactOperations.memoryToCompact(srcSeg, srcOrdered, null);
+ }
+ //not SerVer 3, assume compact stored form
+ final short seedHash = ThetaUtil.computeSeedHash(seed);
+ if (serVer == 1) {
+ return ForwardCompatibility.heapify1to3(srcSeg, seedHash);
+ }
+ if (serVer == 2) {
+ return ForwardCompatibility.heapify2to3(srcSeg,
+ enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
+ }
+ throw new SketchesArgumentException("Unknown Serialization Version: " + serVer);
+ }
+
+ /**
+ * Wrap takes the CompactSketch image in given MemorySegment and refers to it directly.
+ * There is no data copying onto the java heap.
+ * The wrap operation enables fast read-only merging and access to all the public read-only API.
+ *
+ * Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
+ * been explicitly stored as direct sketches can be wrapped.
+ * Wrapping earlier serial version sketches will result in a heapify operation.
+ * These early versions were never designed to "wrap".
+ *
+ * Wrapping any subclass of this class that is empty or contains only a single item will
+ * result in heapified forms of empty and single item sketch respectively.
+ * This is actually faster and consumes less overall memory.
+ *
+ * This method assumes that the sketch image was created with the correct hash seed, so it is not checked.
+ * However, Serial Version 1 sketch images do not have a seedHash field,
+ * so the resulting on-heap CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.
+ *
+ * @param srcSeg an image of a Sketch.
+ * @return a CompactSketch backed by the given MemorySegment except as above.
+ */
+ public static CompactSketch wrap(final MemorySegment srcSeg) {
+ return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED, false);
+ }
+
+ /**
+ * Wrap takes the sketch image in the given MemorySegment and refers to it directly.
+ * There is no data copying onto the java heap.
+ * The wrap operation enables fast read-only merging and access to all the public read-only API.
+ *
+ * Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
+ * been explicitly stored as direct sketches can be wrapped.
+ * Wrapping earlier serial version sketches will result in a heapify operation.
+ * These early versions were never designed to "wrap".
+ *
+ * Wrapping any subclass of this class that is empty or contains only a single item will
+ * result in heapified forms of empty and single item sketch respectively.
+ * This is actually faster and consumes less overall memory.
+ *
+ * This method checks if the given expectedSeed was used to create the source MemorySegment image.
+ * However, SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
+ * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.
+ *
+ * @param srcSeg an image of a Sketch that was created using the given expectedSeed.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * @return a CompactSketch backed by the given MemorySegment except as above.
+ */
+ public static CompactSketch wrap(final MemorySegment srcSeg, final long expectedSeed) {
+ return wrap(srcSeg, expectedSeed, true);
+ }
+
+ private static CompactSketch wrap(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) {
+ final int serVer = extractSerVer(srcSeg);
+ final int familyID = extractFamilyID(srcSeg);
+ final Family family = Family.idToFamily(familyID);
+ if (family != Family.COMPACT) {
+ throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!");
+ }
+ final short seedHash = ThetaUtil.computeSeedHash(seed);
+
+ if (serVer == 4) {
+ return DirectCompactCompressedSketch.wrapInstance(srcSeg,
+ enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
+ }
+ else if (serVer == 3) {
+ if (PreambleUtil.isEmptyFlag(srcSeg)) {
+ return EmptyCompactSketch.getHeapInstance(srcSeg);
+ }
+ if (otherCheckForSingleItem(srcSeg)) {
+ return SingleItemSketch.heapify(srcSeg, enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
+ }
+ //not empty & not singleItem
+ final int flags = extractFlags(srcSeg);
+ final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
+ if (!compactFlag) {
+ throw new SketchesArgumentException(
+ "Corrupted: COMPACT family sketch image must have compact flag set");
+ }
+ final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
+ if (!readOnly) {
+ throw new SketchesArgumentException(
+ "Corrupted: COMPACT family sketch image must have Read-Only flag set");
+ }
+ return DirectCompactSketch.wrapInstance(srcSeg,
+ enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
+ } //end of serVer 3
+ else if (serVer == 1) {
+ return ForwardCompatibility.heapify1to3(srcSeg, seedHash);
+ }
+ else if (serVer == 2) {
+ return ForwardCompatibility.heapify2to3(srcSeg,
+ enforceSeed ? seedHash : (short) extractSeedHash(srcSeg));
+ }
+ throw new SketchesArgumentException(
+ "Corrupted: Serialization Version " + serVer + " not recognized.");
+ }
+
+ /**
+ * Wrap takes the sketch image in the given MemorySegment and refers to it directly.
+ * There is no data copying onto the java heap.
+ * The wrap operation enables fast read-only merging and access to all the public read-only API.
+ *
+ * Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
+ * been explicitly stored as direct sketches can be wrapped.
+ * Wrapping earlier serial version sketches will result in a heapify operation.
+ * These early versions were never designed to "wrap".
+ *
+ * Wrapping any subclass of this class that is empty or contains only a single item will
+ * result in heapified forms of empty and single item sketch respectively.
+ * This is actually faster and consumes less overall memory.
+ *
+ * This method checks if the DEFAULT_UPDATE_SEED was used to create the source MemorySegment image.
+ * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
+ * so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.
+ *
+ * @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED.
+ *
+ * @return a CompactSketch backed by the given MemorySegment except as above.
+ */
+ public static CompactSketch wrap(final byte[] bytes) {
+ return wrap(bytes, ThetaUtil.DEFAULT_UPDATE_SEED, false);
+ }
+
+ /**
+ * Wrap takes the sketch image in the given MemorySegment and refers to it directly.
+ * There is no data copying onto the java heap.
+ * The wrap operation enables fast read-only merging and access to all the public read-only API.
+ *
+ * Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
+ * been explicitly stored as direct sketches can be wrapped.
+ * Wrapping earlier serial version sketches will result in a heapify operation.
+ * These early versions were never designed to "wrap".
+ *
+ * Wrapping any subclass of this class that is empty or contains only a single item will
+ * result in heapified forms of empty and single item sketch respectively.
+ * This is actually faster and consumes less overall memory.
+ *
+ * This method checks if the given expectedSeed was used to create the source MemorySegment image.
+ * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
+ * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.
+ *
+ * @param bytes a byte array image of a Sketch that was created using the given expectedSeed.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * @return a CompactSketch backed by the given MemorySegment except as above.
+ */
+ public static CompactSketch wrap(final byte[] bytes, final long expectedSeed) {
+ return wrap(bytes, expectedSeed, true);
+ }
+
+ private static CompactSketch wrap(final byte[] bytes, final long seed, final boolean enforceSeed) {
+ final int serVer = bytes[PreambleUtil.SER_VER_BYTE];
+ final int familyId = bytes[PreambleUtil.FAMILY_BYTE];
+ final Family family = Family.idToFamily(familyId);
+ if (family != Family.COMPACT) {
+ throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!");
+ }
+ final short seedHash = ThetaUtil.computeSeedHash(seed);
+ if (serVer == 4) {
+ return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash);
+ } else if (serVer == 3) {
+ final int flags = bytes[FLAGS_BYTE];
+ if ((flags & EMPTY_FLAG_MASK) > 0) {
+ return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes));
+ }
+ final int preLongs = bytes[PREAMBLE_LONGS_BYTE];
+ if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) {
+ return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
+ }
+ //not empty & not singleItem
+ final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
+ if (!compactFlag) {
+ throw new SketchesArgumentException(
+ "Corrupted: COMPACT family sketch image must have compact flag set");
+ }
+ final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
+ if (!readOnly) {
+ throw new SketchesArgumentException(
+ "Corrupted: COMPACT family sketch image must have Read-Only flag set");
+ }
+ return WrappedCompactSketch.wrapInstance(bytes,
+ enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
+ } else if (serVer == 1) {
+ return ForwardCompatibility.heapify1to3(MemorySegment.ofArray(bytes), seedHash);
+ } else if (serVer == 2) {
+ return ForwardCompatibility.heapify2to3(MemorySegment.ofArray(bytes),
+ enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT));
+ }
+ throw new SketchesArgumentException(
+ "Corrupted: Serialization Version " + serVer + " not recognized.");
+ }
+
+ //Sketch Overrides
+
+ @Override
+ public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstMem);
+
+ @Override
+ public int getCompactBytes() {
+ return getCurrentBytes();
+ }
+
+ @Override
+ int getCurrentDataLongs() {
+ return getRetainedEntries(true);
+ }
+
+ @Override
+ public Family getFamily() {
+ return Family.COMPACT;
+ }
+
+ @Override
+ public boolean isCompact() {
+ return true;
+ }
+
+ @Override
+ public double getEstimate() {
+ return Sketch.estimate(getThetaLong(), getRetainedEntries());
+ }
+
+ /**
+ * gets the sketch as a compressed byte array
+ * @return the sketch as a compressed byte array
+ */
+ public byte[] toByteArrayCompressed() {
+ if (!isOrdered() || getRetainedEntries() == 0 || (getRetainedEntries() == 1 && !isEstimationMode())) {
+ return toByteArray();
+ }
+ return toByteArrayV4();
+ }
+
+ private int computeMinLeadingZeros() {
+ // compression is based on leading zeros in deltas between ordered hash values
+ // assumes ordered sketch
+ long previous = 0;
+ long ored = 0;
+ final HashIterator it = iterator();
+ while (it.next()) {
+ final long delta = it.get() - previous;
+ ored |= delta;
+ previous = it.get();
+ }
+ return Long.numberOfLeadingZeros(ored);
+ }
+
+ private byte[] toByteArrayV4() {
+ final int preambleLongs = isEstimationMode() ? 2 : 1;
+ final int entryBits = 64 - computeMinLeadingZeros();
+ final int compressedBits = entryBits * getRetainedEntries();
+
+ // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
+ final int numEntriesBytes = wholeBytesToHoldBits(32 - Integer.numberOfLeadingZeros(getRetainedEntries()));
+
+ final int sizeBytes = preambleLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(compressedBits);
+ final byte[] bytes = new byte[sizeBytes];
+ final MemorySegment wseg = MemorySegment.ofArray(bytes);
+ int offsetBytes = 0;
+ wseg.set(JAVA_BYTE, offsetBytes++, (byte) preambleLongs);
+ wseg.set(JAVA_BYTE, offsetBytes++, (byte) 4); // to do: add constant
+ wseg.set(JAVA_BYTE, offsetBytes++, (byte) Family.COMPACT.getID());
+ wseg.set(JAVA_BYTE, offsetBytes++, (byte) entryBits);
+ wseg.set(JAVA_BYTE, offsetBytes++, (byte) numEntriesBytes);
+ wseg.set(JAVA_BYTE, offsetBytes++, (byte) (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK));
+ wseg.set(JAVA_SHORT_UNALIGNED, offsetBytes, getSeedHash());
+ offsetBytes += Short.BYTES;
+ if (isEstimationMode()) {
+ wseg.set(JAVA_LONG_UNALIGNED, offsetBytes, getThetaLong());
+ offsetBytes += Long.BYTES;
+ }
+ int numEntries = getRetainedEntries();
+ for (int i = 0; i < numEntriesBytes; i++) {
+ wseg.set(JAVA_BYTE, offsetBytes++, (byte) (numEntries & 0xff));
+ numEntries >>>= 8;
+ }
+ long previous = 0;
+ final long[] deltas = new long[8];
+ final HashIterator it = iterator();
+ int i;
+ for (i = 0; i + 7 < getRetainedEntries(); i += 8) {
+ for (int j = 0; j < 8; j++) {
+ it.next();
+ deltas[j] = it.get() - previous;
+ previous = it.get();
+ }
+ BitPacking.packBitsBlock8(deltas, 0, bytes, offsetBytes, entryBits);
+ offsetBytes += entryBits;
+ }
+ int offsetBits = 0;
+ for (; i < getRetainedEntries(); i++) {
+ it.next();
+ final long delta = it.get() - previous;
+ previous = it.get();
+ BitPacking.packBits(delta, entryBits, bytes, offsetBytes, offsetBits);
+ offsetBytes += (offsetBits + entryBits) >>> 3;
+ offsetBits = (offsetBits + entryBits) & 7;
+ }
+ return bytes;
+ }
+
+ private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) {
+ final int preLongs = extractPreLongs(srcSeg);
+ final int entryBits = extractEntryBitsV4(srcSeg);
+ final int numEntriesBytes = extractNumEntriesBytesV4(srcSeg);
+ final short seedHash = (short) extractSeedHash(srcSeg);
+ if (enforceSeed) { PreambleUtil.checkMemorySeedHash(srcSeg, seed); }
+ int offsetBytes = 8;
+ long theta = Long.MAX_VALUE;
+ if (preLongs > 1) {
+ theta = extractThetaLongV4(srcSeg);
+ offsetBytes += Long.BYTES;
+ }
+ int numEntries = 0;
+ for (int i = 0; i < numEntriesBytes; i++) {
+ numEntries |= Byte.toUnsignedInt(srcSeg.get(JAVA_BYTE, offsetBytes++)) << (i << 3);
+ }
+ final long[] entries = new long[numEntries];
+ final byte[] bytes = new byte[entryBits]; // temporary buffer for unpacking
+ int i;
+ for (i = 0; i + 7 < numEntries; i += 8) {
+ MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, entryBits);
+ BitPacking.unpackBitsBlock8(entries, i, bytes, 0, entryBits);
+ offsetBytes += entryBits;
+ }
+ if (i < numEntries) {
+ MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, wholeBytesToHoldBits((numEntries - i) * entryBits));
+ int offsetBits = 0;
+ offsetBytes = 0;
+ for (; i < numEntries; i++) {
+ BitPacking.unpackBits(entries, i, entryBits, bytes, offsetBytes, offsetBits);
+ offsetBytes += (offsetBits + entryBits) >>> 3;
+ offsetBits = (offsetBits + entryBits) & 7;
+ }
+ }
+ // undo deltas
+ long previous = 0;
+ for (i = 0; i < numEntries; i++) {
+ entries[i] += previous;
+ previous = entries[i];
+ }
+ return new HeapCompactSketch(entries, false, seedHash, numEntries, theta, true);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java
new file mode 100644
index 000000000..2d529c4ce
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * Background propagation thread. Propagates a given sketch or a hash value from local threads
+ * buffers into the shared sketch which stores the most up-to-date estimation of number of unique
+ * items. This propagation is done at the background by dedicated threads, which allows
+ * application threads to continue updating their local buffer.
+ *
+ * @author eshcar
+ */
+class ConcurrentBackgroundThetaPropagation implements Runnable {
+
+ // Shared sketch to absorb the data
+ private final ConcurrentSharedThetaSketch sharedThetaSketch;
+
+ // Propagation flag of local buffer that is being processed.
+ // It is the synchronization primitive to coordinate the work of the propagation with the
+ // local buffer. Updated when the propagation completes.
+ private final AtomicBoolean localPropagationInProgress;
+
+ // Sketch to be propagated to shared sketch. Can be null if only a single hash is propagated
+ private final Sketch sketchIn;
+
+ // Hash of the datum to be propagated to shared sketch. Can be ConcurrentSharedThetaSketch.NOT_SINGLE_HASH
+ // if the data is propagated through a sketch.
+ private final long singleHash;
+
+ // The propagation epoch. The data can be propagated only within the context of this epoch.
+ // The data should not be propagated if this epoch is not equal to the
+ // shared sketch epoch.
+ private final long epoch;
+
+ ConcurrentBackgroundThetaPropagation(final ConcurrentSharedThetaSketch sharedThetaSketch,
+ final AtomicBoolean localPropagationInProgress, final Sketch sketchIn, final long singleHash,
+ final long epoch) {
+ this.sharedThetaSketch = sharedThetaSketch;
+ this.localPropagationInProgress = localPropagationInProgress;
+ this.sketchIn = sketchIn;
+ this.singleHash = singleHash;
+ this.epoch = epoch;
+ }
+
+ /**
+ * Propagation protocol:
+ * 1) validate propagation is executed at the context of the right epoch, otherwise abort
+ * 2) handle propagation: either of a single hash or of a sketch
+ * 3) complete propagation: ping local buffer
+ */
+ @Override
+ public void run() {
+ // 1) validate propagation is executed at the context of the right epoch, otherwise abort
+ if (!sharedThetaSketch.validateEpoch(epoch)) {
+ // invalid epoch - should not propagate
+ sharedThetaSketch.endPropagation(null, false);
+ return;
+ }
+
+ // 2) handle propagation: either of a single hash or of a sketch
+ if (singleHash != ConcurrentSharedThetaSketch.NOT_SINGLE_HASH) {
+ sharedThetaSketch.propagate(singleHash);
+ } else if (sketchIn != null) {
+ final long volTheta = sharedThetaSketch.getVolatileTheta();
+ assert volTheta <= sketchIn.getThetaLong() :
+ "volTheta = " + volTheta + ", bufTheta = " + sketchIn.getThetaLong();
+
+ // propagate values from input sketch one by one
+ final long[] cacheIn = sketchIn.getCache();
+
+ if (sketchIn.isOrdered()) { //Ordered compact, Use early stop
+ for (final long hashIn : cacheIn) {
+ if (hashIn >= volTheta) {
+ break; //early stop
+ }
+ sharedThetaSketch.propagate(hashIn);
+ }
+ } else { //not ordered, also may have zeros (gaps) in the array.
+ for (final long hashIn : cacheIn) {
+ if (hashIn > 0) {
+ sharedThetaSketch.propagate(hashIn);
+ }
+ }
+ }
+ }
+
+ // 3) complete propagation: ping local buffer
+ sharedThetaSketch.endPropagation(localPropagationInProgress, false);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java
new file mode 100644
index 000000000..af5917123
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG;
+
+import java.lang.foreign.MemorySegment;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SuppressFBWarnings;
+
+/**
+ * A concurrent shared sketch that is based on DirectQuickSelectSketch.
+ * It reflects all data processed by a single or multiple update threads, and can serve queries at
+ * any time.
+ * Background propagation threads are used to propagate data from thread local buffers into this
+ * sketch which stores the most up-to-date estimation of number of unique items.
+ *
+ * @author eshcar
+ * @author Lee Rhodes
+ */
+final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch
+ implements ConcurrentSharedThetaSketch {
+
+ // The propagation thread
+ private ExecutorService executorService_;
+
+ // A flag to coordinate between several eager propagation threads
+ private final AtomicBoolean sharedPropagationInProgress_;
+
+ // Theta value of concurrent sketch
+ private volatile long volatileThetaLong_;
+
+ // A snapshot of the estimated number of unique entries
+ private volatile double volatileEstimate_;
+
+ // Num of retained entries in which the sketch toggles from sync (exact) mode to async
+ // propagation mode
+ private final long exactLimit_;
+
+ // An epoch defines an interval between two resets. A propagation invoked at epoch i cannot
+ // affect the sketch at epoch j > i.
+ private volatile long epoch_;
+
+ /**
+ * Construct a new sketch instance and initialize the given Memory as its backing store.
+ *
+ * @param lgNomLongs See lgNomLongs.
+ * @param seed See Update Hash Seed.
+ * @param maxConcurrencyError the max error value including error induced by concurrency.
+ * @param dstSeg the given MemorySegment object destination. It cannot be null.
+ */
+ ConcurrentDirectQuickSelectSketch(final int lgNomLongs, final long seed,
+ final double maxConcurrencyError, final MemorySegment dstSeg) {
+ super(lgNomLongs, seed, 1.0F, //p
+ ResizeFactor.X1, //rf,
+ dstSeg, false); //unionGadget
+
+ volatileThetaLong_ = Long.MAX_VALUE;
+ volatileEstimate_ = 0;
+ exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(),
+ maxConcurrencyError);
+ sharedPropagationInProgress_ = new AtomicBoolean(false);
+ epoch_ = 0;
+ initBgPropagationService();
+ }
+
+ ConcurrentDirectQuickSelectSketch(final UpdateSketch sketch, final long seed,
+ final double maxConcurrencyError, final MemorySegment dstSeg) {
+ super(sketch.getLgNomLongs(), seed, 1.0F, //p
+ ResizeFactor.X1, //rf,
+ dstSeg,
+ false); //unionGadget
+
+ exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(),
+ maxConcurrencyError);
+ sharedPropagationInProgress_ = new AtomicBoolean(false);
+ epoch_ = 0;
+ initBgPropagationService();
+ for (final long hashIn : sketch.getCache()) {
+ propagate(hashIn);
+ }
+ wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, sketch.getThetaLong());
+ updateVolatileTheta();
+ updateEstimationSnapshot();
+ }
+
+ //Sketch overrides
+
+ @Override
+ public double getEstimate() {
+ return volatileEstimate_;
+ }
+
+ @Override
+ public boolean isEstimationMode() {
+ return (getRetainedEntries(false) > exactLimit_) || super.isEstimationMode();
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free
+ final byte[] res = super.toByteArray();
+ sharedPropagationInProgress_.set(false);
+ return res;
+ }
+
+ //UpdateSketch overrides
+
+ @Override
+ public UpdateSketch rebuild() {
+ super.rebuild();
+ updateEstimationSnapshot();
+ return this;
+ }
+
+ /**
+ * {@inheritDoc}
+ * Takes care of mutual exclusion with propagation thread.
+ */
+ @Override
+ public void reset() {
+ advanceEpoch();
+ super.reset();
+ volatileThetaLong_ = Long.MAX_VALUE;
+ volatileEstimate_ = 0;
+ }
+
+ @Override
+ UpdateReturnState hashUpdate(final long hash) {
+ final String msg = "No update method should be called directly to a shared theta sketch."
+ + " Updating the shared sketch is only permitted through propagation from local sketches.";
+ throw new UnsupportedOperationException(msg);
+ }
+
+ //ConcurrentSharedThetaSketch declarations
+
+ @Override
+ public long getExactLimit() {
+ return exactLimit_;
+ }
+
+ @Override
+ public boolean startEagerPropagation() {
+ while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free
+ return (!isEstimationMode());// no eager propagation is allowed in estimation mode
+ }
+
+ @Override
+ public void endPropagation(final AtomicBoolean localPropagationInProgress, final boolean isEager) {
+ //update volatile theta, uniques estimate and propagation flag
+ updateVolatileTheta();
+ updateEstimationSnapshot();
+ if (isEager) {
+ sharedPropagationInProgress_.set(false);
+ }
+ if (localPropagationInProgress != null) {
+ localPropagationInProgress.set(false); //clear local propagation flag
+ }
+ }
+
+ @Override
+ public long getVolatileTheta() {
+ return volatileThetaLong_;
+ }
+
+ @Override
+ public void awaitBgPropagationTermination() {
+ try {
+ executorService_.shutdown();
+ while (!executorService_.awaitTermination(1, TimeUnit.MILLISECONDS)) {
+ Thread.sleep(1);
+ }
+ } catch (final InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public final void initBgPropagationService() {
+ executorService_ = ConcurrentPropagationService.getExecutorService(Thread.currentThread().threadId());
+ }
+
+ @Override
+ public boolean propagate(final AtomicBoolean localPropagationInProgress,
+ final Sketch sketchIn, final long singleHash) {
+ final long epoch = epoch_;
+ if ((singleHash != NOT_SINGLE_HASH) // namely, is a single hash and
+ && (getRetainedEntries(false) < exactLimit_)) { // a small sketch then propagate myself (blocking)
+ if (!startEagerPropagation()) {
+ endPropagation(localPropagationInProgress, true);
+ return false;
+ }
+ if (!validateEpoch(epoch)) {
+ endPropagation(null, true); // do not change local flag
+ return true;
+ }
+ propagate(singleHash);
+ endPropagation(localPropagationInProgress, true);
+ return true;
+ }
+ // otherwise, be nonblocking, let background thread do the work
+ final ConcurrentBackgroundThetaPropagation job = new ConcurrentBackgroundThetaPropagation(
+ this, localPropagationInProgress, sketchIn, singleHash, epoch);
+ executorService_.execute(job);
+ return true;
+ }
+
+ @Override
+ public void propagate(final long singleHash) {
+ super.hashUpdate(singleHash);
+ }
+
+ @Override
+ public void updateEstimationSnapshot() {
+ volatileEstimate_ = super.getEstimate();
+ }
+
+ @Override
+ public void updateVolatileTheta() {
+ volatileThetaLong_ = getThetaLong();
+ }
+
+ @Override
+ public boolean validateEpoch(final long epoch) {
+ return epoch_ == epoch;
+ }
+
+ //Restricted
+
+ /**
+ * Advances the epoch while there is no background propagation
+ * This ensures a propagation invoked before the reset cannot affect the sketch after the reset
+ * is completed. Ignore VO_VOLATILE_INCREMENT findbugs warning, it is False Positive.
+ */
+ @SuppressFBWarnings(value = "VO_VOLATILE_INCREMENT", justification = "Likely False Positive, Fix Later")
+ private void advanceEpoch() {
+ awaitBgPropagationTermination();
+ startEagerPropagation();
+ ConcurrentPropagationService.resetExecutorService(Thread.currentThread().threadId());
+ //no inspection NonAtomicOperationOnVolatileField
+ // this increment of a volatile field is done within the scope of the propagation
+ // synchronization and hence is done by a single thread.
+ epoch_++;
+ endPropagation(null, true);
+ initBgPropagationService();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java
new file mode 100644
index 000000000..56e254b51
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SuppressFBWarnings;
+
+/**
+ * A concurrent shared sketch that is based on HeapQuickSelectSketch.
+ * It reflects all data processed by a single or multiple update threads, and can serve queries at
+ * any time.
+ * Background propagation threads are used to propagate data from thread local buffers into this
+ * sketch which stores the most up-to-date estimation of number of unique items.
+ *
+ * @author eshcar
+ * @author Lee Rhodes
+ */
+final class ConcurrentHeapQuickSelectSketch extends HeapQuickSelectSketch
+ implements ConcurrentSharedThetaSketch {
+
+ // The propagation thread
+ private volatile ExecutorService executorService_;
+
+ //A flag to coordinate between several eager propagation threads
+ private final AtomicBoolean sharedPropagationInProgress_;
+
+ // Theta value of concurrent sketch
+ private volatile long volatileThetaLong_;
+
+ // A snapshot of the estimated number of unique entries
+ private volatile double volatileEstimate_;
+
+ // Num of retained entries in which the sketch toggles from sync (exact) mode to async
+ // propagation mode
+ private final long exactLimit_;
+
+ // An epoch defines an interval between two resets. A propagation invoked at epoch i cannot
+ // affect the sketch at epoch j > i.
+ private volatile long epoch_;
+
+ /**
+ * Construct a new sketch instance on the java heap.
+ *
+ * @param lgNomLongs See lgNomLongs.
+ * @param seed See seed
+ * @param maxConcurrencyError the max error value including error induced by concurrency
+ *
+ */
+ ConcurrentHeapQuickSelectSketch(final int lgNomLongs, final long seed,
+ final double maxConcurrencyError) {
+ super(lgNomLongs, seed, 1.0F, //p
+ ResizeFactor.X1, //rf,
+ false); //unionGadget
+
+ volatileThetaLong_ = Long.MAX_VALUE;
+ volatileEstimate_ = 0;
+ exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(),
+ maxConcurrencyError);
+ sharedPropagationInProgress_ = new AtomicBoolean(false);
+ epoch_ = 0;
+ initBgPropagationService();
+ }
+
+ ConcurrentHeapQuickSelectSketch(final UpdateSketch sketch, final long seed,
+ final double maxConcurrencyError) {
+ super(sketch.getLgNomLongs(), seed, 1.0F, //p
+ ResizeFactor.X1, //rf,
+ false); //unionGadget
+
+ exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(),
+ maxConcurrencyError);
+ sharedPropagationInProgress_ = new AtomicBoolean(false);
+ epoch_ = 0;
+ initBgPropagationService();
+ for (final long hashIn : sketch.getCache()) {
+ propagate(hashIn);
+ }
+ thetaLong_ = sketch.getThetaLong();
+ updateVolatileTheta();
+ updateEstimationSnapshot();
+ }
+
+ //Sketch overrides
+
+ @Override
+ public double getEstimate() {
+ return volatileEstimate_;
+ }
+
+ @Override
+ public boolean isEstimationMode() {
+ return (getRetainedEntries(false) > exactLimit_) || super.isEstimationMode();
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free
+ final byte[] res = super.toByteArray();
+ sharedPropagationInProgress_.set(false);
+ return res;
+ }
+
+ //UpdateSketch overrides
+
+ @Override
+ public UpdateSketch rebuild() {
+ super.rebuild();
+ updateEstimationSnapshot();
+ return this;
+ }
+
+ /**
+ * {@inheritDoc}
+ * Takes care of mutual exclusion with propagation thread.
+ */
+ @Override
+ public void reset() {
+ advanceEpoch();
+ super.reset();
+ volatileThetaLong_ = Long.MAX_VALUE;
+ volatileEstimate_ = 0;
+ }
+
+ @Override
+ UpdateReturnState hashUpdate(final long hash) {
+ final String msg = "No update method should be called directly to a shared theta sketch."
+ + " Updating the shared sketch is only permitted through propagation from local sketches.";
+ throw new UnsupportedOperationException(msg);
+ }
+
+ //ConcurrentSharedThetaSketch declarations
+
+ @Override
+ public long getExactLimit() {
+ return exactLimit_;
+ }
+
+ @Override
+ public boolean startEagerPropagation() {
+ while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free
+ return (!isEstimationMode());// no eager propagation is allowed in estimation mode
+ }
+
+ @Override
+ public void endPropagation(final AtomicBoolean localPropagationInProgress, final boolean isEager) {
+ //update volatile theta, uniques estimate and propagation flag
+ updateVolatileTheta();
+ updateEstimationSnapshot();
+ if (isEager) {
+ sharedPropagationInProgress_.set(false);
+ }
+ if (localPropagationInProgress != null) {
+ localPropagationInProgress.set(false); //clear local propagation flag
+ }
+ }
+
+ @Override
+ public long getVolatileTheta() {
+ return volatileThetaLong_;
+ }
+
+ @Override
+ public void awaitBgPropagationTermination() {
+ try {
+ executorService_.shutdown();
+ while (!executorService_.awaitTermination(1, TimeUnit.MILLISECONDS)) {
+ Thread.sleep(1);
+ }
+ } catch (final InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void initBgPropagationService() {
+ executorService_ = ConcurrentPropagationService.getExecutorService(Thread.currentThread().threadId());
+ }
+
+ @Override
+ public boolean propagate(final AtomicBoolean localPropagationInProgress,
+ final Sketch sketchIn, final long singleHash) {
+ final long epoch = epoch_;
+ if ((singleHash != NOT_SINGLE_HASH) //namely, is a single hash and
+ && (getRetainedEntries(false) < exactLimit_)) { //a small sketch then propagate myself (blocking)
+ if (!startEagerPropagation()) {
+ endPropagation(localPropagationInProgress, true);
+ return false;
+ }
+ if (!validateEpoch(epoch)) {
+ endPropagation(null, true); // do not change local flag
+ return true;
+ }
+ propagate(singleHash);
+ endPropagation(localPropagationInProgress, true);
+ return true;
+ }
+ // otherwise, be nonblocking, let background thread do the work
+ final ConcurrentBackgroundThetaPropagation job = new ConcurrentBackgroundThetaPropagation(
+ this, localPropagationInProgress, sketchIn, singleHash, epoch);
+ executorService_.execute(job);
+ return true;
+ }
+
+ @Override
+ public void propagate(final long singleHash) {
+ super.hashUpdate(singleHash);
+ }
+
+ @Override
+ public void updateEstimationSnapshot() {
+ volatileEstimate_ = super.getEstimate();
+ }
+
+ @Override
+ public void updateVolatileTheta() {
+ volatileThetaLong_ = getThetaLong();
+ }
+
+ @Override
+ public boolean validateEpoch(final long epoch) {
+ return epoch_ == epoch;
+ }
+
+ //Restricted
+
+ /**
+ * Advances the epoch while there is no background propagation
+ * This ensures a propagation invoked before the reset cannot affect the sketch after the reset
+ * is completed.
+ */
+ @SuppressFBWarnings(value = "VO_VOLATILE_INCREMENT", justification = "Likely False Positive, Fix Later")
+ private void advanceEpoch() {
+ awaitBgPropagationTermination();
+ startEagerPropagation();
+ ConcurrentPropagationService.resetExecutorService(Thread.currentThread().threadId());
+ //no inspection NonAtomicOperationOnVolatileField
+ // this increment of a volatile field is done within the scope of the propagation
+ // synchronization and hence is done by a single thread
+ // Ignore a FindBugs warning
+ epoch_++;
+ endPropagation(null, true);
+ initBgPropagationService();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
new file mode 100644
index 000000000..ab1d41a65
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.theta2.UpdateReturnState.ConcurrentBufferInserted;
+import static org.apache.datasketches.theta2.UpdateReturnState.ConcurrentPropagated;
+import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.thetacommon.HashOperations;
+
+/**
+ * This is a theta filtering, bounded size buffer that operates in the context of a single writing
+ * thread. When the buffer becomes full its content is propagated into the shared sketch, which
+ * may be on a different thread. The limit on the buffer size is configurable. A bound of size 1
+ * allows the combination of buffers and shared sketch to maintain an error bound in real-time
+ * that is close to the error bound of a sequential theta sketch. Allowing larger buffer sizes
+ * enables amortization of the cost propagations and substantially improves overall system throughput.
+ * The error caused by the buffering is essentially a perspective of time and synchronization
+ * and not really a true error. At the end of a stream, after all the buffers have synchronized with
+ * the shared sketch, there is no additional error.
+ * Propagation is done either synchronously by the updating thread, or asynchronously by a
+ * background propagation thread.
+ *
+ * This is a buffer, not a sketch, and it extends the HeapQuickSelectSketch
+ * in order to leverage some of the sketch machinery to make its work simple. However, if this
+ * buffer receives a query, like getEstimate(), the correct answer does not come from the super
+ * HeapQuickSelectSketch, which knows nothing about the concurrency relationship to the
+ * shared concurrent sketch, it must come from the shared concurrent sketch. As a result nearly all
+ * of the inherited sketch methods are redirected to the shared concurrent sketch.
+ *
+ * @author eshcar
+ * @author Lee Rhodes
+ */
+final class ConcurrentHeapThetaBuffer extends HeapQuickSelectSketch {
+
+ // Shared sketch consisting of the global sample set and theta value.
+ private final ConcurrentSharedThetaSketch shared;
+
+ // A flag indicating whether the shared sketch is in shared mode and requires eager propagation
+ // Initially this is true. Once it is set to false (estimation mode) it never flips back.
+ private boolean isExactMode;
+
+ // A flag to indicate if we expect the propagated data to be ordered
+ private final boolean propagateOrderedCompact;
+
+ // Propagation flag is set to true while propagation is in progress (or pending).
+ // It is the synchronization primitive to coordinate the work with the propagation thread.
+ private final AtomicBoolean localPropagationInProgress;
+
+ ConcurrentHeapThetaBuffer(final int lgNomLongs, final long seed,
+ final ConcurrentSharedThetaSketch shared, final boolean propagateOrderedCompact,
+ final int maxNumLocalThreads) {
+ super(computeLogBufferSize(lgNomLongs, shared.getExactLimit(), maxNumLocalThreads),
+ seed, 1.0F, //p
+ ResizeFactor.X1, //rf
+ false); //not a union gadget
+
+ this.shared = shared;
+ isExactMode = true;
+ this.propagateOrderedCompact = propagateOrderedCompact;
+ localPropagationInProgress = new AtomicBoolean(false);
+ }
+
+ private static int computeLogBufferSize(final int lgNomLongs, final long exactSize,
+ final int maxNumLocalBuffers) {
+ return Math.min(lgNomLongs, (int)Math.log(Math.sqrt(exactSize) / (2 * maxNumLocalBuffers)));
+ }
+
+ //concurrent restricted methods
+
+ /**
+ * Propagates a single hash value to the shared sketch
+ *
+ * @param hash to be propagated
+ */
+ private boolean propagateToSharedSketch(final long hash) {
+ //no inspection StatementWithEmptyBody
+ while (localPropagationInProgress.get()) {
+ } //busy wait until previous propagation completed
+ localPropagationInProgress.set(true);
+ final boolean res = shared.propagate(localPropagationInProgress, null, hash);
+ //in this case the parent empty_ and curCount_ were not touched
+ thetaLong_ = shared.getVolatileTheta();
+ return res;
+ }
+
+ /**
+ * Propagates the content of the buffer as a sketch to the shared sketch
+ */
+ private void propagateToSharedSketch() {
+ //no inspection StatementWithEmptyBody
+ while (localPropagationInProgress.get()) {
+ } //busy wait until previous propagation completed
+
+ final CompactSketch compactSketch = compact(propagateOrderedCompact, null);
+ localPropagationInProgress.set(true);
+ shared.propagate(localPropagationInProgress, compactSketch,
+ ConcurrentSharedThetaSketch.NOT_SINGLE_HASH);
+ super.reset();
+ thetaLong_ = shared.getVolatileTheta();
+ }
+
+ //Public Sketch overrides proxies to shared concurrent sketch
+
+ @Override
+ public int getCompactBytes() {
+ return shared.getCompactBytes();
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ return shared.getCurrentBytes();
+ }
+
+ @Override
+ public double getEstimate() {
+ return shared.getEstimate();
+ }
+
+ @Override
+ public double getLowerBound(final int numStdDev) {
+ return shared.getLowerBound(numStdDev);
+ }
+
+ @Override
+ public double getUpperBound(final int numStdDev) {
+ return shared.getUpperBound(numStdDev);
+ }
+
+ @Override
+ public boolean hasMemorySegment() {
+ return false;
+ }
+
+ @Override
+ public boolean isDirect() {
+ return false;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return shared.isEmpty();
+ }
+
+ @Override
+ public boolean isEstimationMode() {
+ return shared.isEstimationMode();
+ }
+
+ //End of proxies
+
+ @Override
+ public byte[] toByteArray() {
+ throw new UnsupportedOperationException("Local theta buffer need not be serialized");
+ }
+
+ //Public UpdateSketch overrides
+
+ @Override
+ public void reset() {
+ super.reset();
+ isExactMode = true;
+ localPropagationInProgress.set(false);
+ }
+
+ //Restricted UpdateSketch overrides
+
+ /**
+ * Updates buffer with given hash value.
+ * Triggers propagation to shared sketch if buffer is full.
+ *
+ * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored.
+ * A negative hash value will throw an exception.
+ * @return
+ * See Update Return State
+ */
+ @Override
+ UpdateReturnState hashUpdate(final long hash) {
+ if (isExactMode) {
+ isExactMode = !shared.isEstimationMode();
+ }
+ HashOperations.checkHashCorruption(hash);
+ if ((getHashTableThreshold() == 0) || isExactMode ) {
+ //The over-theta and zero test
+ if (HashOperations.continueCondition(getThetaLong(), hash)) {
+ return RejectedOverTheta; //signal that hash was rejected due to theta or zero.
+ }
+ if (propagateToSharedSketch(hash)) {
+ return ConcurrentPropagated;
+ }
+ }
+ final UpdateReturnState state = super.hashUpdate(hash);
+ if (isOutOfSpace(getRetainedEntries(true) + 1)) {
+ propagateToSharedSketch();
+ return ConcurrentPropagated;
+ }
+ if (state == UpdateReturnState.InsertedCountIncremented) {
+ return ConcurrentBufferInserted;
+ }
+ return state;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java
new file mode 100644
index 000000000..92ca954fa
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.datasketches.common.SuppressFBWarnings;
+
+/**
+ * Pool of threads to serve all propagation tasks in the system.
+ *
+ * @author Eshcar Hillel
+ */
+final class ConcurrentPropagationService {
+
+ static int NUM_POOL_THREADS = 3; // Default: 3 threads
+ private static volatile ConcurrentPropagationService instance = null; // Singleton
+ private static ExecutorService[] propagationExecutorService = null;
+
+ @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "Fix later")
+ private ConcurrentPropagationService() {
+ propagationExecutorService = new ExecutorService[NUM_POOL_THREADS];
+ }
+
+ //Factory: Get the singleton
+ @SuppressFBWarnings(value = "SSD_DO_NOT_USE_INSTANCE_LOCK_ON_SHARED_STATIC_DATA", justification = "Fix later")
+ private static ConcurrentPropagationService getInstance() {
+ if (instance == null) {
+ synchronized (ConcurrentPropagationService.class) {
+ if (instance == null) {
+ instance = new ConcurrentPropagationService(); //SpotBugs: SSD_DO_NOT_USE_INSTANCE_LOCK_ON_SHARED_STATIC_DATA
+ }
+ }
+ }
+ return instance;
+ }
+
+ public static ExecutorService getExecutorService(final long id) {
+ return getInstance().initExecutorService((int) id % NUM_POOL_THREADS);
+ }
+
+ @SuppressWarnings("static-access")
+ public static ExecutorService resetExecutorService(final long id) {
+ return getInstance().propagationExecutorService[(int) id % NUM_POOL_THREADS] = null;
+ }
+
+ @SuppressWarnings("static-method")
+ private ExecutorService initExecutorService(final int i) {
+ if (propagationExecutorService[i] == null) {
+ propagationExecutorService[i] = Executors.newSingleThreadExecutor();
+ }
+ return propagationExecutorService[i];
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
new file mode 100644
index 000000000..5bf147049
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import java.lang.foreign.MemorySegment;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.datasketches.common.Util;
+
+/**
+ * An internal interface to define the API of a concurrent shared theta sketch.
+ * It reflects all data processed by a single or multiple update threads, and can serve queries at
+ * any time.
+ *
+ * @author eshcar
+ */
+interface ConcurrentSharedThetaSketch {
+
+ long NOT_SINGLE_HASH = -1L;
+ double MIN_ERROR = 0.0000001;
+
+ static long computeExactLimit(final long k, final double error) {
+ return 2 * Math.min(k, (long) Math.ceil(1.0 / Math.pow(Math.max(error,MIN_ERROR), 2.0)));
+ }
+
+ /**
+ * Returns flip point (number of updates) from exact to estimate mode.
+ * @return flip point from exact to estimate mode
+ */
+ long getExactLimit();
+
+ /**
+ * Ensures mutual exclusion. No other thread can update the shared sketch while propagation is
+ * in progress
+ * @return true if eager propagation was started
+ */
+ boolean startEagerPropagation();
+
+ /**
+ * Completes the propagation: end mutual exclusion block.
+ * Notifies the local thread the propagation is completed
+ *
+ * @param localPropagationInProgress the synchronization primitive through which propagator
+ * notifies local thread the propagation is completed
+ * @param isEager true if the propagation is in eager mode
+ */
+ void endPropagation(AtomicBoolean localPropagationInProgress, boolean isEager);
+
+ /**
+ * Returns the value of the volatile theta managed by the shared sketch
+ * @return the value of the volatile theta managed by the shared sketch
+ */
+ long getVolatileTheta();
+
+ /**
+ * Awaits termination of background (lazy) propagation tasks
+ */
+ void awaitBgPropagationTermination();
+
+ /**
+ * Init background (lazy) propagation service
+ */
+ void initBgPropagationService();
+
+ /**
+ * (Eager) Propagates the given sketch or hash value into this sketch
+ * @param localPropagationInProgress the flag to be updated when propagation is done
+ * @param sketchIn any Theta sketch with the data
+ * @param singleHash a single hash value
+ * @return true if propagation successfully started
+ */
+ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch sketchIn,
+ final long singleHash);
+
+ /**
+ * (Lazy/Eager) Propagates the given hash value into this sketch
+ * @param singleHash a single hash value
+ */
+ void propagate(final long singleHash);
+
+ /**
+ * Updates the estimation of the number of unique entries by capturing a snapshot of the sketch
+ * data, namely, volatile theta and the num of valid entries in the sketch
+ */
+ void updateEstimationSnapshot();
+
+ /**
+ * Updates the value of the volatile theta by extracting it from the underlying sketch managed
+ * by the shared sketch
+ */
+ void updateVolatileTheta();
+
+ /**
+ * Validates the shared sketch is in the context of the given epoch
+ *
+ * @param epoch the epoch number to be validates
+ * @return true iff the shared sketch is in the context of the given epoch
+ */
+ boolean validateEpoch(long epoch);
+
+ //The following mirrors are public methods that already exist on the "extends" side of the dual
+ // inheritance. They are provided here to allow casts to this interface access
+ // to these methods without having to cast back to the extended parent class.
+ //
+ //This allows an internal class to cast either the Concurrent Direct or Concurrent Heap
+ //shared class to this interface and have access to the above special concurrent methods as
+ //well as the methods below.
+ //
+ //For the external user all of the below methods can be obtained by casting the shared
+ //sketch to UpdateSketch. However, these methods here also act as an alias so that an
+ //attempt to access these methods from the local buffer will be diverted to the shared
+ //sketch.
+
+ //From Sketch and MemoryStatus
+
+ int getCompactBytes();
+
+ int getCurrentBytes();
+
+ double getEstimate();
+
+ double getLowerBound(int numStdDev);
+
+ double getUpperBound(int numStdDev);
+
+ /**
+ * Returns true if this object's internal data is backed by a Memory object,
+ * which may be on-heap or off-heap.
+ * @return true if this object's internal data is backed by a Memory object.
+ */
+ boolean hasMemorySegment();
+
+ /**
+ * Returns true if this object's internal data is backed by direct (off-heap) Memory.
+ * @return true if this object's internal data is backed by direct (off-heap) Memory.
+ */
+ boolean isDirect();
+
+ /**
+ * Returns true if the two given MemorySegments refer to the same backing resource,
+ * which is either an off-heap memory location and size, or the same on-heap array object.
+ *
+ *
This is a convenient delegate of
+ * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}
+ *
+ * @param seg1 The first given MemorySegment
+ * @param seg2 The second given MemorySegment
+ * @return true if both MemorySegments are determined to be the same backing memory.
+ */
+ default boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) {
+ return Util.isSameResource(seg1, seg2);
+ }
+
+ boolean isEmpty();
+
+ boolean isEstimationMode();
+
+ byte[] toByteArray();
+
+ int getRetainedEntries(boolean valid);
+
+ CompactSketch compact();
+
+ CompactSketch compact(boolean ordered, MemorySegment wseg);
+
+ UpdateSketch rebuild();
+
+ void reset();
+}
+
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java
new file mode 100644
index 000000000..9be51c379
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.extractEntryBitsV4;
+import static org.apache.datasketches.theta2.PreambleUtil.extractNumEntriesBytesV4;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLongV4;
+import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered.
+ *
+ * This sketch can only be associated with a Serialization Version 4 format binary image.
+ *
+ * This implementation uses data in a given MemorySegment that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ */
+class DirectCompactCompressedSketch extends DirectCompactSketch {
+ /**
+ * Construct this sketch with the given MemorySegment.
+ * @param seg Read-only MemorySegment object.
+ */
+ DirectCompactCompressedSketch(final MemorySegment seg) {
+ super(seg);
+ }
+
+ /**
+ * Wraps the given MemorySegment, which must be a SerVer 4 compressed CompactSketch image.
+ * Must check the validity of the Memory before calling.
+ * @param srcSeg The source MemorySegment
+ * @param seedHash The update seedHash.
+ * See Seed Hash.
+ * @return this sketch
+ */
+ static DirectCompactCompressedSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) {
+ ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash);
+ return new DirectCompactCompressedSketch(srcSeg);
+ }
+
+ //Sketch Overrides
+
+ @Override
+ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) {
+ if (dstSeg != null) {
+ MemorySegment.copy(seg_, 0, dstSeg, 0, getCurrentBytes());
+ return new DirectCompactSketch(dstSeg);
+ }
+ return CompactSketch.heapify(seg_);
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ final int preLongs = extractPreLongs(seg_);
+ final int entryBits = extractEntryBitsV4(seg_);
+ final int numEntriesBytes = extractNumEntriesBytesV4(seg_);
+ return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits);
+ }
+
+ private static final int START_PACKED_DATA_EXACT_MODE = 8;
+ private static final int START_PACKED_DATA_ESTIMATION_MODE = 16;
+
+ @Override
+ public int getRetainedEntries(final boolean valid) { //compact is always valid
+ // number of entries is stored using variable length encoding
+ // most significant bytes with all zeros are not stored
+ // one byte in the preamble has the number of non-zero bytes used
+ final int preLongs = extractPreLongs(seg_); // if > 1 then the second long has theta
+ final int numEntriesBytes = extractNumEntriesBytesV4(seg_);
+ int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE;
+ int numEntries = 0;
+ for (int i = 0; i < numEntriesBytes; i++) {
+ numEntries |= Byte.toUnsignedInt(seg_.get(JAVA_BYTE, offsetBytes++)) << (i << 3);
+ }
+ return numEntries;
+ }
+
+ @Override
+ public long getThetaLong() {
+ final int preLongs = extractPreLongs(seg_);
+ return (preLongs > 1) ? extractThetaLongV4(seg_) : Long.MAX_VALUE;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return false;
+ }
+
+ @Override
+ public boolean isOrdered() {
+ return true;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new MemoryCompactCompressedHashIterator(
+ seg_,
+ (extractPreLongs(seg_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE)
+ + extractNumEntriesBytesV4(seg_),
+ extractEntryBitsV4(seg_),
+ getRetainedEntries()
+ );
+ }
+
+ //restricted methods
+
+ @Override
+ long[] getCache() {
+ final int numEntries = getRetainedEntries();
+ final long[] cache = new long[numEntries];
+ int i = 0;
+ final HashIterator it = iterator();
+ while (it.next()) {
+ cache[i++] = it.get();
+ }
+ return cache;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java
new file mode 100644
index 000000000..188f2cd73
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty;
+import static org.apache.datasketches.theta2.CompactOperations.memoryToCompact;
+import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta2.SingleItemSketch.otherCheckForSingleItem;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * An off-heap (Direct), compact, read-only sketch. The internal hash array can be either ordered
+ * or unordered. It is not empty, not a single item.
+ *
+ * This sketch can only be associated with a Serialization Version 3 format binary image.
+ *
+ * This implementation uses data in a given MemorySegment that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ *
+ * @author Lee Rhodes
+ */
+class DirectCompactSketch extends CompactSketch {
+ final MemorySegment seg_;
+
+ /**
+ * Construct this sketch with the given MemorySegment.
+ * @param seg Read-only MemorySegment object with the order bit properly set.
+ */
+ DirectCompactSketch(final MemorySegment seg) {
+ seg_ = seg;
+ }
+
+ /**
+ * Wraps the given MemorySegment, which must be a SerVer 3, CompactSketch image.
+ * Must check the validity of the MemorySegment before calling. The order bit must be set properly.
+ * @param srcSeg the given MemorySegment
+ * @param seedHash The update seedHash.
+ * See Seed Hash.
+ * @return this sketch
+ */
+ static DirectCompactSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) {
+ ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash);
+ return new DirectCompactSketch(srcSeg);
+ }
+
+ //Sketch Overrides
+
+ @Override
+ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) {
+ return memoryToCompact(seg_, dstOrdered, dstSeg);
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ if (otherCheckForSingleItem(seg_)) { return 16; }
+ final int preLongs = extractPreLongs(seg_);
+ final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_);
+ return (preLongs + curCount) << 3;
+ }
+
+ @Override
+ public int getRetainedEntries(final boolean valid) { //compact is always valid
+ if (otherCheckForSingleItem(seg_)) { return 1; }
+ final int preLongs = extractPreLongs(seg_);
+ final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_);
+ return curCount;
+ }
+
+ @Override
+ public long getThetaLong() {
+ final int preLongs = extractPreLongs(seg_);
+ return (preLongs > 2) ? extractThetaLong(seg_) : Long.MAX_VALUE;
+ }
+
+ @Override
+ public boolean hasMemorySegment() {
+ return seg_ != null;
+ }
+
+ @Override
+ public boolean isDirect() {
+ return hasMemorySegment() ? seg_.isNative() : false;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ final boolean emptyFlag = PreambleUtil.isEmptyFlag(seg_);
+ final long thetaLong = getThetaLong();
+ final int curCount = getRetainedEntries(true);
+ return emptyFlag || ((curCount == 0) && (thetaLong == Long.MAX_VALUE));
+ }
+
+ @Override
+ public boolean isOrdered() {
+ return (extractFlags(seg_) & ORDERED_FLAG_MASK) > 0;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new MemoryHashIterator(seg_, getRetainedEntries(true), getThetaLong());
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries());
+ final int outBytes = getCurrentBytes();
+ final byte[] byteArrOut = new byte[outBytes];
+ MemorySegment.copy(seg_, JAVA_BYTE, 0, byteArrOut, 0, outBytes);
+ return byteArrOut;
+ }
+
+ //restricted methods
+
+ @Override
+ long[] getCache() {
+ if (otherCheckForSingleItem(seg_)) { return new long[] { seg_.get(JAVA_LONG_UNALIGNED, 8) }; }
+ final int preLongs = extractPreLongs(seg_);
+ final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_);
+ if (curCount > 0) {
+ final long[] cache = new long[curCount];
+ MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, preLongs << 3, cache, 0, curCount);
+ return cache;
+ }
+ return new long[0];
+ }
+
+ @Override
+ int getCompactPreambleLongs() {
+ return extractPreLongs(seg_);
+ }
+
+ @Override
+ int getCurrentPreambleLongs() {
+ return extractPreLongs(seg_);
+ }
+
+ @Override
+ MemorySegment getMemorySegment() {
+ return seg_;
+ }
+
+ @Override
+ short getSeedHash() {
+ return (short) extractSeedHash(seg_);
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
new file mode 100644
index 000000000..32ae0d14d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+import static org.apache.datasketches.common.Util.newHeapSegment;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT;
+import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER;
+import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.getMemBytes;
+import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.insertFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgNomLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor;
+import static org.apache.datasketches.theta2.PreambleUtil.insertP;
+import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer;
+import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong;
+import static org.apache.datasketches.theta2.PreambleUtil.insertUnionThetaLong;
+import static org.apache.datasketches.theta2.Rebuilder.actLgResizeFactor;
+import static org.apache.datasketches.theta2.Rebuilder.moveAndResize;
+import static org.apache.datasketches.theta2.Rebuilder.quickSelectAndRebuild;
+import static org.apache.datasketches.theta2.Rebuilder.resize;
+import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented;
+import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedRebuilt;
+import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedResized;
+import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate;
+import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.HashOperations;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * The default Theta Sketch using the QuickSelect algorithm.
+ * This subclass implements methods, which affect the state (update, rebuild, reset)
+ *
+ * This implementation uses data in a given Memory that is owned and managed by the caller.
+ * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ *
+ * @author Lee Rhodes
+ * @author Kevin Lang
+ */
+class DirectQuickSelectSketch extends DirectQuickSelectSketchR {
+
+ private DirectQuickSelectSketch(
+ final long seed,
+ final MemorySegment wseg) {
+ super(seed, wseg);
+ }
+
+ /**
+ * Construct a new sketch instance and initialize the given MemorySegment as its backing store.
+ *
+ * @param lgNomLongs See lgNomLongs.
+ * @param seed See Update Hash Seed.
+ * @param p
+ * See Sampling Probability, p
+ * @param rf Currently internally fixed at 2. Unless dstMem is not configured with a valid
+ * MemoryRequest, in which case the rf is effectively 1, which is no resizing at all and the
+ * dstMem must be large enough for a full sketch.
+ * See Resize Factor
+ * @param memReqSvr the given MemoryRequestServer
+ * @param dstSeg the given Memory object destination. It cannot be null.
+ * It will be cleared prior to use.
+ * @param unionGadget true if this sketch is implementing the Union gadget function.
+ * Otherwise, it is behaving as a normal QuickSelectSketch.
+ */
+ DirectQuickSelectSketch(
+ final int lgNomLongs,
+ final long seed,
+ final float p,
+ final ResizeFactor rf,
+ final MemorySegment dstSeg,
+ final boolean unionGadget) {
+ this(
+ checkMemSize(lgNomLongs, rf, dstSeg, unionGadget),
+ //SpotBugs CT_CONSTRUCTOR_THROW is false positive.
+ //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J
+ lgNomLongs,
+ seed,
+ p,
+ rf,
+ dstSeg,
+ unionGadget);
+ }
+
+ private DirectQuickSelectSketch(
+ final boolean secure, //required part of Finalizer Attack prevention
+ final int lgNomLongs,
+ final long seed,
+ final float p,
+ final ResizeFactor rf,
+ final MemorySegment dstSeg,
+ final boolean unionGadget) {
+ super(seed, dstSeg);
+ //Choose family, preambleLongs
+ final Family family;
+ final int preambleLongs;
+ if (unionGadget) {
+ preambleLongs = Family.UNION.getMinPreLongs();
+ family = Family.UNION;
+ }
+ else {
+ preambleLongs = Family.QUICKSELECT.getMinPreLongs();
+ family = Family.QUICKSELECT;
+ }
+
+ //Choose RF, minReqBytes, lgArrLongs.
+ final int lgRF = rf.lg();
+ final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS;
+
+ //@formatter:off
+ //Build preamble
+ insertPreLongs(dstSeg, preambleLongs); //byte 0
+ insertLgResizeFactor(dstSeg, lgRF); //byte 0
+ insertSerVer(dstSeg, SER_VER); //byte 1
+ insertFamilyID(dstSeg, family.getID()); //byte 2
+ insertLgNomLongs(dstSeg, lgNomLongs); //byte 3
+ insertLgArrLongs(dstSeg, lgArrLongs); //byte 4
+ //flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4
+ insertFlags(dstSeg, EMPTY_FLAG_MASK); //byte 5
+ insertSeedHash(dstSeg, ThetaUtil.computeSeedHash(seed)); //bytes 6,7
+ insertCurCount(dstSeg, 0); //bytes 8-11
+ insertP(dstSeg, p); //bytes 12-15
+ final long thetaLong = (long)(p * LONG_MAX_VALUE_AS_DOUBLE);
+ insertThetaLong(dstSeg, thetaLong); //bytes 16-23
+ if (unionGadget) {
+ insertUnionThetaLong(dstSeg, thetaLong);
+ }
+ //@formatter:on
+
+ //clear hash table area
+ dstSeg.asSlice(preambleLongs << 3, Long.BYTES << lgArrLongs).fill((byte)0);
+
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
+ }
+
+ private static final boolean checkMemSize(
+ final int lgNomLongs, final ResizeFactor rf, final MemorySegment dstSeg, final boolean unionGadget) {
+ final int preambleLongs = (unionGadget) ? Family.UNION.getMinPreLongs() : Family.QUICKSELECT.getMinPreLongs();
+ final int lgRF = rf.lg();
+ final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS;
+ final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
+ final long curMemCapBytes = dstSeg.byteSize();
+ if (curMemCapBytes < minReqBytes) {
+ throw new SketchesArgumentException(
+ "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes);
+ }
+ return true;
+ }
+
+ /**
+ * Wrap a sketch around the given source MemorySegment containing sketch data that originated from
+ * this sketch.
+ * @param srcSeg The given MemorySegment object must be in hash table form and not read only.
+ * @param seed See Update Hash Seed
+ * @return instance of this sketch
+ */
+ static DirectQuickSelectSketch writableWrap(final MemorySegment srcSeg, final long seed) {
+ final int preambleLongs = extractPreLongs(srcSeg); //byte 0
+ final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
+ final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
+
+ UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs);
+ checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs);
+
+ if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) {
+ //If incorrect it sets it to X2 which always works.
+ insertLgResizeFactor(srcSeg, ResizeFactor.X2.lg());
+ }
+
+ final DirectQuickSelectSketch dqss =
+ new DirectQuickSelectSketch(seed, srcSeg);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
+ return dqss;
+ }
+
+ /**
+ * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from
+ * this sketch. This does NO validity checking of the given Memory.
+ * @param srcSeg The given MemorySegment must be in hash table form and not read only.
+ * @param seed See Update Hash Seed
+ * @return instance of this sketch
+ */
+ static DirectQuickSelectSketch fastWritableWrap(final MemorySegment srcSeg, final long seed) {
+ final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
+ final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
+
+ final DirectQuickSelectSketch dqss =
+ new DirectQuickSelectSketch(seed, srcSeg);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
+ return dqss;
+ }
+
+ //Sketch
+
+ //UpdateSketch
+
+ @Override
+ public UpdateSketch rebuild() {
+ final int lgNomLongs = getLgNomLongs();
+ final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+ if (getRetainedEntries(true) > (1 << lgNomLongs)) {
+ quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs);
+ }
+ return this;
+ }
+
+ @Override
+ public void reset() {
+ //clear hash table
+ //hash table size and hashTableThreshold stays the same
+ //lgArrLongs stays the same
+ //thetaLongs resets to p
+ final int arrLongs = 1 << getLgArrLongs();
+ final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+ final int preBytes = preambleLongs << 3;
+ wseg_.asSlice(preBytes, arrLongs * 8L).fill((byte)0);
+ //flags: bigEndian = readOnly = compact = ordered = false; empty = true.
+ wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK);
+ wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0);
+ final float p = wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT);
+ final long thetaLong = (long) (p * LONG_MAX_VALUE_AS_DOUBLE);
+ wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong);
+ }
+
+ //restricted methods
+
+ @Override
+ UpdateReturnState hashUpdate(final long hash) {
+ HashOperations.checkHashCorruption(hash);
+
+ wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) (wseg_.get(JAVA_BYTE, FLAGS_BYTE) & ~EMPTY_FLAG_MASK));
+ final long thetaLong = getThetaLong();
+ final int lgNomLongs = getLgNomLongs();
+ //The over-theta test
+ if (HashOperations.continueCondition(thetaLong, hash)) {
+ return RejectedOverTheta; //signal that hash was rejected due to theta or zero.
+ }
+
+ final int lgArrLongs = getLgArrLongs();
+ final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+
+ //The duplicate test
+ final int index =
+ HashOperations.hashSearchOrInsertMemory(wseg_, lgArrLongs, hash, preambleLongs << 3);
+ if (index >= 0) {
+ return RejectedDuplicate; //Duplicate, not inserted
+ }
+ //insertion occurred, increment curCount
+ final int curCount = getRetainedEntries(true) + 1;
+ wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); //update curCount
+
+ if (isOutOfSpace(curCount)) { //we need to do something, we are out of space
+
+ if (lgArrLongs > lgNomLongs) { //at full size, rebuild
+ //Assumes no dirty values, changes thetaLong, curCount_
+ assert (lgArrLongs == (lgNomLongs + 1)) : "lgArr: " + lgArrLongs + ", lgNom: " + lgNomLongs;
+ //rebuild, refresh curCount based on # values in the hashtable.
+ quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs);
+ return InsertedCountIncrementedRebuilt;
+ } //end of rebuild, exit
+
+ else { //Not at full size, resize. Should not get here if lgRF = 0 and memCap is too small.
+ final int lgRF = getLgRF();
+ final int actLgRF = actLgResizeFactor(wseg_.byteSize(), lgArrLongs, preambleLongs, lgRF);
+ int tgtLgArrLongs = Math.min(lgArrLongs + actLgRF, lgNomLongs + 1);
+
+ if (actLgRF > 0) { //Expand in current Memory
+ //lgArrLongs will change; thetaLong, curCount will not
+ resize(wseg_, preambleLongs, lgArrLongs, tgtLgArrLongs);
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
+ return InsertedCountIncrementedResized;
+ } //end of Expand in current memory, exit.
+
+ else {
+ //Request more memory, then resize. lgArrLongs will change; thetaLong, curCount will not
+ final int preBytes = preambleLongs << 3;
+ tgtLgArrLongs = Math.min(lgArrLongs + lgRF, lgNomLongs + 1);
+ final int tgtArrBytes = 8 << tgtLgArrLongs;
+ final int reqBytes = tgtArrBytes + preBytes;
+
+ //memReqSvr_ = (memReqSvr_ == null) ? wseg_.getMemoryRequestServer() : memReqSvr_;
+ //if (memReqSvr_ == null) { //in case the MRS is not enabled or null.
+ // throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand.");
+ //}
+ //final MemorySegment newDstSeg = memReqSvr_.request(wseg_, reqBytes);
+
+ final MemorySegment newDstSeg = newHeapSegment(reqBytes);
+
+ moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong);
+ wseg_ = newDstSeg;
+
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
+ return InsertedCountIncrementedResized;
+ } //end of Request more memory to resize
+ } //end of resize
+ } //end of isOutOfSpace
+ return InsertedCountIncremented;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
new file mode 100644
index 000000000..33d371554
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty;
+import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs;
+import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_RESIZE_FACTOR_BIT;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT;
+import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT;
+import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesReadOnlyException;
+import org.apache.datasketches.common.SuppressFBWarnings;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * The default Theta Sketch using the QuickSelect algorithm.
+ * This is the read-only implementation with non-functional methods, which affect the state.
+ *
+ * This implementation uses data in a given Memory that is owned and managed by the caller.
+ * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ *
+ * @author Lee Rhodes
+ * @author Kevin Lang
+ */
+class DirectQuickSelectSketchR extends UpdateSketch {
+ static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space
+ final long seed_; //provided, kept only on heap, never serialized.
+ int hashTableThreshold_; //computed, kept only on heap, never serialized.
+ MemorySegment wseg_; //A WritableMemory for child class, but no write methods here
+
+ //only called by DirectQuickSelectSketch and below
+ DirectQuickSelectSketchR(final long seed, final MemorySegment wseg) {
+ seed_ = seed;
+ wseg_ = wseg;
+ }
+
+ /**
+ * Wrap a sketch around the given source MemorySegment containing sketch data that originated from
+ * this sketch.
+ * @param srcSeg the source MemorySegment.
+ * The given MemorySegment object must be in hash table form and not read only.
+ * @param seed See Update Hash Seed
+ * @return instance of this sketch
+ */
+ static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final long seed) {
+ final int preambleLongs = extractPreLongs(srcSeg); //byte 0
+ final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
+ final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
+
+ UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs);
+ checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs);
+
+ final DirectQuickSelectSketchR dqssr =
+ new DirectQuickSelectSketchR(seed, srcSeg);
+ dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
+ return dqssr;
+ }
+
+ /**
+ * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from
+ * this sketch. This does NO validity checking of the given MemorySegment.
+ * @param srcSeg The given MemorySegment object must be in hash table form and not read only.
+ * @param seed See Update Hash Seed
+ * @return instance of this sketch
+ */
+ static DirectQuickSelectSketchR fastReadOnlyWrap(final MemorySegment srcSeg, final long seed) {
+ final int lgNomLongs = srcSeg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF;
+ final int lgArrLongs = srcSeg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
+
+ final DirectQuickSelectSketchR dqss =
+ new DirectQuickSelectSketchR(seed, srcSeg);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
+ return dqss;
+ }
+
+ //Sketch
+
+ @Override
+ public int getCurrentBytes() {
+ //not compact
+ final byte lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE);
+ final int preLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+ final int lengthBytes = (preLongs + (1 << lgArrLongs)) << 3;
+ return lengthBytes;
+ }
+
+ @Override
+ public double getEstimate() {
+ final int curCount = extractCurCount(wseg_);
+ final long thetaLong = extractThetaLong(wseg_);
+ return Sketch.estimate(thetaLong, curCount);
+ }
+
+ @Override
+ public Family getFamily() {
+ final int familyID = wseg_.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF;
+ return Family.idToFamily(familyID);
+ }
+
+ @Override
+ public int getRetainedEntries(final boolean valid) { //always valid
+ return wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT);
+ }
+
+ @Override
+ public long getThetaLong() {
+ return isEmpty() ? Long.MAX_VALUE : wseg_.get(JAVA_LONG_UNALIGNED, THETA_LONG);
+ }
+
+ @Override
+ public boolean hasMemorySegment() {
+ return wseg_ != null;
+ }
+
+ @Override
+ public boolean isDirect() {
+ return hasMemorySegment() ? wseg_.isNative() : false;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return PreambleUtil.isEmptyFlag(wseg_);
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new MemoryHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong());
+ }
+
+ @Override
+ public byte[] toByteArray() { //MY_FAMILY is stored in wmem_
+ checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wseg_));
+ final int lengthBytes = getCurrentBytes();
+ final byte[] byteArray = new byte[lengthBytes];
+ final MemorySegment seg = MemorySegment.ofArray(byteArray);
+ MemorySegment.copy(wseg_, 0, seg, 0, lengthBytes);
+ final long thetaLong =
+ correctThetaOnCompact(isEmpty(), extractCurCount(wseg_), extractThetaLong(wseg_));
+ insertThetaLong(wseg_, thetaLong);
+ return byteArray;
+ }
+
+ //UpdateSketch
+
+ @Override
+ public final int getLgNomLongs() {
+ return PreambleUtil.extractLgNomLongs(wseg_);
+ }
+
+ @Override
+ float getP() {
+ return wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT);
+ }
+
+ @Override
+ public ResizeFactor getResizeFactor() {
+ return ResizeFactor.getRF(getLgRF());
+ }
+
+ @Override
+ long getSeed() {
+ return seed_;
+ }
+
+ @Override
+ public UpdateSketch rebuild() {
+ throw new SketchesReadOnlyException();
+ }
+
+ @Override
+ public void reset() {
+ throw new SketchesReadOnlyException();
+ }
+
+ //restricted methods
+
+ @Override
+ long[] getCache() {
+ final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
+ final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+ final long[] cacheArr = new long[1 << lgArrLongs];
+ final MemorySegment seg = MemorySegment.ofArray(cacheArr);
+ MemorySegment.copy(wseg_, preambleLongs << 3, seg, 0, 8 << lgArrLongs);
+ return cacheArr;
+ }
+
+ @Override
+ int getCompactPreambleLongs() {
+ return computeCompactPreLongs(isEmpty(), getRetainedEntries(true), getThetaLong());
+ }
+
+ @Override
+ int getCurrentPreambleLongs() {
+ return PreambleUtil.extractPreLongs(wseg_);
+ }
+
+ @Override
+ MemorySegment getMemorySegment() {
+ return wseg_;
+ }
+
+ @Override
+ short getSeedHash() {
+ return (short) PreambleUtil.extractSeedHash(wseg_);
+ }
+
+ @Override
+ boolean isDirty() {
+ return false; //Always false for QuickSelectSketch
+ }
+
+ @Override
+ boolean isOutOfSpace(final int numEntries) {
+ return numEntries > hashTableThreshold_;
+ }
+
+ @Override
+ int getLgArrLongs() {
+ return wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
+ }
+
+ int getLgRF() { //only Direct needs this
+ return (wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3;
+ }
+
+ @Override
+ UpdateReturnState hashUpdate(final long hash) {
+ throw new SketchesReadOnlyException();
+ }
+
+ /**
+ * Returns the cardinality limit given the current size of the hash table array.
+ *
+ * @param lgNomLongs See lgNomLongs.
+ * @param lgArrLongs See lgArrLongs.
+ * @return the hash table threshold
+ */
+ @SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments")
+ protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
+ //SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD,
+ //but this allows us to tune these constants for different sketches.
+ final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
+ return (int) (fraction * (1 << lgArrLongs));
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java
new file mode 100644
index 000000000..c9c6dd609
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.SketchesArgumentException;
+
+/**
+ * Singleton empty CompactSketch.
+ *
+ * @author Lee Rhodes
+ */
+final class EmptyCompactSketch extends CompactSketch {
+
+ //For backward compatibility, a candidate long must have Flags= compact, read-only,
+ // COMPACT-Family=3, SerVer=3, PreLongs=1, and be exactly 8 bytes long. The seedHash is ignored.
+ // NOTE: The empty and ordered flags may or may not be set
+ private static final long EMPTY_SKETCH_MASK = 0X00_00_EB_00_00_FF_FF_FFL;
+ private static final long EMPTY_SKETCH_TEST = 0X00_00_0A_00_00_03_03_01L;
+ //When returning a byte array the empty and ordered bits are also set
+ static final byte[] EMPTY_COMPACT_SKETCH_ARR = { 1, 3, 3, 0, 0, 0x1E, 0, 0 };
+ private static final EmptyCompactSketch EMPTY_COMPACT_SKETCH = new EmptyCompactSketch();
+
+ private EmptyCompactSketch() {}
+
+ static synchronized EmptyCompactSketch getInstance() {
+ return EMPTY_COMPACT_SKETCH;
+ }
+
+ //This should be a heapify
+ static synchronized EmptyCompactSketch getHeapInstance(final MemorySegment srcSeg) {
+ final long pre0 = srcSeg.get(JAVA_LONG_UNALIGNED, 0);
+ if (testCandidatePre0(pre0)) {
+ return EMPTY_COMPACT_SKETCH;
+ }
+ final long maskedPre0 = pre0 & EMPTY_SKETCH_MASK;
+ throw new SketchesArgumentException("Input Memory does not match required Preamble. "
+ + "Memory Pre0: " + Long.toHexString(maskedPre0)
+ + ", required Pre0: " + Long.toHexString(EMPTY_SKETCH_TEST));
+ }
+
+ @Override
+ // This returns with ordered flag = true independent of dstOrdered.
+ // This is required for fast detection.
+ // The hashSeed is ignored and set == 0.
+ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) {
+ if (dstWSeg == null) { return EmptyCompactSketch.getInstance(); }
+ //dstWSeg.putByteArray(0, EMPTY_COMPACT_SKETCH_ARR, 0, 8);
+ MemorySegment.copy(EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8);
+ return new DirectCompactSketch(dstWSeg);
+ }
+
+ //static
+
+ static boolean testCandidatePre0(final long candidate) {
+ return (candidate & EMPTY_SKETCH_MASK) == EMPTY_SKETCH_TEST;
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ return 8;
+ }
+
+ @Override
+ public double getEstimate() { return 0; }
+
+ @Override
+ public int getRetainedEntries(final boolean valid) {
+ return 0;
+ }
+
+ @Override
+ public long getThetaLong() {
+ return Long.MAX_VALUE;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return true;
+ }
+
+ @Override
+ public boolean isOrdered() {
+ return true;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new HeapCompactHashIterator(new long[0]);
+ }
+
+ /**
+ * Returns 8 bytes representing a CompactSketch that the following flags set:
+ * ordered, compact, empty, readOnly. The SerVer is 3, the Family is COMPACT(3),
+ * and the PreLongs = 1. The seedHash is zero.
+ */
+ @Override
+ public byte[] toByteArray() {
+ return EMPTY_COMPACT_SKETCH_ARR;
+ }
+
+ @Override
+ long[] getCache() {
+ return new long[0];
+ }
+
+ @Override
+ int getCompactPreambleLongs() {
+ return 1;
+ }
+
+ @Override
+ int getCurrentPreambleLongs() {
+ return 1;
+ }
+
+ @Override
+ MemorySegment getMemorySegment() {
+ return null;
+ }
+
+ @Override
+ short getSeedHash() {
+ return 0;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java
new file mode 100644
index 000000000..9791a7902
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+
+/**
+ * Used to convert older serialization versions 1 and 2 to version 3. The Serialization
+ * Version is the version of the sketch binary image format and should not be confused with the
+ * version number of the Open Source DataSketches Library.
+ *
+ * @author Lee Rhodes
+ */
+final class ForwardCompatibility {
+
+ /**
+ * Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch.
+ * Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored
+ * in a compact ordered form, but with 3 different sketch types. All SerVer 1 sketches will
+ * be converted to a SerVer 3 sketches. There is no concept of p-sampling, no empty bit.
+ *
+ * @param srcSeg the image of a SerVer 1 sketch
+ *
+ * @param seedHash See Seed Hash.
+ * The seedHash that matches the seedHash of the original seed used to construct the sketch.
+ * Note: SerVer 1 sketches do not have the concept of the SeedHash, so the seedHash provided here
+ * MUST be derived from the actual seed that was used when the SerVer 1 sketches were built.
+ * @return a SerVer 3 {@link CompactSketch}.
+ */
+ static final CompactSketch heapify1to3(final MemorySegment srcSeg, final short seedHash) {
+ final int segCap = (int) srcSeg.byteSize();
+ final int preLongs = extractPreLongs(srcSeg); //always 3 for serVer 1
+ if (preLongs != 3) {
+ throw new SketchesArgumentException("PreLongs must be 3 for SerVer 1: " + preLongs);
+ }
+ final int familyId = extractFamilyID(srcSeg); //1,2,3
+ if ((familyId < 1) || (familyId > 3)) {
+ throw new SketchesArgumentException("Family ID (Sketch Type) must be 1 to 3: " + familyId);
+ }
+ final int curCount = extractCurCount(srcSeg);
+ final long thetaLong = extractThetaLong(srcSeg);
+ final boolean empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE);
+
+ if (empty || (segCap <= 24)) { //return empty
+ return EmptyCompactSketch.getInstance();
+ }
+
+ final int reqCap = (curCount + preLongs) << 3;
+ validateInputSize(reqCap, segCap);
+
+ if ((thetaLong == Long.MAX_VALUE) && (curCount == 1)) {
+ final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3);
+ return new SingleItemSketch(hash, seedHash);
+ }
+ //theta < 1.0 and/or curCount > 1
+
+ final long[] compactOrderedCache = new long[curCount];
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount);
+ return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true);
+ }
+
+ /**
+ * Convert a serialization version (SerVer) 2 sketch to a SerVer 3 HeapCompactOrderedSketch.
+ * Note: SerVer 2 sketches can have metadata-longs of 1,2 or 3 and are always stored
+ * in a compact ordered form (not as a hash table), but with 4 different sketch types.
+ * @param srcSeg the image of a SerVer 2 sketch
+ * @param seedHash See Seed Hash.
+ * The seed used for building the sketch image in srcMem
+ * @return a SerVer 3 HeapCompactOrderedSketch
+ */
+ static final CompactSketch heapify2to3(final MemorySegment srcSeg, final short seedHash) {
+ final int segCap = (int) srcSeg.byteSize();
+ final int preLongs = extractPreLongs(srcSeg); //1,2 or 3
+ final int familyId = extractFamilyID(srcSeg); //1,2,3,4
+ if ((familyId < 1) || (familyId > 4)) {
+ throw new SketchesArgumentException("Family (Sketch Type) must be 1 to 4: " + familyId);
+ }
+ int reqBytesIn = 8;
+ int curCount = 0;
+ long thetaLong = Long.MAX_VALUE;
+ if (preLongs == 1) {
+ reqBytesIn = 8;
+ validateInputSize(reqBytesIn, segCap);
+ return EmptyCompactSketch.getInstance();
+ }
+ if (preLongs == 2) { //includes pre0 + count, no theta (== 1.0)
+ reqBytesIn = preLongs << 3;
+ validateInputSize(reqBytesIn, segCap);
+ curCount = extractCurCount(srcSeg);
+ if (curCount == 0) {
+ return EmptyCompactSketch.getInstance();
+ }
+ if (curCount == 1) {
+ reqBytesIn = (preLongs + 1) << 3;
+ validateInputSize(reqBytesIn, segCap);
+ final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3);
+ return new SingleItemSketch(hash, seedHash);
+ }
+ //curCount > 1
+ reqBytesIn = (curCount + preLongs) << 3;
+ validateInputSize(reqBytesIn, segCap);
+ final long[] compactOrderedCache = new long[curCount];
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount);
+ return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong,true);
+ }
+ if (preLongs == 3) { //pre0 + count + theta
+ reqBytesIn = (preLongs) << 3; //
+ validateInputSize(reqBytesIn, segCap);
+ curCount = extractCurCount(srcSeg);
+ thetaLong = extractThetaLong(srcSeg);
+ if ((curCount == 0) && (thetaLong == Long.MAX_VALUE)) {
+ return EmptyCompactSketch.getInstance();
+ }
+ if ((curCount == 1) && (thetaLong == Long.MAX_VALUE)) {
+ reqBytesIn = (preLongs + 1) << 3;
+ validateInputSize(reqBytesIn, segCap);
+ final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3);
+ return new SingleItemSketch(hash, seedHash);
+ }
+ //curCount > 1 and/or theta < 1.0
+ reqBytesIn = (curCount + preLongs) << 3;
+ validateInputSize(reqBytesIn, segCap);
+ final long[] compactOrderedCache = new long[curCount];
+ //srcSeg.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount);
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount);
+ return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true);
+ }
+ throw new SketchesArgumentException("PreLongs must be 1,2, or 3: " + preLongs);
+ }
+
+ private static final void validateInputSize(final int reqBytesIn, final int memCap) {
+ if (reqBytesIn > memCap) {
+ throw new SketchesArgumentException(
+ "Input Memory or byte[] size is too small: Required Bytes: " + reqBytesIn
+ + ", bytesIn: " + memCap);
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/HashIterator.java b/src/main/java/org/apache/datasketches/theta2/HashIterator.java
new file mode 100644
index 000000000..c8cf9aba9
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/HashIterator.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+/**
+ * This is used to iterate over the retained hash values of the Theta sketch.
+ * @author Lee Rhodes
+ */
+public interface HashIterator {
+
+ /**
+ * Gets the hash value
+ * @return the hash value
+ */
+ long get();
+
+ /**
+ * Returns true at the next hash value in sequence.
+ * If false, the iteration is done.
+ * @return true at the next hash value in sequence.
+ */
+ boolean next();
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java
new file mode 100644
index 000000000..5bc11d712
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java
@@ -0,0 +1,601 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static java.lang.Math.sqrt;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+import static org.apache.datasketches.common.Util.checkBounds;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor;
+import static org.apache.datasketches.theta2.PreambleUtil.extractP;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented;
+import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountNotIncremented;
+import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate;
+import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta;
+import static org.apache.datasketches.thetacommon.HashOperations.STRIDE_MASK;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Objects;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.HashOperations;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * This sketch uses the
+ * Theta Sketch Framework
+ * and the
+ * Alpha TCF algorithm
+ * with a single cache.
+ *
+ * @author Lee Rhodes
+ * @author Kevin Lang
+ */
+final class HeapAlphaSketch extends HeapUpdateSketch {
+ private static final int ALPHA_MIN_LG_NOM_LONGS = 9; //The smallest Log2 k allowed => 512.
+ private final double alpha_; // computed from lgNomLongs
+ private final long split1_; // computed from alpha and p
+
+ private int lgArrLongs_;
+ private int hashTableThreshold_; //never serialized
+ private int curCount_ = 0;
+ private long thetaLong_;
+ private boolean empty_ = true;
+
+ private long[] cache_;
+ private boolean dirty_ = false;
+
+ private HeapAlphaSketch(final int lgNomLongs, final long seed, final float p,
+ final ResizeFactor rf, final double alpha, final long split1) {
+ super(lgNomLongs, seed, p, rf);
+ alpha_ = alpha;
+ split1_ = split1;
+ }
+
+ /**
+ * Get a new sketch instance on the java heap.
+ *
+ * @param lgNomLongs See lgNomLongs
+ * @param seed See Update Hash Seed
+ * @param p See Sampling Probability, p
+ * @param rf See Resize Factor
+ * @return instance of this sketch
+ */
+ static HeapAlphaSketch newHeapInstance(final int lgNomLongs, final long seed, final float p,
+ final ResizeFactor rf) {
+
+ if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) {
+ throw new SketchesArgumentException(
+ "This sketch requires a minimum nominal entries of " + (1 << ALPHA_MIN_LG_NOM_LONGS));
+ }
+
+ final double nomLongs = (1L << lgNomLongs);
+ final double alpha = nomLongs / (nomLongs + 1.0);
+ final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE);
+
+ final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, seed, p, rf, alpha, split1);
+
+ final int lgArrLongs = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS);
+ has.lgArrLongs_ = lgArrLongs;
+ has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ has.curCount_ = 0;
+ has.thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE);
+ has.empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false;
+ has.cache_ = new long[1 << lgArrLongs];
+ return has;
+ }
+
+ /**
+ * Heapify a sketch from a MemorySegment object containing sketch data.
+ * @param srcSeg The source MemorySegment object.
+ * It must have a size of at least 24 bytes.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See seed
+ * @return instance of this sketch
+ */
+ static HeapAlphaSketch heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) {
+ Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null");
+ checkBounds(0, 24, srcSeg.byteSize());
+ final int preambleLongs = extractPreLongs(srcSeg); //byte 0
+ final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
+ final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
+
+ checkAlphaFamily(srcSeg, preambleLongs, lgNomLongs);
+ checkMemIntegrity(srcSeg, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs);
+
+ final float p = extractP(srcSeg); //bytes 12-15
+ final int memlgRF = extractLgResizeFactor(srcSeg); //byte 0
+ ResizeFactor memRF = ResizeFactor.getRF(memlgRF);
+
+ final double nomLongs = (1L << lgNomLongs);
+ final double alpha = nomLongs / (nomLongs + 1.0);
+ final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE);
+
+ if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) {
+ memRF = ResizeFactor.X2; //X2 always works.
+ }
+
+ final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, memRF, alpha, split1);
+ has.lgArrLongs_ = lgArrLongs;
+ has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ has.curCount_ = extractCurCount(srcSeg);
+ has.thetaLong_ = extractThetaLong(srcSeg);
+ has.empty_ = PreambleUtil.isEmptyFlag(srcSeg);
+ has.cache_ = new long[1 << lgArrLongs];
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table
+ return has;
+ }
+
+ //Sketch
+
+ @Override
+ public Family getFamily() {
+ return Family.ALPHA;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new HeapHashIterator(cache_, thetaLong_);
+ }
+
+ @Override
+ public double getEstimate() {
+ return (thetaLong_ > split1_)
+ ? Sketch.estimate(thetaLong_, curCount_)
+ : (1 << lgNomLongs_) * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong_);
+ }
+
+ @Override
+ public double getLowerBound(final int numStdDev) {
+ if ((numStdDev < 1) || (numStdDev > 3)) {
+ throw new SketchesArgumentException("numStdDev can only be the values 1, 2 or 3.");
+ }
+ double lb;
+ if (isEstimationMode()) {
+ final int validCount = getRetainedEntries(true);
+ if (validCount > 0) {
+ final double est = getEstimate();
+ final double var = getVariance(1 << lgNomLongs_, getP(), alpha_, getTheta(), validCount);
+ lb = est - (numStdDev * sqrt(var));
+ lb = max(lb, 0.0);
+ }
+ else {
+ lb = 0.0;
+ }
+ }
+ else {
+ lb = curCount_;
+ }
+ return lb;
+ }
+
+ @Override
+ public int getRetainedEntries(final boolean valid) {
+ if (curCount_ > 0) {
+ if (valid && isDirty()) {
+ final int curCount = HashOperations.countPart(getCache(), getLgArrLongs(), getThetaLong());
+ return curCount;
+ }
+ }
+ return curCount_;
+ }
+
+ @Override
+ public long getThetaLong() {
+ return thetaLong_;
+ }
+
+ @Override
+ public double getUpperBound(final int numStdDev) {
+ if ((numStdDev < 1) || (numStdDev > 3)) {
+ throw new SketchesArgumentException("numStdDev can only be the values 1, 2 or 3.");
+ }
+ if (isEstimationMode()) {
+ final double var =
+ getVariance(1 << lgNomLongs_, getP(), alpha_, getTheta(), getRetainedEntries(true));
+ return getEstimate() + (numStdDev * sqrt(var));
+ }
+ return curCount_;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return empty_;
+ }
+
+ /*
+ * Alpha Sketch Preamble Layout ( same as Theta UpdateSketch )
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || Seed Hash | Flags | LgArr | LgNom | FamID | SerVer | lgRF | PreLongs=3 |
+ *
+ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ * 1 ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ * 2 ||---------------------------------Theta---------------------------------------------|
+ *
+ */
+
+ @Override
+ public byte[] toByteArray() {
+ return toByteArray(Family.ALPHA.getMinPreLongs(), (byte) Family.ALPHA.getID());
+ }
+
+ //UpdateSketch
+
+ @Override
+ public UpdateSketch rebuild() {
+ if (isDirty()) {
+ rebuildDirty();
+ }
+ return this;
+ }
+
+ @Override
+ public final void reset() {
+ final int lgArrLongs =
+ ThetaUtil.startingSubMultiple(lgNomLongs_ + 1, getResizeFactor().lg(), ThetaUtil.MIN_LG_ARR_LONGS);
+ if (lgArrLongs == lgArrLongs_) {
+ final int arrLongs = cache_.length;
+ assert (1 << lgArrLongs_) == arrLongs;
+ java.util.Arrays.fill(cache_, 0L);
+ }
+ else {
+ cache_ = new long[1 << lgArrLongs];
+ lgArrLongs_ = lgArrLongs;
+ }
+ hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
+ empty_ = true;
+ curCount_ = 0;
+ thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE);
+ dirty_ = false;
+ }
+
+ //restricted methods
+
+ @Override
+ int getCompactPreambleLongs() {
+ return CompactOperations.computeCompactPreLongs(empty_, curCount_, thetaLong_);
+ }
+
+ @Override
+ int getCurrentPreambleLongs() {
+ return Family.ALPHA.getMinPreLongs();
+ }
+
+ @Override
+ MemorySegment getMemorySegment() {
+ return null;
+ }
+
+ @Override
+ long[] getCache() {
+ return cache_;
+ }
+
+ @Override
+ boolean isDirty() {
+ return dirty_;
+ }
+
+ @Override
+ boolean isOutOfSpace(final int numEntries) {
+ return numEntries > hashTableThreshold_;
+ }
+
+ @Override
+ int getLgArrLongs() {
+ return lgArrLongs_;
+ }
+
+ @Override
+ UpdateReturnState hashUpdate(final long hash) {
+ HashOperations.checkHashCorruption(hash);
+ empty_ = false;
+
+ //The over-theta test
+ if (HashOperations.continueCondition(thetaLong_, hash)) {
+ return RejectedOverTheta; //signal that hash was rejected due to theta.
+ }
+
+ //The duplicate/inserted tests
+ if (dirty_) { //may have dirty values, must be at tgt size
+ return enhancedHashInsert(cache_, hash);
+ }
+
+ //NOT dirty, the other duplicate or inserted test
+ if (HashOperations.hashSearchOrInsert(cache_, lgArrLongs_, hash) >= 0) {
+ return UpdateReturnState.RejectedDuplicate;
+ }
+ //insertion occurred, must increment
+ curCount_++;
+ final int r = (thetaLong_ > split1_) ? 0 : 1; //are we in sketch mode? (i.e., seen k+1 inserts?)
+ if (r == 0) { //not yet sketch mode (has not seen k+1 inserts), but could be sampling
+ if (curCount_ > (1 << lgNomLongs_)) { // > k
+ //Reached the k+1 insert. Must be at tgt size or larger.
+ //Transition to Sketch Mode. Happens only once.
+ //Decrement theta, make dirty, don't bother check size, already not-empty.
+ thetaLong_ = (long) (thetaLong_ * alpha_);
+ dirty_ = true; //now may have dirty values
+ }
+ else {
+ //inserts (not entries!) <= k. It may not be at tgt size.
+ //Check size, don't decrement theta. cnt already ++, empty_ already false;
+ if (isOutOfSpace(curCount_)) {
+ resizeClean(); //not dirty, not at tgt size.
+ }
+ }
+ }
+ else { //r > 0: sketch mode and not dirty (e.g., after a rebuild).
+ //dec theta, make dirty, cnt already ++, must be at tgt size or larger. check for rebuild
+ assert (lgArrLongs_ > lgNomLongs_) : "lgArr: " + lgArrLongs_ + ", lgNom: " + lgNomLongs_;
+ thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta
+ dirty_ = true; //now may have dirty values
+ if (isOutOfSpace(curCount_)) {
+ rebuildDirty(); // at tgt size and maybe dirty
+ }
+ }
+ return UpdateReturnState.InsertedCountIncremented;
+ }
+
+ /**
+ * Enhanced Knuth-style Open Addressing, Double Hash insert.
+ * The insertion process will overwrite an already existing, dirty (over-theta) value if one is
+ * found in the search.
+ * If an empty cell is found first, it will be inserted normally.
+ *
+ * @param hashTable the hash table to insert into
+ * @param hash must not be 0. If not a duplicate, it will be inserted into the hash array
+ * @return See Update Return State
+ */
+ final UpdateReturnState enhancedHashInsert(final long[] hashTable, final long hash) {
+ final int arrayMask = (1 << lgArrLongs_) - 1; // arrayLongs -1
+ // make odd and independent of curProbe:
+ final int stride = (2 * (int) ((hash >>> lgArrLongs_) & STRIDE_MASK)) + 1;
+ int curProbe = (int) (hash & arrayMask);
+ long curTableHash = hashTable[curProbe];
+ final int loopIndex = curProbe;
+
+ // This is the enhanced part
+ // Search for duplicate or zero, or opportunity to replace garbage.
+ while ((curTableHash != hash) && (curTableHash != 0)) {
+ // curHash is not a duplicate and not zero
+
+ if (curTableHash >= thetaLong_) { // curTableHash is garbage, do enhanced insert
+ final int rememberPos = curProbe; // remember its position.
+ // Now we must make sure there are no duplicates in this search path,
+ // so we keep searching
+ curProbe = (curProbe + stride) & arrayMask; // move forward
+ curTableHash = hashTable[curProbe];
+ while ((curTableHash != hash) && (curTableHash != 0)) {
+ curProbe = (curProbe + stride) & arrayMask;
+ curTableHash = hashTable[curProbe];
+ }
+ // curTableHash is a duplicate or zero
+ if (curTableHash == hash) {
+ return RejectedDuplicate; // duplicate, just return
+ }
+ assert (curTableHash == 0); // must be zero
+ // Now that we know there are no duplicates we can
+ // go back and insert at first garbage value position
+ hashTable[rememberPos] = hash;
+ thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta
+ dirty_ = true; //the decremented theta could have produced a new dirty value
+ return InsertedCountNotIncremented;
+ }
+
+ // curTableHash was not a duplicate, not zero, and NOT garbage,
+ // so we keep searching
+ assert (curTableHash < thetaLong_);
+ curProbe = (curProbe + stride) & arrayMask;
+ curTableHash = hashTable[curProbe];
+
+ // ensure no infinite loop
+ if (curProbe == loopIndex) {
+ throw new SketchesArgumentException("No empty slot in table!");
+ }
+ // end of Enhanced insert
+ } // end while and search
+
+ // curTableHash is a duplicate or zero and NOT garbage
+ if (curTableHash == hash) {
+ return RejectedDuplicate; // duplicate, just return
+ }
+ // must be zero, so insert and increment
+ assert (curTableHash == 0);
+ hashTable[curProbe] = hash;
+ thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta
+ dirty_ = true; //the decremented theta could have produced a new dirty value
+ if (++curCount_ > hashTableThreshold_) {
+ rebuildDirty(); //at tgt size and maybe dirty
+ }
+ return InsertedCountIncremented;
+ }
+
+ //At tgt size or greater
+ //Checks for rare lockup condition
+ // Used by hashUpdate(), rebuild()
+ private final void rebuildDirty() {
+ final int curCountBefore = curCount_;
+ forceRebuildDirtyCache(); //changes curCount_ only
+ if (curCountBefore == curCount_) {
+ //clean but unsuccessful at reducing count, must take drastic measures, very rare.
+ forceResizeCleanCache(1);
+ }
+ }
+
+ //curCount > hashTableThreshold
+ //Checks for rare lockup condition
+ // Used by hashUpdate()
+ private final void resizeClean() {
+ //must resize, but are we at tgt size?
+ final int lgTgtLongs = lgNomLongs_ + 1;
+ if (lgTgtLongs > lgArrLongs_) {
+ //not yet at tgt size
+ final ResizeFactor rf = getResizeFactor();
+ final int lgDeltaLongs = lgTgtLongs - lgArrLongs_; //must be > 0
+ final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1); //rf_.lg() could be 0
+ forceResizeCleanCache(lgResizeFactor);
+ }
+ else {
+ //at tgt size or larger, no dirty values, must take drastic measures, very rare.
+ forceResizeCleanCache(1);
+ }
+ }
+
+ //Force resize. Changes lgArrLongs_ only. Theta doesn't change, count doesn't change.
+ // Used by rebuildDirty(), resizeClean()
+ private final void forceResizeCleanCache(final int lgResizeFactor) {
+ assert (!dirty_); // Should never be dirty before a resize.
+ lgArrLongs_ += lgResizeFactor; // new tgt size
+ final long[] tgtArr = new long[1 << lgArrLongs_];
+ final int newCount = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_);
+ assert (curCount_ == newCount);
+ curCount_ = newCount;
+ cache_ = tgtArr;
+ hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
+ }
+
+ //Cache stays the same size. Must be dirty. Theta doesn't change, count will change.
+ // Used by rebuildDirtyAtTgtSize()
+ private final void forceRebuildDirtyCache() {
+ final long[] tgtArr = new long[1 << lgArrLongs_];
+ curCount_ = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_);
+ cache_ = tgtArr;
+ dirty_ = false;
+ //hashTableThreshold stays the same
+ }
+
+ // @formatter:off
+ /**
+ * Computes an estimate of the error variance based on Historic Inverse Probability (HIP)
+ * estimators. See Cohen: All-Distances Sketches, Revisited: HIP Estimators for Massive Graph
+ * Analysis, Nov 2014.
+ *
+ * Table of sketch states and how Upper and Lower Bounds are computed
+ *
+ * Theta P Count Empty EstMode Est UB LB Comments
+ * 1.0 1.0 0 T F 0 0 0 Empty Sketch-mode only sketch
+ * 1.0 1.0 N F F N N N Degenerate Sketch-mode only sketch
+ * <1.0 1.0 - F T est HIP HIP Normal Sketch-mode only sketch
+ * P <1.0 0 T F 0 0 0 Virgin sampling sketch
+ * P <1.0 N F T est HIP HIP Degenerate sampling sketch
+ * <P <1.0 N F T est HIP HIP Sampling sketch also in sketch-mode
+ *
+ * @param k alias for nominal entries.
+ * @param p See Sampling Probability, p.
+ * @param alpha the value of alpha for this sketch
+ * @param theta See theta.
+ * @param count the current valid count.
+ * @return the variance.
+ */
+ // @formatter:on
+ private static final double getVariance(final double k, final double p, final double alpha,
+ final double theta, final int count) {
+ final double kPlus1 = k + 1.0;
+ final double y = 1.0 / p;
+ final double ySq = y * y;
+ final double ySqMinusY = ySq - y;
+ final int r = getR(theta, alpha, p);
+ final double result;
+ if (r == 0) {
+ result = count * ySqMinusY;
+ }
+ else if (r == 1) {
+ result = kPlus1 * ySqMinusY; //term1
+ }
+ else { //r > 1
+ final double b = 1.0 / alpha;
+ final double bSq = b * b;
+ final double x = p / theta;
+ final double xSq = x * x;
+ final double term1 = kPlus1 * ySqMinusY;
+ final double term2 = y / (1.0 - bSq);
+ final double term3 = (((y * bSq) - (y * xSq) - b - bSq) + x + (x * b));
+ result = term1 + (term2 * term3);
+ }
+ final double term4 = (1 - theta) / (theta * theta);
+ return result + term4;
+ }
+
+ /**
+ * Computes whether there have been 0, 1, or 2 or more actual insertions into the cache in a
+ * numerically safe way.
+ * @param theta See Theta.
+ * @param alpha internal computed value alpha.
+ * @param p See Sampling Probability, p.
+ * @return R.
+ */
+ private static final int getR(final double theta, final double alpha, final double p) {
+ final double split1 = (p * (alpha + 1.0)) / 2.0;
+ if (theta > split1) { return 0; }
+ if (theta > (alpha * split1)) { return 1; }
+ return 2;
+ }
+
+ /**
+ * Returns the cardinality limit given the current size of the hash table array.
+ *
+ * @param lgNomLongs See lgNomLongs.
+ * @param lgArrLongs See lgArrLongs.
+ * @return the hash table threshold
+ */
+ private static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
+ final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
+ return (int) Math.floor(fraction * (1 << lgArrLongs));
+ }
+
+ static void checkAlphaFamily(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) {
+ //Check Family
+ final int familyID = extractFamilyID(seg); //byte 2
+ final Family family = Family.idToFamily(familyID);
+ if (family.equals(Family.ALPHA)) {
+ if (preambleLongs != Family.ALPHA.getMinPreLongs()) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Invalid PreambleLongs value for ALPHA: " + preambleLongs);
+ }
+ }
+ else {
+ throw new SketchesArgumentException(
+ "Possible corruption: Invalid Family: " + family.toString());
+ }
+
+ //Check lgNomLongs
+ if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) {
+ throw new SketchesArgumentException(
+ "Possible corruption: This sketch requires a minimum nominal entries of "
+ + (1 << ALPHA_MIN_LG_NOM_LONGS));
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java
new file mode 100644
index 000000000..b10ffcaaf
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+class HeapCompactHashIterator implements HashIterator {
+ private long[] cache;
+ private int index;
+
+ HeapCompactHashIterator(final long[] cache) {
+ this.cache = cache;
+ index = -1;
+ }
+
+ @Override
+ public long get() {
+ return cache[index];
+ }
+
+ @Override
+ public boolean next() {
+ return ++index < cache.length;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
new file mode 100644
index 000000000..065213191
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty;
+import static org.apache.datasketches.theta2.CompactOperations.componentsToCompact;
+import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs;
+import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact;
+import static org.apache.datasketches.theta2.CompactOperations.isSingleItem;
+import static org.apache.datasketches.theta2.CompactOperations.loadCompactMemory;
+import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK;
+
+import java.lang.foreign.MemorySegment;
+
+/**
+ * Parent class of the Heap Compact Sketches.
+ *
+ * @author Lee Rhodes
+ */
+class HeapCompactSketch extends CompactSketch {
+ private final long thetaLong_; //computed
+ private final int curCount_;
+ private final int preLongs_; //computed
+ private final short seedHash_;
+ private final boolean empty_;
+ private final boolean ordered_;
+ private final boolean singleItem_;
+ private final long[] cache_;
+
+ /**
+ * Constructs this sketch from correct, valid components.
+ * @param cache in compact form
+ * @param empty The correct Empty.
+ * @param seedHash The correct
+ * Seed Hash.
+ * @param curCount correct value
+ * @param thetaLong The correct
+ * thetaLong.
+ */
+ HeapCompactSketch(final long[] cache, final boolean empty, final short seedHash,
+ final int curCount, final long thetaLong, final boolean ordered) {
+ seedHash_ = seedHash;
+ curCount_ = curCount;
+ empty_ = empty;
+ ordered_ = ordered;
+ cache_ = cache;
+ //computed
+ thetaLong_ = correctThetaOnCompact(empty, curCount, thetaLong);
+ preLongs_ = computeCompactPreLongs(empty, curCount, thetaLong); //considers singleItem
+ singleItem_ = isSingleItem(empty, curCount, thetaLong);
+ checkIllegalCurCountAndEmpty(empty, curCount);
+ }
+
+ //Sketch
+
+ @Override
+ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) {
+ if (dstSeg == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; }
+ return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(),
+ true, ordered_, dstOrdered, dstSeg, getCache().clone());
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ return (preLongs_ + curCount_) << 3;
+ }
+
+ @Override
+ public int getRetainedEntries(final boolean valid) {
+ return curCount_;
+ }
+
+ @Override
+ public long getThetaLong() {
+ return thetaLong_;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return empty_;
+ }
+
+ @Override
+ public boolean isOrdered() {
+ return ordered_;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new HeapCompactHashIterator(cache_);
+ }
+
+ //restricted methods
+
+ @Override
+ long[] getCache() {
+ return cache_;
+ }
+
+ @Override
+ int getCompactPreambleLongs() {
+ return preLongs_;
+ }
+
+ @Override
+ int getCurrentPreambleLongs() { //already compact; ignored
+ return preLongs_;
+ }
+
+ @Override
+ MemorySegment getMemorySegment() {
+ return null;
+ }
+
+ @Override
+ short getSeedHash() {
+ return seedHash_;
+ }
+
+ //use of Memory is convenient. The byteArray and Memory are loaded simultaneously.
+ @Override
+ public byte[] toByteArray() {
+ final int bytes = getCurrentBytes();
+ final byte[] byteArray = new byte[bytes];
+ final MemorySegment dstSeg = MemorySegment.ofArray(byteArray);
+ final int emptyBit = isEmpty() ? EMPTY_FLAG_MASK : 0;
+ final int orderedBit = ordered_ ? ORDERED_FLAG_MASK : 0;
+ final int singleItemBit = singleItem_ ? SINGLEITEM_FLAG_MASK : 0;
+ final byte flags = (byte) (emptyBit | READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK
+ | orderedBit | singleItemBit);
+ final int preLongs = getCompactPreambleLongs();
+ loadCompactMemory(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(),
+ dstSeg, flags, preLongs);
+ return byteArray;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java
new file mode 100644
index 000000000..c2b098c25
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+/**
+ * @author Lee Rhodes
+ */
+class HeapHashIterator implements HashIterator {
+ private long[] cache;
+ private long thetaLong;
+ private int index;
+ private long hash;
+
+ HeapHashIterator(final long[] cache, final long thetaLong) {
+ this.cache = cache;
+ this.thetaLong = thetaLong;
+ index = -1;
+ hash = 0;
+ }
+
+ @Override
+ public long get() {
+ return hash;
+ }
+
+ @Override
+ public boolean next() {
+ while (++index < cache.length) {
+ hash = cache[index];
+ if ((hash != 0) && (hash < thetaLong)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java
new file mode 100644
index 000000000..3096e5e1a
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor;
+import static org.apache.datasketches.theta2.PreambleUtil.extractP;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented;
+import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedRebuilt;
+import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedResized;
+import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate;
+import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta;
+import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.thetacommon.HashOperations;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * @author Lee Rhodes
+ * @author Kevin Lang
+ */
+class HeapQuickSelectSketch extends HeapUpdateSketch {
+ private final Family MY_FAMILY;
+
+ private final int preambleLongs_;
+ private int lgArrLongs_;
+ private int hashTableThreshold_; //never serialized
+ int curCount_;
+ long thetaLong_;
+ boolean empty_;
+
+ private long[] cache_;
+
+ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float p,
+ final ResizeFactor rf, final int preambleLongs, final Family family) {
+ super(lgNomLongs, seed, p, rf);
+ preambleLongs_ = preambleLongs;
+ MY_FAMILY = family;
+ }
+
+ /**
+ * Construct a new sketch instance on the java heap.
+ *
+ * @param lgNomLongs See lgNomLongs.
+ * @param seed See seed
+ * @param p See Sampling Probability, p
+ * @param rf See Resize Factor
+ * @param unionGadget true if this sketch is implementing the Union gadget function.
+ * Otherwise, it is behaving as a normal QuickSelectSketch.
+ */
+ HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float p,
+ final ResizeFactor rf, final boolean unionGadget) {
+ super(lgNomLongs, seed, p, rf);
+
+ //Choose family, preambleLongs
+ if (unionGadget) {
+ preambleLongs_ = Family.UNION.getMinPreLongs();
+ MY_FAMILY = Family.UNION;
+ }
+ else {
+ preambleLongs_ = Family.QUICKSELECT.getMinPreLongs();
+ MY_FAMILY = Family.QUICKSELECT;
+ }
+
+ lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS);
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_);
+ curCount_ = 0;
+ thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE);
+ empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false;
+ cache_ = new long[1 << lgArrLongs_];
+ }
+
+ /**
+ * Heapify a sketch from a MemorySegment UpdateSketch or Union object
+ * containing sketch data.
+ * @param srcSeg The source MemorySegment object.
+ * @param seed See seed
+ * @return instance of this sketch
+ */
+ static HeapQuickSelectSketch heapifyInstance(final MemorySegment srcSeg, final long seed) {
+ final int preambleLongs = extractPreLongs(srcSeg); //byte 0
+ final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3
+ final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4
+
+ checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs);
+ checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs);
+
+ final float p = extractP(srcSeg); //bytes 12-15
+ final int memlgRF = extractLgResizeFactor(srcSeg); //byte 0
+ ResizeFactor memRF = ResizeFactor.getRF(memlgRF);
+ final int familyID = extractFamilyID(srcSeg);
+ final Family family = Family.idToFamily(familyID);
+
+ if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) {
+ memRF = ResizeFactor.X2; //X2 always works.
+ }
+
+ final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF,
+ preambleLongs, family);
+ hqss.lgArrLongs_ = lgArrLongs;
+ hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs);
+ hqss.curCount_ = extractCurCount(srcSeg);
+ hqss.thetaLong_ = extractThetaLong(srcSeg);
+ hqss.empty_ = PreambleUtil.isEmptyFlag(srcSeg);
+ hqss.cache_ = new long[1 << lgArrLongs];
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs); //read in as hash table
+ return hqss;
+ }
+
+ //Sketch
+
+ @Override
+ public double getEstimate() {
+ return Sketch.estimate(thetaLong_, curCount_);
+ }
+
+ @Override
+ public Family getFamily() {
+ return MY_FAMILY;
+ }
+
+ @Override
+ public int getRetainedEntries(final boolean valid) {
+ return curCount_;
+ }
+
+ @Override
+ public long getThetaLong() {
+ return empty_ ? Long.MAX_VALUE : thetaLong_;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return empty_;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new HeapHashIterator(cache_, thetaLong_);
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ return toByteArray(preambleLongs_, (byte) MY_FAMILY.getID());
+ }
+
+ //UpdateSketch
+
+ @Override
+ public UpdateSketch rebuild() {
+ if (getRetainedEntries(true) > (1 << getLgNomLongs())) {
+ quickSelectAndRebuild();
+ }
+ return this;
+ }
+
+ @Override
+ public void reset() {
+ final ResizeFactor rf = getResizeFactor();
+ final int lgArrLongsSM = ThetaUtil.startingSubMultiple(lgNomLongs_ + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS);
+ if (lgArrLongsSM == lgArrLongs_) {
+ final int arrLongs = cache_.length;
+ assert (1 << lgArrLongs_) == arrLongs;
+ java.util.Arrays.fill(cache_, 0L);
+ }
+ else {
+ cache_ = new long[1 << lgArrLongsSM];
+ lgArrLongs_ = lgArrLongsSM;
+ }
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
+ empty_ = true;
+ curCount_ = 0;
+ thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE);
+ }
+
+ //restricted methods
+
+ @Override
+ long[] getCache() {
+ return cache_;
+ }
+
+ @Override
+ int getCompactPreambleLongs() {
+ return CompactOperations.computeCompactPreLongs(empty_, curCount_, thetaLong_);
+ }
+
+ @Override
+ int getCurrentPreambleLongs() {
+ return preambleLongs_;
+ }
+
+ //only used by ConcurrentHeapThetaBuffer & Test
+ int getHashTableThreshold() {
+ return hashTableThreshold_;
+ }
+
+ @Override
+ int getLgArrLongs() {
+ return lgArrLongs_;
+ }
+
+ @Override
+ MemorySegment getMemorySegment() {
+ return null;
+ }
+
+ @Override
+ UpdateReturnState hashUpdate(final long hash) {
+ HashOperations.checkHashCorruption(hash);
+ empty_ = false;
+
+ //The over-theta test
+ if (HashOperations.continueCondition(thetaLong_, hash)) {
+ return RejectedOverTheta; //signal that hash was rejected due to theta.
+ }
+
+ //The duplicate test
+ if (HashOperations.hashSearchOrInsert(cache_, lgArrLongs_, hash) >= 0) {
+ return RejectedDuplicate; //Duplicate, not inserted
+ }
+ //insertion occurred, must increment curCount
+ curCount_++;
+
+ if (isOutOfSpace(curCount_)) { //we need to do something, we are out of space
+ //must rebuild or resize
+ if (lgArrLongs_ <= lgNomLongs_) { //resize
+ resizeCache();
+ return InsertedCountIncrementedResized;
+ }
+ //Already at tgt size, must rebuild
+ assert (lgArrLongs_ == (lgNomLongs_ + 1)) : "lgArr: " + lgArrLongs_ + ", lgNom: " + lgNomLongs_;
+ quickSelectAndRebuild(); //Changes thetaLong_, curCount_, reassigns cache
+ return InsertedCountIncrementedRebuilt;
+ }
+ return InsertedCountIncremented;
+ }
+
+ @Override
+ boolean isDirty() {
+ return false;
+ }
+
+ @Override
+ boolean isOutOfSpace(final int numEntries) {
+ return numEntries > hashTableThreshold_;
+ }
+
+ //Must resize. Changes lgArrLongs_, cache_, hashTableThreshold;
+ // theta and count don't change.
+ // Used by hashUpdate()
+ private final void resizeCache() {
+ final ResizeFactor rf = getResizeFactor();
+ final int lgMaxArrLongs = lgNomLongs_ + 1;
+ final int lgDeltaLongs = lgMaxArrLongs - lgArrLongs_;
+ final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1); //rf_.lg() could be 0
+ lgArrLongs_ += lgResizeFactor; // new arr size
+
+ final long[] tgtArr = new long[1 << lgArrLongs_];
+ final int newCount = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_);
+
+ assert newCount == curCount_; //Assumes no dirty values.
+ curCount_ = newCount;
+
+ cache_ = tgtArr;
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
+ }
+
+ //array stays the same size. Changes theta and thus count
+ private final void quickSelectAndRebuild() {
+ final int arrLongs = 1 << lgArrLongs_; // generally 2 * k,
+
+ final int pivot = (1 << lgNomLongs_) + 1; // pivot for QS = k + 1
+
+ thetaLong_ = selectExcludingZeros(cache_, curCount_, pivot); //messes up the cache_
+
+ // now we rebuild to clean up dirty data, update count, reconfigure as a hash table
+ final long[] tgtArr = new long[arrLongs];
+ curCount_ = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_);
+ cache_ = tgtArr;
+ //hashTableThreshold stays the same
+ }
+
+ /**
+ * Returns the cardinality limit given the current size of the hash table array.
+ *
+ * @param lgNomLongs See lgNomLongs.
+ * @param lgArrLongs See lgArrLongs.
+ * @return the hash table threshold
+ */
+ private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
+ final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
+ return (int) (fraction * (1 << lgArrLongs));
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java
new file mode 100644
index 000000000..87737bfa8
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty;
+import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER;
+import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.insertFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgNomLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor;
+import static org.apache.datasketches.theta2.PreambleUtil.insertP;
+import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer;
+import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * The parent class for Heap Updatable Theta Sketches.
+ *
+ * @author Lee Rhodes
+ */
+abstract class HeapUpdateSketch extends UpdateSketch {
+ final int lgNomLongs_;
+ private final long seed_;
+ private final float p_;
+ private final ResizeFactor rf_;
+
+ HeapUpdateSketch(final int lgNomLongs, final long seed, final float p, final ResizeFactor rf) {
+ lgNomLongs_ = Math.max(lgNomLongs, ThetaUtil.MIN_LG_NOM_LONGS);
+ seed_ = seed;
+ p_ = p;
+ rf_ = rf;
+ }
+
+ //Sketch
+
+ @Override
+ public int getCurrentBytes() {
+ final int preLongs = getCurrentPreambleLongs();
+ final int dataLongs = getCurrentDataLongs();
+ return (preLongs + dataLongs) << 3;
+ }
+
+ //UpdateSketch
+
+ @Override
+ public final int getLgNomLongs() {
+ return lgNomLongs_;
+ }
+
+ @Override
+ float getP() {
+ return p_;
+ }
+
+ @Override
+ public ResizeFactor getResizeFactor() {
+ return rf_;
+ }
+
+ @Override
+ long getSeed() {
+ return seed_;
+ }
+
+ //restricted methods
+
+ @Override
+ short getSeedHash() {
+ return ThetaUtil.computeSeedHash(getSeed());
+ }
+
+ //Used by HeapAlphaSketch and HeapQuickSelectSketch / Theta UpdateSketch
+ byte[] toByteArray(final int preLongs, final byte familyID) {
+ if (isDirty()) { rebuild(); }
+ checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries(true));
+ final int preBytes = (preLongs << 3) & 0X3F; //24 bytes
+ final int dataBytes = getCurrentDataLongs() << 3;
+ final byte[] byteArrOut = new byte[preBytes + dataBytes];
+
+ final MemorySegment segOut = MemorySegment.ofArray(byteArrOut);
+
+ //preamble first 8 bytes. Note: only compact can be reduced to 8 bytes.
+ final int lgRf = getResizeFactor().lg() & 0x3;
+ insertPreLongs(segOut, preLongs); //byte 0 low 6 bits
+ insertLgResizeFactor(segOut, lgRf); //byte 0 high 2 bits
+ insertSerVer(segOut, SER_VER); //byte 1
+ insertFamilyID(segOut, familyID); //byte 2
+ insertLgNomLongs(segOut, getLgNomLongs()); //byte 3
+ insertLgArrLongs(segOut, getLgArrLongs()); //byte 4
+ insertSeedHash(segOut, getSeedHash()); //bytes 6 & 7
+
+ insertCurCount(segOut, this.getRetainedEntries(true));
+ insertP(segOut, getP());
+ final long thetaLong =
+ correctThetaOnCompact(isEmpty(), getRetainedEntries(true), getThetaLong());
+ insertThetaLong(segOut, thetaLong);
+
+ //Flags: BigEnd=0, ReadOnly=0, Empty=X, compact=0, ordered=0
+ final byte flags = isEmpty() ? (byte) EMPTY_FLAG_MASK : 0;
+ insertFlags(segOut, flags);
+
+ //Data
+ final int arrLongs = 1 << getLgArrLongs();
+ final long[] cache = getCache();
+ //segOut.putLongArray(preBytes, cache, 0, arrLongs); //load byteArrOut
+
+ MemorySegment.copy(cache, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, arrLongs);
+ return byteArrOut;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/Intersection.java b/src/main/java/org/apache/datasketches/theta2/Intersection.java
new file mode 100644
index 000000000..685dd3eac
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/Intersection.java
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.Util.floorPowerOf2;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * The API for intersection operations
+ *
+ * @author Lee Rhodes
+ */
+public abstract class Intersection extends SetOperation {
+
+ @Override
+ public Family getFamily() {
+ return Family.INTERSECTION;
+ }
+
+ /**
+ * Gets the result of this operation as an ordered CompactSketch on the Java heap.
+ * This does not disturb the underlying data structure of this intersection.
+ * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an
+ * exception will be thrown. This is because a virgin Intersection object represents the
+ * Universal Set, which has an infinite number of values.
+ * @return the result of this operation as an ordered CompactSketch on the Java heap
+ */
+ public CompactSketch getResult() {
+ return getResult(true, null);
+ }
+
+ /**
+ * Gets the result of this operation as a CompactSketch in the given dstMem.
+ * This does not disturb the underlying data structure of this intersection.
+ * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an
+ * exception will be thrown. This is because a virgin Intersection object represents the
+ * Universal Set, which has an infinite number of values.
+ *
+ * Note that presenting an intersection with an empty sketch sets the internal
+ * state of the intersection to empty = true, and current count = 0. This is consistent with
+ * the mathematical definition of the intersection of any set with the empty set is
+ * always empty.
+ *
+ * Presenting an intersection with a null argument will throw an exception.
+ *
+ * @param dstOrdered
+ * See Destination Ordered
+ *
+ * @param dstSeg the destination MemorySegment.
+ *
+ * @return the result of this operation as a CompactSketch stored in the given dstMem,
+ * which can be either on or off-heap..
+ */
+ public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg);
+
+ /**
+ * Returns true if there is a valid intersection result available
+ * @return true if there is a valid intersection result available
+ */
+ public abstract boolean hasResult();
+
+ /**
+ * Resets this Intersection for stateful operations only.
+ * The seed remains intact, otherwise reverts to
+ * the Universal Set: theta = 1.0, no retained data and empty = false.
+ */
+ public abstract void reset();
+
+ /**
+ * Serialize this intersection to a byte array form.
+ * @return byte array of this intersection
+ */
+ public abstract byte[] toByteArray();
+
+ /**
+ * Intersect the given sketch with the internal state.
+ * This method can be repeatedly called.
+ * If the given sketch is null the internal state becomes the empty sketch.
+ * Theta will become the minimum of thetas seen so far.
+ * @param sketchIn the given sketch
+ */
+ public abstract void intersect(Sketch sketchIn);
+
+ /**
+ * Perform intersect set operation on the two given sketch arguments and return the result as an
+ * ordered CompactSketch on the heap.
+ * @param a The first sketch argument
+ * @param b The second sketch argument
+ * @return an ordered CompactSketch on the heap
+ */
+ public CompactSketch intersect(final Sketch a, final Sketch b) {
+ return intersect(a, b, true, null);
+ }
+
+ /**
+ * Perform intersect set operation on the two given sketches and return the result as a
+ * CompactSketch.
+ * @param a The first sketch argument
+ * @param b The second sketch argument
+ * @param dstOrdered
+ * See Destination Ordered.
+ * @param dstSeg the destination MemorySegment.
+ * @return the result as a CompactSketch.
+ */
+ public abstract CompactSketch intersect(Sketch a, Sketch b, boolean dstOrdered,
+ MemorySegment dstSeg);
+
+ // Restricted
+
+ /**
+ * Returns the maximum lgArrLongs given the capacity of the MemorySegment.
+ * @param dstSeg the given MemorySegment
+ * @return the maximum lgArrLongs given the capacity of the MemorySegment
+ */
+ protected static int getMaxLgArrLongs(final MemorySegment dstSeg) {
+ final int preBytes = CONST_PREAMBLE_LONGS << 3;
+ final long cap = dstSeg.byteSize();
+ return Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3);
+ }
+
+ protected static void checkMinSizeMemory(final MemorySegment seg) {
+ final int minBytes = (CONST_PREAMBLE_LONGS << 3) + (8 << ThetaUtil.MIN_LG_ARR_LONGS);//280
+ final long cap = seg.byteSize();
+ if (cap < minBytes) {
+ throw new SketchesArgumentException(
+ "MemorySegment must be at least " + minBytes + " bytes. Actual capacity: " + cap);
+ }
+ }
+
+ /**
+ * Compact first 2^lgArrLongs of given array
+ * @param srcCache anything
+ * @param lgArrLongs The correct
+ * lgArrLongs.
+ * @param curCount must be correct
+ * @param thetaLong The correct
+ * thetaLong.
+ * @param dstOrdered true if output array must be sorted
+ * @return the compacted array
+ */ //Only used in IntersectionImpl & Test
+ static final long[] compactCachePart(final long[] srcCache, final int lgArrLongs,
+ final int curCount, final long thetaLong, final boolean dstOrdered) {
+ if (curCount == 0) {
+ return new long[0];
+ }
+ final long[] cacheOut = new long[curCount];
+ final int len = 1 << lgArrLongs;
+ int j = 0;
+ for (int i = 0; i < len; i++) {
+ final long v = srcCache[i];
+ if (v <= 0L || v >= thetaLong ) { continue; }
+ cacheOut[j++] = v;
+ }
+ assert curCount == j;
+ if (dstOrdered) {
+ Arrays.sort(cacheOut);
+ }
+ return cacheOut;
+ }
+
+ protected static void memChecks(final MemorySegment srcSeg) {
+ //Get Preamble
+ //Note: Intersection does not use lgNomLongs (or k), per se.
+ //seedHash loaded and checked in private constructor
+ final int preLongs = extractPreLongs(srcSeg);
+ final int serVer = extractSerVer(srcSeg);
+ final int famID = extractFamilyID(srcSeg);
+ final boolean empty = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0;
+ final int curCount = extractCurCount(srcSeg);
+ //Checks
+ if (preLongs != CONST_PREAMBLE_LONGS) {
+ throw new SketchesArgumentException(
+ "MemorySegment PreambleLongs must equal " + CONST_PREAMBLE_LONGS + ": " + preLongs);
+ }
+ if (serVer != SER_VER) {
+ throw new SketchesArgumentException("Serialization Version must equal " + SER_VER);
+ }
+ Family.INTERSECTION.checkFamilyID(famID);
+ if (empty) {
+ if (curCount != 0) {
+ throw new SketchesArgumentException(
+ "srcMem empty state inconsistent with curCount: " + empty + "," + curCount);
+ }
+ //empty = true AND curCount_ = 0: OK
+ } //else empty = false, curCount could be anything
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
new file mode 100644
index 000000000..ba7bcd8ac
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
@@ -0,0 +1,561 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.Math.min;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
+import static org.apache.datasketches.common.Util.clearBits;
+import static org.apache.datasketches.common.Util.setBits;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT;
+import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT;
+import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG;
+import static org.apache.datasketches.theta2.PreambleUtil.clearEmpty;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertP;
+import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer;
+import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong;
+import static org.apache.datasketches.theta2.PreambleUtil.setEmpty;
+import static org.apache.datasketches.thetacommon.HashOperations.continueCondition;
+import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly;
+import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemory;
+import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;
+import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.SketchesReadOnlyException;
+import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Intersection operation for Theta Sketches.
+ *
+ * This implementation uses data either on-heap or off-heap in a given MemorySegment
+ * that is owned and managed by the caller.
+ * The off-heap MemorySegment, which if managed properly, will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ *
+ * @author Lee Rhodes
+ * @author Kevin Lang
+ */
+class IntersectionImpl extends Intersection {
+ protected final short seedHash_;
+ protected final boolean readOnly_; //True if this sketch is to be treated as read only
+ protected final MemorySegment wseg_;
+ protected final int maxLgArrLongs_; //only used with MemorySegment, not serialized
+
+ //Note: Intersection does not use lgNomLongs or k, per se.
+ protected int lgArrLongs_; //current size of hash table
+ protected int curCount_; //curCount of HT, if < 0 means Universal Set (US) is true
+ protected long thetaLong_;
+ protected boolean empty_; //A virgin intersection represents the Universal Set, so empty is FALSE!
+ protected long[] hashTable_; //retained entries of the intersection, on-heap only.
+
+ /**
+ * Constructor: Sets the class finals and computes, sets and checks the seedHash.
+ * @param wseg Can be either a Source(e.g. wrap) or Destination (new Direct) MemorySegment.
+ * @param seed Used to validate incoming sketch arguments.
+ * @param dstMemFlag The given MemorySegment is a Destination (new Direct) MemorySegment.
+ * @param readOnly True if MemorySegment is to be treated as read only.
+ */
+ protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstMemFlag,
+ final boolean readOnly) {
+ readOnly_ = readOnly;
+ if (wseg != null) {
+ wseg_ = wseg;
+ if (dstMemFlag) { //DstMem: compute & store seedHash, no seedhash checking
+ checkMinSizeMemory(wseg);
+ maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap
+ seedHash_ = ThetaUtil.computeSeedHash(seed);
+ wseg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_);
+ } else { //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed
+ seedHash_ = wseg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT);
+ ThetaUtil.checkSeedHashes(seedHash_, ThetaUtil.computeSeedHash(seed)); //check for seed hash conflict
+ maxLgArrLongs_ = 0;
+ }
+ } else { //compute & store seedHash
+ wseg_ = null;
+ maxLgArrLongs_ = 0;
+ seedHash_ = ThetaUtil.computeSeedHash(seed);
+ }
+ }
+
+ /**
+ * Factory: Construct a new Intersection target on the java heap.
+ * Called by SetOperationBuilder, test.
+ *
+ * @param seed See Seed
+ * @return a new IntersectionImpl on the Java heap
+ */
+ static IntersectionImpl initNewHeapInstance(final long seed) {
+ final boolean dstMemFlag = false;
+ final boolean readOnly = false;
+ final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly);
+ impl.hardReset();
+ return impl;
+ }
+
+ /**
+ * Factory: Construct a new Intersection target direct to the given destination MemorySegment.
+ * Called by SetOperationBuilder, test.
+ *
+ * @param seed See Seed
+ * @param dstSeg destination MemorySegment
+ * @return a new IntersectionImpl that may be off-heap
+ */
+ static IntersectionImpl initNewDirectInstance(final long seed, final MemorySegment dstSeg) {
+ //Load Preamble
+ //Pre0
+ dstSeg.asSlice(0, CONST_PREAMBLE_LONGS << 3).fill((byte)0);
+ insertPreLongs(dstSeg, CONST_PREAMBLE_LONGS); //RF not used = 0
+ insertSerVer(dstSeg, SER_VER);
+ insertFamilyID(dstSeg, Family.INTERSECTION.getID());
+ //lgNomLongs not used by Intersection
+ //lgArrLongs set by hardReset
+ //flags are already 0: bigEndian = readOnly = compact = ordered = empty = false;
+ //seedHash loaded and checked in IntersectionImpl constructor
+ //Pre1
+ //CurCount set by hardReset
+ insertP(dstSeg, (float) 1.0); //not used by intersection
+ //Pre2
+ //thetaLong set by hardReset
+
+ //Initialize
+ final boolean dstMemFlag = true;
+ final boolean readOnly = false;
+ final IntersectionImpl impl = new IntersectionImpl(dstSeg, seed, dstMemFlag, readOnly);
+ impl.hardReset();
+ return impl;
+ }
+
+ /**
+ * Factory: Heapify an intersection target from a MemorySegment image containing data.
+ * @param srcSeg The source MemorySegment object.
+ * @param seed See seed
+ * @return a IntersectionImpl instance on the Java heap
+ */
+ static IntersectionImpl heapifyInstance(final MemorySegment srcSeg, final long seed) {
+ final boolean dstMemFlag = false;
+ final boolean readOnly = false;
+ final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly);
+ memChecks(srcSeg);
+
+ //Initialize
+ impl.lgArrLongs_ = extractLgArrLongs(srcSeg);
+ impl.curCount_ = extractCurCount(srcSeg);
+ impl.thetaLong_ = extractThetaLong(srcSeg);
+ impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0;
+ if (!impl.empty_) {
+ if (impl.curCount_ > 0) {
+ impl.hashTable_ = new long[1 << impl.lgArrLongs_];
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, impl.hashTable_, 0, 1 << impl.lgArrLongs_);
+ }
+ }
+ return impl;
+ }
+
+ /**
+ * Factory: Wrap an Intersection target around the given source MemorySegment containing
+ * intersection data.
+ * @param srcSeg The source MemorySegment image.
+ * @param seed See seed
+ * @param readOnly True if MemorySegment is to be treated as read only
+ * @return a IntersectionImpl that wraps a source MemorySegment that contains an Intersection image
+ */
+ static IntersectionImpl wrapInstance(
+ final MemorySegment srcSeg,
+ final long seed,
+ final boolean readOnly) {
+ final boolean dstMemFlag = false;
+ final IntersectionImpl impl = new IntersectionImpl(srcSeg, seed, dstMemFlag, readOnly);
+ memChecks(srcSeg);
+ impl.lgArrLongs_ = extractLgArrLongs(srcSeg);
+ impl.curCount_ = extractCurCount(srcSeg);
+ impl.thetaLong_ = extractThetaLong(srcSeg);
+ impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0;
+ return impl;
+ }
+
+ @Override
+ public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered,
+ final MemorySegment dstSeg) {
+ if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); }
+ hardReset();
+ intersect(a);
+ intersect(b);
+ final CompactSketch csk = getResult(dstOrdered, dstSeg);
+ hardReset();
+ return csk;
+ }
+
+ @Override
+ public void intersect(final Sketch sketchIn) {
+ if (sketchIn == null) {
+ throw new SketchesArgumentException("Intersection argument must not be null.");
+ }
+ if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); }
+ if (empty_ || sketchIn.isEmpty()) { //empty rule
+ //Because of the def of null above and the Empty Rule (which is OR), empty_ must be true.
+ //Whatever the current internal state, we make our local empty.
+ resetToEmpty();
+ return;
+ }
+ ThetaUtil.checkSeedHashes(seedHash_, sketchIn.getSeedHash());
+ //Set minTheta
+ thetaLong_ = min(thetaLong_, sketchIn.getThetaLong()); //Theta rule
+ empty_ = false;
+ if (wseg_ != null) {
+ insertThetaLong(wseg_, thetaLong_);
+ clearEmpty(wseg_); //false
+ }
+
+ // The truth table for the following state machine. MinTheta is set above.
+ // Incoming sketch is not null and not empty, but could have 0 count and Theta < 1.0
+ // Case curCount sketchInEntries | Actions
+ // 1 <0 0 | First intersect, set curCount = 0; HT = null; minTh; exit
+ // 2 0 0 | set curCount = 0; HT = null; minTh; exit
+ // 3 >0 0 | set curCount = 0; HT = null; minTh; exit
+ // 4 | Not used
+ // 5 <0 >0 | First intersect, clone SketchIn; exit
+ // 6 0 >0 | set curCount = 0; HT = null; minTh; exit
+ // 7 >0 >0 | Perform full intersect
+ final int sketchInEntries = sketchIn.getRetainedEntries(true);
+
+ //states 1,2,3,6
+ if (curCount_ == 0 || sketchInEntries == 0) {
+ curCount_ = 0;
+ if (wseg_ != null) { insertCurCount(wseg_, 0); }
+ hashTable_ = null; //No need for a HT. Don't bother clearing mem if valid
+ } //end of states 1,2,3,6
+
+ // state 5
+ else if (curCount_ < 0 && sketchInEntries > 0) {
+ curCount_ = sketchIn.getRetainedEntries(true);
+ final int requiredLgArrLongs = minLgHashTableSize(curCount_, ThetaUtil.REBUILD_THRESHOLD);
+ final int priorLgArrLongs = lgArrLongs_; //prior only used in error message
+ lgArrLongs_ = requiredLgArrLongs;
+
+ if (wseg_ != null) { //Off heap, check if current dstMem is large enough
+ insertCurCount(wseg_, curCount_);
+ insertLgArrLongs(wseg_, lgArrLongs_);
+ if (requiredLgArrLongs <= maxLgArrLongs_) {
+ wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0);
+ }
+ else { //not enough space in dstMem
+ final int requiredBytes = (8 << requiredLgArrLongs) + 24;
+ final int givenBytes = (8 << priorLgArrLongs) + 24;
+ throw new SketchesArgumentException(
+ "Insufficient internal Memory space: " + requiredBytes + " > " + givenBytes);
+ }
+ }
+ else { //On the heap, allocate a HT
+ hashTable_ = new long[1 << lgArrLongs_];
+ }
+ moveDataToTgt(sketchIn);
+ } //end of state 5
+
+ //state 7
+ else if (curCount_ > 0 && sketchInEntries > 0) {
+ //Sets resulting hashTable, curCount and adjusts lgArrLongs
+ performIntersect(sketchIn);
+ } //end of state 7
+
+ else {
+ assert false : "Should not happen";
+ }
+ }
+
+ @Override
+ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) {
+ if (curCount_ < 0) {
+ throw new SketchesStateException(
+ "Calling getResult() with no intervening intersections would represent the infinite set, "
+ + "which is not a legal result.");
+ }
+ long[] compactCache;
+ final boolean srcOrdered, srcCompact;
+ if (curCount_ == 0) {
+ compactCache = new long[0];
+ srcCompact = true;
+ srcOrdered = false; //hashTable, even though empty
+ return CompactOperations.componentsToCompact(
+ thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered,
+ dstSeg, compactCache);
+ }
+ //else curCount > 0
+ final long[] hashTable;
+ if (wseg_ != null) {
+ final int htLen = 1 << lgArrLongs_;
+ hashTable = new long[htLen];
+ MemorySegment.copy(dstSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen);
+ } else {
+ hashTable = hashTable_;
+ }
+ compactCache = compactCachePart(hashTable, lgArrLongs_, curCount_, thetaLong_, dstOrdered);
+ srcCompact = true;
+ srcOrdered = dstOrdered;
+ return CompactOperations.componentsToCompact(
+ thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered,
+ dstSeg, compactCache);
+ }
+
+ @Override
+ public boolean hasMemorySegment() {
+ return wseg_ != null;
+ }
+
+ @Override
+ public boolean hasResult() {
+ return hasMemorySegment() ? wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0;
+ }
+
+ @Override
+ public boolean isDirect() {
+ return hasMemorySegment() ? wseg_.isNative() : false;
+ }
+
+ @Override
+ public void reset() {
+ hardReset();
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ final int preBytes = CONST_PREAMBLE_LONGS << 3;
+ final int dataBytes = curCount_ > 0 ? 8 << lgArrLongs_ : 0;
+ final byte[] byteArrOut = new byte[preBytes + dataBytes];
+ if (wseg_ != null) {
+ MemorySegment.copy(wseg_, JAVA_BYTE, 0, byteArrOut, 0, preBytes + dataBytes);
+ }
+ else {
+ final MemorySegment segOut = MemorySegment.ofArray(byteArrOut);
+
+ //preamble
+ segOut.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) CONST_PREAMBLE_LONGS); //RF not used = 0
+ segOut.set(JAVA_BYTE, SER_VER_BYTE, (byte) SER_VER);
+ segOut.set(JAVA_BYTE, FAMILY_BYTE, (byte) Family.INTERSECTION.getID());
+ segOut.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 0); //not used
+ segOut.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs_);
+ if (empty_) { setBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); }
+ else { clearBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); }
+ segOut.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_);
+ segOut.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount_);
+ segOut.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, (float) 1.0);
+ segOut.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_);
+
+ //data
+ if (curCount_ > 0) {
+ MemorySegment.copy(hashTable_, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, 1 << lgArrLongs_);
+ }
+ }
+ return byteArrOut;
+ }
+
+ //restricted
+
+ /**
+ * Gets the number of retained entries from this operation. If negative, it is interpreted
+ * as the infinite Universal Set.
+ */
+ @Override
+ int getRetainedEntries() {
+ return curCount_;
+ }
+
+ @Override
+ boolean isEmpty() {
+ return empty_;
+ }
+
+ @Override
+ long[] getCache() {
+ if (wseg_ == null) {
+ return hashTable_ != null ? hashTable_ : new long[0];
+ }
+ //Direct
+ final int arrLongs = 1 << lgArrLongs_;
+ final long[] outArr = new long[arrLongs];
+ MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs);
+ return outArr;
+ }
+
+ @Override
+ short getSeedHash() {
+ return seedHash_;
+ }
+
+ @Override
+ long getThetaLong() {
+ return thetaLong_;
+ }
+
+ private void performIntersect(final Sketch sketchIn) {
+ // curCount and input data are nonzero, match against HT
+ assert curCount_ > 0 && !empty_;
+ final long[] hashTable;
+ if (wseg_ != null) {
+ final int htLen = 1 << lgArrLongs_;
+ hashTable = new long[htLen];
+ MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen);
+ } else {
+ hashTable = hashTable_;
+ }
+ //allocate space for matching
+ final long[] matchSet = new long[ min(curCount_, sketchIn.getRetainedEntries(true)) ];
+
+ int matchSetCount = 0;
+ final boolean isOrdered = sketchIn.isOrdered();
+ final HashIterator it = sketchIn.iterator();
+ while (it.next()) {
+ final long hashIn = it.get();
+ if (hashIn < thetaLong_) {
+ final int foundIdx = hashSearch(hashTable, lgArrLongs_, hashIn);
+ if (foundIdx != -1) {
+ matchSet[matchSetCount++] = hashIn;
+ }
+ } else {
+ if (isOrdered) { break; } // early stop
+ }
+ }
+ //reduce effective array size to minimum
+ curCount_ = matchSetCount;
+ lgArrLongs_ = minLgHashTableSize(matchSetCount, ThetaUtil.REBUILD_THRESHOLD);
+ if (wseg_ != null) {
+ insertCurCount(wseg_, matchSetCount);
+ insertLgArrLongs(wseg_, lgArrLongs_);
+ wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); //clear for rebuild
+ } else {
+ Arrays.fill(hashTable_, 0, 1 << lgArrLongs_, 0L); //clear for rebuild
+ }
+
+ if (curCount_ > 0) {
+ moveDataToTgt(matchSet, matchSetCount); //move matchSet to target
+ } else {
+ if (thetaLong_ == Long.MAX_VALUE) {
+ empty_ = true;
+ }
+ }
+ }
+
+ private void moveDataToTgt(final long[] arr, final int count) {
+ final int arrLongsIn = arr.length;
+ int tmpCnt = 0;
+ if (wseg_ != null) { //Off Heap puts directly into mem
+ final int preBytes = CONST_PREAMBLE_LONGS << 3;
+ final int lgArrLongs = lgArrLongs_;
+ final long thetaLong = thetaLong_;
+ for (int i = 0; i < arrLongsIn; i++ ) {
+ final long hashIn = arr[i];
+ if (continueCondition(thetaLong, hashIn)) { continue; }
+ hashInsertOnlyMemory(wseg_, lgArrLongs, hashIn, preBytes);
+ tmpCnt++;
+ }
+ } else { //On Heap. Assumes HT exists and is large enough
+ for (int i = 0; i < arrLongsIn; i++ ) {
+ final long hashIn = arr[i];
+ if (continueCondition(thetaLong_, hashIn)) { continue; }
+ hashInsertOnly(hashTable_, lgArrLongs_, hashIn);
+ tmpCnt++;
+ }
+ }
+ assert tmpCnt == count : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count;
+ }
+
+ private void moveDataToTgt(final Sketch sketch) {
+ final int count = sketch.getRetainedEntries();
+ int tmpCnt = 0;
+ if (wseg_ != null) { //Off Heap puts directly into mem
+ final int preBytes = CONST_PREAMBLE_LONGS << 3;
+ final int lgArrLongs = lgArrLongs_;
+ final long thetaLong = thetaLong_;
+ final HashIterator it = sketch.iterator();
+ while (it.next()) {
+ final long hash = it.get();
+ if (continueCondition(thetaLong, hash)) { continue; }
+ hashInsertOnlyMemory(wseg_, lgArrLongs, hash, preBytes);
+ tmpCnt++;
+ }
+ } else { //On Heap. Assumes HT exists and is large enough
+ final HashIterator it = sketch.iterator();
+ while (it.next()) {
+ final long hash = it.get();
+ if (continueCondition(thetaLong_, hash)) { continue; }
+ hashInsertOnly(hashTable_, lgArrLongs_, hash);
+ tmpCnt++;
+ }
+ }
+ assert tmpCnt == count : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count;
+ }
+
+ private void hardReset() {
+ resetCommon();
+ if (wseg_ != null) {
+ insertCurCount(wseg_, -1); //Universal Set
+ clearEmpty(wseg_); //false
+ }
+ curCount_ = -1; //Universal Set
+ empty_ = false;
+ }
+
+ private void resetToEmpty() {
+ resetCommon();
+ if (wseg_ != null) {
+ insertCurCount(wseg_, 0);
+ setEmpty(wseg_); //true
+ }
+ curCount_ = 0;
+ empty_ = true;
+ }
+
+ private void resetCommon() {
+ if (wseg_ != null) {
+ if (readOnly_) { throw new SketchesReadOnlyException(); }
+ wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << ThetaUtil.MIN_LG_ARR_LONGS).fill((byte)0);
+ insertLgArrLongs(wseg_, ThetaUtil.MIN_LG_ARR_LONGS);
+ insertThetaLong(wseg_, Long.MAX_VALUE);
+ }
+ lgArrLongs_ = ThetaUtil.MIN_LG_ARR_LONGS;
+ thetaLong_ = Long.MAX_VALUE;
+ hashTable_ = null;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
new file mode 100644
index 000000000..96ccb41e8
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits;
+
+import java.lang.foreign.MemorySegment;
+
+/*
+ * This is to uncompress serial version 4 sketch incrementally
+ */
+class MemoryCompactCompressedHashIterator implements HashIterator {
+ private MemorySegment seg;
+ private int offset;
+ private int entryBits;
+ private int numEntries;
+ private int index;
+ private long previous;
+ private int offsetBits;
+ private long[] buffer;
+ private byte[] bytes;
+ private boolean isBlockMode;
+ private boolean isFirstUnpack1;
+
+ MemoryCompactCompressedHashIterator(
+ final MemorySegment srcSeg,
+ final int offset,
+ final int entryBits,
+ final int numEntries
+ ) {
+ this.seg = srcSeg;
+ this.offset = offset;
+ this.entryBits = entryBits;
+ this.numEntries = numEntries;
+ index = -1;
+ previous = 0;
+ offsetBits = 0;
+ buffer = new long[8];
+ bytes = new byte[entryBits];
+ isBlockMode = numEntries >= 8;
+ isFirstUnpack1 = true;
+ }
+
+ @Override
+ public long get() {
+ return buffer[index & 7];
+ }
+
+ @Override
+ public boolean next() {
+ if (++index == numEntries) { return false; }
+ if (isBlockMode) {
+ if ((index & 7) == 0) {
+ if (numEntries - index >= 8) {
+ unpack8();
+ } else {
+ isBlockMode = false;
+ unpack1();
+ }
+ }
+ } else {
+ unpack1();
+ }
+ return true;
+ }
+
+ private void unpack1() {
+ if (isFirstUnpack1) {
+ MemorySegment.copy(seg, JAVA_BYTE, offset, bytes, 0, wholeBytesToHoldBits((numEntries - index) * entryBits));
+ offset = 0;
+ isFirstUnpack1 = false;
+ }
+ final int i = index & 7;
+ BitPacking.unpackBits(buffer, i, entryBits, bytes, offset, offsetBits);
+ offset += (offsetBits + entryBits) >>> 3;
+ offsetBits = (offsetBits + entryBits) & 7;
+ buffer[i] += previous;
+ previous = buffer[i];
+ }
+
+ private void unpack8() {
+ MemorySegment.copy(seg, JAVA_BYTE, offset, bytes, 0, entryBits);
+ BitPacking.unpackBitsBlock8(buffer, 0, bytes, 0, entryBits);
+ offset += entryBits;
+ for (int i = 0; i < 8; i++) {
+ buffer[i] += previous;
+ previous = buffer[i];
+ }
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java
new file mode 100644
index 000000000..eb2137afd
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
+/**
+ * @author Lee Rhodes
+ */
+class MemoryHashIterator implements HashIterator {
+ private MemorySegment seg;
+ private int arrLongs;
+ private long thetaLong;
+ private long offsetBytes;
+ private int index;
+ private long hash;
+
+ MemoryHashIterator(final MemorySegment srcSeg, final int arrLongs, final long thetaLong) {
+ this.seg = srcSeg;
+ this.arrLongs = arrLongs;
+ this.thetaLong = thetaLong;
+ offsetBytes = PreambleUtil.extractPreLongs(srcSeg) << 3;
+ index = -1;
+ hash = 0;
+ }
+
+ @Override
+ public long get() {
+ return hash;
+ }
+
+ @Override
+ public boolean next() {
+ while (++index < arrLongs) {
+ hash = seg.get(JAVA_LONG_UNALIGNED, offsetBytes + (index << 3));
+ if ((hash != 0) && (hash < thetaLong)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java
new file mode 100644
index 000000000..55035a456
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java
@@ -0,0 +1,533 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT;
+import static java.lang.foreign.ValueLayout.JAVA_INT;
+import static java.lang.foreign.ValueLayout.JAVA_LONG;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT;
+import static org.apache.datasketches.common.Util.LS;
+import static org.apache.datasketches.common.Util.zeroPad;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteOrder;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+//@formatter:off
+
+/**
+ * This class defines the preamble data structure and provides basic utilities for some of the key
+ * fields.
+ *
+ * The intent of the design of this class was to isolate the detailed knowledge of the bit and
+ * byte layout of the serialized form of the sketches derived from the Sketch class into one place.
+ * This allows the possibility of the introduction of different serialization
+ * schemes with minimal impact on the rest of the library.
+ *
+ *
+ * MAP: Low significance bytes of this long data structure are on the right. However, the
+ * multi-byte integers (int and long) are stored in native byte order. The
+ * byte values are treated as unsigned.
+ *
+ * An empty CompactSketch only requires 8 bytes.
+ * Flags: notSI, Ordered*, Compact, Empty*, ReadOnly, LE.
+ * (*) Earlier versions did not set these.
+ *
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || Seed Hash | Flags | | | FamID | SerVer | PreLongs = 1 |
+ *
+ *
+ * A SingleItemSketch (extends CompactSketch) requires an 8 byte preamble plus a single
+ * hash item of 8 bytes. Flags: SingleItem*, Ordered, Compact, notEmpty, ReadOnly, LE.
+ * (*) Earlier versions did not set these.
+ *
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || Seed Hash | Flags | | | FamID | SerVer | PreLongs = 1 |
+ *
+ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ * 1 ||---------------------------Single long hash----------------------------------------|
+ *
+ *
+ * An exact (non-estimating) CompactSketch requires 16 bytes of preamble plus a compact array of
+ * longs.
+ *
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || Seed Hash | Flags | | | FamID | SerVer | PreLongs = 2 |
+ *
+ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ * 1 ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ * 2 ||----------------------Start of Compact Long Array----------------------------------|
+ *
+ *
+ * An estimating CompactSketch requires 24 bytes of preamble plus a compact array of longs.
+ *
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || Seed Hash | Flags | | | FamID | SerVer | PreLongs = 3 |
+ *
+ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ * 1 ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ * 2 ||------------------------------THETA_LONG-------------------------------------------|
+ *
+ * || 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 |
+ * 3 ||----------------------Start of Compact Long Array----------------------------------|
+ *
+ *
+ * The UpdateSketch and AlphaSketch require 24 bytes of preamble followed by a non-compact
+ * array of longs representing a hash table.
+ *
+ * The following table applies to both the Theta UpdateSketch and the Alpha Sketch
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || Seed Hash | Flags | LgArr | lgNom | FamID | SerVer | RF, PreLongs = 3 |
+ *
+ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ * 1 ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ * 2 ||------------------------------THETA_LONG-------------------------------------------|
+ *
+ * || 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 |
+ * 3 ||----------------------Start of Hash Table of longs---------------------------------|
+ *
+ *
+ * Union objects require 32 bytes of preamble plus a non-compact array of longs representing a
+ * hash table.
+ *
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || Seed Hash | Flags | LgArr | lgNom | FamID | SerVer | RF, PreLongs = 4 |
+ *
+ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ * 1 ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ * 2 ||------------------------------THETA_LONG-------------------------------------------|
+ *
+ * || 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 |
+ * 3 ||---------------------------UNION THETA LONG----------------------------------------|
+ *
+ * || 39 | 38 | 37 | 36 | 35 | 34 | 33 | 32 |
+ * 4 ||----------------------Start of Hash Table of longs---------------------------------|
+ *
+ *
+ *
+ * @author Lee Rhodes
+ */
+final class PreambleUtil {
+
+ private PreambleUtil() {}
+
+ // ###### DO NOT MESS WITH THIS FROM HERE ...
+ // Preamble byte Addresses
+ static final int PREAMBLE_LONGS_BYTE = 0; //lower 6 bits in byte.
+ static final int LG_RESIZE_FACTOR_BIT = 6; //upper 2 bits in byte. Not used by compact, direct
+ static final int SER_VER_BYTE = 1;
+ static final int FAMILY_BYTE = 2; //SerVer1,2 was SKETCH_TYPE_BYTE
+ static final int LG_NOM_LONGS_BYTE = 3; //not used by compact
+ static final int LG_ARR_LONGS_BYTE = 4; //not used by compact
+ static final int FLAGS_BYTE = 5;
+ static final int SEED_HASH_SHORT = 6; //byte 6,7
+ static final int RETAINED_ENTRIES_INT = 8; //8 byte aligned
+ static final int P_FLOAT = 12; //4 byte aligned, not used by compact
+ static final int THETA_LONG = 16; //8-byte aligned
+ static final int UNION_THETA_LONG = 24; //8-byte aligned, only used by Union
+
+ // flag bit masks
+ static final int BIG_ENDIAN_FLAG_MASK = 1; //SerVer 1, 2, 3
+ static final int READ_ONLY_FLAG_MASK = 2; //Set but not read. Reserved. SerVer 1, 2, 3
+ static final int EMPTY_FLAG_MASK = 4; //SerVer 2, 3
+ static final int COMPACT_FLAG_MASK = 8; //SerVer 2 was NO_REBUILD_FLAG_MASK, 3
+ static final int ORDERED_FLAG_MASK = 16;//SerVer 2 was UNORDERED_FLAG_MASK, 3
+ static final int SINGLEITEM_FLAG_MASK = 32;//SerVer 3
+ //The last 2 bits of the flags byte are reserved and assumed to be zero, for now.
+
+ //Backward compatibility: SerVer1 preamble always 3 longs, SerVer2 preamble: 1, 2, 3 longs
+ // SKETCH_TYPE_BYTE 2 //SerVer1, SerVer2
+ // V1, V2 types: Alpha = 1, QuickSelect = 2, SetSketch = 3; V3 only: Buffered QS = 4
+ static final int LG_RESIZE_RATIO_BYTE_V1 = 5; //used by SerVer 1
+ static final int FLAGS_BYTE_V1 = 6; //used by SerVer 1
+
+ //Other constants
+ static final int SER_VER = 3;
+
+ // serial version 4 compressed ordered sketch, not empty, not single item
+ static final int ENTRY_BITS_BYTE_V4 = 3; // number of bits packed in deltas between hashes
+ static final int NUM_ENTRIES_BYTES_BYTE_V4 = 4; // number of bytes used for the number of entries
+ static final int THETA_LONG_V4 = 8; //8-byte aligned
+
+ static final boolean NATIVE_ORDER_IS_BIG_ENDIAN =
+ (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN);
+
+ /**
+ * Computes the number of bytes required for an updatable sketch using a hash-table cache.
+ * This does not apply for compact sketches.
+ * @param lgArrLongs log2(current hash-table size)
+ * @param preambleLongs current preamble size
+ * @return the size in bytes
+ */
+ static final int getMemBytes(final int lgArrLongs, final int preambleLongs) {
+ return (8 << lgArrLongs) + (preambleLongs << 3);
+ }
+
+ // STRINGS
+
+ /**
+ * Returns a human readable string summary of the preamble state of the given byte array.
+ * Used primarily in testing.
+ *
+ * @param byteArr the given byte array.
+ * @return the summary preamble string.
+ */
+ static String preambleToString(final byte[] byteArr) {
+ final MemorySegment seg = MemorySegment.ofArray(byteArr);
+ return preambleToString(seg);
+ }
+
+ /**
+ * Returns a human readable string summary of the preamble state of the given Memory.
+ * Note: other than making sure that the given Memory size is large
+ * enough for just the preamble, this does not do much value checking of the contents of the
+ * preamble as this is primarily a tool for debugging the preamble visually.
+ *
+ * @param seg the given Memory.
+ * @return the summary preamble string.
+ */
+ static String preambleToString(final MemorySegment seg) {
+ final int preLongs = getAndCheckPreLongs(seg);
+ final int rfId = extractLgResizeFactor(seg);
+ final ResizeFactor rf = ResizeFactor.getRF(rfId);
+ final int serVer = extractSerVer(seg);
+ final int familyId = extractFamilyID(seg);
+ final Family family = Family.idToFamily(familyId);
+ final int lgNomLongs = extractLgNomLongs(seg);
+ final int lgArrLongs = extractLgArrLongs(seg);
+
+ //Flags
+ final int flags = extractFlags(seg);
+ final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", "
+ + zeroPad(Integer.toBinaryString(flags), 8);
+ final String nativeOrder = ByteOrder.nativeOrder().toString();
+ final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
+ final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
+ final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
+ final boolean compact = (flags & COMPACT_FLAG_MASK) > 0;
+ final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0;
+ final boolean singleItem = (flags & SINGLEITEM_FLAG_MASK) > 0; //!empty && (preLongs == 1);
+
+ final int seedHash = extractSeedHash(seg);
+
+ //assumes preLongs == 1; empty or singleItem
+ int curCount = singleItem ? 1 : 0;
+ float p = (float) 1.0; //preLongs 1 or 2
+ long thetaLong = Long.MAX_VALUE; //preLongs 1 or 2
+ long thetaULong = thetaLong; //preLongs 1, 2 or 3
+
+ if (preLongs == 2) { //exact (non-estimating) CompactSketch
+ curCount = extractCurCount(seg);
+ p = extractP(seg);
+ }
+ else if (preLongs == 3) { //Update Sketch
+ curCount = extractCurCount(seg);
+ p = extractP(seg);
+ thetaLong = extractThetaLong(seg);
+ thetaULong = thetaLong;
+ }
+ else if (preLongs == 4) { //Union
+ curCount = extractCurCount(seg);
+ p = extractP(seg);
+ thetaLong = extractThetaLong(seg);
+ thetaULong = extractUnionThetaLong(seg);
+ }
+ //else the same as an empty sketch or singleItem
+
+ final double thetaDbl = thetaLong / Util.LONG_MAX_VALUE_AS_DOUBLE;
+ final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16);
+ final double thetaUDbl = thetaULong / Util.LONG_MAX_VALUE_AS_DOUBLE;
+ final String thetaUHex = zeroPad(Long.toHexString(thetaULong), 16);
+
+ final StringBuilder sb = new StringBuilder();
+ sb.append(LS);
+ sb.append("### SKETCH PREAMBLE SUMMARY:").append(LS);
+ sb.append("Native Byte Order : ").append(nativeOrder).append(LS);
+ sb.append("Byte 0: Preamble Longs : ").append(preLongs).append(LS);
+ sb.append("Byte 0: ResizeFactor : ").append(rfId + ", " + rf.toString()).append(LS);
+ sb.append("Byte 1: Serialization Version: ").append(serVer).append(LS);
+ sb.append("Byte 2: Family : ").append(familyId + ", " + family.toString()).append(LS);
+ sb.append("Byte 3: LgNomLongs : ").append(lgNomLongs).append(LS);
+ sb.append("Byte 4: LgArrLongs : ").append(lgArrLongs).append(LS);
+ sb.append("Byte 5: Flags Field : ").append(flagsStr).append(LS);
+ sb.append(" Bit Flag Name : State:").append(LS);
+ sb.append(" 0 BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS);
+ sb.append(" 1 READ_ONLY : ").append(readOnly).append(LS);
+ sb.append(" 2 EMPTY : ").append(empty).append(LS);
+ sb.append(" 3 COMPACT : ").append(compact).append(LS);
+ sb.append(" 4 ORDERED : ").append(ordered).append(LS);
+ sb.append(" 5 SINGLE_ITEM : ").append(singleItem).append(LS);
+ sb.append("Bytes 6-7 : Seed Hash Hex : ").append(Integer.toHexString(seedHash)).append(LS);
+ if (preLongs == 1) {
+ sb.append(" --ABSENT FIELDS, ASSUMED:").append(LS);
+ sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS);
+ sb.append("Bytes 12-15: P : ").append(p).append(LS);
+ sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS);
+ sb.append(" Theta (long) : ").append(thetaLong).append(LS);
+ sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
+ }
+ else if (preLongs == 2) {
+ sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS);
+ sb.append("Bytes 12-15: P : ").append(p).append(LS);
+ sb.append(" --ABSENT, ASSUMED:").append(LS);
+ sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS);
+ sb.append(" Theta (long) : ").append(thetaLong).append(LS);
+ sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
+ }
+ else if (preLongs == 3) {
+ sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS);
+ sb.append("Bytes 12-15: P : ").append(p).append(LS);
+ sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS);
+ sb.append(" Theta (long) : ").append(thetaLong).append(LS);
+ sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
+ }
+ else { //preLongs == 4
+ sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS);
+ sb.append("Bytes 12-15: P : ").append(p).append(LS);
+ sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS);
+ sb.append(" Theta (long) : ").append(thetaLong).append(LS);
+ sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
+ sb.append("Bytes 25-31: ThetaU (double) : ").append(thetaUDbl).append(LS);
+ sb.append(" ThetaU (long) : ").append(thetaULong).append(LS);
+ sb.append(" ThetaU (long,hex): ").append(thetaUHex).append(LS);
+ }
+ sb.append( "Preamble Bytes : ").append(preLongs * 8).append(LS);
+ sb.append( "Data Bytes : ").append(curCount * 8).append(LS);
+ sb.append( "TOTAL Sketch Bytes : ").append((preLongs + curCount) * 8).append(LS);
+ sb.append( "TOTAL Capacity Bytes : ").append(seg.byteSize()).append(LS);
+ sb.append("### END SKETCH PREAMBLE SUMMARY").append(LS);
+ return sb.toString();
+ }
+
+ //@formatter:on
+
+ static int extractPreLongs(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+ }
+
+ static int extractLgResizeFactor(final MemorySegment seg) {
+ return (seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3;
+ }
+
+ static int extractLgResizeRatioV1(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, LG_RESIZE_RATIO_BYTE_V1) & 0X3;
+ }
+
+ static int extractSerVer(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF;
+ }
+
+ static int extractFamilyID(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF;
+ }
+
+ static int extractLgNomLongs(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF;
+ }
+
+ static int extractLgArrLongs(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
+ }
+
+ static int extractFlags(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF;
+ }
+
+ static int extractFlagsV1(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, FLAGS_BYTE_V1) & 0XFF;
+ }
+
+ static int extractSeedHash(final MemorySegment seg) {
+ return seg.get(JAVA_SHORT, SEED_HASH_SHORT) & 0XFFFF;
+ }
+
+ static int extractCurCount(final MemorySegment seg) {
+ return seg.get(JAVA_INT, RETAINED_ENTRIES_INT);
+ }
+
+ static float extractP(final MemorySegment seg) {
+ return seg.get(JAVA_FLOAT, P_FLOAT);
+ }
+
+ static long extractThetaLong(final MemorySegment seg) {
+ return seg.get(JAVA_LONG, THETA_LONG);
+ }
+
+ static long extractUnionThetaLong(final MemorySegment seg) {
+ return seg.get(JAVA_LONG, UNION_THETA_LONG);
+ }
+
+ static int extractEntryBitsV4(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, ENTRY_BITS_BYTE_V4) & 0XFF;
+ }
+
+ static int extractNumEntriesBytesV4(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, NUM_ENTRIES_BYTES_BYTE_V4) & 0XFF;
+ }
+
+ static long extractThetaLongV4(final MemorySegment seg) {
+ return seg.get(JAVA_LONG, THETA_LONG_V4);
+ }
+
+ /**
+ * Sets PreLongs in the low 6 bits and sets LgRF in the upper 2 bits = 0.
+ * @param seg the target MemorySegment
+ * @param preLongs the given number of preamble longs
+ */
+ static void insertPreLongs(final MemorySegment seg, final int preLongs) {
+ seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) (preLongs & 0X3F));
+ }
+
+ /**
+ * Sets the top 2 lgRF bits and does not affect the lower 6 bits (PreLongs).
+ * To work properly, this should be called after insertPreLongs().
+ * @param seg the target MemorySegment
+ * @param rf the given lgRF bits
+ */
+ static void insertLgResizeFactor(final MemorySegment seg, final int rf) {
+ final int curByte = seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0xFF;
+ final int shift = LG_RESIZE_FACTOR_BIT; // shift in bits
+ final int mask = 3;
+ final byte newByte = (byte) (((rf & mask) << shift) | (~(mask << shift) & curByte));
+ seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, newByte);
+ }
+
+ static void insertSerVer(final MemorySegment seg, final int serVer) {
+ seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) serVer);
+ }
+
+ static void insertFamilyID(final MemorySegment seg, final int famId) {
+ seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) famId);
+ }
+
+ static void insertLgNomLongs(final MemorySegment seg, final int lgNomLongs) {
+ seg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) lgNomLongs);
+ }
+
+ static void insertLgArrLongs(final MemorySegment seg, final int lgArrLongs) {
+ seg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs);
+ }
+
+ static void insertFlags(final MemorySegment seg, final int flags) {
+ seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags);
+ }
+
+ static void insertSeedHash(final MemorySegment seg, final int seedHash) {
+ seg.set(JAVA_SHORT, SEED_HASH_SHORT, (short) seedHash);
+ }
+
+ static void insertCurCount(final MemorySegment seg, final int curCount) {
+ seg.set(JAVA_INT, RETAINED_ENTRIES_INT, curCount);
+ }
+
+ static void insertP(final MemorySegment seg, final float p) {
+ seg.set(JAVA_FLOAT, P_FLOAT, p);
+ }
+
+ static void insertThetaLong(final MemorySegment seg, final long thetaLong) {
+ seg.set(JAVA_LONG, THETA_LONG, thetaLong);
+ }
+
+ static void insertUnionThetaLong(final MemorySegment seg, final long unionThetaLong) {
+ seg.set(JAVA_LONG, UNION_THETA_LONG, unionThetaLong);
+ }
+
+ static void setEmpty(final MemorySegment seg) {
+ int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF;
+ flags |= EMPTY_FLAG_MASK;
+ seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags);
+ }
+
+ static void clearEmpty(final MemorySegment seg) {
+ int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF;
+ flags &= ~EMPTY_FLAG_MASK;
+ seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags);
+ }
+
+ static boolean isEmptyFlag(final MemorySegment seg) {
+ return ((extractFlags(seg) & EMPTY_FLAG_MASK) > 0);
+ }
+
+ /**
+ * Checks Memory for capacity to hold the preamble and returns the extracted preLongs.
+ * @param seg the given MemorySegment
+ * @return the extracted prelongs value.
+ */
+ static int getAndCheckPreLongs(final MemorySegment seg) {
+ final long cap = seg.byteSize();
+ if (cap < 8) {
+ throwNotBigEnough(cap, 8);
+ }
+ final int preLongs = extractPreLongs(seg);
+ final int required = Math.max(preLongs << 3, 8);
+ if (cap < required) {
+ throwNotBigEnough(cap, required);
+ }
+ return preLongs;
+ }
+
+ static final short checkMemorySeedHash(final MemorySegment seg, final long seed) {
+ final short seedHashMem = (short) extractSeedHash(seg);
+ ThetaUtil.checkSeedHashes(seedHashMem, ThetaUtil.computeSeedHash(seed)); //throws if bad seedHash
+ return seedHashMem;
+ }
+
+ private static void throwNotBigEnough(final long cap, final int required) {
+ throw new SketchesArgumentException(
+ "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap
+ + ", Required: " + required);
+ }
+
+ static int wholeBytesToHoldBits(final int bits) {
+ return (bits >>> 3) + ((bits & 7) > 0 ? 1 : 0);
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/Rebuilder.java b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java
new file mode 100644
index 000000000..5e92447f9
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong;
+import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.HashOperations;
+
+/**
+ * This class performs resize, rebuild and move operations where the input and output are Theta sketch images in MemorySegments.
+ *
+ * NOTE: These operations copy data from the input MemorySegment into local arrays, perform the required operations on the
+ * arrays, and then copies the result to the destination MemorySegment. Attempting to perform these operations directly on the
+ * MemorySegments would be slower due to MemorySegment internal checks. Meanwhile, he bulk copies performed by the MemorySegments are
+ * vectorized at the machine level and are quite fast. Measurements reveal that this is a good tradeoff.
+ *
+ * @author Lee Rhodes
+ */
+final class Rebuilder {
+
+ private Rebuilder() {}
+
+ /**
+ * Rebuild the hashTable in the given MemorySegment at its current size. Changes theta and thus count.
+ * This assumes a MemorySegment preamble of standard form with correct values of curCount and thetaLong.
+ * ThetaLong and curCount will change.
+ * Afterwards, caller must update local class members curCount and thetaLong from MemorySegment.
+ *
+ * @param seg the given MemorySegment
+ * @param preambleLongs size of preamble in longs
+ * @param lgNomLongs the log_base2 of k, the configuration parameter of the sketch
+ */
+ static final void quickSelectAndRebuild(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) {
+
+ //Copy data from input segment into local buffer array for QS algorithm
+ final int lgArrLongs = extractLgArrLongs(seg);
+ final int arrLongs = 1 << lgArrLongs;
+ final long[] tmpArr = new long[arrLongs];
+ final int preBytes = preambleLongs << 3;
+ MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, tmpArr, 0, arrLongs);
+
+ //Do the QuickSelect on a tmp arr to create new thetaLong
+ final int pivot = (1 << lgNomLongs) + 1; // (K+1) pivot for QS
+ final long newThetaLong = selectExcludingZeros(tmpArr, extractCurCount(seg), pivot);
+ insertThetaLong(seg, newThetaLong); //UPDATE thetaLong
+
+ //Rebuild to clean up dirty data, update count
+ final long[] tgtArr = new long[arrLongs];
+ final int newCurCount =
+ HashOperations.hashArrayInsert(tmpArr, tgtArr, lgArrLongs, newThetaLong);
+ insertCurCount(seg, newCurCount); //UPDATE curCount
+
+ //put the rebuilt array back into MemorySegment
+ MemorySegment.copy(tgtArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, arrLongs);
+ }
+
+ /**
+ * Moves me (the entire updatable sketch) to a new larger MemorySegment location and rebuilds the hash table.
+ * This assumes a MemorySegment preamble of standard form with the correct value of thetaLong.
+ * Afterwards, the caller must update the local MemorySegment reference, lgArrLongs
+ * and hashTableThreshold from the dstMemory and free the source MemorySegment.
+ *
+ * @param srcSeg the source MemorySegment
+ * @param preambleLongs size of preamble in longs
+ * @param srcLgArrLongs size (log_base2) of source hash table
+ * @param dstSeg the destination MemorySegment, which may be garbage
+ * @param dstLgArrLongs the destination hash table target size
+ * @param thetaLong theta as a long
+ */
+ static final void moveAndResize(final MemorySegment srcSeg, final int preambleLongs,
+ final int srcLgArrLongs, final MemorySegment dstSeg, final int dstLgArrLongs, final long thetaLong) {
+
+ //Move Preamble to destination MemorySegment
+ final int preBytes = preambleLongs << 3;
+ MemorySegment.copy(srcSeg, 0, dstSeg, 0, preBytes);
+
+ //Bulk copy source Hash Table to local buffer array
+ final int srcHTLen = 1 << srcLgArrLongs;
+ final long[] srcHTArr = new long[srcHTLen];
+ MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen);
+
+ //Create destination buffer
+ final int dstHTLen = 1 << dstLgArrLongs;
+ final long[] dstHTArr = new long[dstHTLen];
+
+ //Rebuild hash table in destination buffer
+ HashOperations.hashArrayInsert(srcHTArr, dstHTArr, dstLgArrLongs, thetaLong);
+
+ //Bulk copy to destination MemorySegment
+ MemorySegment.copy(dstHTArr, 0, dstSeg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen);
+ dstSeg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update lgArrLongs in dstSeg
+ }
+
+ /**
+ * Resizes existing hash array into a larger one within a single MemorySegment, assuming enough space.
+ * This assumes a preamble of standard form with the correct value of thetaLong.
+ * The lgArrLongs will change.
+ * Afterwards, the caller must update the caller's local copies of lgArrLongs and hashTableThreshold
+ * from the returned MemorySegment.
+ *
+ * @param seg the source and destination MemorySegment
+ * @param preambleLongs the size of the preamble in longs
+ * @param srcLgArrLongs the size of the source hash table
+ * @param tgtLgArrLongs the LgArrLongs value for the new hash table
+ */
+ static final void resize(final MemorySegment seg, final int preambleLongs,
+ final int srcLgArrLongs, final int tgtLgArrLongs) {
+
+ //Preamble stays in place
+ final int preBytes = preambleLongs << 3;
+
+ //Bulk copy source to on-heap buffer
+ final int srcHTLen = 1 << srcLgArrLongs; //current value
+ final long[] srcHTArr = new long[srcHTLen]; //on-heap src buffer
+ //seg.getLongArray(preBytes, srcHTArr, 0, srcHTLen);
+ MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen);
+
+ //Create destination on-heap buffer
+ final int dstHTLen = 1 << tgtLgArrLongs;
+ final long[] dstHTArr = new long[dstHTLen]; //on-heap dst buffer
+
+ //Rebuild hash table in destination buffer
+ HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, extractThetaLong(seg));
+
+ //Bulk copy to destination memory
+ MemorySegment.copy(dstHTArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen);
+ insertLgArrLongs(seg, tgtLgArrLongs); //update in mem
+ }
+
+ /**
+ * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be
+ * an integer value between zero and the given lgRF, inclusive;
+ * @param capBytes the current memory capacity in bytes
+ * @param lgArrLongs the current lg hash table size in longs
+ * @param preLongs the current preamble size in longs
+ * @param lgRF the configured lg Resize Factor
+ * @return the actual log2 Resize Factor that can be used to grow the hash table
+ */
+ static final int actLgResizeFactor(final long capBytes, final int lgArrLongs, final int preLongs,
+ final int lgRF) {
+ final int maxHTLongs = Util.floorPowerOf2(((int)(capBytes >>> 3) - preLongs));
+ final int lgFactor = Math.max(Integer.numberOfTrailingZeros(maxHTLongs) - lgArrLongs, 0);
+ return (lgFactor >= lgRF) ? lgRF : lgFactor;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperation.java b/src/main/java/org/apache/datasketches/theta2/SetOperation.java
new file mode 100644
index 000000000..5c959cde6
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/SetOperation.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static org.apache.datasketches.common.Family.idToFamily;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * The parent API for all Set Operations
+ *
+ * @author Lee Rhodes
+ */
+public abstract class SetOperation {
+ static final int CONST_PREAMBLE_LONGS = 3;
+
+ /**
+ * Constructor
+ */
+ SetOperation() {}
+
+ /**
+ * Makes a new builder
+ *
+ * @return a new builder
+ */
+ public static final SetOperationBuilder builder() {
+ return new SetOperationBuilder();
+ }
+
+ /**
+ * Heapify takes the SetOperations image in MemorySegment and instantiates an on-heap
+ * SetOperation using the
+ * Default Update Seed.
+ * The resulting SetOperation will not retain any link to the source MemorySegment.
+ *
+ * Note: Only certain set operators during stateful operations can be serialized and thus
+ * heapified.
+ *
+ * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash.
+ * @return a Heap-based SetOperation from the given MemorySegment
+ */
+ public static SetOperation heapify(final MemorySegment srcSeg) {
+ return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Heapify takes the SetOperation image in MemorySegment and instantiates an on-heap
+ * SetOperation using the given expectedSeed.
+ * The resulting SetOperation will not retain any link to the source MemorySegment.
+ *
+ * Note: Only certain set operators during stateful operations can be serialized and thus
+ * heapified.
+ *
+ * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * @return a Heap-based SetOperation from the given MemorySegment
+ */
+ public static SetOperation heapify(final MemorySegment srcSeg, final long expectedSeed) {
+ final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE);
+ final Family family = idToFamily(famID);
+ switch (family) {
+ case UNION : {
+ return UnionImpl.heapifyInstance(srcSeg, expectedSeed);
+ }
+ case INTERSECTION : {
+ return IntersectionImpl.heapifyInstance(srcSeg, expectedSeed);
+ }
+ default: {
+ throw new SketchesArgumentException("SetOperation cannot heapify family: "
+ + family.toString());
+ }
+ }
+ }
+
+ /**
+ * Wrap takes the SetOperation image in MemorySegment and refers to it directly.
+ * There is no data copying onto the java heap.
+ * This method assumes the
+ * Default Update Seed.
+ *
+ * Note: Only certain set operators during stateful operations can be serialized and thus
+ * wrapped.
+ *
+ * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash.
+ * @return a SetOperation backed by the given MemorySegment
+ */
+ public static SetOperation wrap(final MemorySegment srcSeg) {
+ return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Wrap takes the SetOperation image in MemorySegment and refers to it directly.
+ * There is no data copying onto the java heap.
+ *
+ * Note: Only certain set operators during stateful operations can be serialized and thus
+ * wrapped.
+ *
+ * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * @return a SetOperation backed by the given MemorySegment
+ */
+ public static SetOperation wrap(final MemorySegment srcSeg, final long expectedSeed) {
+ final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE);
+ final Family family = idToFamily(famID);
+ final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE);
+ if (serVer != 3) {
+ throw new SketchesArgumentException("SerVer must be 3: " + serVer);
+ }
+ switch (family) {
+ case UNION : {
+ return UnionImpl.wrapInstance(srcSeg, expectedSeed);
+ }
+ case INTERSECTION : {
+ return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, true);
+ }
+ default:
+ throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString());
+ }
+ }
+
+ /**
+ * Returns the maximum required storage bytes given a nomEntries parameter for Union operations
+ * @param nomEntries Nominal Entries
+ * This will become the ceiling power of 2 if it is not.
+ * @return the maximum required storage bytes given a nomEntries parameter
+ */
+ public static int getMaxUnionBytes(final int nomEntries) {
+ final int nomEnt = ceilingPowerOf2(nomEntries);
+ return (nomEnt << 4) + (Family.UNION.getMaxPreLongs() << 3);
+ }
+
+ /**
+ * Returns the maximum required storage bytes given a nomEntries parameter for Intersection
+ * operations
+ * @param nomEntries Nominal Entries
+ * This will become the ceiling power of 2 if it is not.
+ * @return the maximum required storage bytes given a nomEntries parameter
+ */
+ public static int getMaxIntersectionBytes(final int nomEntries) {
+ final int nomEnt = ceilingPowerOf2(nomEntries);
+ final int bytes = (nomEnt << 4) + (Family.INTERSECTION.getMaxPreLongs() << 3);
+ return bytes;
+ }
+
+ /**
+ * Returns the maximum number of bytes for the returned CompactSketch, given the
+ * value of nomEntries of the first sketch A of AnotB.
+ * @param nomEntries this value must be a power of 2.
+ * @return the maximum number of bytes.
+ */
+ public static int getMaxAnotBResultBytes(final int nomEntries) {
+ final int ceil = ceilingPowerOf2(nomEntries);
+ return 24 + (15 * ceil);
+ }
+
+ /**
+ * Gets the Family of this SetOperation
+ * @return the Family of this SetOperation
+ */
+ public abstract Family getFamily();
+
+ //restricted
+
+ /**
+ * Gets the hash array in compact form.
+ * This is only useful during stateful operations.
+ * This should never be made public.
+ * @return the hash array
+ */
+ abstract long[] getCache();
+
+ /**
+ * Gets the current count of retained entries.
+ * This is only useful during stateful operations.
+ * Intentionally not made public because behavior will be confusing to end user.
+ *
+ * @return Gets the current count of retained entries.
+ */
+ abstract int getRetainedEntries();
+
+ /**
+ * Returns the seedHash established during class construction.
+ * @return the seedHash.
+ */
+ abstract short getSeedHash();
+
+ /**
+ * Gets the current value of ThetaLong.
+ * Only useful during stateful operations.
+ * Intentionally not made public because behavior will be confusing to end user.
+ * @return the current value of ThetaLong.
+ */
+ abstract long getThetaLong();
+
+ /**
+ * Returns true if this object's internal data is backed by a Memory object,
+ * which may be on-heap or off-heap.
+ * @return true if this object's internal data is backed by a Memory object.
+ */
+ public boolean hasMemorySegment() { return false; }
+
+ /**
+ * Returns true if this object's internal data is backed by an off-heap MemorySegment.
+ * @return true if this object's internal data is backed by an off-heap MemorySegment.
+ */
+ public boolean isDirect() { return false; }
+
+ /**
+ * Returns true if this set operator is empty.
+ * Only useful during stateful operations.
+ * Intentionally not made public because behavior will be confusing to end user.
+ * @return true if this set operator is empty.
+ */
+ abstract boolean isEmpty();
+
+ /**
+ * Returns true if the two given MemorySegments refer to the same backing resource,
+ * which is either an off-heap memory location and size, or the same on-heap array object.
+ *
+ * This is a convenient delegate of
+ * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}
+ *
+ * @param seg1 The first given MemorySegment
+ * @param seg2 The second given MemorySegment
+ * @return true if both MemorySegments are determined to be the same backing memory.
+ */
+ public boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) {
+ return Util.isSameResource(seg1, seg2);
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java
new file mode 100644
index 000000000..cf64326b8
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.Util.LS;
+import static org.apache.datasketches.common.Util.TAB;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * For building a new SetOperation.
+ *
+ * @author Lee Rhodes
+ */
+public class SetOperationBuilder {
+ private int bLgNomLongs;
+ private long bSeed;
+ private ResizeFactor bRF;
+ private float bP;
+
+ /**
+ * Constructor for building a new SetOperation. The default configuration is
+ *
+ * - Max Nominal Entries (max K):
+ * {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
+ * - Seed: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}
+ * - {@link ResizeFactor#X8}
+ * - Input Sampling Probability: 1.0
+ * - Memory: null
+ *
+ */
+ public SetOperationBuilder() {
+ bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES);
+ bSeed = ThetaUtil.DEFAULT_UPDATE_SEED;
+ bP = (float) 1.0;
+ bRF = ResizeFactor.X8;
+ }
+
+ /**
+ * Sets the Maximum Nominal Entries (max K) for this set operation. The effective value of K of the result of a
+ * Set Operation can be less than max K, but never greater.
+ * The minimum value is 16 and the maximum value is 67,108,864, which is 2^26.
+ * @param nomEntries Nominal Entries
+ * This will become the ceiling power of 2 if it is not a power of 2.
+ * @return this SetOperationBuilder
+ */
+ public SetOperationBuilder setNominalEntries(final int nomEntries) {
+ bLgNomLongs = Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries));
+ if ((bLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) {
+ throw new SketchesArgumentException("Nominal Entries must be >= 16 and <= 67108864: "
+ + nomEntries);
+ }
+ return this;
+ }
+
+ /**
+ * Alternative method of setting the Nominal Entries for this set operation from the log_base2 value.
+ * The minimum value is 4 and the maximum value is 26.
+ * Be aware that set operations as large as this maximum value may not have been
+ * thoroughly characterized for performance.
+ *
+ * @param lgNomEntries the log_base2 Nominal Entries.
+ * @return this SetOperationBuilder
+ */
+ public SetOperationBuilder setLogNominalEntries(final int lgNomEntries) {
+ bLgNomLongs = ThetaUtil.checkNomLongs(1 << lgNomEntries);
+ return this;
+ }
+
+ /**
+ * Returns Log-base 2 Nominal Entries
+ * @return Log-base 2 Nominal Entries
+ */
+ public int getLgNominalEntries() {
+ return bLgNomLongs;
+ }
+
+ /**
+ * Sets the long seed value that is require by the hashing function.
+ * @param seed See seed
+ * @return this SetOperationBuilder
+ */
+ public SetOperationBuilder setSeed(final long seed) {
+ bSeed = seed;
+ return this;
+ }
+
+ /**
+ * Returns the seed
+ * @return the seed
+ */
+ public long getSeed() {
+ return bSeed;
+ }
+
+ /**
+ * Sets the upfront uniform sampling probability, p. Although this functionality is
+ * implemented for Unions only, it rarely makes sense to use it. The proper use of upfront
+ * sampling is when building the sketches.
+ * @param p See Sampling Probability, p
+ * @return this SetOperationBuilder
+ */
+ public SetOperationBuilder setP(final float p) {
+ if ((p <= 0.0) || (p > 1.0)) {
+ throw new SketchesArgumentException("p must be > 0 and <= 1.0: " + p);
+ }
+ bP = p;
+ return this;
+ }
+
+ /**
+ * Returns the pre-sampling probability p
+ * @return the pre-sampling probability p
+ */
+ public float getP() {
+ return bP;
+ }
+
+ /**
+ * Sets the cache Resize Factor
+ * @param rf See Resize Factor
+ * @return this SetOperationBuilder
+ */
+ public SetOperationBuilder setResizeFactor(final ResizeFactor rf) {
+ bRF = rf;
+ return this;
+ }
+
+ /**
+ * Returns the Resize Factor
+ * @return the Resize Factor
+ */
+ public ResizeFactor getResizeFactor() {
+ return bRF;
+ }
+
+ /**
+ * Returns a SetOperation with the current configuration of this Builder and the given Family.
+ * @param family the chosen SetOperation family
+ * @return a SetOperation
+ */
+ public SetOperation build(final Family family) {
+ return build(family, null);
+ }
+
+ /**
+ * Returns a SetOperation with the current configuration of this Builder, the given Family
+ * and the given destination memory. Note that the destination MemorySegment cannot be used with AnotB.
+ * @param family the chosen SetOperation family
+ * @param dstSeg The destination MemorySegment.
+ * @return a SetOperation
+ */
+ public SetOperation build(final Family family, final MemorySegment dstSeg) {
+ SetOperation setOp = null;
+ switch (family) {
+ case UNION: {
+ if (dstSeg == null) {
+ setOp = UnionImpl.initNewHeapInstance(bLgNomLongs, bSeed, bP, bRF);
+ }
+ else {
+ setOp = UnionImpl.initNewDirectInstance(bLgNomLongs, bSeed, bP, bRF, dstSeg);
+ }
+ break;
+ }
+ case INTERSECTION: {
+ if (dstSeg == null) {
+ setOp = IntersectionImpl.initNewHeapInstance(bSeed);
+ }
+ else {
+ setOp = IntersectionImpl.initNewDirectInstance(bSeed, dstSeg);
+ }
+ break;
+ }
+ case A_NOT_B: {
+ if (dstSeg == null) {
+ setOp = new AnotBimpl(bSeed);
+ }
+ else {
+ throw new SketchesArgumentException(
+ "AnotB can not be persisted.");
+ }
+ break;
+ }
+ default:
+ throw new SketchesArgumentException(
+ "Given Family cannot be built as a SetOperation: " + family.toString());
+ }
+ return setOp;
+ }
+
+ /**
+ * Convenience method, returns a configured SetOperation Union with
+ * Default Nominal Entries
+ * @return a Union object
+ */
+ public Union buildUnion() {
+ return (Union) build(Family.UNION);
+ }
+
+ /**
+ * Convenience method, returns a configured SetOperation Union with
+ * Default Nominal Entries
+ * and the given destination MemorySegment.
+ * @param dstSeg The destination MemorySegment.
+ * @return a Union object
+ */
+ public Union buildUnion(final MemorySegment dstSeg) {
+ return (Union) build(Family.UNION, dstSeg);
+ }
+
+ /**
+ * Convenience method, returns a configured SetOperation Intersection with
+ * Default Nominal Entries
+ * @return an Intersection object
+ */
+ public Intersection buildIntersection() {
+ return (Intersection) build(Family.INTERSECTION);
+ }
+
+ /**
+ * Convenience method, returns a configured SetOperation Intersection with
+ * Default Nominal Entries
+ * and the given destination memory.
+ * @param dstSeg The destination Memory.
+ * @return an Intersection object
+ */
+ public Intersection buildIntersection(final MemorySegment dstSeg) {
+ return (Intersection) build(Family.INTERSECTION, dstSeg);
+ }
+
+ /**
+ * Convenience method, returns a configured SetOperation ANotB with
+ * Default Update Seed
+ * @return an ANotB object
+ */
+ public AnotB buildANotB() {
+ return (AnotB) build(Family.A_NOT_B);
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("SetOperationBuilder configuration:").append(LS);
+ sb.append("LgK:").append(TAB).append(bLgNomLongs).append(LS);
+ sb.append("K:").append(TAB).append(1 << bLgNomLongs).append(LS);
+ sb.append("Seed:").append(TAB).append(bSeed).append(LS);
+ sb.append("p:").append(TAB).append(bP).append(LS);
+ sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS);
+ return sb.toString();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java
new file mode 100644
index 000000000..2659df84b
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.datasketches.common.ByteArrayUtil.putLongLE;
+import static org.apache.datasketches.hash.MurmurHash3.hash;
+import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * A CompactSketch that holds only one item hash.
+ *
+ * @author Lee Rhodes
+ */
+final class SingleItemSketch extends CompactSketch {
+ private static final long DEFAULT_SEED_HASH = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED) & 0xFFFFL;
+
+ // For backward compatibility, a candidate pre0_ long must have:
+ // Flags (byte 5): Ordered, Compact, NOT Empty, Read Only, LittleEndian = 11010 = 0x1A.
+ // Flags mask will be 0x1F.
+ // SingleItem flag may not be set due to a historical bug, so we can't depend on it for now.
+ // However, if the above flags are correct, preLongs == 1, SerVer >= 3, FamilyID == 3,
+ // and the hash seed matches, it is virtually guaranteed that we have a SingleItem Sketch.
+
+ private static final long PRE0_LO6_SI = 0X00_00_3A_00_00_03_03_01L; //with SI flag
+ private long pre0_ = 0;
+ private long hash_ = 0;
+
+ //Internal Constructor. All checking & hashing has been done, assumes default seed
+ private SingleItemSketch(final long hash) {
+ pre0_ = (DEFAULT_SEED_HASH << 48) | PRE0_LO6_SI;
+ hash_ = hash;
+ }
+
+ //All checking & hashing has been done, given the relevant seed
+ SingleItemSketch(final long hash, final long seed) {
+ final long seedHash = ThetaUtil.computeSeedHash(seed) & 0xFFFFL;
+ pre0_ = (seedHash << 48) | PRE0_LO6_SI;
+ hash_ = hash;
+ }
+
+ //All checking & hashing has been done, given the relevant seedHash
+ SingleItemSketch(final long hash, final short seedHash) {
+ final long seedH = seedHash & 0xFFFFL;
+ pre0_ = (seedH << 48) | PRE0_LO6_SI;
+ hash_ = hash;
+ }
+
+ /**
+ * Creates a SingleItemSketch on the heap given a SingleItemSketch MemorySegment image and a seedHash.
+ * Checks the seed hash of the given MemorySegment against the given seedHash.
+ * @param srcSeg the MemorySegment to be heapified.
+ * @param expectedSeedHash the given seedHash to be checked against the srcMem seedHash
+ * @return a SingleItemSketch
+ */ //does not override Sketch
+ static SingleItemSketch heapify(final MemorySegment srcSeg, final short expectedSeedHash) {
+ ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), expectedSeedHash);
+ final boolean singleItem = otherCheckForSingleItem(srcSeg);
+ if (singleItem) { return new SingleItemSketch(srcSeg.get(JAVA_LONG_UNALIGNED, 8), expectedSeedHash); }
+ throw new SketchesArgumentException("Input MemorySegment is not a SingleItemSketch.");
+ }
+
+ @Override
+ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) {
+ if (dstSeg == null) { return this; }
+ else {
+ dstSeg.set(JAVA_LONG_UNALIGNED, 0, pre0_);
+ dstSeg.set(JAVA_LONG_UNALIGNED, 8, hash_);
+ return new DirectCompactSketch(dstSeg);
+ }
+ }
+
+ //Create methods using the default seed
+
+ /**
+ * Create this sketch with a long.
+ *
+ * @param datum The given long datum.
+ * @return a SingleItemSketch
+ */
+ static SingleItemSketch create(final long datum) {
+ final long[] data = { datum };
+ return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1);
+ }
+
+ /**
+ * Create this sketch with the given double (or float) datum.
+ * The double will be converted to a long using Double.doubleToLongBits(datum),
+ * which normalizes all NaN values to a single NaN representation.
+ * Plus and minus zero will be normalized to plus zero.
+ * The special floating-point values NaN and +/- Infinity are treated as distinct.
+ *
+ * @param datum The given double datum.
+ * @return a SingleItemSketch
+ */
+ static SingleItemSketch create(final double datum) {
+ final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0
+ final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN forms
+ return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1);
+ }
+
+ /**
+ * Create this sketch with the given String.
+ * The string is converted to a byte array using UTF8 encoding.
+ * If the string is null or empty no create attempt is made and the method returns null.
+ *
+ * Note: this will not produce the same hash values as the {@link #create(char[])}
+ * method and will generally be a little slower depending on the complexity of the UTF8 encoding.
+ *
+ *
+ * @param datum The given String.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final String datum) {
+ if ((datum == null) || datum.isEmpty()) { return null; }
+ final byte[] data = datum.getBytes(UTF_8);
+ return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1);
+ }
+
+ /**
+ * Create this sketch with the given byte array.
+ * If the byte array is null or empty no create attempt is made and the method returns null.
+ *
+ * @param data The given byte array.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final byte[] data) {
+ if ((data == null) || (data.length == 0)) { return null; }
+ return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1);
+ }
+
+ /**
+ * Create this sketch with the given char array.
+ * If the char array is null or empty no create attempt is made and the method returns null.
+ *
+ * Note: this will not produce the same output hash values as the {@link #create(String)}
+ * method but will be a little faster as it avoids the complexity of the UTF8 encoding.
+ *
+ * @param data The given char array.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final char[] data) {
+ if ((data == null) || (data.length == 0)) { return null; }
+ return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1);
+ }
+
+ /**
+ * Create this sketch with the given integer array.
+ * If the integer array is null or empty no create attempt is made and the method returns null.
+ *
+ * @param data The given int array.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final int[] data) {
+ if ((data == null) || (data.length == 0)) { return null; }
+ return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1);
+ }
+
+ /**
+ * Create this sketch with the given long array.
+ * If the long array is null or empty no create attempt is made and the method returns null.
+ *
+ * @param data The given long array.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final long[] data) {
+ if ((data == null) || (data.length == 0)) { return null; }
+ return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1);
+ }
+
+ //Updates with a user specified seed
+
+ /**
+ * Create this sketch with a long and a seed.
+ *
+ * @param datum The given long datum.
+ * @param seed used to hash the given value.
+ * @return a SingleItemSketch
+ */
+ static SingleItemSketch create(final long datum, final long seed) {
+ final long[] data = { datum };
+ return new SingleItemSketch(hash(data, seed)[0] >>> 1);
+ }
+
+ /**
+ * Create this sketch with the given double (or float) datum and a seed.
+ * The double will be converted to a long using Double.doubleToLongBits(datum),
+ * which normalizes all NaN values to a single NaN representation.
+ * Plus and minus zero will be normalized to plus zero.
+ * The special floating-point values NaN and +/- Infinity are treated as distinct.
+ *
+ * @param datum The given double datum.
+ * @param seed used to hash the given value.
+ * @return a SingleItemSketch
+ */
+ static SingleItemSketch create(final double datum, final long seed) {
+ final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0
+ final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN forms
+ return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed);
+ }
+
+ /**
+ * Create this sketch with the given String and a seed.
+ * The string is converted to a byte array using UTF8 encoding.
+ * If the string is null or empty no create attempt is made and the method returns null.
+ *
+ * Note: this will not produce the same output hash values as the {@link #create(char[])}
+ * method and will generally be a little slower depending on the complexity of the UTF8 encoding.
+ *
+ *
+ * @param datum The given String.
+ * @param seed used to hash the given value.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final String datum, final long seed) {
+ if ((datum == null) || datum.isEmpty()) { return null; }
+ final byte[] data = datum.getBytes(UTF_8);
+ return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed);
+ }
+
+ /**
+ * Create this sketch with the given byte array and a seed.
+ * If the byte array is null or empty no create attempt is made and the method returns null.
+ *
+ * @param data The given byte array.
+ * @param seed used to hash the given value.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final byte[] data, final long seed) {
+ if ((data == null) || (data.length == 0)) { return null; }
+ return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed);
+ }
+
+ /**
+ * Create this sketch with the given char array and a seed.
+ * If the char array is null or empty no create attempt is made and the method returns null.
+ *
+ * Note: this will not produce the same output hash values as the {@link #create(String)}
+ * method but will be a little faster as it avoids the complexity of the UTF8 encoding.
+ *
+ * @param data The given char array.
+ * @param seed used to hash the given value.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final char[] data, final long seed) {
+ if ((data == null) || (data.length == 0)) { return null; }
+ return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed);
+ }
+
+ /**
+ * Create this sketch with the given integer array and a seed.
+ * If the integer array is null or empty no create attempt is made and the method returns null.
+ *
+ * @param data The given int array.
+ * @param seed used to hash the given value.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final int[] data, final long seed) {
+ if ((data == null) || (data.length == 0)) { return null; }
+ return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed);
+ }
+
+ /**
+ * Create this sketch with the given long array (as an item) and a seed.
+ * If the long array is null or empty no create attempt is made and the method returns null.
+ *
+ * @param data The given long array.
+ * @param seed used to hash the given value.
+ * @return a SingleItemSketch or null
+ */
+ static SingleItemSketch create(final long[] data, final long seed) {
+ if ((data == null) || (data.length == 0)) { return null; }
+ return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed);
+ }
+
+ //Sketch
+
+ @Override //much faster
+ public int getCountLessThanThetaLong(final long thetaLong) {
+ return (hash_ < thetaLong) ? 1 : 0;
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ return 16;
+ }
+
+ @Override
+ public double getEstimate() {
+ return 1.0;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new HeapCompactHashIterator(new long[] { hash_ });
+ }
+
+ @Override
+ public double getLowerBound(final int numStdDev) {
+ return 1.0;
+ }
+
+ @Override
+ public int getRetainedEntries(final boolean valid) {
+ return 1;
+ }
+
+ @Override
+ public long getThetaLong() {
+ return Long.MAX_VALUE;
+ }
+
+ @Override
+ public double getUpperBound(final int numStdDev) {
+ return 1.0;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return false;
+ }
+
+ @Override
+ public boolean isOrdered() {
+ return true;
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ final byte[] out = new byte[16];
+ putLongLE(out, 0, pre0_);
+ putLongLE(out, 8, hash_);
+ return out;
+ }
+
+ //restricted methods
+
+ @Override
+ long[] getCache() {
+ return new long[] { hash_ };
+ }
+
+ @Override
+ int getCompactPreambleLongs() {
+ return 1;
+ }
+
+ @Override
+ int getCurrentPreambleLongs() {
+ return 1;
+ }
+
+ @Override
+ MemorySegment getMemorySegment() {
+ return null;
+ }
+
+ @Override
+ short getSeedHash() {
+ return (short) (pre0_ >>> 48);
+ }
+
+ static final boolean otherCheckForSingleItem(final MemorySegment seg) {
+ return otherCheckForSingleItem(extractPreLongs(seg), extractSerVer(seg),
+ extractFamilyID(seg), extractFlags(seg) );
+ }
+
+ static final boolean otherCheckForSingleItem(final int preLongs, final int serVer,
+ final int famId, final int flags) {
+ // Flags byte: SI=X, Ordered=T, Compact=T, Empty=F, ReadOnly=T, BigEndian=F = X11010 = 0x1A.
+ // Flags mask will be 0x1F.
+ // SingleItem flag may not be set due to a historical bug, so we can't depend on it for now.
+ // However, if the above flags are correct, preLongs == 1, SerVer >= 3, FamilyID == 3,
+ // and the hash seed matches (not done here), it is virtually guaranteed that we have a
+ // SingleItem Sketch.
+ final boolean numPreLongs = preLongs == 1;
+ final boolean numSerVer = serVer >= 3;
+ final boolean numFamId = famId == Family.COMPACT.getID();
+ final boolean numFlags = (flags & 0x1F) == 0x1A; //no SI, yet
+ final boolean singleFlag = (flags & SINGLEITEM_FLAG_MASK) > 0;
+ return (numPreLongs && numSerVer && numFamId && numFlags) || singleFlag;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java
new file mode 100644
index 000000000..3c5650a91
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java
@@ -0,0 +1,695 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static org.apache.datasketches.common.Family.idToFamily;
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+import static org.apache.datasketches.common.Util.LS;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.common.Util.zeroPad;
+import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.apache.datasketches.thetacommon.HashOperations.count;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.BinomialBoundsN;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * The top-level class for all theta sketches. This class is never constructed directly.
+ * Use the UpdateSketch.builder() methods to create UpdateSketches.
+ *
+ * @author Lee Rhodes
+ */
+public abstract class Sketch {
+
+ Sketch() {}
+
+ //public static factory constructor-type methods
+
+ /**
+ * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch.
+ *
+ * The resulting sketch will not retain any link to the source MemorySegment.
+ *
+ * For Update Sketches this method checks if the
+ * Default Update Seed
+ * was used to create the source MemorySegment image.
+ *
+ * For Compact Sketches this method assumes that the sketch image was created with the
+ * correct hash seed, so it is not checked.
+ *
+ * @param srcSeg an image of a Sketch.
+ *
+ * @return a Sketch on the heap.
+ */
+ public static Sketch heapify(final MemorySegment srcSeg) {
+ final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE);
+ final Family family = idToFamily(familyID);
+ if (family == Family.COMPACT) {
+ return CompactSketch.heapify(srcSeg);
+ }
+ return heapifyUpdateFromMemory(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch.
+ *
+ * The resulting sketch will not retain any link to the source MemorySegment.
+ *
+ * For Update and Compact Sketches this method checks if the given expectedSeed was used to
+ * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.
+ *
+ * @param srcSeg an image of a Sketch that was created using the given expectedSeed.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * Compact sketches store a 16-bit hash of the seed, but not the seed itself.
+ * @return a Sketch on the heap.
+ */
+ public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed) {
+ final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE);
+ final Family family = idToFamily(familyID);
+ if (family == Family.COMPACT) {
+ return CompactSketch.heapify(srcSeg, expectedSeed);
+ }
+ return heapifyUpdateFromMemory(srcSeg, expectedSeed);
+ }
+
+ /**
+ * Wrap takes the sketch image in the given MemorySegment and refers to it directly.
+ * There is no data copying onto the java heap.
+ * The wrap operation enables fast read-only merging and access to all the public read-only API.
+ *
+ * Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
+ * been explicitly stored as direct sketches can be wrapped.
+ * Wrapping earlier serial version sketches will result in a on-heap CompactSketch
+ * where all data will be copied to the heap. These early versions were never designed to
+ * "wrap".
+ *
+ * Wrapping any subclass of this class that is empty or contains only a single item will
+ * result in on-heap equivalent forms of empty and single item sketch respectively.
+ * This is actually faster and consumes less overall memory.
+ *
+ * For Update Sketches this method checks if the
+ * Default Update Seed
+ * was used to create the source MemorySegment image.
+ *
+ * For Compact Sketches this method assumes that the sketch image was created with the
+ * correct hash seed, so it is not checked.
+ *
+ * @param srcSeg an image of a Sketch.
+ * See Memory.
+ * @return a Sketch backed by the given MemorySegment
+ */
+ public static Sketch wrap(final MemorySegment srcSeg) {
+ final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+ final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF;
+ final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF;
+ final Family family = Family.idToFamily(familyID);
+ if (family == Family.QUICKSELECT) {
+ if (serVer == 3 && preLongs == 3) {
+ return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ } else {
+ throw new SketchesArgumentException(
+ "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3");
+ }
+ }
+ if (family == Family.COMPACT) {
+ return CompactSketch.wrap(srcSeg);
+ }
+ throw new SketchesArgumentException(
+ "Cannot wrap family: " + family + " as a Sketch");
+ }
+
+ /**
+ * Wrap takes the sketch image in the given MemorySegment and refers to it directly.
+ * There is no data copying onto the java heap.
+ * The wrap operation enables fast read-only merging and access to all the public read-only API.
+ *
+ * Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
+ * been explicitly stored as direct sketches can be wrapped.
+ * Wrapping earlier serial version sketches will result in a on-heap CompactSketch
+ * where all data will be copied to the heap. These early versions were never designed to
+ * "wrap".
+ *
+ * Wrapping any subclass of this class that is empty or contains only a single item will
+ * result in on-heap equivalent forms of empty and single item sketch respectively.
+ * This is actually faster and consumes less overall memory.
+ *
+ * For Update and Compact Sketches this method checks if the given expectedSeed was used to
+ * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.
+ *
+ * @param srcSeg a MemorySegment with an image of a Sketch.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * @return a UpdateSketch backed by the given Memory except as above.
+ */
+ public static Sketch wrap(final MemorySegment srcSeg, final long expectedSeed) {
+ final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+ final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF;
+ final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF;
+ final Family family = Family.idToFamily(familyID);
+ if (family == Family.QUICKSELECT) {
+ if (serVer == 3 && preLongs == 3) {
+ return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed);
+ } else {
+ throw new SketchesArgumentException(
+ "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3");
+ }
+ }
+ if (family == Family.COMPACT) {
+ return CompactSketch.wrap(srcSeg, expectedSeed);
+ }
+ throw new SketchesArgumentException(
+ "Cannot wrap family: " + family + " as a Sketch");
+ }
+
+ //Sketch interface
+
+ /**
+ * Converts this sketch to a ordered CompactSketch.
+ *
+ * If this.isCompact() == true this method returns this,
+ * otherwise, this method is equivalent to
+ * {@link #compact(boolean, MemorySegment) compact(true, null)}.
+ *
+ *
A CompactSketch is always immutable.
+ *
+ * @return this sketch as an ordered CompactSketch.
+ */
+ public CompactSketch compact() {
+ return (this.isCompact()) ? (CompactSketch)this : compact(true, null);
+ }
+
+ /**
+ * Convert this sketch to a CompactSketch.
+ *
+ * If this sketch is a type of UpdateSketch, the compacting process converts the hash table
+ * of the UpdateSketch to a simple list of the valid hash values.
+ * Any hash values of zero or equal-to or greater than theta will be discarded.
+ * The number of valid values remaining in the CompactSketch depends on a number of factors,
+ * but may be larger or smaller than Nominal Entries (or k).
+ * It will never exceed 2k.
+ * If it is critical to always limit the size to no more than k,
+ * then rebuild() should be called on the UpdateSketch prior to calling this method.
+ *
+ * A CompactSketch is always immutable.
+ *
+ * A new CompactSketch object is created:
+ * - if dstMem != null
+ * - if dstMem == null and this.hasMemory() == true
+ * - if dstMem == null and this has more than 1 item and this.isOrdered() == false
+ * and dstOrdered == true.
+ *
+ *
+ * Otherwise, this operation returns this.
+ *
+ * @param dstOrdered assumed true if this sketch is empty or has only one value
+ * See Destination Ordered
+ *
+ * @param dstSeg
+ * See Destination MemorySegment.
+ *
+ * @return this sketch as a CompactSketch.
+ */
+ public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg);
+
+ /**
+ * Returns the number of storage bytes required for this Sketch if its current state were
+ * compacted. It this sketch is already in the compact form this is equivalent to
+ * calling {@link #getCurrentBytes()}.
+ * @return number of compact bytes
+ */
+ public abstract int getCompactBytes();
+
+ /**
+ * Gets the number of hash values less than the given theta expressed as a long.
+ * @param thetaLong the given theta as a long between zero and Long.MAX_VALUE.
+ * @return the number of hash values less than the given thetaLong.
+ */
+ public int getCountLessThanThetaLong(final long thetaLong) {
+ return count(getCache(), thetaLong);
+ }
+
+ /**
+ * Returns the number of storage bytes required for this sketch in its current state.
+ *
+ * @return the number of storage bytes required for this sketch
+ */
+ public abstract int getCurrentBytes();
+
+ /**
+ * Gets the unique count estimate.
+ * @return the sketch's best estimate of the cardinality of the input stream.
+ */
+ public abstract double getEstimate();
+
+ /**
+ * Returns the Family that this sketch belongs to
+ * @return the Family that this sketch belongs to
+ */
+ public abstract Family getFamily();
+
+ /**
+ * Gets the approximate lower error bound given the specified number of Standard Deviations.
+ * This will return getEstimate() if isEmpty() is true.
+ *
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @return the lower bound.
+ */
+ public double getLowerBound(final int numStdDev) {
+ return isEstimationMode()
+ ? lowerBound(getRetainedEntries(true), getThetaLong(), numStdDev, isEmpty())
+ : getRetainedEntries(true);
+ }
+
+ /**
+ * Returns the maximum number of storage bytes required for a CompactSketch with the given
+ * number of actual entries.
+ * @param numberOfEntries the actual number of retained entries stored in the sketch.
+ * @return the maximum number of storage bytes required for a CompactSketch with the given number
+ * of retained entries.
+ */
+ public static int getMaxCompactSketchBytes(final int numberOfEntries) {
+ if (numberOfEntries == 0) { return 8; }
+ if (numberOfEntries == 1) { return 16; }
+ return (numberOfEntries << 3) + 24;
+ }
+
+ /**
+ * Returns the maximum number of storage bytes required for a CompactSketch given the configured
+ * log_base2 of the number of nominal entries, which is a power of 2.
+ * @param lgNomEntries Nominal Entries
+ * @return the maximum number of storage bytes required for a CompactSketch with the given
+ * lgNomEntries.
+ */
+ public static int getCompactSketchMaxBytes(final int lgNomEntries) {
+ return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD
+ + Family.QUICKSELECT.getMaxPreLongs()) * Long.BYTES;
+ }
+
+ /**
+ * Returns the maximum number of storage bytes required for an UpdateSketch with the given
+ * number of nominal entries (power of 2).
+ * @param nomEntries Nominal Entries
+ * This will become the ceiling power of 2 if it is not.
+ * @return the maximum number of storage bytes required for a UpdateSketch with the given
+ * nomEntries
+ */
+ public static int getMaxUpdateSketchBytes(final int nomEntries) {
+ final int nomEnt = ceilingPowerOf2(nomEntries);
+ return (nomEnt << 4) + (Family.QUICKSELECT.getMaxPreLongs() << 3);
+ }
+
+ /**
+ * Returns the number of valid entries that have been retained by the sketch.
+ * @return the number of valid retained entries
+ */
+ public int getRetainedEntries() {
+ return getRetainedEntries(true);
+ }
+
+ /**
+ * Returns the number of entries that have been retained by the sketch.
+ * @param valid if true, returns the number of valid entries, which are less than theta and used
+ * for estimation.
+ * Otherwise, return the number of all entries, valid or not, that are currently in the internal
+ * sketch cache.
+ * @return the number of retained entries
+ */
+ public abstract int getRetainedEntries(boolean valid);
+
+ /**
+ * Returns the serialization version from the given MemorySegment
+ * @param seg the sketch MemorySegment
+ * @return the serialization version from the MemorySegment
+ */
+ public static int getSerializationVersion(final MemorySegment seg) {
+ return seg.get(JAVA_BYTE, SER_VER_BYTE);
+ }
+
+ /**
+ * Gets the value of theta as a double with a value between zero and one
+ * @return the value of theta as a double
+ */
+ public double getTheta() {
+ return getThetaLong() / LONG_MAX_VALUE_AS_DOUBLE;
+ }
+
+ /**
+ * Gets the value of theta as a long
+ * @return the value of theta as a long
+ */
+ public abstract long getThetaLong();
+
+ /**
+ * Gets the approximate upper error bound given the specified number of Standard Deviations.
+ * This will return getEstimate() if isEmpty() is true.
+ *
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @return the upper bound.
+ */
+ public double getUpperBound(final int numStdDev) {
+ return isEstimationMode()
+ ? upperBound(getRetainedEntries(true), getThetaLong(), numStdDev, isEmpty())
+ : getRetainedEntries(true);
+ }
+
+ /**
+ * Returns true if this object's internal data is backed by a MemorySegment object,
+ * which may be on-heap or off-heap.
+ * @return true if this object's internal data is backed by a MemorySegment object.
+ */
+ public boolean hasMemorySegment() { return false; }
+
+ /**
+ * Returns true if this sketch is in compact form.
+ * @return true if this sketch is in compact form.
+ */
+ public abstract boolean isCompact();
+
+ /**
+ * Returns true if this object's internal data is backed by an off-heap MemorySegment.
+ * @return true if this object's internal data is backed by an off-heap MemorySegment.
+ */
+ public boolean isDirect() { return false; }
+
+ /**
+ * See Empty
+ * @return true if empty.
+ */
+ public abstract boolean isEmpty();
+
+ /**
+ * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode).
+ * This is true if theta < 1.0 AND isEmpty() is false.
+ * @return true if the sketch is in estimation mode.
+ */
+ public boolean isEstimationMode() {
+ return estMode(getThetaLong(), isEmpty());
+ }
+
+ /**
+ * Returns true if internal cache is ordered
+ * @return true if internal cache is ordered
+ */
+ public abstract boolean isOrdered();
+
+ /**
+ * Returns true if the two given MemorySegments refer to the same backing resource,
+ * which is either an off-heap memory location and size, or the same on-heap array object.
+ *
+ * This is a convenient delegate of
+ * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}
+ *
+ * @param seg1 The first given MemorySegment
+ * @param seg2 The second given MemorySegment
+ * @return true if both MemorySegments are determined to be the same backing memory.
+ */
+ public boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) {
+ return Util.isSameResource(seg1, seg2);
+ }
+
+ /**
+ * Returns a HashIterator that can be used to iterate over the retained hash values of the
+ * Theta sketch.
+ * @return a HashIterator that can be used to iterate over the retained hash values of the
+ * Theta sketch.
+ */
+ public abstract HashIterator iterator();
+
+ /**
+ * Serialize this sketch to a byte array form.
+ * @return byte array of this sketch
+ */
+ public abstract byte[] toByteArray();
+
+ /**
+ * Returns a human readable summary of the sketch. This method is equivalent to the parameterized
+ * call:
+ * Sketch.toString(sketch, true, false, 8, true);
+ * @return summary
+ */
+ @Override
+ public String toString() {
+ return toString(true, false, 8, true);
+ }
+
+ /**
+ * Gets a human readable listing of contents and summary of the given sketch.
+ * This can be a very long string. If this sketch is in a "dirty" state there
+ * may be values in the dataDetail view that are ≥ theta.
+ *
+ * @param sketchSummary If true the sketch summary will be output at the end.
+ * @param dataDetail If true, includes all valid hash values in the sketch.
+ * @param width The number of columns of hash values. Default is 8.
+ * @param hexMode If true, hashes will be output in hex.
+ * @return The result string, which can be very long.
+ */
+ public String toString(final boolean sketchSummary, final boolean dataDetail, final int width,
+ final boolean hexMode) {
+ final StringBuilder sb = new StringBuilder();
+
+ int nomLongs = 0;
+ int arrLongs = 0;
+ float p = 0;
+ int rf = 0;
+ final boolean updateSketch = this instanceof UpdateSketch;
+
+ final long thetaLong = getThetaLong();
+ final int curCount = this.getRetainedEntries(true);
+
+ if (updateSketch) {
+ final UpdateSketch uis = (UpdateSketch)this;
+ nomLongs = 1 << uis.getLgNomLongs();
+ arrLongs = 1 << uis.getLgArrLongs();
+ p = uis.getP();
+ rf = uis.getResizeFactor().getValue();
+ }
+
+ if (dataDetail) {
+ final int w = width > 0 ? width : 8; // default is 8 wide
+ if (curCount > 0) {
+ sb.append("### SKETCH DATA DETAIL");
+ final HashIterator it = iterator();
+ int j = 0;
+ while (it.next()) {
+ final long h = it.get();
+ if (j % w == 0) {
+ sb.append(LS).append(String.format(" %6d", j + 1));
+ }
+ if (hexMode) {
+ sb.append(" " + zeroPad(Long.toHexString(h), 16) + ",");
+ }
+ else {
+ sb.append(String.format(" %20d,", h));
+ }
+ j++ ;
+ }
+ sb.append(LS).append("### END DATA DETAIL").append(LS + LS);
+ }
+ }
+
+ if (sketchSummary) {
+ final double thetaDbl = thetaLong / LONG_MAX_VALUE_AS_DOUBLE;
+ final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16);
+ final String thisSimpleName = this.getClass().getSimpleName();
+ final int seedHash = Short.toUnsignedInt(getSeedHash());
+
+ sb.append(LS);
+ sb.append("### ").append(thisSimpleName).append(" SUMMARY: ").append(LS);
+ if (updateSketch) {
+ sb.append(" Nominal Entries (k) : ").append(nomLongs).append(LS);
+ }
+ sb.append(" Estimate : ").append(getEstimate()).append(LS);
+ sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS);
+ sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS);
+ if (updateSketch) {
+ sb.append(" p : ").append(p).append(LS);
+ }
+ sb.append(" Theta (double) : ").append(thetaDbl).append(LS);
+ sb.append(" Theta (long) : ").append(thetaLong).append(LS);
+ sb.append(" Theta (long) hex : ").append(thetaHex).append(LS);
+ sb.append(" EstMode? : ").append(isEstimationMode()).append(LS);
+ sb.append(" Empty? : ").append(isEmpty()).append(LS);
+ sb.append(" Ordered? : ").append(isOrdered()).append(LS);
+ if (updateSketch) {
+ sb.append(" Resize Factor : ").append(rf).append(LS);
+ sb.append(" Array Size Entries : ").append(arrLongs).append(LS);
+ }
+ sb.append(" Retained Entries : ").append(curCount).append(LS);
+ sb.append(" Seed Hash : ").append(Integer.toHexString(seedHash))
+ .append(" | ").append(seedHash).append(LS);
+ sb.append("### END SKETCH SUMMARY").append(LS);
+
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns a human readable string of the preamble of a byte array image of a Theta Sketch.
+ * @param byteArr the given byte array
+ * @return a human readable string of the preamble of a byte array image of a Theta Sketch.
+ */
+ public static String toString(final byte[] byteArr) {
+ return PreambleUtil.preambleToString(byteArr);
+ }
+
+ /**
+ * Returns a human readable string of the preamble of a MemorySegment image of a Theta Sketch.
+ * @param mem the given Memory object
+ * @return a human readable string of the preamble of a MemorySegment image of a Theta Sketch.
+ */
+ public static String toString(final MemorySegment mem) {
+ return PreambleUtil.preambleToString(mem);
+ }
+
+ //Restricted methods
+
+ /**
+ * Gets the internal cache array. For on-heap sketches this will return a reference to the actual
+ * cache array. For Memory-based sketches this returns a copy.
+ * @return the internal cache array.
+ */
+ abstract long[] getCache();
+
+ /**
+ * Gets preamble longs if stored in compact form. If this sketch is already in compact form,
+ * this is identical to the call {@link #getCurrentPreambleLongs()}.
+ * @return preamble longs if stored in compact form.
+ */
+ abstract int getCompactPreambleLongs();
+
+ /**
+ * Gets the number of data longs if stored in current state.
+ * @return the number of data longs if stored in current state.
+ */
+ abstract int getCurrentDataLongs();
+
+ /**
+ * Returns preamble longs if stored in current state.
+ * @return number of preamble longs if stored.
+ */
+ abstract int getCurrentPreambleLongs();
+
+ /**
+ * Returns the backing MemorySegment object if it exists, otherwise null.
+ * @return the backing MemorySegment object if it exists, otherwise null.
+ */
+ abstract MemorySegment getMemorySegment();
+
+ /**
+ * Gets the 16-bit seed hash
+ * @return the seed hash
+ */
+ abstract short getSeedHash();
+
+ /**
+ * Returns true if given Family id is one of the theta sketches
+ * @param id the given Family id
+ * @return true if given Family id is one of the theta sketches
+ */
+ static final boolean isValidSketchID(final int id) {
+ return id == Family.ALPHA.getID()
+ || id == Family.QUICKSELECT.getID()
+ || id == Family.COMPACT.getID();
+ }
+
+ /**
+ * Checks Ordered and Compact flags for integrity between sketch and Memory
+ * @param sketch the given sketch
+ */
+ static final void checkSketchAndMemoryFlags(final Sketch sketch) {
+ final MemorySegment seg = sketch.getMemorySegment();
+ if (seg == null) { return; }
+ final int flags = PreambleUtil.extractFlags(seg);
+ if ((flags & COMPACT_FLAG_MASK) > 0 ^ sketch.isCompact()) {
+ throw new SketchesArgumentException("Possible corruption: "
+ + "MemorySegment Compact Flag inconsistent with Sketch");
+ }
+ if ((flags & ORDERED_FLAG_MASK) > 0 ^ sketch.isOrdered()) {
+ throw new SketchesArgumentException("Possible corruption: "
+ + "MemorySegment Ordered Flag inconsistent with Sketch");
+ }
+ }
+
+ static final double estimate(final long thetaLong, final int curCount) {
+ return curCount * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong);
+ }
+
+ static final double lowerBound(final int curCount, final long thetaLong, final int numStdDev,
+ final boolean empty) {
+ final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE;
+ return BinomialBoundsN.getLowerBound(curCount, theta, numStdDev, empty);
+ }
+
+ static final double upperBound(final int curCount, final long thetaLong, final int numStdDev,
+ final boolean empty) {
+ final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE;
+ return BinomialBoundsN.getUpperBound(curCount, theta, numStdDev, empty);
+ }
+
+ private static final boolean estMode(final long thetaLong, final boolean empty) {
+ return thetaLong < Long.MAX_VALUE && !empty;
+ }
+
+ /**
+ * Instantiates a Heap Update Sketch from MemorySegment. Only SerVer3. SerVer 1 & 2 already handled.
+ * @param srcSeg the source MemorySegment
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * @return a Sketch
+ */
+ private static final Sketch heapifyUpdateFromMemory(final MemorySegment srcSeg, final long expectedSeed) {
+ final long cap = srcSeg.byteSize();
+ if (cap < 8) {
+ throw new SketchesArgumentException(
+ "Corrupted: valid sketch must be at least 8 bytes.");
+ }
+ final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE);
+ final Family family = idToFamily(familyID);
+
+ if (family == Family.ALPHA) {
+ final int flags = PreambleUtil.extractFlags(srcSeg);
+ final boolean compactFlag = (flags & COMPACT_FLAG_MASK) != 0;
+ if (compactFlag) {
+ throw new SketchesArgumentException(
+ "Corrupted: ALPHA family image: cannot be compact");
+ }
+ return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed);
+ }
+ if (family == Family.QUICKSELECT) {
+ return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed);
+ }
+ throw new SketchesArgumentException(
+ "Sketch cannot heapify family: " + family + " as a Sketch");
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/Union.java b/src/main/java/org/apache/datasketches/theta2/Union.java
new file mode 100644
index 000000000..861857366
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/Union.java
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+
+import org.apache.datasketches.common.Family;
+
+/**
+ * Compute the union of two or more theta sketches.
+ * A new instance represents an empty set.
+ *
+ * @author Lee Rhodes
+ */
+public abstract class Union extends SetOperation {
+
+ /**
+ * Returns the number of storage bytes required for this union in its current state.
+ *
+ * @return the number of storage bytes required for this union in its current state.
+ */
+ public abstract int getCurrentBytes();
+
+ @Override
+ public Family getFamily() {
+ return Family.UNION;
+ }
+
+ /**
+ * Returns the maximum required storage bytes for this union.
+ * @return the maximum required storage bytes for this union.
+ */
+ public abstract int getMaxUnionBytes();
+
+ /**
+ * Gets the result of this operation as an ordered CompactSketch on the Java heap.
+ * This does not disturb the underlying data structure of the union.
+ * Therefore, it is OK to continue updating the union after this operation.
+ * @return the result of this operation as an ordered CompactSketch on the Java heap
+ */
+ public abstract CompactSketch getResult();
+
+ /**
+ * Gets the result of this operation as a CompactSketch of the chosen form.
+ * This does not disturb the underlying data structure of the union.
+ * Therefore, it is OK to continue updating the union after this operation.
+ *
+ * @param dstOrdered
+ * See Destination Ordered
+ *
+ * @param dstSeg destination MemorySegment
+ *
+ * @return the result of this operation as a CompactSketch of the chosen form
+ */
+ public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg);
+
+ /**
+ * Resets this Union. The seed remains intact, everything else reverts back to its virgin state.
+ */
+ public abstract void reset();
+
+ /**
+ * Returns a byte array image of this Union object
+ * @return a byte array image of this Union object
+ */
+ public abstract byte[] toByteArray();
+
+ /**
+ * This implements a stateless, pair-wise union operation. The returned sketch will be cut back to
+ * the smaller of the two k values if required.
+ *
+ * Nulls and empty sketches are ignored.
+ *
+ * @param sketchA The first argument
+ * @param sketchB The second argument
+ * @return the result ordered CompactSketch on the heap.
+ */
+ public CompactSketch union(final Sketch sketchA, final Sketch sketchB) {
+ return union(sketchA, sketchB, true, null);
+ }
+
+ /**
+ * This implements a stateless, pair-wise union operation. The returned sketch will be cut back to
+ * k if required, similar to the regular Union operation.
+ *
+ * Nulls and empty sketches are ignored.
+ *
+ * @param sketchA The first argument
+ * @param sketchB The second argument
+ * @param dstOrdered If true, the returned CompactSketch will be ordered.
+ * @param dstSeg If not null, the returned CompactSketch will be placed in this MemorySegment.
+ * @return the result CompactSketch.
+ */
+ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstOrdered,
+ MemorySegment dstSeg);
+
+ /**
+ * Perform a Union operation with this union and the given on-heap sketch of the Theta Family.
+ * This method is not valid for the older SetSketch, which was prior to Open Source (August, 2015).
+ *
+ * This method can be repeatedly called.
+ *
+ *
Nulls and empty sketches are ignored.
+ *
+ * @param sketchIn The incoming sketch.
+ */
+ public abstract void union(Sketch sketchIn);
+
+ /**
+ * Perform a Union operation with this union and the given MemorySegment image of any sketch of the
+ * Theta Family. The input image may be from earlier versions of the Theta Compact Sketch,
+ * called the SetSketch (circa 2014), which was prior to Open Source and are compact and ordered.
+ *
+ * This method can be repeatedly called.
+ *
+ *
Nulls and empty sketches are ignored.
+ *
+ * @param seg MemorySegment image of sketch to be merged
+ */
+ public abstract void union(MemorySegment seg);
+
+ /**
+ * Update this union with the given long data item.
+ *
+ * @param datum The given long datum.
+ */
+ public abstract void update(long datum);
+
+ /**
+ * Update this union with the given double (or float) data item.
+ * The double will be converted to a long using Double.doubleToLongBits(datum),
+ * which normalizes all NaN values to a single NaN representation.
+ * Plus and minus zero will be normalized to plus zero.
+ * Each of the special floating-point values NaN and +/- Infinity are treated as distinct.
+ *
+ * @param datum The given double datum.
+ */
+ public abstract void update(double datum);
+
+ /**
+ * Update this union with the with the given String data item.
+ * The string is converted to a byte array using UTF8 encoding.
+ * If the string is null or empty no update attempt is made and the method returns.
+ *
+ * Note: this will not produce the same output hash values as the {@link #update(char[])}
+ * method and will generally be a little slower depending on the complexity of the UTF8 encoding.
+ *
+ *
+ * Note: this is not a Sketch Union operation. This treats the given string as a data item.
+ *
+ * @param datum The given String.
+ */
+ public abstract void update(String datum);
+
+ /**
+ * Update this union with the given byte array item.
+ * If the byte array is null or empty no update attempt is made and the method returns.
+ *
+ * Note: this is not a Sketch Union operation. This treats the given byte array as a data
+ * item.
+ *
+ * @param data The given byte array.
+ */
+ public abstract void update(byte[] data);
+
+ /**
+ * Update this union with the given ByteBuffer item.
+ * If the ByteBuffer is null or empty no update attempt is made and the method returns.
+ *
+ * Note: this is not a Sketch Union operation. This treats the given ByteBuffer as a data
+ * item.
+ *
+ * @param data The given ByteBuffer.
+ */
+ public abstract void update(ByteBuffer data);
+
+ /**
+ * Update this union with the given integer array item.
+ * If the integer array is null or empty no update attempt is made and the method returns.
+ *
+ * Note: this is not a Sketch Union operation. This treats the given integer array as a data
+ * item.
+ *
+ * @param data The given int array.
+ */
+ public abstract void update(int[] data);
+
+ /**
+ * Update this union with the given char array item.
+ * If the char array is null or empty no update attempt is made and the method returns.
+ *
+ * Note: this will not produce the same output hash values as the {@link #update(String)}
+ * method but will be a little faster as it avoids the complexity of the UTF8 encoding.
+ *
+ * Note: this is not a Sketch Union operation. This treats the given char array as a data
+ * item.
+ *
+ * @param data The given char array.
+ */
+ public abstract void update(char[] data);
+
+ /**
+ * Update this union with the given long array item.
+ * If the long array is null or empty no update attempt is made and the method returns.
+ *
+ * Note: this is not a Sketch Union operation. This treats the given char array as a data
+ * item.
+ *
+ * @param data The given long array.
+ */
+ public abstract void update(long[] data);
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java
new file mode 100644
index 000000000..a86365c7d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.Math.min;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.PreambleUtil.UNION_THETA_LONG;
+import static org.apache.datasketches.theta2.PreambleUtil.clearEmpty;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractUnionThetaLong;
+import static org.apache.datasketches.theta2.PreambleUtil.insertUnionThetaLong;
+import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.thetacommon.HashOperations;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Shared code for the HeapUnion and DirectUnion implementations.
+ *
+ * @author Lee Rhodes
+ * @author Kevin Lang
+ */
+final class UnionImpl extends Union {
+
+ /**
+ * Although the gadget object is initially an UpdateSketch, in the context of a Union it is used
+ * as a specialized buffer that happens to leverage much of the machinery of an UpdateSketch.
+ * However, in this context some of the key invariants of the sketch algorithm are intentionally
+ * violated as an optimization. As a result this object can not be considered as an UpdateSketch
+ * and should never be exported as an UpdateSketch. It's internal state is not necessarily
+ * finalized and may contain garbage. Also its internal concept of "nominal entries" or "k" can
+ * be meaningless. It is private for very good reasons.
+ */
+ private final UpdateSketch gadget_;
+ private final short expectedSeedHash_; //eliminates having to compute the seedHash on every union.
+ private long unionThetaLong_; //when on-heap, this is the only copy
+ private boolean unionEmpty_; //when on-heap, this is the only copy
+
+ private UnionImpl(final UpdateSketch gadget, final long seed) {
+ gadget_ = gadget;
+ expectedSeedHash_ = ThetaUtil.computeSeedHash(seed);
+ }
+
+ /**
+ * Construct a new Union SetOperation on the java heap.
+ * Called by SetOperationBuilder.
+ *
+ * @param lgNomLongs See lgNomLongs
+ * @param seed See seed
+ * @param p See Sampling Probability, p
+ * @param rf See Resize Factor
+ * @return instance of this sketch
+ */
+ static UnionImpl initNewHeapInstance(
+ final int lgNomLongs,
+ final long seed,
+ final float p,
+ final ResizeFactor rf) {
+ final UpdateSketch gadget = //create with UNION family
+ new HeapQuickSelectSketch(lgNomLongs, seed, p, rf, true);
+ final UnionImpl unionImpl = new UnionImpl(gadget, seed);
+ unionImpl.unionThetaLong_ = gadget.getThetaLong();
+ unionImpl.unionEmpty_ = gadget.isEmpty();
+ return unionImpl;
+ }
+
+ /**
+ * Construct a new Direct Union in the off-heap destination MemorySegment.
+ * Called by SetOperationBuilder.
+ *
+ * @param lgNomLongs See lgNomLongs.
+ * @param seed See seed
+ * @param p See Sampling Probability, p
+ * @param rf See Resize Factor
+ * @param memReqSvr a given instance of a MemoryRequestServer
+ * @param dstSeg the given MemorySegment object destination. It will be cleared prior to use.
+ * @return this class
+ */
+ static UnionImpl initNewDirectInstance(
+ final int lgNomLongs,
+ final long seed,
+ final float p,
+ final ResizeFactor rf,
+ final MemorySegment dstSeg) {
+ final UpdateSketch gadget = //create with UNION family
+ new DirectQuickSelectSketch(lgNomLongs, seed, p, rf, dstSeg, true);
+ final UnionImpl unionImpl = new UnionImpl(gadget, seed);
+ unionImpl.unionThetaLong_ = gadget.getThetaLong();
+ unionImpl.unionEmpty_ = gadget.isEmpty();
+ return unionImpl;
+ }
+
+ /**
+ * Heapify a Union from a MemorySegment Union object containing data.
+ * Called by SetOperation.
+ * @param srcSeg The source MemorySegment Union object.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See seed
+ * @return this class
+ */
+ static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) {
+ Family.UNION.checkFamilyID(extractFamilyID(srcSeg));
+ final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed);
+ final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed);
+ unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg);
+ unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg);
+ return unionImpl;
+ }
+
+ /**
+ * Fast-wrap a Union object around a Union MemorySegment object containing data.
+ * This does NO validity checking of the given MemorySegment.
+ * @param srcSeg The source MemorySegment object.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See seed
+ * @return this class
+ */
+ static UnionImpl fastWrap(final MemorySegment srcSeg, final long expectedSeed) {
+ Family.UNION.checkFamilyID(extractFamilyID(srcSeg));
+ final UpdateSketch gadget = DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed);
+ final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed);
+ unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg);
+ unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg);
+ return unionImpl;
+ }
+
+ /**
+ * Wrap a Union object around a Union MemorySegment object containing data.
+ * Called by SetOperation.
+ * @param srcSeg The source MemorySegment object.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See seed
+ * @return this class
+ */
+ static UnionImpl wrapInstance(final MemorySegment srcSeg, final long expectedSeed) {
+ Family.UNION.checkFamilyID(extractFamilyID(srcSeg));
+ final UpdateSketch gadget = DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed);
+ final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed);
+ unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg);
+ unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg);
+ return unionImpl;
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ return gadget_.getCurrentBytes();
+ }
+
+ @Override
+ public int getMaxUnionBytes() {
+ final int lgK = gadget_.getLgNomLongs();
+ return (16 << lgK) + (Family.UNION.getMaxPreLongs() << 3);
+ }
+
+ @Override
+ public CompactSketch getResult() {
+ return getResult(true, null);
+ }
+
+ @Override
+ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) {
+ final int gadgetCurCount = gadget_.getRetainedEntries(true);
+ final int k = 1 << gadget_.getLgNomLongs();
+ final long[] gadgetCacheCopy =
+ gadget_.hasMemorySegment() ? gadget_.getCache() : gadget_.getCache().clone();
+
+ //Pull back to k
+ final long curGadgetThetaLong = gadget_.getThetaLong();
+ final long adjGadgetThetaLong = gadgetCurCount > k
+ ? selectExcludingZeros(gadgetCacheCopy, gadgetCurCount, k + 1) : curGadgetThetaLong;
+
+ //Finalize Theta and curCount
+ final long unionThetaLong = gadget_.hasMemorySegment()
+ ? gadget_.getMemorySegment().get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG)
+ : unionThetaLong_;
+
+ final long minThetaLong = min(min(curGadgetThetaLong, adjGadgetThetaLong), unionThetaLong);
+ final int curCountOut = minThetaLong < curGadgetThetaLong
+ ? HashOperations.count(gadgetCacheCopy, minThetaLong)
+ : gadgetCurCount;
+
+ //Compact the cache
+ final long[] compactCacheOut =
+ CompactOperations.compactCache(gadgetCacheCopy, curCountOut, minThetaLong, dstOrdered);
+ final boolean empty = gadget_.isEmpty() && unionEmpty_;
+ final short seedHash = gadget_.getSeedHash();
+ return CompactOperations.componentsToCompact(
+ minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstSeg, compactCacheOut);
+ }
+
+ @Override
+ public boolean hasMemorySegment() {
+ return gadget_ instanceof DirectQuickSelectSketchR
+ ? gadget_.hasMemorySegment() : false;
+ }
+
+ @Override
+ public boolean isDirect() {
+ return gadget_ instanceof DirectQuickSelectSketchR
+ ? gadget_.isDirect() : false;
+ }
+
+ @Override
+ public void reset() {
+ gadget_.reset();
+ unionThetaLong_ = gadget_.getThetaLong();
+ unionEmpty_ = gadget_.isEmpty();
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ final byte[] gadgetByteArr = gadget_.toByteArray();
+ final MemorySegment seg = MemorySegment.ofArray(gadgetByteArr);
+ insertUnionThetaLong(seg, unionThetaLong_);
+ if (gadget_.isEmpty() != unionEmpty_) {
+ clearEmpty(seg);
+ unionEmpty_ = false;
+ }
+ return gadgetByteArr;
+ }
+
+ @Override //Stateless Union
+ public CompactSketch union(final Sketch sketchA, final Sketch sketchB, final boolean dstOrdered,
+ final MemorySegment dstSeg) {
+ reset();
+ union(sketchA);
+ union(sketchB);
+ final CompactSketch csk = getResult(dstOrdered, dstSeg);
+ reset();
+ return csk;
+ }
+
+ @Override
+ public void union(final Sketch sketchIn) {
+ //UNION Empty Rule: AND the empty states.
+
+ if (sketchIn == null || sketchIn.isEmpty()) {
+ //null and empty is interpreted as (Theta = 1.0, count = 0, empty = T). Nothing changes
+ return;
+ }
+ //sketchIn is valid and not empty
+ ThetaUtil.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash());
+ if (sketchIn instanceof SingleItemSketch) {
+ gadget_.hashUpdate(sketchIn.getCache()[0]);
+ return;
+ }
+ Sketch.checkSketchAndMemoryFlags(sketchIn);
+
+ unionThetaLong_ = min(min(unionThetaLong_, sketchIn.getThetaLong()), gadget_.getThetaLong()); //Theta rule
+ unionEmpty_ = false;
+ final boolean isOrdered = sketchIn.isOrdered();
+ final HashIterator it = sketchIn.iterator();
+ while (it.next()) {
+ final long hash = it.get();
+ if (hash < unionThetaLong_ && hash < gadget_.getThetaLong()) {
+ gadget_.hashUpdate(hash); // backdoor update, hash function is bypassed
+ } else {
+ if (isOrdered) { break; }
+ }
+ }
+ unionThetaLong_ = min(unionThetaLong_, gadget_.getThetaLong()); //Theta rule with gadget
+ if (gadget_.hasMemorySegment()) {
+ final MemorySegment wseg = gadget_.getMemorySegment();
+ PreambleUtil.insertUnionThetaLong(wseg, unionThetaLong_);
+ PreambleUtil.clearEmpty(wseg);
+ }
+ }
+
+ @Override
+ public void union(final MemorySegment seg) {
+ if (seg != null) {
+ union(Sketch.wrap(seg));
+ }
+ }
+
+ @Override
+ public void update(final long datum) {
+ gadget_.update(datum);
+ }
+
+ @Override
+ public void update(final double datum) {
+ gadget_.update(datum);
+ }
+
+ @Override
+ public void update(final String datum) {
+ gadget_.update(datum);
+ }
+
+ @Override
+ public void update(final byte[] data) {
+ gadget_.update(data);
+ }
+
+ @Override
+ public void update(final ByteBuffer data) {
+ gadget_.update(data);
+ }
+
+ @Override
+ public void update(final char[] data) {
+ gadget_.update(data);
+ }
+
+ @Override
+ public void update(final int[] data) {
+ gadget_.update(data);
+ }
+
+ @Override
+ public void update(final long[] data) {
+ gadget_.update(data);
+ }
+
+ //Restricted
+
+ @Override
+ long[] getCache() {
+ return gadget_.getCache();
+ }
+
+ @Override
+ int getRetainedEntries() {
+ return gadget_.getRetainedEntries(true);
+ }
+
+ @Override
+ short getSeedHash() {
+ return gadget_.getSeedHash();
+ }
+
+ @Override
+ long getThetaLong() {
+ return min(unionThetaLong_, gadget_.getThetaLong());
+ }
+
+ @Override
+ boolean isEmpty() {
+ return gadget_.isEmpty() && unionEmpty_;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java b/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java
new file mode 100644
index 000000000..6c12ca7c2
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+/**
+ * See Update Return State
+ *
+ * @author Lee Rhodes
+ */
+public enum UpdateReturnState {
+
+ /**
+ * The hash was accepted into the sketch and the retained count was incremented.
+ */
+ InsertedCountIncremented, //all UpdateSketches
+
+ /**
+ * The hash was accepted into the sketch, the retained count was incremented.
+ * The current cache was out of room and resized larger based on the Resize Factor.
+ */
+ InsertedCountIncrementedResized, //used by HeapQuickSelectSketch
+
+ /**
+ * The hash was accepted into the sketch, the retained count was incremented.
+ * The current cache was out of room and at maximum size, so the cache was rebuilt.
+ */
+ InsertedCountIncrementedRebuilt, //used by HeapQuickSelectSketch
+
+ /**
+ * The hash was accepted into the sketch and the retained count was not incremented.
+ */
+ InsertedCountNotIncremented, //used by enhancedHashInsert for Alpha
+
+ /**
+ * The hash was inserted into the local concurrent buffer,
+ * but has not yet been propagated to the concurrent shared sketch.
+ */
+ ConcurrentBufferInserted, //used by ConcurrentHeapThetaBuffer
+
+ /**
+ * The hash has been propagated to the concurrent shared sketch.
+ * This does not reflect the action taken by the shared sketch.
+ */
+ ConcurrentPropagated, //used by ConcurrentHeapThetaBuffer
+
+ /**
+ * The hash was rejected as a duplicate.
+ */
+ RejectedDuplicate, //all UpdateSketches hashUpdate(), enhancedHashInsert
+
+ /**
+ * The hash was rejected because it was null or empty.
+ */
+ RejectedNullOrEmpty, //UpdateSketch.update(arr[])
+
+ /**
+ * The hash was rejected because the value was negative, zero or
+ * greater than theta.
+ */
+ RejectedOverTheta; //all UpdateSketches.hashUpdate()
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java
new file mode 100644
index 000000000..ee5f93ea2
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java
@@ -0,0 +1,469 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+import static org.apache.datasketches.common.Util.checkBounds;
+import static org.apache.datasketches.hash.MurmurHash3.hash;
+import static org.apache.datasketches.theta2.CompactOperations.componentsToCompact;
+import static org.apache.datasketches.theta2.PreambleUtil.BIG_ENDIAN_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.checkMemorySeedHash;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta2.PreambleUtil.extractFlags;
+import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor;
+import static org.apache.datasketches.theta2.PreambleUtil.extractP;
+import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer;
+import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta2.PreambleUtil.getMemBytes;
+import static org.apache.datasketches.theta2.UpdateReturnState.RejectedNullOrEmpty;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * The parent class for the Update Sketch families, such as QuickSelect and Alpha.
+ * The primary task of an Update Sketch is to consider datums presented via the update() methods
+ * for inclusion in its internal cache. This is the sketch building process.
+ *
+ * @author Lee Rhodes
+ */
+public abstract class UpdateSketch extends Sketch {
+
+ UpdateSketch() {}
+
+ /**
+ * Wrap takes the writable sketch image in MemorySegment and refers to it directly. There is no data copying onto
+ * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
+ * been explicitly stored as writable, direct objects can be wrapped. This method assumes the
+ * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}.
+ * Default Update Seed.
+ * @param srcWSeg an image of a writable sketch where the image seed hash matches the default seed hash.
+ * It must have a size of at least 24 bytes.
+ * @return an UpdateSketch backed by the given MemorySegment
+ */
+ public static UpdateSketch wrap(final MemorySegment srcWSeg) {
+ return wrap(srcWSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Wrap takes the sketch image in MemorySegment and refers to it directly. There is no data copying onto
+ * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
+ * been explicitly stored as writable direct objects can be wrapped.
+ * An attempt to "wrap" earlier version sketches will result in a "heapified", normal
+ * Java Heap version of the sketch where all data will be copied to the heap.
+ * @param srcWSeg an image of a writable sketch where the image seed hash matches the given seed hash.
+ * It must have a size of at least 24 bytes.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * Compact sketches store a 16-bit hash of the seed, but not the seed itself.
+ * @return a UpdateSketch backed by the given MemorySegment
+ */
+ public static UpdateSketch wrap(final MemorySegment srcWSeg, final long expectedSeed) {
+ Objects.requireNonNull(srcWSeg, "Source MemorySeg e t must not be null");
+ checkBounds(0, 24, srcWSeg.byteSize()); //need min 24 bytes
+ final int preLongs = srcWSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
+ final int serVer = srcWSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF;
+ final int familyID = srcWSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF;
+ final Family family = Family.idToFamily(familyID);
+ if (family != Family.QUICKSELECT) {
+ throw new SketchesArgumentException(
+ "A " + family + " sketch cannot be wrapped as an UpdateSketch.");
+ }
+ if ((serVer == 3) && (preLongs == 3)) {
+ return DirectQuickSelectSketch.writableWrap(srcWSeg, expectedSeed);
+ } else {
+ throw new SketchesArgumentException(
+ "Corrupted: An UpdateSketch image must have SerVer = 3 and preLongs = 3");
+ }
+ }
+
+ /**
+ * Instantiates an on-heap UpdateSketch from a MemorySegment. This method assumes the
+ * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}.
+ * @param srcSeg the given MemorySegment with a sketch image.
+ * It must have a size of at least 24 bytes.
+ * @return an UpdateSketch
+ */
+ public static UpdateSketch heapify(final MemorySegment srcSeg) {
+ return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Instantiates an on-heap UpdateSketch from a MemorySegment.
+ * @param srcSeg the given MemorySegment.
+ * It must have a size of at least 24 bytes.
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * See Update Hash Seed.
+ * @return an UpdateSketch
+ */
+ public static UpdateSketch heapify(final MemorySegment srcSeg, final long expectedSeed) {
+ Objects.requireNonNull(srcSeg, "Source Memory must not be null");
+ checkBounds(0, 24, srcSeg.byteSize()); //need min 24 bytes
+ final Family family = Family.idToFamily(srcSeg.get(JAVA_BYTE, FAMILY_BYTE));
+ if (family.equals(Family.ALPHA)) {
+ return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed);
+ }
+ return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed);
+ }
+
+ //Sketch interface
+
+ @Override
+ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) {
+ return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(),
+ false, false, dstOrdered, dstWSeg, getCache());
+ }
+
+ @Override
+ public int getCompactBytes() {
+ final int preLongs = getCompactPreambleLongs();
+ final int dataLongs = getRetainedEntries(true);
+ return (preLongs + dataLongs) << 3;
+ }
+
+ @Override
+ int getCurrentDataLongs() {
+ return 1 << getLgArrLongs();
+ }
+
+ @Override
+ public boolean isCompact() {
+ return false;
+ }
+
+ @Override
+ public boolean isOrdered() {
+ return false;
+ }
+
+ //UpdateSketch interface
+
+ /**
+ * Returns a new builder
+ * @return a new builder
+ */
+ public static final UpdateSketchBuilder builder() {
+ return new UpdateSketchBuilder();
+ }
+
+ /**
+ * Returns the configured ResizeFactor
+ * @return the configured ResizeFactor
+ */
+ public abstract ResizeFactor getResizeFactor();
+
+ /**
+ * Gets the configured sampling probability, p.
+ * See Sampling Probability, p
+ * @return the sampling probability, p
+ */
+ abstract float getP();
+
+ /**
+ * Gets the configured seed
+ * @return the configured seed
+ */
+ abstract long getSeed();
+
+ /**
+ * Resets this sketch back to a virgin empty state.
+ */
+ public abstract void reset();
+
+ /**
+ * Rebuilds the hash table to remove dirty values or to reduce the size
+ * to nominal entries.
+ * @return this sketch
+ */
+ public abstract UpdateSketch rebuild();
+
+ /**
+ * Present this sketch with a long.
+ *
+ * @param datum The given long datum.
+ * @return
+ * See Update Return State
+ */
+ public UpdateReturnState update(final long datum) {
+ final long[] data = { datum };
+ return hashUpdate(hash(data, getSeed())[0] >>> 1);
+ }
+
+ /**
+ * Present this sketch with the given double (or float) datum.
+ * The double will be converted to a long using Double.doubleToLongBits(datum),
+ * which normalizes all NaN values to a single NaN representation.
+ * Plus and minus zero will be normalized to plus zero.
+ * The special floating-point values NaN and +/- Infinity are treated as distinct.
+ *
+ * @param datum The given double datum.
+ * @return
+ * See Update Return State
+ */
+ public UpdateReturnState update(final double datum) {
+ final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0
+ final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN & +/- infinity forms
+ return hashUpdate(hash(data, getSeed())[0] >>> 1);
+ }
+
+ /**
+ * Present this sketch with the given String.
+ * The string is converted to a byte array using UTF8 encoding.
+ * If the string is null or empty no update attempt is made and the method returns.
+ *
+ * Note: this will not produce the same output hash values as the {@link #update(char[])}
+ * method and will generally be a little slower depending on the complexity of the UTF8 encoding.
+ *
+ *
+ * @param datum The given String.
+ * @return
+ * See Update Return State
+ */
+ public UpdateReturnState update(final String datum) {
+ if ((datum == null) || datum.isEmpty()) {
+ return RejectedNullOrEmpty;
+ }
+ final byte[] data = datum.getBytes(UTF_8);
+ return hashUpdate(hash(data, getSeed())[0] >>> 1);
+ }
+
+ /**
+ * Present this sketch with the given byte array.
+ * If the byte array is null or empty no update attempt is made and the method returns.
+ *
+ * @param data The given byte array.
+ * @return
+ * See Update Return State
+ */
+ public UpdateReturnState update(final byte[] data) {
+ if ((data == null) || (data.length == 0)) {
+ return RejectedNullOrEmpty;
+ }
+ return hashUpdate(hash(data, getSeed())[0] >>> 1);
+ }
+
+ /**
+ * Present this sketch with the given ByteBuffer
+ * If the ByteBuffer is null or empty, no update attempt is made and the method returns.
+ *
+ * @param buffer the input ByteBuffer
+ * @return
+ * See Update Return State
+ */
+ public UpdateReturnState update(final ByteBuffer buffer) {
+ if (buffer == null || buffer.hasRemaining() == false) {
+ return RejectedNullOrEmpty;
+ }
+ return hashUpdate(hash(buffer, getSeed())[0] >>> 1);
+ }
+
+ /**
+ * Present this sketch with the given char array.
+ * If the char array is null or empty no update attempt is made and the method returns.
+ *
+ * Note: this will not produce the same output hash values as the {@link #update(String)}
+ * method but will be a little faster as it avoids the complexity of the UTF8 encoding.
+ *
+ * @param data The given char array.
+ * @return
+ * See Update Return State
+ */
+ public UpdateReturnState update(final char[] data) {
+ if ((data == null) || (data.length == 0)) {
+ return RejectedNullOrEmpty;
+ }
+ return hashUpdate(hash(data, getSeed())[0] >>> 1);
+ }
+
+ /**
+ * Present this sketch with the given integer array.
+ * If the integer array is null or empty no update attempt is made and the method returns.
+ *
+ * @param data The given int array.
+ * @return
+ * See Update Return State
+ */
+ public UpdateReturnState update(final int[] data) {
+ if ((data == null) || (data.length == 0)) {
+ return RejectedNullOrEmpty;
+ }
+ return hashUpdate(hash(data, getSeed())[0] >>> 1);
+ }
+
+ /**
+ * Present this sketch with the given long array.
+ * If the long array is null or empty no update attempt is made and the method returns.
+ *
+ * @param data The given long array.
+ * @return
+ * See Update Return State
+ */
+ public UpdateReturnState update(final long[] data) {
+ if ((data == null) || (data.length == 0)) {
+ return RejectedNullOrEmpty;
+ }
+ return hashUpdate(hash(data, getSeed())[0] >>> 1);
+ }
+
+ //restricted methods
+
+ /**
+ * All potential updates converge here.
+ *
+ * Don't ever call this unless you really know what you are doing!
+ *
+ * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored.
+ * A negative hash value will throw an exception.
+ * @return See Update Return State
+ */
+ abstract UpdateReturnState hashUpdate(long hash);
+
+ /**
+ * Gets the Log base 2 of the current size of the internal cache
+ * @return the Log base 2 of the current size of the internal cache
+ */
+ abstract int getLgArrLongs();
+
+ /**
+ * Gets the Log base 2 of the configured nominal entries
+ * @return the Log base 2 of the configured nominal entries
+ */
+ public abstract int getLgNomLongs();
+
+ /**
+ * Returns true if the internal cache contains "dirty" values that are greater than or equal
+ * to thetaLong.
+ * @return true if the internal cache is dirty.
+ */
+ abstract boolean isDirty();
+
+ /**
+ * Returns true if numEntries (curCount) is greater than the hashTableThreshold.
+ * @param numEntries the given number of entries (or current count).
+ * @return true if numEntries (curCount) is greater than the hashTableThreshold.
+ */
+ abstract boolean isOutOfSpace(int numEntries);
+
+ static void checkUnionQuickSelectFamily(final MemorySegment seg, final int preambleLongs,
+ final int lgNomLongs) {
+ //Check Family
+ final int familyID = extractFamilyID(seg); //byte 2
+ final Family family = Family.idToFamily(familyID);
+ if (family.equals(Family.UNION)) {
+ if (preambleLongs != Family.UNION.getMinPreLongs()) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Invalid PreambleLongs value for UNION: " + preambleLongs);
+ }
+ }
+ else if (family.equals(Family.QUICKSELECT)) {
+ if (preambleLongs != Family.QUICKSELECT.getMinPreLongs()) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Invalid PreambleLongs value for QUICKSELECT: " + preambleLongs);
+ }
+ } else {
+ throw new SketchesArgumentException(
+ "Possible corruption: Invalid Family: " + family.toString());
+ }
+
+ //Check lgNomLongs
+ if (lgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Current Memory lgNomLongs < min required size: "
+ + lgNomLongs + " < " + ThetaUtil.MIN_LG_NOM_LONGS);
+ }
+ }
+
+ static void checkMemIntegrity(final MemorySegment srcSeg, final long expectedSeed, final int preambleLongs,
+ final int lgNomLongs, final int lgArrLongs) {
+
+ //Check SerVer
+ final int serVer = extractSerVer(srcSeg); //byte 1
+ if (serVer != SER_VER) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Invalid Serialization Version: " + serVer);
+ }
+
+ //Check flags
+ final int flags = extractFlags(srcSeg); //byte 5
+ final int flagsMask =
+ ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK;
+ if ((flags & flagsMask) > 0) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only");
+ }
+
+ //Check seed hashes
+ final short seedHash = checkMemorySeedHash(srcSeg, expectedSeed); //byte 6,7
+ ThetaUtil.checkSeedHashes(seedHash, ThetaUtil.computeSeedHash(expectedSeed));
+
+ //Check mem capacity, lgArrLongs
+ final long curCapBytes = srcSeg.byteSize();
+ final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
+ if (curCapBytes < minReqBytes) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Current Memory size < min required size: "
+ + curCapBytes + " < " + minReqBytes);
+ }
+ //check Theta, p
+ final float p = extractP(srcSeg); //bytes 12-15
+ final long thetaLong = extractThetaLong(srcSeg); //bytes 16-23
+ final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE;
+ //if (lgArrLongs <= lgNomLongs) the sketch is still resizing, thus theta cannot be < p.
+ if ((lgArrLongs <= lgNomLongs) && (theta < p) ) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. "
+ + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p);
+ }
+ }
+
+ /**
+ * This checks to see if the memory RF factor was set correctly as early versions may not
+ * have set it.
+ * @param srcSeg the source MemorySegment
+ * @param lgNomLongs the current lgNomLongs
+ * @param lgArrLongs the current lgArrLongs
+ * @return true if the the memory RF factor is incorrect and the caller can either
+ * correct it or throw an error.
+ */
+ static boolean isResizeFactorIncorrect(final MemorySegment srcSeg, final int lgNomLongs,
+ final int lgArrLongs) {
+ final int lgT = lgNomLongs + 1;
+ final int lgA = lgArrLongs;
+ final int lgR = extractLgResizeFactor(srcSeg);
+ if (lgR == 0) { return lgA != lgT; }
+ return !(((lgT - lgA) % lgR) == 0);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java
new file mode 100644
index 000000000..e8353888f
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java
@@ -0,0 +1,493 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.Util.LS;
+import static org.apache.datasketches.common.Util.TAB;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.common.SuppressFBWarnings;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * For building a new UpdateSketch.
+ *
+ * @author Lee Rhodes
+ */
+public class UpdateSketchBuilder {
+ private int bLgNomLongs;
+ private long bSeed;
+ private ResizeFactor bRF;
+ private Family bFam;
+ private float bP;
+
+ //Fields for concurrent theta sketch
+ private int bNumPoolThreads;
+ private int bLocalLgNomLongs;
+ private boolean bPropagateOrderedCompact;
+ private double bMaxConcurrencyError;
+ private int bMaxNumLocalThreads;
+
+ /**
+ * Constructor for building a new UpdateSketch. The default configuration is
+ *
+ * - Nominal Entries: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
+ * - Seed: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}
+ * - Input Sampling Probability: 1.0
+ * - Family: {@link org.apache.datasketches.common.Family#QUICKSELECT}
+ * - Resize Factor: The default for sketches on the Java heap is {@link ResizeFactor#X8}.
+ * For direct sketches, which are targeted for native memory off the Java heap, this value will
+ * be fixed at either {@link ResizeFactor#X1} or {@link ResizeFactor#X2}.
+ * - MemoryRequestServer (Direct only):
+ * {@link org.apache.datasketches.memory.DefaultMemoryRequestServer}.
+ *
+ * Parameters unique to the concurrent sketches only:
+ *
+ * - Number of local Nominal Entries: 4
+ * - Concurrent NumPoolThreads: 3
+ * - Concurrent PropagateOrderedCompact: true
+ * - Concurrent MaxConcurrencyError: 0
+ *
+ */
+ public UpdateSketchBuilder() {
+ bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES);
+ bSeed = ThetaUtil.DEFAULT_UPDATE_SEED;
+ bP = (float) 1.0;
+ bRF = ResizeFactor.X8;
+ bFam = Family.QUICKSELECT;
+ // Default values for concurrent sketch
+ bNumPoolThreads = ConcurrentPropagationService.NUM_POOL_THREADS;
+ bLocalLgNomLongs = 4; //default is smallest legal QS sketch
+ bPropagateOrderedCompact = true;
+ bMaxConcurrencyError = 0;
+ bMaxNumLocalThreads = 1;
+ }
+
+ /**
+ * Sets the Nominal Entries for this sketch.
+ * This value is also used for building a shared concurrent sketch.
+ * The minimum value is 16 (2^4) and the maximum value is 67,108,864 (2^26).
+ * Be aware that sketches as large as this maximum value may not have been
+ * thoroughly tested or characterized for performance.
+ *
+ * @param nomEntries Nominal Entries
+ * This will become the ceiling power of 2 if the given value is not.
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setNominalEntries(final int nomEntries) {
+ bLgNomLongs = ThetaUtil.checkNomLongs(nomEntries);
+ return this;
+ }
+
+ /**
+ * Alternative method of setting the Nominal Entries for this sketch from the log_base2 value.
+ * This value is also used for building a shared concurrent sketch.
+ * The minimum value is 4 and the maximum value is 26.
+ * Be aware that sketches as large as this maximum value may not have been
+ * thoroughly characterized for performance.
+ *
+ * @param lgNomEntries the Log Nominal Entries. Also for the concurrent shared sketch
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setLogNominalEntries(final int lgNomEntries) {
+ bLgNomLongs = ThetaUtil.checkNomLongs(1 << lgNomEntries);
+ return this;
+ }
+
+ /**
+ * Returns Log-base 2 Nominal Entries
+ * @return Log-base 2 Nominal Entries
+ */
+ public int getLgNominalEntries() {
+ return bLgNomLongs;
+ }
+
+ /**
+ * Sets the Nominal Entries for the concurrent local sketch. The minimum value is 16 and the
+ * maximum value is 67,108,864, which is 2^26.
+ * Be aware that sketches as large as this maximum
+ * value have not been thoroughly tested or characterized for performance.
+ *
+ * @param nomEntries Nominal Entries
+ * This will become the ceiling power of 2 if it is not.
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setLocalNominalEntries(final int nomEntries) {
+ bLocalLgNomLongs = Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries));
+ if ((bLocalLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLocalLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) {
+ throw new SketchesArgumentException(
+ "Nominal Entries must be >= 16 and <= 67108864: " + nomEntries);
+ }
+ return this;
+ }
+
+ /**
+ * Alternative method of setting the Nominal Entries for a local concurrent sketch from the
+ * log_base2 value.
+ * The minimum value is 4 and the maximum value is 26.
+ * Be aware that sketches as large as this maximum
+ * value have not been thoroughly tested or characterized for performance.
+ *
+ * @param lgNomEntries the Log Nominal Entries for a concurrent local sketch
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setLocalLogNominalEntries(final int lgNomEntries) {
+ bLocalLgNomLongs = lgNomEntries;
+ if ((bLocalLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLocalLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) {
+ throw new SketchesArgumentException(
+ "Log Nominal Entries must be >= 4 and <= 26: " + lgNomEntries);
+ }
+ return this;
+ }
+
+ /**
+ * Returns Log-base 2 Nominal Entries for the concurrent local sketch
+ * @return Log-base 2 Nominal Entries for the concurrent local sketch
+ */
+ public int getLocalLgNominalEntries() {
+ return bLocalLgNomLongs;
+ }
+
+ /**
+ * Sets the long seed value that is required by the hashing function.
+ * @param seed See seed
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setSeed(final long seed) {
+ bSeed = seed;
+ return this;
+ }
+
+ /**
+ * Returns the seed
+ * @return the seed
+ */
+ public long getSeed() {
+ return bSeed;
+ }
+
+ /**
+ * Sets the upfront uniform sampling probability, p
+ * @param p See Sampling Probability, p
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setP(final float p) {
+ if ((p <= 0.0) || (p > 1.0)) {
+ throw new SketchesArgumentException("p must be > 0 and <= 1.0: " + p);
+ }
+ bP = p;
+ return this;
+ }
+
+ /**
+ * Returns the pre-sampling probability p
+ * @return the pre-sampling probability p
+ */
+ public float getP() {
+ return bP;
+ }
+
+ /**
+ * Sets the cache Resize Factor.
+ * @param rf See Resize Factor
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setResizeFactor(final ResizeFactor rf) {
+ bRF = rf;
+ return this;
+ }
+
+ /**
+ * Returns the Resize Factor
+ * @return the Resize Factor
+ */
+ public ResizeFactor getResizeFactor() {
+ return bRF;
+ }
+
+ /**
+ * Set the Family.
+ * @param family the family for this builder
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setFamily(final Family family) {
+ bFam = family;
+ return this;
+ }
+
+ /**
+ * Returns the Family
+ * @return the Family
+ */
+ public Family getFamily() {
+ return bFam;
+ }
+
+ /**
+ * Sets the number of pool threads used for background propagation in the concurrent sketches.
+ * @param numPoolThreads the given number of pool threads
+ */
+ public void setNumPoolThreads(final int numPoolThreads) {
+ bNumPoolThreads = numPoolThreads;
+ }
+
+ /**
+ * Gets the number of background pool threads used for propagation in the concurrent sketches.
+ * @return the number of background pool threads
+ */
+ public int getNumPoolThreads() {
+ return bNumPoolThreads;
+ }
+
+ /**
+ * Sets the Propagate Ordered Compact flag to the given value. Used with concurrent sketches.
+ *
+ * @param prop the given value
+ * @return this UpdateSketchBuilder
+ */
+ public UpdateSketchBuilder setPropagateOrderedCompact(final boolean prop) {
+ bPropagateOrderedCompact = prop;
+ return this;
+ }
+
+ /**
+ * Gets the Propagate Ordered Compact flag used with concurrent sketches.
+ * @return the Propagate Ordered Compact flag
+ */
+ public boolean getPropagateOrderedCompact() {
+ return bPropagateOrderedCompact;
+ }
+
+ /**
+ * Sets the Maximum Concurrency Error.
+ * @param maxConcurrencyError the given Maximum Concurrency Error.
+ */
+ public void setMaxConcurrencyError(final double maxConcurrencyError) {
+ bMaxConcurrencyError = maxConcurrencyError;
+ }
+
+ /**
+ * Gets the Maximum Concurrency Error
+ * @return the Maximum Concurrency Error
+ */
+ public double getMaxConcurrencyError() {
+ return bMaxConcurrencyError;
+ }
+
+ /**
+ * Sets the Maximum Number of Local Threads.
+ * This is used to set the size of the local concurrent buffers.
+ * @param maxNumLocalThreads the given Maximum Number of Local Threads
+ */
+ public void setMaxNumLocalThreads(final int maxNumLocalThreads) {
+ bMaxNumLocalThreads = maxNumLocalThreads;
+ }
+
+ /**
+ * Gets the Maximum Number of Local Threads.
+ * @return the Maximum Number of Local Threads.
+ */
+ public int getMaxNumLocalThreads() {
+ return bMaxNumLocalThreads;
+ }
+
+ // BUILD FUNCTIONS
+
+ /**
+ * Returns an UpdateSketch with the current configuration of this Builder.
+ * @return an UpdateSketch
+ */
+ public UpdateSketch build() {
+ return build(null);
+ }
+
+ /**
+ * Returns an UpdateSketch with the current configuration of this Builder
+ * with the specified backing destination MemorySegment store.
+ * Note: this cannot be used with the Alpha Family of sketches.
+ * @param dstSeg The destination MemorySegment.
+ * @return an UpdateSketch
+ */
+ public UpdateSketch build(final MemorySegment dstSeg) {
+ UpdateSketch sketch = null;
+ switch (bFam) {
+ case ALPHA: {
+ if (dstSeg == null) {
+ sketch = HeapAlphaSketch.newHeapInstance(bLgNomLongs, bSeed, bP, bRF);
+ }
+ else {
+ throw new SketchesArgumentException("AlphaSketch cannot be made Direct to Memory.");
+ }
+ break;
+ }
+ case QUICKSELECT: {
+ if (dstSeg == null) {
+ sketch = new HeapQuickSelectSketch(bLgNomLongs, bSeed, bP, bRF, false);
+ }
+ else {
+ sketch = new DirectQuickSelectSketch(
+ bLgNomLongs, bSeed, bP, bRF, dstSeg, false);
+ }
+ break;
+ }
+ default: {
+ throw new SketchesArgumentException(
+ "Given Family cannot be built as a Theta Sketch: " + bFam.toString());
+ }
+ }
+ return sketch;
+ }
+
+ /**
+ * Returns an on-heap concurrent shared UpdateSketch with the current configuration of the
+ * Builder.
+ *
+ * The parameters unique to the shared concurrent sketch are:
+ *
+ * - Number of Pool Threads (default is 3)
+ * - Maximum Concurrency Error
+ *
+ *
+ * Key parameters that are in common with other Theta sketches:
+ *
+ * - Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
+ *
+ *
+ * @return an on-heap concurrent UpdateSketch with the current configuration of the Builder.
+ */
+ public UpdateSketch buildShared() {
+ return buildShared(null);
+ }
+
+ /**
+ * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current
+ * configuration of the Builder and the given destination WritableMemory. If the destination
+ * WritableMemory is null, this defaults to an on-heap concurrent shared UpdateSketch.
+ *
+ * The parameters unique to the shared concurrent sketch are:
+ *
+ * - Number of Pool Threads (default is 3)
+ * - Maximum Concurrency Error
+ *
+ *
+ * Key parameters that are in common with other Theta sketches:
+ *
+ * - Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
+ * - Destination Writable Memory (if not null, returned sketch is Direct. Default is null.)
+ *
+ *
+ * @param dstSeg the given MemorySegment for Direct, otherwise null.
+ * @return a concurrent UpdateSketch with the current configuration of the Builder
+ * and the given destination MemorySegment.
+ */
+ @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD",
+ justification = "Harmless in Builder, fix later")
+ public UpdateSketch buildShared(final MemorySegment dstSeg) {
+ ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads;
+ if (dstSeg == null) {
+ return new ConcurrentHeapQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError);
+ } else {
+ return new ConcurrentDirectQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError, dstSeg);
+ }
+ }
+
+ /**
+ * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current
+ * configuration of the Builder, the data from the given sketch, and the given destination
+ * MemorySegment. If the destination MemorySegment is null, this defaults to an on-heap
+ * concurrent shared UpdateSketch.
+ *
+ * The parameters unique to the shared concurrent sketch are:
+ *
+ * - Number of Pool Threads (default is 3)
+ * - Maximum Concurrency Error
+ *
+ *
+ * Key parameters that are in common with other Theta sketches:
+ *
+ * - Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
+ * - Destination MemorySegment (if not null, returned sketch is Direct. Default is null.)
+ *
+ *
+ * @param sketch a given UpdateSketch from which the data is used to initialize the returned
+ * shared sketch.
+ * @param dstSeg the given MemorySegment for Direct, otherwise null.
+ * @return a concurrent UpdateSketch with the current configuration of the Builder
+ * and the given destination MemorySegment.
+ */
+ @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD",
+ justification = "Harmless in Builder, fix later")
+ public UpdateSketch buildSharedFromSketch(final UpdateSketch sketch, final MemorySegment dstSeg) {
+ ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads;
+ if (dstSeg == null) {
+ return new ConcurrentHeapQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError);
+ } else {
+ return new ConcurrentDirectQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError, dstSeg);
+ }
+ }
+
+ /**
+ * Returns a local, on-heap, concurrent UpdateSketch to be used as a per-thread local buffer
+ * along with the given concurrent shared UpdateSketch and the current configuration of this
+ * Builder.
+ *
+ * The parameters unique to the local concurrent sketch are:
+ *
+ * - Local Nominal Entries or Local Log Nominal Entries
+ * - Propagate Ordered Compact flag
+ *
+ *
+ * @param shared the concurrent shared sketch to be accessed via the concurrent local sketch.
+ * @return an UpdateSketch to be used as a per-thread local buffer.
+ */
+ public UpdateSketch buildLocal(final UpdateSketch shared) {
+ if ((shared == null) || !(shared instanceof ConcurrentSharedThetaSketch)) {
+ throw new SketchesStateException("The concurrent shared sketch must be built first.");
+ }
+ return new ConcurrentHeapThetaBuffer(bLocalLgNomLongs, bSeed,
+ (ConcurrentSharedThetaSketch) shared, bPropagateOrderedCompact, bMaxNumLocalThreads);
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("UpdateSketchBuilder configuration:").append(LS);
+ sb.append("LgK:").append(TAB).append(bLgNomLongs).append(LS);
+ sb.append("K:").append(TAB).append(1 << bLgNomLongs).append(LS);
+ sb.append("LgLocalK:").append(TAB).append(bLocalLgNomLongs).append(LS);
+ sb.append("LocalK:").append(TAB).append(1 << bLocalLgNomLongs).append(LS);
+ sb.append("Seed:").append(TAB).append(bSeed).append(LS);
+ sb.append("p:").append(TAB).append(bP).append(LS);
+ sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS);
+ sb.append("Family:").append(TAB).append(bFam).append(LS);
+ sb.append("Propagate Ordered Compact").append(TAB).append(bPropagateOrderedCompact).append(LS);
+ sb.append("NumPoolThreads").append(TAB).append(bNumPoolThreads).append(LS);
+ sb.append("MaxConcurrencyError").append(TAB).append(bMaxConcurrencyError).append(LS);
+ sb.append("MaxNumLocalThreads").append(TAB).append(bMaxNumLocalThreads).append(LS);
+ return sb.toString();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java
new file mode 100644
index 000000000..1558c49e7
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits;
+import static org.apache.datasketches.theta2.PreambleUtil.ENTRY_BITS_BYTE_V4;
+import static org.apache.datasketches.theta2.PreambleUtil.NUM_ENTRIES_BYTES_BYTE_V4;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+
+import org.apache.datasketches.common.ByteArrayUtil;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Wrapper around a serialized compact compressed read-only sketch. It is not empty, not a single item.
+ *
+ * This sketch can only be associated with a Serialization Version 4 format binary image.
+ */
+class WrappedCompactCompressedSketch extends WrappedCompactSketch {
+
+ /**
+ * Construct this sketch with the given bytes.
+ * @param bytes containing serialized compact compressed sketch.
+ */
+ WrappedCompactCompressedSketch(final byte[] bytes) {
+ super(bytes);
+ }
+
+ /**
+ * Wraps the given bytes, which must be a SerVer 4 compressed CompactSketch image.
+ * @param bytes representation of serialized compressed compact sketch.
+ * @param seedHash The update seedHash.
+ * See Seed Hash.
+ * @return this sketch
+ */
+ static WrappedCompactCompressedSketch wrapInstance(final byte[] bytes, final short seedHash) {
+ ThetaUtil.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash);
+ return new WrappedCompactCompressedSketch(bytes);
+ }
+
+ //Sketch Overrides
+
+ @Override
+ public int getCurrentBytes() {
+ final int preLongs = bytes_[PREAMBLE_LONGS_BYTE];
+ final int entryBits = bytes_[ENTRY_BITS_BYTE_V4];
+ final int numEntriesBytes = bytes_[NUM_ENTRIES_BYTES_BYTE_V4];
+ return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits);
+ }
+
+ private static final int START_PACKED_DATA_EXACT_MODE = 8;
+ private static final int START_PACKED_DATA_ESTIMATION_MODE = 16;
+
+ @Override
+ public int getRetainedEntries(final boolean valid) { //compact is always valid
+ // number of entries is stored using variable length encoding
+ // most significant bytes with all zeros are not stored
+ // one byte in the preamble has the number of non-zero bytes used
+ final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; // if > 1 then the second long has theta
+ final int numEntriesBytes = bytes_[NUM_ENTRIES_BYTES_BYTE_V4];
+ int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE;
+ int numEntries = 0;
+ for (int i = 0; i < numEntriesBytes; i++) {
+ numEntries |= Byte.toUnsignedInt(bytes_[offsetBytes++]) << (i << 3);
+ }
+ return numEntries;
+ }
+
+ @Override
+ public long getThetaLong() {
+ final int preLongs = bytes_[PREAMBLE_LONGS_BYTE];
+ return (preLongs > 1) ? ByteArrayUtil.getLongLE(bytes_, 8) : Long.MAX_VALUE;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return false;
+ }
+
+ @Override
+ public boolean isOrdered() {
+ return true;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new BytesCompactCompressedHashIterator(
+ bytes_,
+ (bytes_[PREAMBLE_LONGS_BYTE] > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE)
+ + bytes_[NUM_ENTRIES_BYTES_BYTE_V4],
+ bytes_[ENTRY_BITS_BYTE_V4],
+ getRetainedEntries()
+ );
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java
new file mode 100644
index 000000000..08939ee41
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.ByteArrayUtil.getIntLE;
+import static org.apache.datasketches.common.ByteArrayUtil.getLongLE;
+import static org.apache.datasketches.common.ByteArrayUtil.getShortLE;
+import static org.apache.datasketches.theta2.CompactOperations.memoryToCompact;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT;
+import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT;
+import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Wrapper around a serialized compact read-only sketch. It is not empty, not a single item.
+ *
+ * This sketch can only be associated with a Serialization Version 3 format binary image.
+ */
+class WrappedCompactSketch extends CompactSketch {
+ final byte[] bytes_;
+
+ /**
+ * Construct this sketch with the given bytes.
+ * @param bytes containing serialized compact sketch.
+ */
+ WrappedCompactSketch(final byte[] bytes) {
+ bytes_ = bytes;
+ }
+
+ /**
+ * Wraps the given Memory, which must be a SerVer 3 CompactSketch image.
+ * @param bytes representation of serialized compressed compact sketch.
+ * @param seedHash The update seedHash.
+ * See Seed Hash.
+ * @return this sketch
+ */
+ static WrappedCompactSketch wrapInstance(final byte[] bytes, final short seedHash) {
+ ThetaUtil.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash);
+ return new WrappedCompactSketch(bytes);
+ }
+
+ //Sketch Overrides
+
+ @Override
+ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) {
+ return memoryToCompact(MemorySegment.ofArray(bytes_), dstOrdered, dstSeg);
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ final int preLongs = bytes_[PreambleUtil.PREAMBLE_LONGS_BYTE];
+ final int numEntries = (preLongs == 1) ? 0 : getIntLE(bytes_, RETAINED_ENTRIES_INT);
+ return (preLongs + numEntries) << 3;
+ }
+
+ @Override
+ public int getRetainedEntries(final boolean valid) { //compact is always valid
+ final int preLongs = bytes_[PREAMBLE_LONGS_BYTE];
+ return (preLongs == 1) ? 0 : getIntLE(bytes_, RETAINED_ENTRIES_INT);
+ }
+
+ @Override
+ public long getThetaLong() {
+ final int preLongs = bytes_[PREAMBLE_LONGS_BYTE];
+ return (preLongs > 2) ? getLongLE(bytes_, THETA_LONG) : Long.MAX_VALUE;
+ }
+
+ @Override
+ public boolean hasMemorySegment() {
+ return false;
+ }
+
+ @Override
+ public boolean isDirect() {
+ return false;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (bytes_[FLAGS_BYTE] & EMPTY_FLAG_MASK) > 0;
+ }
+
+ @Override
+ public boolean isOrdered() {
+ return (bytes_[FLAGS_BYTE] & ORDERED_FLAG_MASK) > 0;
+ }
+
+ @Override
+ public HashIterator iterator() {
+ return new BytesCompactHashIterator(
+ bytes_,
+ bytes_[PREAMBLE_LONGS_BYTE] << 3,
+ getRetainedEntries()
+ );
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ return Arrays.copyOf(bytes_, getCurrentBytes());
+ }
+
+ //restricted methods
+
+ @Override
+ long[] getCache() {
+ final long[] cache = new long[getRetainedEntries()];
+ int i = 0;
+ final HashIterator it = iterator();
+ while (it.next()) {
+ cache[i++] = it.get();
+ }
+ return cache;
+ }
+
+ @Override
+ int getCompactPreambleLongs() {
+ return bytes_[PREAMBLE_LONGS_BYTE];
+ }
+
+ @Override
+ int getCurrentPreambleLongs() {
+ return bytes_[PREAMBLE_LONGS_BYTE];
+ }
+
+ @Override
+ MemorySegment getMemorySegment() {
+ return null;
+ }
+
+ @Override
+ short getSeedHash() {
+ return getShortLE(bytes_, SEED_HASH_SHORT);
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/package-info.java b/src/main/java/org/apache/datasketches/theta2/package-info.java
new file mode 100644
index 000000000..71c333bb5
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/package-info.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * The theta package contains the basic sketch classes that are members of the
+ * Theta Sketch Framework.
+ *
+ * There is a separate Tuple package for many of the sketches that are derived from the
+ * same algorithms defined in the Theta Sketch Framework paper.
+ */
+package org.apache.datasketches.theta2;
diff --git a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java
index f6b22cb50..2cdb99a0a 100644
--- a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java
+++ b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java
@@ -19,9 +19,12 @@
package org.apache.datasketches.thetacommon;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
import static java.lang.Math.max;
import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import java.lang.foreign.MemorySegment;
+
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.SketchesStateException;
import org.apache.datasketches.memory.Memory;
@@ -284,6 +287,107 @@ public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int
throw new SketchesArgumentException("Key not found and no empty slot in table!");
}
+ //With MemorySegment
+
+ /**
+ * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for MemorySegment.
+ * Returns the index if found, -1 if not found. The input MemorySegment may be read only.
+ *
+ * @param seg The MemorySegment containing the hash table to search.
+ * The hash table portion must be a power of 2 in size.
+ * @param lgArrLongs The log_base2(hashTable.length).
+ * See lgArrLongs.
+ * @param hash The hash value to search for. Must not be zero.
+ * @param segOffsetBytes offset in the MemorySegment where the hashTable starts
+ * @return Current probe index if found, -1 if not found.
+ */
+ public static int hashSearchMemory(final MemorySegment seg, final int lgArrLongs, final long hash,
+ final int segOffsetBytes) {
+ if (hash == 0) {
+ throw new SketchesArgumentException("Given hash must not be zero: " + hash);
+ }
+ final int arrayMask = (1 << lgArrLongs) - 1;
+ final int stride = getStride(hash, lgArrLongs);
+ int curProbe = (int) (hash & arrayMask);
+ final int loopIndex = curProbe;
+ do {
+ final int curProbeOffsetBytes = (curProbe << 3) + segOffsetBytes;
+ final long curArrayHash = seg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes);
+ if (curArrayHash == EMPTY) { return -1; }
+ else if (curArrayHash == hash) { return curProbe; }
+ curProbe = (curProbe + stride) & arrayMask;
+ } while (curProbe != loopIndex);
+ return -1;
+ }
+
+ /**
+ * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for MemorySegment.
+ * This method assumes that the input hash is not a duplicate.
+ * Useful for rebuilding tables to avoid unnecessary comparisons.
+ * Returns the index of insertion, which is always positive or zero.
+ * Throws an exception if table has no empty slot.
+ *
+ * @param wseg The writable MemorySegment that contains the hashTable to insert into.
+ * The size of the hashTable portion must be a power of 2.
+ * @param lgArrLongs The log_base2(hashTable.length.
+ * See lgArrLongs.
+ * @param hash value that must not be zero and will be inserted into the array into an empty slot.
+ * @param memOffsetBytes offset in the writable MemorySegment where the hashTable starts
+ * @return index of insertion. Always positive or zero.
+ */
+ public static int hashInsertOnlyMemory(final MemorySegment wseg, final int lgArrLongs,
+ final long hash, final int memOffsetBytes) {
+ final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1
+ final int stride = getStride(hash, lgArrLongs);
+ int curProbe = (int) (hash & arrayMask);
+ // search for duplicate or zero
+ final int loopIndex = curProbe;
+ do {
+ final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes;
+ final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes);
+ if (curArrayHash == EMPTY) {
+ wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash);
+ return curProbe;
+ }
+ curProbe = (curProbe + stride) & arrayMask;
+ } while (curProbe != loopIndex);
+ throw new SketchesArgumentException("No empty slot in table!");
+ }
+
+ /**
+ * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts
+ * values directly into a writable MemorySegment.
+ * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1).
+ * Throws an exception if the value is not found and table has no empty slot.
+ *
+ * @param wseg The writable MemorySegment that contains the hashTable to insert into.
+ * @param lgArrLongs The log_base2(hashTable.length).
+ * See lgArrLongs.
+ * @param hash The hash value to be potentially inserted into an empty slot only if it is not
+ * a duplicate of any other hash value in the table. It must not be zero.
+ * @param memOffsetBytes offset in the writable MemorySegment where the hash array starts
+ * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1).
+ */
+ public static int hashSearchOrInsertMemory(final MemorySegment wseg, final int lgArrLongs,
+ final long hash, final int memOffsetBytes) {
+ final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1
+ final int stride = getStride(hash, lgArrLongs);
+ int curProbe = (int) (hash & arrayMask);
+ // search for duplicate or zero
+ final int loopIndex = curProbe;
+ do {
+ final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes;
+ final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes);
+ if (curArrayHash == EMPTY) {
+ wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash);
+ return ~curProbe;
+ } else if (curArrayHash == hash) { return curProbe; } // curArrayHash is a duplicate
+ // curArrayHash is not a duplicate and not zero, continue searching
+ curProbe = (curProbe + stride) & arrayMask;
+ } while (curProbe != loopIndex);
+ throw new SketchesArgumentException("Key not found and no empty slot in table!");
+ }
+
//Other related methods
/**
From 543e866b53ecd2c5462b10c3a3ed576547a1c676 Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Tue, 10 Jun 2025 11:30:15 -0700
Subject: [PATCH 10/25] Partial progress on Theta tests.
---
.../common/MemorySegmentStatus.java | 58 ++
.../org/apache/datasketches/common/Util.java | 131 ++-
.../apache/datasketches/theta2/AnotBimpl.java | 9 +
.../datasketches/theta2/CompactSketch.java | 15 +
.../theta2/ConcurrentHeapThetaBuffer.java | 10 -
.../theta2/ConcurrentSharedThetaSketch.java | 30 -
.../theta2/DirectCompactSketch.java | 11 +-
.../theta2/DirectQuickSelectSketchR.java | 10 +-
.../theta2/EmptyCompactSketch.java | 6 +-
.../datasketches/theta2/HeapAlphaSketch.java | 5 -
.../theta2/HeapCompactSketch.java | 5 -
.../theta2/HeapQuickSelectSketch.java | 5 -
.../datasketches/theta2/IntersectionImpl.java | 29 +-
.../MemoryCompactCompressedHashIterator.java | 23 +-
.../datasketches/theta2/PreambleUtil.java | 30 +-
.../datasketches/theta2/SetOperation.java | 45 +-
.../datasketches/theta2/SingleItemSketch.java | 5 -
.../apache/datasketches/theta2/Sketch.java | 42 +-
.../apache/datasketches/theta2/Sketches.java | 407 ++++++++
.../apache/datasketches/theta2/UnionImpl.java | 41 +-
.../datasketches/theta2/UpdateSketch.java | 15 +
.../theta2/WrappedCompactSketch.java | 15 -
.../datasketches/theta2/AnotBimplTest.java | 333 +++++++
.../theta2/BackwardConversions.java | 238 +++++
.../datasketches/theta2/BitPackingTest.java | 166 ++++
.../theta2/CompactSketchTest.java | 674 +++++++++++++
.../CornerCaseThetaSetOperationsTest.java | 518 ++++++++++
.../theta2/DirectIntersectionTest.java | 769 ++++++++++++++
.../theta2/DirectQuickSelectSketchTest.java | 936 ++++++++++++++++++
.../datasketches/theta2/DirectUnionTest.java | 827 ++++++++++++++++
.../apache/datasketches/theta2/EmptyTest.java | 169 ++++
.../datasketches/theta2/ExamplesTest.java | 124 +++
.../theta2/ForwardCompatibilityTest.java | 219 ++++
.../theta2/HeapAlphaSketchTest.java | 696 +++++++++++++
.../theta2/HeapIntersectionTest.java | 534 ++++++++++
.../theta2/HeapQuickSelectSketchTest.java | 642 ++++++++++++
.../datasketches/theta2/HeapUnionTest.java | 669 +++++++++++++
.../theta2/HeapifyWrapSerVer1and2Test.java | 609 ++++++++++++
.../datasketches/theta2/IteratorTest.java | 133 +++
.../theta2/SingleItemSketchTest.java | 377 +++++++
.../datasketches/theta2/SketchTest.java | 440 ++++++++
.../datasketches/theta2/UnionImplTest.java | 320 ++++++
.../datasketches/theta2/UpdateSketchTest.java | 237 +++++
43 files changed, 10347 insertions(+), 230 deletions(-)
create mode 100644 src/main/java/org/apache/datasketches/common/MemorySegmentStatus.java
create mode 100644 src/main/java/org/apache/datasketches/theta2/Sketches.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/BitPackingTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/CornerCaseThetaSetOperationsTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/EmptyTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/ExamplesTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/IteratorTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/SketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/UnionImplTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/UpdateSketchTest.java
diff --git a/src/main/java/org/apache/datasketches/common/MemorySegmentStatus.java b/src/main/java/org/apache/datasketches/common/MemorySegmentStatus.java
new file mode 100644
index 000000000..889e8c0b8
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/common/MemorySegmentStatus.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.common;
+
+import java.lang.foreign.MemorySegment;
+
+/**
+ * Methods for inquiring the status of a backing MemorySegment.
+ */
+public interface MemorySegmentStatus {
+
+ /**
+ * Returns true if this object's internal data is backed by a MemorySegment,
+ * which may be on-heap or off-heap.
+ * @return true if this object's internal data is backed by a MemorySegment.
+ */
+ boolean hasMemorySegment();
+
+ /**
+ * Returns true if this object's internal data is backed by an off-heap (direct or native)) MemorySegment.
+ * @return true if this object's internal data is backed by an off-heap (direct or native)) MemorySegment.
+ */
+ boolean isDirect();
+
+ /**
+ * Returns true if the backing MemorySegment of this object refers to the same MemorySegment of that.
+ * They can either have the same off-heap memory location and size, or refer to the same on-heap array object.
+ *
+ * If both segment are off-heap, they both must have the same starting address and the same size.
+ *
+ * For on-heap segments, both segments must be based on or derived from the same array object and neither segment
+ * can be read-only.
+ *
+ * Returns false if either argument is null;
+ *
+ * @param that The given MemorySegment.
+ * @return true if the backing MemorySegment of this object hierarchy refers to the same MemorySegment of that.
+ */
+ boolean isSameResource(final MemorySegment that);
+
+}
diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java
index 969cdc389..11615a39a 100644
--- a/src/main/java/org/apache/datasketches/common/Util.java
+++ b/src/main/java/org/apache/datasketches/common/Util.java
@@ -28,7 +28,6 @@
import java.lang.foreign.MemorySegment;
import java.util.Comparator;
-import java.util.Objects;
/**
* Common utility functions.
@@ -802,6 +801,80 @@ public static boolean le(final Object item1, final Object item2, final Compa
//MemorySegment related
+ /**
+ * Clears all bytes of this MemorySegment to zero.
+ * @param seg the given MemorySegment
+ */
+ public static void clear(final MemorySegment seg) {
+ seg.fill((byte)0);
+ }
+
+ /**
+ * Clears a portion of this MemorySegment to zero.
+ * @param seg the given MemorySegment
+ * @param offsetBytes offset bytes relative to this MemorySegment start
+ * @param lengthBytes the length in bytes
+ */
+ public static void clear(final MemorySegment seg, final long offsetBytes, final long lengthBytes) {
+ final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes);
+ slice.fill((byte)0);
+ }
+
+ /**
+ * Clears the bits defined by the bitMask
+ * @param seg the given MemorySegment
+ * @param offsetBytes offset bytes relative to this Memory start.
+ * @param bitMask the bits set to one will be cleared
+ */
+ public static void clearBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) {
+ final byte b = seg.get(JAVA_BYTE, offsetBytes);
+ seg.set(JAVA_BYTE, offsetBytes, (byte)(b & ~bitMask));
+ }
+
+ /**
+ * Returns true if both segments have the same contents and the same length.
+ * @param seg1 the given MemorySegment #1
+ * @param seg2 the given MemorySegment #2
+ * @return true if both segments have the same contents and the same length.
+ */
+ public static boolean equalContents(final MemorySegment seg1, final MemorySegment seg2) {
+ if (seg1.byteSize() != seg2.byteSize()) { return false; }
+ return equalContents(seg1, 0, seg2, 0, seg1.byteSize());
+ }
+
+ /**
+ * Returns true if both segments have the same content for the specified region.
+ * @param seg1 the given MemorySegment #1
+ * @param seg1offsetBytes the starting offset for MemorySegment #1 in bytes.
+ * @param seg2 the given MemorySegment #2
+ * @param seg2offsetBytes the starting offset for MemorySegment #2 in bytes.
+ * @param lengthBytes the length of the region to be compared, in bytes.
+ * @return true, if both segments have the content for the specified region.
+ */
+ public static boolean equalContents(
+ final MemorySegment seg1,
+ final long seg1offsetBytes,
+ final MemorySegment seg2,
+ final long seg2offsetBytes,
+ final long lengthBytes) {
+ if (seg1.equals(seg2) && (seg1.byteSize() == seg2.byteSize())) { return true; } //identical segments
+ final long seg1EndOff = seg1offsetBytes + lengthBytes;
+ final long seg2EndOff = seg2offsetBytes + lengthBytes;
+ return MemorySegment.mismatch(seg1, seg1offsetBytes, seg1EndOff, seg2, seg2offsetBytes, seg2EndOff) == -1;
+ }
+
+ /**
+ * Fills a portion of this Memory region to the given byte value.
+ * @param seg the given MemorySegment
+ * @param offsetBytes offset bytes relative to this Memory start
+ * @param lengthBytes the length in bytes
+ * @param value the given byte value
+ */
+ public static void fill(final MemorySegment seg, final long offsetBytes, final long lengthBytes, final byte value) {
+ final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes);
+ slice.fill(value);
+ }
+
/**
* Returns true if the two given MemorySegments refer to the same backing resource,
* which is either an off-heap memory location and size, or the same on-heap array object.
@@ -811,13 +884,14 @@ public static boolean le(final Object item1, final Object item2, final Compa
* For on-heap segments, both segments must be based on or derived from the same array object and neither segment
* can be read-only.
*
+ * Returns false if either argument is null;
+ *
* @param seg1 The first given MemorySegment
* @param seg2 The second given MemorySegment
* @return true if both MemorySegments are determined to be the same backing memory.
*/
public static boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) {
- Objects.requireNonNull(seg1, "seg1 must not be null.");
- Objects.requireNonNull(seg2, "seg2 must not be null.");
+ if ((seg1 == null) || (seg2 == null)) { return false; }
if (!seg1.scope().isAlive() || !seg2.scope().isAlive()) {
throw new IllegalArgumentException("Both arguments must be alive.");
}
@@ -842,59 +916,16 @@ public static boolean isSameResource(final MemorySegment seg1, final MemorySegme
* If the requested capacity is not divisible by eight, the returned size
* will be rolled up to the next multiple of eight.
*
- * @param capacityBytes The new capacity being requested.
+ * @param capacityBytes The new capacity being requested. It must not be negative.
* @return a new MemorySegment with the requested capacity.
*/
public static MemorySegment newHeapSegment(final int capacityBytes) {
- if (capacityBytes < 0) {
- throw new IllegalArgumentException("Requested capacity must be positive.");
- }
- final long[] array = ((capacityBytes * 0x7) == 0) ? new long[capacityBytes >>> 3] : new long[(capacityBytes >>> 3) + 1];
+ final long[] array = ((capacityBytes & 0x7) == 0)
+ ? new long[capacityBytes >>> 3]
+ : new long[(capacityBytes >>> 3) + 1];
return MemorySegment.ofArray(array);
}
- /**
- * Clears all bytes of this MemorySegment to zero.
- * @param seg the given MemorySegment
- */
- public static void clear(final MemorySegment seg) {
- seg.fill((byte)0);
- }
-
- /**
- * Clears a portion of this MemorySegment to zero.
- * @param seg the given MemorySegment
- * @param offsetBytes offset bytes relative to this MemorySegment start
- * @param lengthBytes the length in bytes
- */
- public static void clear(final MemorySegment seg, final long offsetBytes, final long lengthBytes) {
- final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes);
- slice.fill((byte)0);
- }
-
- /**
- * Fills a portion of this Memory region to the given byte value.
- * @param seg the given MemorySegment
- * @param offsetBytes offset bytes relative to this Memory start
- * @param lengthBytes the length in bytes
- * @param value the given byte value
- */
- public static void fill(final MemorySegment seg, final long offsetBytes, final long lengthBytes, final byte value) {
- final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes);
- slice.fill(value);
- }
-
- /**
- * Clears the bits defined by the bitMask
- * @param seg the given MemorySegment
- * @param offsetBytes offset bytes relative to this Memory start.
- * @param bitMask the bits set to one will be cleared
- */
- public static void clearBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) {
- final byte b = seg.get(JAVA_BYTE, offsetBytes);
- seg.set(JAVA_BYTE, offsetBytes, (byte)(b & ~bitMask));
- }
-
/**
* Sets the bits defined by the bitMask
* @param seg the given MemorySegment
diff --git a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java
index 4931bb680..21872ecd6 100644
--- a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java
+++ b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java
@@ -233,6 +233,15 @@ long getThetaLong() {
return thetaLong_;
}
+ @Override
+ public boolean hasMemorySegment() { return false; }
+
+ @Override
+ public boolean isDirect() { return false; }
+
+ @Override
+ public boolean isSameResource( final MemorySegment that) { return false; }
+
@Override
boolean isEmpty() {
return empty_;
diff --git a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java
index 0498eed34..6ee764678 100644
--- a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java
@@ -343,11 +343,26 @@ public Family getFamily() {
return Family.COMPACT;
}
+ @Override
+ public boolean hasMemorySegment() {
+ return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).hasMemorySegment());
+ }
+
@Override
public boolean isCompact() {
return true;
}
+ @Override
+ public boolean isDirect() {
+ return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isDirect());
+ }
+
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isSameResource(that));
+ }
+
@Override
public double getEstimate() {
return Sketch.estimate(getThetaLong(), getRetainedEntries());
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
index ab1d41a65..c93ed892b 100644
--- a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
@@ -147,16 +147,6 @@ public double getUpperBound(final int numStdDev) {
return shared.getUpperBound(numStdDev);
}
- @Override
- public boolean hasMemorySegment() {
- return false;
- }
-
- @Override
- public boolean isDirect() {
- return false;
- }
-
@Override
public boolean isEmpty() {
return shared.isEmpty();
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
index 5bf147049..40746c3e6 100644
--- a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
@@ -22,8 +22,6 @@
import java.lang.foreign.MemorySegment;
import java.util.concurrent.atomic.AtomicBoolean;
-import org.apache.datasketches.common.Util;
-
/**
* An internal interface to define the API of a concurrent shared theta sketch.
* It reflects all data processed by a single or multiple update threads, and can serve queries at
@@ -140,34 +138,6 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s
double getUpperBound(int numStdDev);
- /**
- * Returns true if this object's internal data is backed by a Memory object,
- * which may be on-heap or off-heap.
- * @return true if this object's internal data is backed by a Memory object.
- */
- boolean hasMemorySegment();
-
- /**
- * Returns true if this object's internal data is backed by direct (off-heap) Memory.
- * @return true if this object's internal data is backed by direct (off-heap) Memory.
- */
- boolean isDirect();
-
- /**
- * Returns true if the two given MemorySegments refer to the same backing resource,
- * which is either an off-heap memory location and size, or the same on-heap array object.
- *
- * This is a convenient delegate of
- * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}
- *
- * @param seg1 The first given MemorySegment
- * @param seg2 The second given MemorySegment
- * @return true if both MemorySegments are determined to be the same backing memory.
- */
- default boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) {
- return Util.isSameResource(seg1, seg2);
- }
-
boolean isEmpty();
boolean isEstimationMode();
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java
index 188f2cd73..ef0e4b604 100644
--- a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java
@@ -33,6 +33,7 @@
import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.Util;
import org.apache.datasketches.thetacommon.ThetaUtil;
/**
@@ -102,12 +103,12 @@ public long getThetaLong() {
@Override
public boolean hasMemorySegment() {
- return seg_ != null;
+ return seg_ != null && seg_.scope().isAlive();
}
@Override
public boolean isDirect() {
- return hasMemorySegment() ? seg_.isNative() : false;
+ return hasMemorySegment() && seg_.isNative();
}
@Override
@@ -123,6 +124,12 @@ public boolean isOrdered() {
return (extractFlags(seg_) & ORDERED_FLAG_MASK) > 0;
}
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return hasMemorySegment() && Util.isSameResource(seg_, that);
+
+ }
+
@Override
public HashIterator iterator() {
return new MemoryHashIterator(seg_, getRetainedEntries(true), getThetaLong());
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
index 33d371554..c0db75b16 100644
--- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
@@ -47,6 +47,7 @@
import org.apache.datasketches.common.ResizeFactor;
import org.apache.datasketches.common.SketchesReadOnlyException;
import org.apache.datasketches.common.SuppressFBWarnings;
+import org.apache.datasketches.common.Util;
import org.apache.datasketches.thetacommon.ThetaUtil;
/**
@@ -147,12 +148,12 @@ public long getThetaLong() {
@Override
public boolean hasMemorySegment() {
- return wseg_ != null;
+ return wseg_ != null && wseg_.scope().isAlive();
}
@Override
public boolean isDirect() {
- return hasMemorySegment() ? wseg_.isNative() : false;
+ return hasMemorySegment() && wseg_.isNative();
}
@Override
@@ -160,6 +161,11 @@ public boolean isEmpty() {
return PreambleUtil.isEmptyFlag(wseg_);
}
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return hasMemorySegment() && Util.isSameResource(wseg_, that);
+ }
+
@Override
public HashIterator iterator() {
return new MemoryHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong());
diff --git a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java
index c9c6dd609..c4679374b 100644
--- a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java
@@ -23,6 +23,7 @@
import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
import java.lang.foreign.MemorySegment;
+
import org.apache.datasketches.common.SketchesArgumentException;
/**
@@ -134,11 +135,6 @@ int getCurrentPreambleLongs() {
return 1;
}
- @Override
- MemorySegment getMemorySegment() {
- return null;
- }
-
@Override
short getSeedHash() {
return 0;
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java
index 5bc11d712..a3b7bd145 100644
--- a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java
@@ -293,11 +293,6 @@ int getCurrentPreambleLongs() {
return Family.ALPHA.getMinPreLongs();
}
- @Override
- MemorySegment getMemorySegment() {
- return null;
- }
-
@Override
long[] getCache() {
return cache_;
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
index 065213191..6cffd9818 100644
--- a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
@@ -128,11 +128,6 @@ int getCompactPreambleLongs() {
return preLongs_;
}
- @Override
- MemorySegment getMemorySegment() {
- return null;
- }
-
@Override
short getSeedHash() {
return seedHash_;
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java
index 3096e5e1a..082259c21 100644
--- a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java
@@ -230,11 +230,6 @@ int getLgArrLongs() {
return lgArrLongs_;
}
- @Override
- MemorySegment getMemorySegment() {
- return null;
- }
-
@Override
UpdateReturnState hashUpdate(final long hash) {
HashOperations.checkHashCorruption(hash);
diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
index ba7bcd8ac..92ca096c3 100644
--- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
@@ -65,6 +65,7 @@
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.SketchesReadOnlyException;
import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.common.Util;
import org.apache.datasketches.thetacommon.ThetaUtil;
/**
@@ -93,9 +94,9 @@ class IntersectionImpl extends Intersection {
/**
* Constructor: Sets the class finals and computes, sets and checks the seedHash.
- * @param wseg Can be either a Source(e.g. wrap) or Destination (new Direct) MemorySegment.
+ * @param wseg Can be either a Source(e.g. wrap) or Destination (new offHeap) MemorySegment.
* @param seed Used to validate incoming sketch arguments.
- * @param dstMemFlag The given MemorySegment is a Destination (new Direct) MemorySegment.
+ * @param dstMemFlag The given MemorySegment is a Destination (new offHeap) MemorySegment.
* @param readOnly True if MemorySegment is to be treated as read only.
*/
protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstMemFlag,
@@ -103,7 +104,7 @@ protected IntersectionImpl(final MemorySegment wseg, final long seed, final bool
readOnly_ = readOnly;
if (wseg != null) {
wseg_ = wseg;
- if (dstMemFlag) { //DstMem: compute & store seedHash, no seedhash checking
+ if (dstMemFlag) { //DstMem: compute & store seedHash, no seedHash checking
checkMinSizeMemory(wseg);
maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap
seedHash_ = ThetaUtil.computeSeedHash(seed);
@@ -275,17 +276,17 @@ else if (curCount_ < 0 && sketchInEntries > 0) {
final int priorLgArrLongs = lgArrLongs_; //prior only used in error message
lgArrLongs_ = requiredLgArrLongs;
- if (wseg_ != null) { //Off heap, check if current dstMem is large enough
+ if (wseg_ != null) { //Off heap, check if current dstSeg is large enough
insertCurCount(wseg_, curCount_);
insertLgArrLongs(wseg_, lgArrLongs_);
if (requiredLgArrLongs <= maxLgArrLongs_) {
wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0);
}
- else { //not enough space in dstMem
+ else { //not enough space in dstSeg
final int requiredBytes = (8 << requiredLgArrLongs) + 24;
final int givenBytes = (8 << priorLgArrLongs) + 24;
throw new SketchesArgumentException(
- "Insufficient internal Memory space: " + requiredBytes + " > " + givenBytes);
+ "Insufficient internal MemorySegment space: " + requiredBytes + " > " + givenBytes);
}
}
else { //On the heap, allocate a HT
@@ -305,6 +306,9 @@ else if (curCount_ > 0 && sketchInEntries > 0) {
}
}
+ @Override
+ MemorySegment getMemorySegment() { return wseg_; }
+
@Override
public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) {
if (curCount_ < 0) {
@@ -327,7 +331,7 @@ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dst
if (wseg_ != null) {
final int htLen = 1 << lgArrLongs_;
hashTable = new long[htLen];
- MemorySegment.copy(dstSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen);
+ MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen);
} else {
hashTable = hashTable_;
}
@@ -341,7 +345,7 @@ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dst
@Override
public boolean hasMemorySegment() {
- return wseg_ != null;
+ return wseg_ != null && wseg_.scope().isAlive();
}
@Override
@@ -351,7 +355,12 @@ public boolean hasResult() {
@Override
public boolean isDirect() {
- return hasMemorySegment() ? wseg_.isNative() : false;
+ return hasMemorySegment() && wseg_.isNative();
+ }
+
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return hasMemorySegment() && Util.isSameResource(wseg_, that);
}
@Override
@@ -412,7 +421,7 @@ long[] getCache() {
if (wseg_ == null) {
return hashTable_ != null ? hashTable_ : new long[0];
}
- //Direct
+ //offHeap
final int arrLongs = 1 << lgArrLongs_;
final long[] outArr = new long[arrLongs];
MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs);
diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
index 96ccb41e8..31aa6ff92 100644
--- a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
+++ b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
@@ -24,10 +24,13 @@
import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.MemorySegmentStatus;
+import org.apache.datasketches.common.Util;
+
/*
* This is to uncompress serial version 4 sketch incrementally
*/
-class MemoryCompactCompressedHashIterator implements HashIterator {
+class MemoryCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus {
private MemorySegment seg;
private int offset;
private int entryBits;
@@ -44,8 +47,7 @@ class MemoryCompactCompressedHashIterator implements HashIterator {
final MemorySegment srcSeg,
final int offset,
final int entryBits,
- final int numEntries
- ) {
+ final int numEntries) {
this.seg = srcSeg;
this.offset = offset;
this.entryBits = entryBits;
@@ -64,6 +66,21 @@ public long get() {
return buffer[index & 7];
}
+ @Override
+ public boolean hasMemorySegment() {
+ return seg != null && seg.scope().isAlive();
+ }
+
+ @Override
+ public boolean isDirect() {
+ return hasMemorySegment() && seg.isNative();
+ }
+
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return hasMemorySegment() && Util.isSameResource(seg, that);
+ }
+
@Override
public boolean next() {
if (++index == numEntries) { return false; }
diff --git a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java
index 55035a456..564446ed4 100644
--- a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java
+++ b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java
@@ -20,10 +20,10 @@
package org.apache.datasketches.theta2;
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
-import static java.lang.foreign.ValueLayout.JAVA_FLOAT;
-import static java.lang.foreign.ValueLayout.JAVA_INT;
-import static java.lang.foreign.ValueLayout.JAVA_LONG;
-import static java.lang.foreign.ValueLayout.JAVA_SHORT;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
import static org.apache.datasketches.common.Util.LS;
import static org.apache.datasketches.common.Util.zeroPad;
@@ -387,23 +387,23 @@ static int extractFlagsV1(final MemorySegment seg) {
}
static int extractSeedHash(final MemorySegment seg) {
- return seg.get(JAVA_SHORT, SEED_HASH_SHORT) & 0XFFFF;
+ return seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT) & 0XFFFF;
}
static int extractCurCount(final MemorySegment seg) {
- return seg.get(JAVA_INT, RETAINED_ENTRIES_INT);
+ return seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT);
}
static float extractP(final MemorySegment seg) {
- return seg.get(JAVA_FLOAT, P_FLOAT);
+ return seg.get(JAVA_FLOAT_UNALIGNED, P_FLOAT);
}
static long extractThetaLong(final MemorySegment seg) {
- return seg.get(JAVA_LONG, THETA_LONG);
+ return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG);
}
static long extractUnionThetaLong(final MemorySegment seg) {
- return seg.get(JAVA_LONG, UNION_THETA_LONG);
+ return seg.get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG);
}
static int extractEntryBitsV4(final MemorySegment seg) {
@@ -415,7 +415,7 @@ static int extractNumEntriesBytesV4(final MemorySegment seg) {
}
static long extractThetaLongV4(final MemorySegment seg) {
- return seg.get(JAVA_LONG, THETA_LONG_V4);
+ return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG_V4);
}
/**
@@ -462,23 +462,23 @@ static void insertFlags(final MemorySegment seg, final int flags) {
}
static void insertSeedHash(final MemorySegment seg, final int seedHash) {
- seg.set(JAVA_SHORT, SEED_HASH_SHORT, (short) seedHash);
+ seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, (short) seedHash);
}
static void insertCurCount(final MemorySegment seg, final int curCount) {
- seg.set(JAVA_INT, RETAINED_ENTRIES_INT, curCount);
+ seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount);
}
static void insertP(final MemorySegment seg, final float p) {
- seg.set(JAVA_FLOAT, P_FLOAT, p);
+ seg.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, p);
}
static void insertThetaLong(final MemorySegment seg, final long thetaLong) {
- seg.set(JAVA_LONG, THETA_LONG, thetaLong);
+ seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong);
}
static void insertUnionThetaLong(final MemorySegment seg, final long unionThetaLong) {
- seg.set(JAVA_LONG, UNION_THETA_LONG, unionThetaLong);
+ seg.set(JAVA_LONG_UNALIGNED, UNION_THETA_LONG, unionThetaLong);
}
static void setEmpty(final MemorySegment seg) {
diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperation.java b/src/main/java/org/apache/datasketches/theta2/SetOperation.java
index 5c959cde6..55dda17b0 100644
--- a/src/main/java/org/apache/datasketches/theta2/SetOperation.java
+++ b/src/main/java/org/apache/datasketches/theta2/SetOperation.java
@@ -28,8 +28,9 @@
import java.lang.foreign.MemorySegment;
import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.MemorySegmentStatus;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.common.Util;
+//import org.apache.datasketches.common.Util;
import org.apache.datasketches.thetacommon.ThetaUtil;
/**
@@ -37,7 +38,7 @@
*
* @author Lee Rhodes
*/
-public abstract class SetOperation {
+public abstract class SetOperation implements MemorySegmentStatus {
static final int CONST_PREAMBLE_LONGS = 3;
/**
@@ -140,7 +141,7 @@ public static SetOperation wrap(final MemorySegment srcSeg, final long expectedS
return UnionImpl.wrapInstance(srcSeg, expectedSeed);
}
case INTERSECTION : {
- return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, true);
+ return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() );
}
default:
throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString());
@@ -198,6 +199,12 @@ public static int getMaxAnotBResultBytes(final int nomEntries) {
*/
abstract long[] getCache();
+ /**
+ * Returns the backing MemorySegment object if it exists, otherwise null.
+ * @return the backing MemorySegment object if it exists, otherwise null.
+ */
+ MemorySegment getMemorySegment() { return null; }
+
/**
* Gets the current count of retained entries.
* This is only useful during stateful operations.
@@ -221,18 +228,11 @@ public static int getMaxAnotBResultBytes(final int nomEntries) {
*/
abstract long getThetaLong();
- /**
- * Returns true if this object's internal data is backed by a Memory object,
- * which may be on-heap or off-heap.
- * @return true if this object's internal data is backed by a Memory object.
- */
- public boolean hasMemorySegment() { return false; }
+ @Override
+ public abstract boolean hasMemorySegment();
- /**
- * Returns true if this object's internal data is backed by an off-heap MemorySegment.
- * @return true if this object's internal data is backed by an off-heap MemorySegment.
- */
- public boolean isDirect() { return false; }
+ @Override
+ public abstract boolean isDirect();
/**
* Returns true if this set operator is empty.
@@ -242,18 +242,7 @@ public static int getMaxAnotBResultBytes(final int nomEntries) {
*/
abstract boolean isEmpty();
- /**
- * Returns true if the two given MemorySegments refer to the same backing resource,
- * which is either an off-heap memory location and size, or the same on-heap array object.
- *
- * This is a convenient delegate of
- * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}
- *
- * @param seg1 The first given MemorySegment
- * @param seg2 The second given MemorySegment
- * @return true if both MemorySegments are determined to be the same backing memory.
- */
- public boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) {
- return Util.isSameResource(seg1, seg2);
- }
+ @Override
+ public abstract boolean isSameResource(final MemorySegment seg);
+
}
diff --git a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java
index 2659df84b..222eef877 100644
--- a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java
@@ -379,11 +379,6 @@ int getCurrentPreambleLongs() {
return 1;
}
- @Override
- MemorySegment getMemorySegment() {
- return null;
- }
-
@Override
short getSeedHash() {
return (short) (pre0_ >>> 48);
diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java
index 3c5650a91..82661aa27 100644
--- a/src/main/java/org/apache/datasketches/theta2/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java
@@ -35,8 +35,8 @@
import java.lang.foreign.MemorySegment;
import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.MemorySegmentStatus;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.common.Util;
import org.apache.datasketches.thetacommon.BinomialBoundsN;
import org.apache.datasketches.thetacommon.ThetaUtil;
@@ -46,7 +46,7 @@
*
* @author Lee Rhodes
*/
-public abstract class Sketch {
+public abstract class Sketch implements MemorySegmentStatus {
Sketch() {}
@@ -382,25 +382,12 @@ public double getUpperBound(final int numStdDev) {
: getRetainedEntries(true);
}
- /**
- * Returns true if this object's internal data is backed by a MemorySegment object,
- * which may be on-heap or off-heap.
- * @return true if this object's internal data is backed by a MemorySegment object.
- */
- public boolean hasMemorySegment() { return false; }
-
/**
* Returns true if this sketch is in compact form.
* @return true if this sketch is in compact form.
*/
public abstract boolean isCompact();
- /**
- * Returns true if this object's internal data is backed by an off-heap MemorySegment.
- * @return true if this object's internal data is backed by an off-heap MemorySegment.
- */
- public boolean isDirect() { return false; }
-
/**
* See Empty
* @return true if empty.
@@ -423,19 +410,21 @@ public boolean isEstimationMode() {
public abstract boolean isOrdered();
/**
- * Returns true if the two given MemorySegments refer to the same backing resource,
- * which is either an off-heap memory location and size, or the same on-heap array object.
+ * Returns true if the backing MemorySegment of this object refers to the same MemorySegment of that.
+ * They can either have the same off-heap memory location and size, or refer to the same on-heap array object.
+ *
+ * If both segment are off-heap, they both must have the same starting address and the same size.
+ *
+ * For on-heap segments, both segments must be based on or derived from the same array object and neither segment
+ * can be read-only.
*
- * This is a convenient delegate of
- * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}
+ * Returns false if either argument is null;
*
- * @param seg1 The first given MemorySegment
- * @param seg2 The second given MemorySegment
- * @return true if both MemorySegments are determined to be the same backing memory.
+ * @param that The given MemorySegment.
+ * @return true if the backing MemorySegment of this object hierarchy refers to the same MemorySegment of that.
*/
- public boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) {
- return Util.isSameResource(seg1, seg2);
- }
+ @Override
+ public abstract boolean isSameResource(final MemorySegment that);
/**
* Returns a HashIterator that can be used to iterate over the retained hash values of the
@@ -601,9 +590,10 @@ public static String toString(final MemorySegment mem) {
/**
* Returns the backing MemorySegment object if it exists, otherwise null.
+ * This is overridden where relevant.
* @return the backing MemorySegment object if it exists, otherwise null.
*/
- abstract MemorySegment getMemorySegment();
+ MemorySegment getMemorySegment() { return null; }
/**
* Gets the 16-bit seed hash
diff --git a/src/main/java/org/apache/datasketches/theta2/Sketches.java b/src/main/java/org/apache/datasketches/theta2/Sketches.java
new file mode 100644
index 000000000..cbcfac7d0
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/Sketches.java
@@ -0,0 +1,407 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * This class brings together the common sketch and set operation creation methods and
+ * the public static methods into one place.
+ *
+ * @author Lee Rhodes
+ */
+public final class Sketches {
+
+ private Sketches() {}
+
+ /**
+ * Gets the unique count estimate from a valid MemorySegment image of a Sketch
+ * @param srcSeg the source MemorySegment
+ * @return the sketch's best estimate of the cardinality of the input stream.
+ */
+ public static double getEstimate(final MemorySegment srcSeg) {
+ checkIfValidThetaSketch(srcSeg);
+ return Sketch.estimate(getThetaLong(srcSeg), getRetainedEntries(srcSeg));
+ }
+
+ /**
+ * Gets the approximate lower error bound from a valid MemorySegment image of a Sketch
+ * given the specified number of Standard Deviations.
+ * This will return getEstimate() if isEmpty() is true.
+ *
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @param srcSeg the source MemorySegment
+ * @return the lower bound.
+ */
+ public static double getLowerBound(final int numStdDev, final MemorySegment srcSeg) {
+ return Sketch.lowerBound(getRetainedEntries(srcSeg), getThetaLong(srcSeg), numStdDev, getEmpty(srcSeg));
+ }
+
+ /**
+ * Ref: {@link SetOperation#getMaxAnotBResultBytes(int)}.
+ * Returns the maximum number of bytes for the returned CompactSketch, given the maximum
+ * value of nomEntries of the first sketch A of AnotB.
+ * @param maxNomEntries the given value
+ * @return the maximum number of bytes.
+ */
+ public static int getMaxAnotBResultBytes(final int maxNomEntries) {
+ return SetOperation.getMaxAnotBResultBytes(maxNomEntries);
+ }
+
+ /**
+ * Returns the maximum number of storage bytes required for a CompactSketch with the given
+ * number of actual entries.
+ * @param numberOfEntries the actual number of retained entries stored in the sketch.
+ * @return the maximum number of storage bytes required for a CompactSketch with the given number
+ * of retained entries.
+ */
+ public static int getMaxCompactSketchBytes(final int numberOfEntries) {
+ return Sketch.getMaxCompactSketchBytes(numberOfEntries);
+ }
+
+ /**
+ * Returns the maximum number of storage bytes required for a CompactSketch given the configured
+ * log_base2 of the number of nominal entries, which is a power of 2.
+ * @param lgNomEntries Nominal Entries
+ * @return the maximum number of storage bytes required for a CompactSketch with the given
+ * lgNomEntries.
+ * @see Sketch#getCompactSketchMaxBytes(int)
+ */
+ public static int getCompactSketchMaxBytes(final int lgNomEntries) {
+ return Sketch.getCompactSketchMaxBytes(lgNomEntries);
+ }
+
+ /**
+ * Ref: {@link SetOperation#getMaxIntersectionBytes(int)}
+ * @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries}
+ * @return Ref: {@link SetOperation#getMaxIntersectionBytes(int)}
+ */
+ public static int getMaxIntersectionBytes(final int nomEntries) {
+ return SetOperation.getMaxIntersectionBytes(nomEntries);
+ }
+
+ /**
+ * Ref: {@link SetOperation#getMaxUnionBytes(int)}
+ * @param nomEntries Ref: {@link SetOperation#getMaxUnionBytes(int)}, {@code nomEntries}
+ * @return Ref: {@link SetOperation#getMaxUnionBytes(int)}
+ */
+ public static int getMaxUnionBytes(final int nomEntries) {
+ return SetOperation.getMaxUnionBytes(nomEntries);
+ }
+
+ /**
+ * Ref: {@link Sketch#getMaxUpdateSketchBytes(int)}
+ * @param nomEntries Ref: {@link Sketch#getMaxUpdateSketchBytes(int)}, {@code nomEntries}
+ * @return Ref: {@link Sketch#getMaxUpdateSketchBytes(int)}
+ */
+ public static int getMaxUpdateSketchBytes(final int nomEntries) {
+ return Sketch.getMaxUpdateSketchBytes(nomEntries);
+ }
+
+ /**
+ * Ref: {@link Sketch#getSerializationVersion(MemorySegment)}
+ * @param srcSeg Ref: {@link Sketch#getSerializationVersion(MemorySegment)}, {@code srcSeg}
+ * @return Ref: {@link Sketch#getSerializationVersion(MemorySegment)}
+ */
+ public static int getSerializationVersion(final MemorySegment srcSeg) {
+ return Sketch.getSerializationVersion(srcSeg);
+ }
+
+ /**
+ * Gets the approximate upper error bound from a valid MemorySegment image of a Sketch
+ * given the specified number of Standard Deviations.
+ * This will return getEstimate() if isEmpty() is true.
+ *
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @param srcSeg the source MemorySegment
+ * @return the upper bound.
+ */
+ public static double getUpperBound(final int numStdDev, final MemorySegment srcSeg) {
+ return Sketch.upperBound(getRetainedEntries(srcSeg), getThetaLong(srcSeg), numStdDev, getEmpty(srcSeg));
+ }
+
+ //Heapify Operations
+
+ /**
+ * Ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)}
+ * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)}, {@code srcSeg}
+ * @return {@link CompactSketch CompactSketch}
+ */
+ public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg) {
+ return CompactSketch.heapify(srcSeg);
+ }
+
+ /**
+ * Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}
+ * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, {@code srcSeg}
+ * @param expectedSeed Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)},
+ * {@code expectedSeed}
+ * @return {@link CompactSketch CompactSketch}
+ */
+ public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg, final long expectedSeed) {
+ return CompactSketch.heapify(srcSeg, expectedSeed);
+ }
+
+ /**
+ * Ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)}
+ * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)}, {@code srcSeg}
+ * @return {@link CompactSketch CompactSketch}
+ */
+ public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg) {
+ return CompactSketch.wrap(srcSeg);
+ }
+
+ /**
+ * Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}
+ * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, {@code srcSeg}
+ * @param expectedSeed Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)},
+ * {@code expectedSeed}
+ * @return {@link CompactSketch CompactSketch}
+ */
+ public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg, final long expectedSeed) {
+ return CompactSketch.wrap(srcSeg, expectedSeed);
+ }
+
+ /**
+ * Ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)}
+ * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)}, {@code srcSeg}
+ * @return {@link SetOperation SetOperation}
+ */
+ public static SetOperation heapifySetOperation(final MemorySegment srcSeg) {
+ return SetOperation.heapify(srcSeg);
+ }
+
+ /**
+ * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}
+ * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)},
+ * {@code srcSeg}
+ * @param expectedSeed the seed used to validate the given Memory image.
+ * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)},
+ * {@code expectedSeed}
+ * @return {@link SetOperation SetOperation}
+ */
+ public static SetOperation heapifySetOperation(final MemorySegment srcSeg, final long expectedSeed) {
+ return SetOperation.heapify(srcSeg, expectedSeed);
+ }
+
+ /**
+ * Ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)}
+ * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)}, {@code srcSeg}
+ * @return {@link Sketch Sketch}
+ */
+ public static Sketch heapifySketch(final MemorySegment srcSeg) {
+ return Sketch.heapify(srcSeg);
+ }
+
+ /**
+ * Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}
+ * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code srcSeg}
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code expectedSeed}
+ * @return {@link Sketch Sketch}
+ */
+ public static Sketch heapifySketch(final MemorySegment srcSeg, final long expectedSeed) {
+ return Sketch.heapify(srcSeg, expectedSeed);
+ }
+
+ /**
+ * Ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)}
+ * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)}, {@code srcSeg}
+ * @return {@link UpdateSketch UpdateSketch}
+ */
+ public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg) {
+ return UpdateSketch.heapify(srcSeg);
+ }
+
+ /**
+ * Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)}
+ * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)},
+ * {@code srcSeg}
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)},
+ * {@code expectedSeed}
+ * @return {@link UpdateSketch UpdateSketch}
+ */
+ public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg, final long expectedSeed) {
+ return UpdateSketch.heapify(srcSeg, expectedSeed);
+ }
+
+ //Builders
+
+ /**
+ * Ref: {@link SetOperationBuilder SetOperationBuilder}
+ * @return {@link SetOperationBuilder SetOperationBuilder}
+ */
+ public static SetOperationBuilder setOperationBuilder() {
+ return new SetOperationBuilder();
+ }
+
+ /**
+ * Ref: {@link UpdateSketchBuilder UpdateSketchBuilder}
+ * @return {@link UpdateSketchBuilder UpdateSketchBuilder}
+ */
+ public static UpdateSketchBuilder updateSketchBuilder() {
+ return new UpdateSketchBuilder();
+ }
+
+ //Wrap operations
+
+ /**
+ * Convenience method, calls {@link SetOperation#wrap(MemorySegment)} and casts the result to a Intersection
+ * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg}
+ * @return a Intersection backed by the given MemorySegment
+ */
+ public static Intersection wrapIntersection(final MemorySegment srcSeg) {
+ return (Intersection) SetOperation.wrap(srcSeg);
+ }
+
+ /**
+ * Ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)}
+ * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)}, {@code srcSeg}
+ * @return {@link SetOperation SetOperation}
+ */
+ public static SetOperation wrapSetOperation(final MemorySegment srcSeg) {
+ return wrapSetOperation(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}
+ * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code srcSeg}
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(Memory, long)},
+ * {@code expectedSeed}
+ * @return {@link SetOperation SetOperation}
+ */
+ public static SetOperation wrapSetOperation(final MemorySegment srcSeg, final long expectedSeed) {
+ return SetOperation.wrap(srcSeg, expectedSeed);
+ }
+
+ /**
+ * Ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)}
+ * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)}, {@code srcSeg}
+ * @return {@link Sketch Sketch}
+ */
+ public static Sketch wrapSketch(final MemorySegment srcSeg) {
+ return Sketch.wrap(srcSeg);
+ }
+
+ /**
+ * Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}
+ * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code srcSeg}
+ * @param expectedSeed the expectedSeed used to validate the given MemorySegment image.
+ * Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code expectedSeed}
+ * @return {@link Sketch Sketch}
+ */
+ public static Sketch wrapSketch(final MemorySegment srcSeg, final long expectedSeed) {
+ return Sketch.wrap(srcSeg, expectedSeed);
+ }
+
+ /**
+ * Convenience method, calls {@link SetOperation#wrap(MemorySegment)} and casts the result to a Union
+ * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg}
+ * @return a Union backed by the given MemorySegment.
+ */
+ public static Union wrapUnion(final MemorySegment srcSeg) {
+ return (Union) SetOperation.wrap(srcSeg);
+ }
+
+ /**
+ * Ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)}
+ * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)}, {@code srcSeg}
+ * @return {@link UpdateSketch UpdateSketch}
+ */
+ public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg) {
+ return wrapUpdateSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}
+ * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code srcSeg}
+ * @param expectedSeed the seed used to validate the given MemorySegment image.
+ * Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code expectedSeed}
+ * @return {@link UpdateSketch UpdateSketch}
+ */
+ public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg, final long expectedSeed) {
+ return UpdateSketch.wrap(srcSeg, expectedSeed);
+ }
+
+ //Restricted static methods
+
+ static void checkIfValidThetaSketch(final MemorySegment srcSeg) {
+ final int fam = srcSeg.get(JAVA_BYTE, FAMILY_BYTE);
+ if (!Sketch.isValidSketchID(fam)) {
+ throw new SketchesArgumentException("Source Memory not a valid Sketch. Family: "
+ + Family.idToFamily(fam).toString());
+ }
+ }
+
+ static boolean getEmpty(final MemorySegment srcSeg) {
+ final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE);
+ if (serVer == 1) {
+ return ((getThetaLong(srcSeg) == Long.MAX_VALUE) && (getRetainedEntries(srcSeg) == 0));
+ }
+ return (srcSeg.get(JAVA_BYTE, FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3
+ }
+
+ static int getPreambleLongs(final MemorySegment srcSeg) {
+ return srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //for SerVer 1,2,3
+ }
+
+ static int getRetainedEntries(final MemorySegment srcSeg) {
+ final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE);
+ if (serVer == 1) {
+ final int entries = srcSeg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT);
+ if ((getThetaLong(srcSeg) == Long.MAX_VALUE) && (entries == 0)) {
+ return 0;
+ }
+ return entries;
+ }
+ //SerVer 2 or 3
+ final int preLongs = getPreambleLongs(srcSeg);
+ final boolean empty = (srcSeg.get(JAVA_BYTE, FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3
+ if (preLongs == 1) {
+ return empty ? 0 : 1;
+ }
+ //preLongs > 1
+ return srcSeg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); //for SerVer 1,2,3
+ }
+
+ static long getThetaLong(final MemorySegment srcSeg) {
+ final int preLongs = getPreambleLongs(srcSeg);
+ return (preLongs < 3) ? Long.MAX_VALUE : srcSeg.get(JAVA_LONG_UNALIGNED, THETA_LONG); //for SerVer 1,2,3
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java
index a86365c7d..f3bdbe2f4 100644
--- a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java
@@ -30,6 +30,7 @@
import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer;
+import java.util.Objects;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.ResizeFactor;
@@ -87,7 +88,7 @@ static UnionImpl initNewHeapInstance(
}
/**
- * Construct a new Direct Union in the off-heap destination MemorySegment.
+ * Construct a new Direct Union in the destination MemorySegment.
* Called by SetOperationBuilder.
*
* @param lgNomLongs See lgNomLongs.
@@ -121,11 +122,12 @@ static UnionImpl initNewDirectInstance(
* @return this class
*/
static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) {
- Family.UNION.checkFamilyID(extractFamilyID(srcSeg));
- final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed);
+ final MemorySegment srcSegRO = srcSeg.asReadOnly();
+ Family.UNION.checkFamilyID(extractFamilyID(srcSegRO));
+ final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSegRO, expectedSeed);
final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed);
- unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg);
- unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg);
+ unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSegRO);
+ unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSegRO);
return unionImpl;
}
@@ -139,7 +141,9 @@ static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expected
*/
static UnionImpl fastWrap(final MemorySegment srcSeg, final long expectedSeed) {
Family.UNION.checkFamilyID(extractFamilyID(srcSeg));
- final UpdateSketch gadget = DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed);
+ final UpdateSketch gadget = srcSeg.isReadOnly()
+ ? DirectQuickSelectSketchR.fastReadOnlyWrap(srcSeg, expectedSeed)
+ : DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed);
final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed);
unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg);
unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg);
@@ -156,7 +160,9 @@ static UnionImpl fastWrap(final MemorySegment srcSeg, final long expectedSeed) {
*/
static UnionImpl wrapInstance(final MemorySegment srcSeg, final long expectedSeed) {
Family.UNION.checkFamilyID(extractFamilyID(srcSeg));
- final UpdateSketch gadget = DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed);
+ final UpdateSketch gadget = srcSeg.isReadOnly()
+ ? DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed)
+ : DirectQuickSelectSketch.writableWrap(srcSeg, expectedSeed);
final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed);
unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg);
unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg);
@@ -174,6 +180,11 @@ public int getMaxUnionBytes() {
return (16 << lgK) + (Family.UNION.getMaxPreLongs() << 3);
}
+ @Override
+ MemorySegment getMemorySegment() {
+ return hasMemorySegment() ? gadget_.getMemorySegment() : null;
+ }
+
@Override
public CompactSketch getResult() {
return getResult(true, null);
@@ -212,14 +223,17 @@ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dst
@Override
public boolean hasMemorySegment() {
- return gadget_ instanceof DirectQuickSelectSketchR
- ? gadget_.hasMemorySegment() : false;
+ return gadget_.hasMemorySegment();
}
@Override
public boolean isDirect() {
- return gadget_ instanceof DirectQuickSelectSketchR
- ? gadget_.isDirect() : false;
+ return gadget_.isDirect();
+ }
+
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return gadget_.isSameResource(that);
}
@Override
@@ -290,9 +304,8 @@ public void union(final Sketch sketchIn) {
@Override
public void union(final MemorySegment seg) {
- if (seg != null) {
- union(Sketch.wrap(seg));
- }
+ Objects.requireNonNull(seg, "MemorySegment must be non-null");
+ union(Sketch.wrap(seg.asReadOnly()));
}
@Override
diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java
index ee5f93ea2..8a8dc5fd3 100644
--- a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java
@@ -158,16 +158,31 @@ int getCurrentDataLongs() {
return 1 << getLgArrLongs();
}
+ @Override
+ public boolean hasMemorySegment() {
+ return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).hasMemorySegment());
+ }
+
@Override
public boolean isCompact() {
return false;
}
+ @Override
+ public boolean isDirect() {
+ return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isDirect());
+ }
+
@Override
public boolean isOrdered() {
return false;
}
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isSameResource(that));
+ }
+
//UpdateSketch interface
/**
diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java
index 08939ee41..09a20b19e 100644
--- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java
@@ -90,16 +90,6 @@ public long getThetaLong() {
return (preLongs > 2) ? getLongLE(bytes_, THETA_LONG) : Long.MAX_VALUE;
}
- @Override
- public boolean hasMemorySegment() {
- return false;
- }
-
- @Override
- public boolean isDirect() {
- return false;
- }
-
@Override
public boolean isEmpty() {
return (bytes_[FLAGS_BYTE] & EMPTY_FLAG_MASK) > 0;
@@ -147,11 +137,6 @@ int getCurrentPreambleLongs() {
return bytes_[PREAMBLE_LONGS_BYTE];
}
- @Override
- MemorySegment getMemorySegment() {
- return null;
- }
-
@Override
short getSeedHash() {
return getShortLE(bytes_, SEED_HASH_SHORT);
diff --git a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java
new file mode 100644
index 000000000..a192e9875
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class AnotBimplTest {
+
+ @Test
+ public void checkExactAnotB_AvalidNoOverlap() {
+ final int k = 512;
+
+ final UpdateSketch usk1 = UpdateSketch.builder().setNominalEntries(k).build();
+ final UpdateSketch usk2 = UpdateSketch.builder().setNominalEntries(k).build();
+
+ for (int i=0; iV1 dates from roughly Aug 2014 to about May 2015.
+ * The library at that time had an early Theta sketch with set operations based on ByteBuffer,
+ * the Alpha sketch, and an early HLL sketch. It also had an early adaptor for Pig.
+ * It also had code for the even earlier CountUniqueSketch (for backward compatibility),
+ * which was the bucket sketch based on Giroire.
+ *
+ * Serialization Version 1:
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || | Flags | LgResize | LgArr | lgNom | SkType | SerVer | MD_LONGS |
+ *
+ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ * 1 || | ------------CurCount-------------- |
+ *
+ * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ * 2 || --------------------------THETA_LONG------------------------------ |
+ *
+ * || | 24 |
+ * 3 || ----------------------Start of Long Array------------------------ |
+ *
+ *
+ *
+ * - The serialization for V1 was always to a compact form (no hash table spaces).
+ * - MD_LONGS (Metadata Longs, now Preamble Longs) was always 3.
+ * - SerVer is always 1.
+ * - The SkType had three values: 1,2,3 for Alpha, QuickSelect, and SetSketch,
+ * respectively.
+ * - Bytes lgNom and lgArr were only used by the QS and Alpha sketches.
+ * - V1 LgResize (2 bits) was only relevant to the Alpha and QS sketches.
+ * - The flags byte is in byte 6 (moved to 5 in V2).
+ * - The only flag bits are BE(bit0)=0, and Read-Only(bit1)=1. Read-only was only set for the
+ * SetSketch.
+ * - There is no seedHash.
+ * - There is no concept of p-sampling so bytes 12-15 of Pre1 are empty.
+ * - The determination of empty is when both curCount=0 and thetaLong = Long.MAX_VALUE.
+ *
+ *
+ * @param skV3 a SerVer3, ordered CompactSketch
+ * @return a SerVer1 SetSketch as MemorySegment object.
+ */
+ public static MemorySegment convertSerVer3toSerVer1(final CompactSketch skV3) {
+ //Check input sketch
+ final boolean validIn = skV3.isCompact() && skV3.isOrdered() && !skV3.hasMemorySegment();
+ if (!validIn) {
+ throw new SketchesArgumentException("Invalid input sketch.");
+ }
+
+ //Build V1 SetSketch in MemorySegment
+ final int curCount = skV3.getRetainedEntries(true);
+ final int bytes = (3 + curCount) << 3;
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);//Util.newHeapSegment(bytes);
+ //Pre0
+ wseg.set(JAVA_BYTE, 0, (byte) 3); //preLongs
+ wseg.set(JAVA_BYTE, 1, (byte) 1); //SerVer
+ wseg.set(JAVA_BYTE, 2, (byte) 3); //Compact (SetSketch)
+ wseg.set(JAVA_BYTE, 6, (byte) 2); //Flags ReadOnly, LittleEndian
+ //Pre1
+ wseg.set(JAVA_INT_UNALIGNED, 8, curCount);
+ //Pre2
+ wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong());
+ //Data
+ if (curCount > 0) {
+ MemorySegment.copy(skV3.getCache(), 0, wseg, JAVA_LONG_UNALIGNED, 24, curCount);
+ }
+ return wseg;
+ }
+
+ /**
+ * Converts a SerVer3 ordered, heap CompactSketch to a SerVer2 ordered, SetSketch in MemorySegment.
+ * This is exclusively for testing purposes.
+ *
+ * V2 is short-lived and dates from roughly Mid May 2015 to about June 1st, 2015.
+ * (V3 was created about June 15th in preparation for OpenSource in July.)
+ * The Theta sketch had evolved but still based on ByteBuffer. There was an UpdateSketch,
+ * the Alpha sketch, and the early HLL sketch. It also had an early adaptor for Pig.
+ *
+ *
+ *
Serialization Version 2:
+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * 0 || Seed Hash | Flags | lgArr | lgNom | SkType | SerVer | MD_LONGS + RR |
+ *
+ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ * 1 || --------------p-------------- | ---------Retained Entries Count-------- |
+ *
+ * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ * 2 || --------------------------THETA_LONG----------------------------------- |
+ *
+ * || | 24 |
+ * 3 || ----------Start of Long Array, could be at 2 or 3 -------------------- |
+ *
+ *
+ *
+ * - The serialization for V2 was always to a compact form (no hash table spaces).
+ * - MD_LONGS low 6 bits: 1 (Empty), 2 (Exact), 3 (Estimating).
+ * - SerVer is always 2.
+ * - The SkType had 4 values: 1,2,3,4; see below.
+ * - Bytes lgNom and lgArr were only used by the QS and Alpha sketches.
+ * - V2 LgResize top 2 bits if byte 0. Only relevant to the Alpha and QS sketches.
+ * - The flags byte is in byte 5.
+ * - The flag bits are specified below.
+ * - There is a seedHash in bytes 6-7.
+ * - p-sampling is bytes 12-15 of Pre1.
+ * - The determination of empty based on the sketch field empty_.
+ *
+ *
+ * // Metadata byte Addresses
+ * private static final int METADATA_LONGS_BYTE = 0; //low 6 bits
+ * private static final int LG_RESIZE_RATIO_BYTE = 0; //upper 2 bits
+ * private static final int SER_VER_BYTE = 1;
+ * private static final int SKETCH_TYPE_BYTE = 2;
+ * private static final int LG_NOM_LONGS_BYTE = 3;
+ * private static final int LG_ARR_LONGS_BYTE = 4;
+ * private static final int FLAGS_BYTE = 5;
+ * private static final int SEED_HASH_SHORT = 6; //byte 6,7
+ * private static final int RETAINED_ENTRIES_COUNT_INT = 8; //4 byte aligned
+ * private static final int P_FLOAT = 12; //4 byte aligned
+ * private static final int THETA_LONG = 16; //8-byte aligned
+ * //Backward compatibility
+ * private static final int FLAGS_BYTE_V1 = 6;
+ * private static final int LG_RESIZE_RATIO_BYTE_V1 = 5;
+ *
+ * // Constant Values
+ * static final int SER_VER = 2;
+ * static final int ALPHA_SKETCH = 1; //SKETCH_TYPE_BYTE
+ * static final int QUICK_SELECT_SKETCH = 2;
+ * static final int SET_SKETCH = 3;
+ * static final int BUFFERED_QUICK_SELECT_SKETCH = 4;
+ * static final String[] SKETCH_TYPE_STR =
+ * { "None", "AlphaSketch", "QuickSelectSketch", "SetSketch", "BufferedQuickSelectSketch" };
+ *
+ * // flag bit masks
+ * static final int BIG_ENDIAN_FLAG_MASK = 1;
+ * static final int READ_ONLY_FLAG_MASK = 2;
+ * static final int EMPTY_FLAG_MASK = 4;
+ * static final int NO_REBUILD_FLAG_MASK = 8;
+ * static final int UNORDERED_FLAG_MASK = 16;
+ *
+ *
+ * @param skV3 a SerVer3, ordered CompactSketch
+ * @param seed used for checking the seed hash (if one exists).
+ * @return a SerVer2 SetSketch as MemorySegment object.
+ */
+ public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) {
+ final short seedHash = ThetaUtil.computeSeedHash(seed);
+ MemorySegment wseg = null;
+
+ if (skV3 instanceof EmptyCompactSketch) {
+ wseg = MemorySegment.ofArray(new long[1]);
+ wseg.set(JAVA_BYTE, 0, (byte) 1); //preLongs
+ wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
+ wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
+ final byte flags = (byte) 0xE; //NoRebuild, Empty, ReadOnly, LE
+ wseg.set(JAVA_BYTE, 5, flags);
+ wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash);
+ return wseg;
+ }
+ if (skV3 instanceof SingleItemSketch) {
+ final SingleItemSketch sis = (SingleItemSketch) skV3;
+ wseg = MemorySegment.ofArray(new long[3]);
+ wseg.set(JAVA_BYTE, 0, (byte) 2); //preLongs
+ wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
+ wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
+ final byte flags = (byte) 0xA; //NoRebuild, notEmpty, ReadOnly, LE
+ wseg.set(JAVA_BYTE, 5, flags);
+ wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash);
+ wseg.set(JAVA_INT_UNALIGNED, 8, 1);
+ final long[] arr = sis.getCache();
+ wseg.set(JAVA_LONG_UNALIGNED, 16, arr[0]);
+ return wseg;
+ }
+ //General CompactSketch
+ final int preLongs = skV3.getCompactPreambleLongs();
+ final int entries = skV3.getRetainedEntries(true);
+ final boolean unordered = !(skV3.isOrdered());
+ final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE
+ wseg = Util.newHeapSegment((preLongs + entries) << 3);
+ wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs
+ wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
+ wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
+
+ wseg.set(JAVA_BYTE, 5, flags);
+ wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash);
+ wseg.set(JAVA_INT_UNALIGNED, 8, entries);
+ if (preLongs == 3) {
+ wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong());
+ }
+ final long[] arr = skV3.getCache();
+ MemorySegment.copy(arr, 0, wseg, JAVA_LONG_UNALIGNED, preLongs << 3, entries);
+ return wseg;
+ }
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java b/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java
new file mode 100644
index 000000000..d6a68bbd5
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.testng.Assert.assertEquals;
+
+import org.apache.datasketches.common.Util;
+import org.testng.annotations.Test;
+
+public class BitPackingTest {
+ private final static boolean enablePrinting = false;
+//for every number of bits from 1 to 63
+//generate pseudo-random data, pack, unpack and compare
+
+ @Test
+ public void packUnpackBits() {
+ long value = 0xaa55aa55aa55aa55L; // arbitrary starting value
+ for (int n = 0; n < 10000; n++) {
+ for (int bits = 1; bits <= 63; bits++) {
+ final long mask = (1 << bits) - 1;
+ long[] input = new long[8];
+ for (int i = 0; i < 8; ++i) {
+ input[i] = value & mask;
+ value += Util.INVERSE_GOLDEN_U64;
+ }
+
+ byte[] bytes = new byte[8 * Long.BYTES];
+ int bitOffset = 0;
+ int bufOffset = 0;
+ for (int i = 0; i < 8; ++i) {
+ BitPacking.packBits(input[i], bits, bytes, bufOffset, bitOffset);
+ bufOffset += (bitOffset + bits) >>> 3;
+ bitOffset = (bitOffset + bits) & 7;
+ }
+
+ long[] output = new long[8];
+ bitOffset = 0;
+ bufOffset = 0;
+ for (int i = 0; i < 8; ++i) {
+ BitPacking.unpackBits(output, i, bits, bytes, bufOffset, bitOffset);
+ bufOffset += (bitOffset + bits) >>> 3;
+ bitOffset = (bitOffset + bits) & 7;
+ }
+
+ for (int i = 0; i < 8; ++i) {
+ assertEquals(output[i], input[i]);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void packUnpackBlocks() {
+ long value = 0xaa55aa55aa55aa55L; // arbitrary starting value
+ for (int n = 0; n < 10000; n++) {
+ for (int bits = 1; bits <= 63; bits++) {
+ if (enablePrinting) { System.out.println("bits " + bits); }
+ final long mask = (1L << bits) - 1;
+ long[] input = new long[8];
+ for (int i = 0; i < 8; ++i) {
+ input[i] = value & mask;
+ value += Util.INVERSE_GOLDEN_U64;
+ }
+
+ byte[] bytes = new byte[8 * Long.BYTES];
+ BitPacking.packBitsBlock8(input, 0, bytes, 0, bits);
+ if (enablePrinting) { hexDump(bytes); }
+
+ long[] output = new long[8];
+ BitPacking.unpackBitsBlock8(output, 0, bytes, 0, bits);
+
+ for (int i = 0; i < 8; ++i) {
+ if (enablePrinting) { System.out.println("checking value " + i); }
+ assertEquals(output[i], input[i]);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void packBitsUnpackBlocks() {
+ long value = 0; // arbitrary starting value
+ for (int n = 0; n < 10000; n++) {
+ for (int bits = 1; bits <= 63; bits++) {
+ final long mask = (1 << bits) - 1;
+ long[] input = new long[8];
+ for (int i = 0; i < 8; ++i) {
+ input[i] = value & mask;
+ value += Util.INVERSE_GOLDEN_U64;
+ }
+
+ byte[] bytes = new byte[8 * Long.BYTES];
+ int bitOffset = 0;
+ int bufOffset = 0;
+ for (int i = 0; i < 8; ++i) {
+ BitPacking.packBits(input[i], bits, bytes, bufOffset, bitOffset);
+ bufOffset += (bitOffset + bits) >>> 3;
+ bitOffset = (bitOffset + bits) & 7;
+ }
+
+ long[] output = new long[8];
+ BitPacking.unpackBitsBlock8(output, 0, bytes, 0, bits);
+
+ for (int i = 0; i < 8; ++i) {
+ assertEquals(output[i], input[i]);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void packBlocksUnpackBits() {
+ long value = 123L; // arbitrary starting value
+ for (int n = 0; n < 10000; n++) {
+ for (int bits = 1; bits <= 63; bits++) {
+ final long mask = (1 << bits) - 1;
+ long[] input = new long[8];
+ for (int i = 0; i < 8; ++i) {
+ input[i] = value & mask;
+ value += Util.INVERSE_GOLDEN_U64;
+ }
+
+ byte[] bytes = new byte[8 * Long.BYTES];
+ BitPacking.packBitsBlock8(input, 0, bytes, 0, bits);
+
+ long[] output = new long[8];
+ int bitOffset = 0;
+ int bufOffset = 0;
+ for (int i = 0; i < 8; ++i) {
+ BitPacking.unpackBits(output, i, bits, bytes, bufOffset, bitOffset);
+ bufOffset += (bitOffset + bits) >>> 3;
+ bitOffset = (bitOffset + bits) & 7;
+ }
+
+ for (int i = 0; i < 8; ++i) {
+ assertEquals(output[i], input[i]);
+ }
+ }
+ }
+ }
+
+ void hexDump(byte[] bytes) {
+ for (int i = 0; i < bytes.length; i++) {
+ System.out.print(String.format("%02x ", bytes[i]));
+ }
+ System.out.println();
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java
new file mode 100644
index 000000000..6ad5e8cdc
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java
@@ -0,0 +1,674 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.testng.annotations.Test;
+
+import java.lang.foreign.Arena;
+
+/**
+ * @author Lee Rhodes
+ */
+public class CompactSketchTest {
+
+ @Test
+ public void checkHeapifyWrap() {
+ int k = 4096;
+ final boolean ordered = true;
+ checkHeapifyWrap(k, 0, ordered);
+ checkHeapifyWrap(k, 1, ordered);
+ checkHeapifyWrap(k, 1, !ordered);
+ checkHeapifyWrap(k, k, ordered); //exact
+ checkHeapifyWrap(k, k, !ordered); //exact
+ checkHeapifyWrap(k, 4 * k, ordered); //estimating
+ checkHeapifyWrap(k, 4 * k, !ordered); //estimating
+ }
+
+ //test combinations of compact ordered/not ordered and heap/direct
+ public void checkHeapifyWrap(int k, int u, boolean ordered) {
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build();
+
+ for (int i=0; i>> 1 GT_MIDP
+ * 4611686018427387904 Theta for p = 0.5f = MIDP
+ *
+ * 1206007004353599230 hash(6L)[0] >>> 1 GT_LOWP_V
+ * 922337217429372928 Theta for p = 0.1f = LOWP
+ * 593872385995628096 hash(4L)[0] >>> 1 LT_LOWP_V
+ */
+
+ private static final long GT_MIDP_V = 3L;
+ private static final float MIDP = 0.5f;
+
+ private static final long GT_LOWP_V = 6L;
+ private static final float LOWP = 0.1f;
+ private static final long LT_LOWP_V = 4L;
+
+ private static final double LOWP_THETA = LOWP;
+
+ private enum SkType {
+ EMPTY, // { 1.0, 0, T} Bin: 101 Oct: 05
+ EXACT, // { 1.0, >0, F} Bin: 110 Oct: 06, specify only value
+ ESTIMATION, // {<1.0, >0, F} Bin: 010 Oct: 02, specify only value
+ DEGENERATE // {<1.0, 0, F} Bin: 000 Oct: 0, specify p, value
+ }
+
+ //=================================
+
+ @Test
+ public void emptyEmpty() {
+ UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0);
+ UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0);
+ final double expectedIntersectTheta = 1.0;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = true;
+ final double expectedAnotbTheta = 1.0;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = true;
+ final double expectedUnionTheta = 1.0;
+ final int expectedUnionCount = 0;
+ final boolean expectedUnionEmpty = true;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void emptyExact() {
+ UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0);
+ UpdateSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_V);
+ final double expectedIntersectTheta = 1.0;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = true;
+ final double expectedAnotbTheta = 1.0;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = true;
+ final double expectedUnionTheta = 1.0;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void emptyDegenerate() {
+ UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0);
+ UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V);
+ final double expectedIntersectTheta = 1.0;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = true;
+ final double expectedAnotbTheta = 1.0;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = true;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 0;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void emptyEstimation() {
+ UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0);
+ UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V);
+ final double expectedIntersectTheta = 1.0;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = true;
+ final double expectedAnotbTheta = 1.0;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = true;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ //=================================
+
+ @Test
+ public void exactEmpty() {
+ UpdateSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_V);
+ UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0);
+ final double expectedIntersectTheta = 1.0;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = true;
+ final double expectedAnotbTheta = 1.0;
+ final int expectedAnotbCount = 1;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = 1.0;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void exactExact() {
+ UpdateSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_V);
+ UpdateSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_V);
+ final double expectedIntersectTheta = 1.0;
+ final int expectedIntersectCount = 1;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = 1.0;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = true;
+ final double expectedUnionTheta = 1.0;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void exactDegenerate() {
+ UpdateSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_V);
+ UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0
+ final double expectedIntersectTheta = LOWP_THETA;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 1;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void exactEstimation() {
+ UpdateSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_V);
+ UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V);
+ final double expectedIntersectTheta = LOWP_THETA;
+ final int expectedIntersectCount = 1;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ //=================================
+
+ @Test
+ public void estimationEmpty() {
+ UpdateSketch thetaA = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V);
+ UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0);
+ final double expectedIntersectTheta = 1.0;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = true;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 1;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void estimationExact() {
+ UpdateSketch thetaA = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V);
+ UpdateSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_V);
+ final double expectedIntersectTheta = LOWP_THETA;
+ final int expectedIntersectCount = 1;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void estimationDegenerate() {
+ UpdateSketch thetaA = getSketch(SkType.ESTIMATION, MIDP, LT_LOWP_V);
+ UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V);
+ final double expectedIntersectTheta = LOWP_THETA;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 1;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void estimationEstimation() {
+ UpdateSketch thetaA = getSketch(SkType.ESTIMATION, MIDP, LT_LOWP_V);
+ UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V);
+ final double expectedIntersectTheta = LOWP_THETA;
+ final int expectedIntersectCount = 1;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ //=================================
+
+ @Test
+ public void degenerateEmpty() {
+ UpdateSketch thetaA = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0
+ UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0);
+ final double expectedIntersectTheta = 1.0;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = true;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 0;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void degenerateExact() {
+ UpdateSketch thetaA = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0
+ UpdateSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_V);
+ final double expectedIntersectTheta = LOWP_THETA;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void degenerateDegenerate() {
+ UpdateSketch thetaA = getSketch(SkType.DEGENERATE, MIDP, GT_MIDP_V); //entries = 0
+ UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V);
+ final double expectedIntersectTheta = LOWP_THETA;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 0;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ @Test
+ public void degenerateEstimation() {
+ UpdateSketch thetaA = getSketch(SkType.DEGENERATE, MIDP, GT_MIDP_V); //entries = 0
+ UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V);
+ final double expectedIntersectTheta = LOWP_THETA;
+ final int expectedIntersectCount = 0;
+ final boolean expectedIntersectEmpty = false;
+ final double expectedAnotbTheta = LOWP_THETA;
+ final int expectedAnotbCount = 0;
+ final boolean expectedAnotbEmpty = false;
+ final double expectedUnionTheta = LOWP;
+ final int expectedUnionCount = 1;
+ final boolean expectedUnionEmpty = false;
+
+ checks(thetaA, thetaB,
+ expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty,
+ expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty,
+ expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ }
+
+ //=================================
+ //=================================
+
+ private static void checks(
+ UpdateSketch thetaA,
+ UpdateSketch thetaB,
+ double expectedIntersectTheta,
+ int expectedIntersectCount,
+ boolean expectedIntersectEmpty,
+ double expectedAnotbTheta,
+ int expectedAnotbCount,
+ boolean expectedAnotbEmpty,
+ double expectedUnionTheta,
+ int expectedUnionCount,
+ boolean expectedUnionEmpty) {
+ CompactSketch csk;
+ Intersection inter = SetOperation.builder().buildIntersection();
+ AnotB anotb = SetOperation.builder().buildANotB();
+ Union union = new SetOperationBuilder().buildUnion();
+
+ //Intersection Stateless Theta, Theta Updatable
+ csk = inter.intersect(thetaA, thetaB);
+ checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount,
+ expectedIntersectEmpty);
+ //Intersection Stateless Theta, Theta Compact
+ csk = inter.intersect(thetaA.compact(), thetaB.compact());
+ checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount,
+ expectedIntersectEmpty);
+
+ //AnotB Stateless Theta, Theta Updatable
+ csk = anotb.aNotB(thetaA, thetaB);
+ checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty);
+ //AnotB Stateless Theta, Theta Compact
+ csk = anotb.aNotB(thetaA.compact(), thetaB.compact());
+ checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty);
+
+ //AnotB Stateful Theta, Theta Updatable
+ anotb.setA(thetaA);
+ anotb.notB(thetaB);
+ csk = anotb.getResult(true);
+ checkResult("AnotB Stateful Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty);
+ //AnotB Stateful Theta, Theta Compact
+ anotb.setA(thetaA.compact());
+ anotb.notB(thetaB.compact());
+ csk = anotb.getResult(true);
+ checkResult("AnotB Stateful Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty);
+
+ //Union Stateful Theta, Theta Updatable
+ union.union(thetaA);
+ union.union(thetaB);
+ csk = union.getResult();
+ union.reset();
+ checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+ //Union Stateful Theta, Theta Compact
+ union.union(thetaA.compact());
+ union.union(thetaB.compact());
+ csk = union.getResult();
+ union.reset();
+ checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty);
+
+ }
+
+ private static void checkResult(
+ String comment,
+ CompactSketch csk,
+ double expectedTheta,
+ int expectedEntries,
+ boolean expectedEmpty) {
+ double actualTheta = csk.getTheta();
+ int actualEntries = csk.getRetainedEntries();
+ boolean actualEmpty = csk.isEmpty();
+
+ boolean thetaOk = actualTheta == expectedTheta;
+ boolean entriesOk = actualEntries == expectedEntries;
+ boolean emptyOk = actualEmpty == expectedEmpty;
+ if (!thetaOk || !entriesOk || !emptyOk) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(comment + ": ");
+ if (!thetaOk) { sb.append("Theta: expected " + expectedTheta + ", got " + actualTheta + "; "); }
+ if (!entriesOk) { sb.append("Entries: expected " + expectedEntries + ", got " + actualEntries + "; "); }
+ if (!emptyOk) { sb.append("Empty: expected " + expectedEmpty + ", got " + actualEmpty + "."); }
+ throw new IllegalArgumentException(sb.toString());
+ }
+ }
+
+ private static UpdateSketch getSketch(SkType skType, float p, long value) {
+ UpdateSketchBuilder bldr = UpdateSketch.builder();
+ bldr.setLogNominalEntries(4);
+ UpdateSketch sk;
+ switch(skType) {
+ case EMPTY: { // { 1.0, 0, T} p and value are not used
+ sk = bldr.build();
+ break;
+ }
+ case EXACT: { // { 1.0, >0, F} p is not used
+ sk = bldr.build();
+ sk.update(value);
+ break;
+ }
+ case ESTIMATION: { // {<1.0, >0, F}
+ bldr.setP(p);
+ sk = bldr.build();
+ sk.update(value);
+ break;
+ }
+ case DEGENERATE: { // {<1.0, 0, F}
+ bldr.setP(p);
+ sk = bldr.build();
+ sk.update(value);
+ break;
+ }
+
+ default: { return null; } // should not happen
+ }
+ return sk;
+ }
+
+// private static void println(Object o) {
+// System.out.println(o.toString());
+// }
+//
+// @Test
+// public void printHash() {
+// long seed = DEFAULT_UPDATE_SEED;
+// long v = 6;
+// long hash = (hash(v, seed)[0]) >>> 1;
+// println(v + ", " + hash);
+// }
+//
+// @Test
+// public void printPAsLong() {
+// float p = 0.5f;
+// println("p = " + p + ", " + (long)(Long.MAX_VALUE * p));
+// }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java
new file mode 100644
index 000000000..c27c3b085
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java
@@ -0,0 +1,769 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.apache.datasketches.theta2.SetOperation.CONST_PREAMBLE_LONGS;
+import static org.apache.datasketches.theta2.SetOperation.getMaxIntersectionBytes;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.SketchesReadOnlyException;
+import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class DirectIntersectionTest {
+ private static final int PREBYTES = CONST_PREAMBLE_LONGS << 3; //24
+
+ @Test
+ public void checkExactIntersectionNoOverlap() {
+ final int lgK = 9;
+ final int k = 1< k);
+ println("Est: "+est);
+ }
+
+ @SuppressWarnings("unused")
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkOverflow() {
+ final int lgK = 9; //512
+ final int k = 1< k);
+ println("Est: "+est);
+
+ final byte[] byteArray = inter.toByteArray();
+ final MemorySegment seg = MemorySegment.ofArray(byteArray);
+ final Intersection inter2 = (Intersection) SetOperation.heapify(seg);
+ comp2 = inter2.getResult(false, null);
+ est2 = comp2.getEstimate();
+ println("Est2: "+est2);
+ }
+
+ /**
+ * This proves that the hash of 7 is < 0.5. This fact will be used in other tests involving P.
+ */
+ @Test
+ public void checkPreject() {
+ final UpdateSketch sk = UpdateSketch.builder().setP((float) .5).build();
+ sk.update(7);
+ assertEquals(sk.getRetainedEntries(), 0);
+ }
+
+ @Test
+ public void checkWrapVirginEmpty() {
+ final int lgK = 5;
+ final int k = 1 << lgK;
+ Intersection inter1, inter2;
+ UpdateSketch sk1;
+
+ final int segBytes = getMaxIntersectionBytes(k);
+ MemorySegment iMem = MemorySegment.ofArray(new byte[segBytes]);
+
+ inter1 = SetOperation.builder().buildIntersection(iMem); //virgin off-heap
+ inter2 = Sketches.wrapIntersection(iMem); //virgin off-heap, identical to inter1
+ //both in virgin state, empty = false
+ //note: both inter1 and inter2 are tied to the same MemorySegment,
+ // so an intersect to one also affects the other. Don't do what I do!
+ assertFalse(inter1.hasResult());
+ assertFalse(inter2.hasResult());
+
+ //This constructs a sketch with 0 entries and theta < 1.0
+ sk1 = UpdateSketch.builder().setP((float) .5).setNominalEntries(k).build();
+ sk1.update(7); //will be rejected by P, see proof above.
+
+ //A virgin intersection (empty = false) intersected with a not-empty zero cache sketch
+ //remains empty = false!
+ inter1.intersect(sk1);
+ assertFalse(inter1.isEmpty());
+ assertTrue(inter1.hasResult());
+ //note that inter2 is not independent
+ assertFalse(inter2.isEmpty());
+ assertTrue(inter2.hasResult());
+
+ //test the path via toByteArray, now in a different state
+ iMem = MemorySegment.ofArray(inter1.toByteArray());
+ inter2 = Sketches.wrapIntersection(iMem);
+ assertTrue(inter2.hasResult()); //still true
+
+ //test the compaction path
+ final CompactSketch comp = inter2.getResult(true, null);
+ assertEquals(comp.getRetainedEntries(false), 0);
+ assertFalse(comp.isEmpty());
+ }
+
+ @Test
+ public void checkWrapNullEmpty2() {
+ final int lgK = 5;
+ final int k = 1< k);
+ println("Est: "+est);
+
+ final byte[] segArr3 = inter2.toByteArray();
+ final MemorySegment srcMem2 = MemorySegment.ofArray(segArr3);
+ inter3 = Sketches.wrapIntersection(srcMem2);
+ resultComp2 = inter3.getResult(false, null);
+ est2 = resultComp2.getEstimate();
+ println("Est2: "+est2);
+
+ inter.reset();
+ inter2.reset();
+ inter3.reset();
+ }
+
+ @Test
+ public void checkDefaultMinSize() {
+ final int k = 32;
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]);
+ IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkExceptionMinSize() {
+ final int k = 16;
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]);
+ IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg);
+ }
+
+ @Test
+ public void checkGetResult() {
+ final int k = 1024;
+ final UpdateSketch sk = Sketches.updateSketchBuilder().build();
+
+ final int segBytes = getMaxIntersectionBytes(k);
+ final byte[] segArr = new byte[segBytes];
+ final MemorySegment iMem = MemorySegment.ofArray(segArr);
+
+ final Intersection inter = Sketches.setOperationBuilder().buildIntersection(iMem);
+ inter.intersect(sk);
+ final CompactSketch csk = inter.getResult();
+ assertEquals(csk.getCompactBytes(), 8);
+ }
+
+ @Test
+ public void checkFamily() {
+ //cheap trick
+ final int k = 16;
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]);
+ final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg);
+ assertEquals(impl.getFamily(), Family.INTERSECTION);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkExceptions1() {
+ final int k = 16;
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]);
+ IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg);
+ //corrupt SerVer
+ seg.set(JAVA_BYTE, PreambleUtil.SER_VER_BYTE, (byte) 2);
+ IntersectionImpl.wrapInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED, false);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkExceptions2() {
+ final int k = 16;
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]);
+ IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg);
+ //seg now has non-empty intersection
+ //corrupt empty and CurCount
+ Util.setBits(seg, PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK);
+ seg.set(JAVA_INT_UNALIGNED, PreambleUtil.RETAINED_ENTRIES_INT, 2);
+ IntersectionImpl.wrapInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED, false);
+ }
+
+ //Check Alex's bug intersecting 2 direct full sketches with only overlap of 2
+ //
+ @Test
+ public void checkOverlappedDirect() {
+ final int k = 1 << 4;
+ final int segBytes = 2*k*16 +PREBYTES; //plenty of room
+ final UpdateSketch sk1 = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ final UpdateSketch sk2 = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i=0; i k);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkSamplingMode() {
+ int k = 4096;
+ float p = (float)0.5;
+
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setP(p).setNominalEntries(k).build(wseg);
+ DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks
+
+ for (int i = 0; i < k; i++ ) { usk.update(i); }
+
+ double p2 = sk1.getP();
+ double theta = sk1.getTheta();
+ assertTrue(theta <= p2);
+
+ double est = usk.getEstimate();
+ assertEquals(k, est, k *.05);
+ double ub = usk.getUpperBound(1);
+ assertTrue(ub > est);
+ double lb = usk.getLowerBound(1);
+ assertTrue(lb < est);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkErrorBounds() {
+ int k = 512;
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg);
+
+ //Exact mode
+ for (int i = 0; i < k; i++ ) { usk.update(i); }
+
+ double est = usk.getEstimate();
+ double lb = usk.getLowerBound(2);
+ double ub = usk.getUpperBound(2);
+ assertEquals(est, ub, 0.0);
+ assertEquals(est, lb, 0.0);
+
+ //Est mode
+ int u = 100*k;
+ for (int i = k; i < u; i++ ) {
+ usk.update(i);
+ usk.update(i); //test duplicate rejection
+ }
+ est = usk.getEstimate();
+ lb = usk.getLowerBound(2);
+ ub = usk.getUpperBound(2);
+ assertTrue(est <= ub);
+ assertTrue(est >= lb);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ //Empty Tests
+ @Test
+ public void checkEmptyAndP() {
+ //virgin, p = 1.0
+ int k = 1024;
+ float p = (float)1.0;
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setP(p).setNominalEntries(k).build(wseg);
+ DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+ usk.update(1);
+ assertEquals(sk1.getRetainedEntries(true), 1);
+ assertFalse(usk.isEmpty());
+
+ //virgin, p = .001
+ p = (float)0.001;
+ byte[] segArr2 = new byte[(int) wseg.byteSize()];
+ MemorySegment seg2 = MemorySegment.ofArray(segArr2);
+ UpdateSketch usk2 = UpdateSketch.builder().setP(p).setNominalEntries(k).build(seg2);
+ sk1 = (DirectQuickSelectSketch)usk2;
+
+ assertTrue(usk2.isEmpty());
+ usk2.update(1); //will be rejected
+ assertEquals(sk1.getRetainedEntries(true), 0);
+ assertFalse(usk2.isEmpty());
+ double est = usk2.getEstimate();
+ //println("Est: "+est);
+ assertEquals(est, 0.0, 0.0); //because curCount = 0
+ double ub = usk2.getUpperBound(2); //huge because theta is tiny!
+ //println("UB: "+ub);
+ assertTrue(ub > 0.0);
+ double lb = usk2.getLowerBound(2);
+ assertTrue(lb <= est);
+ //println("LB: "+lb);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkUpperAndLowerBounds() {
+ int k = 512;
+ int u = 2*k;
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg);
+
+ for (int i = 0; i < u; i++ ) { usk.update(i); }
+
+ double est = usk.getEstimate();
+ double ub = usk.getUpperBound(1);
+ double lb = usk.getLowerBound(1);
+ assertTrue(ub > est);
+ assertTrue(lb < est);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkRebuild() {
+ int k = 512;
+ int u = 4*k;
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg);
+ DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) { usk.update(i); }
+
+ assertFalse(usk.isEmpty());
+ assertTrue(usk.getEstimate() > 0.0);
+ assertTrue(sk1.getRetainedEntries(false) > k);
+
+ sk1.rebuild();
+ assertEquals(sk1.getRetainedEntries(false), k);
+ assertEquals(sk1.getRetainedEntries(true), k);
+ sk1.rebuild();
+ assertEquals(sk1.getRetainedEntries(false), k);
+ assertEquals(sk1.getRetainedEntries(true), k);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkResetAndStartingSubMultiple() {
+ int k = 512;
+ int u = 4*k;
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg);
+ DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) { usk.update(i); }
+
+ assertFalse(usk.isEmpty());
+ assertTrue(sk1.getRetainedEntries(false) > k);
+ assertTrue(sk1.getThetaLong() < Long.MAX_VALUE);
+
+ sk1.reset();
+ assertTrue(usk.isEmpty());
+ assertEquals(sk1.getRetainedEntries(false), 0);
+ assertEquals(usk.getEstimate(), 0.0, 0.0);
+ assertEquals(sk1.getThetaLong(), Long.MAX_VALUE);
+
+ assertNotNull(sk1.getMemorySegment());
+ assertFalse(sk1.isOrdered());
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkExactModeMemoryArr() {
+ int k = 4096;
+ int u = 4096;
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg);
+ DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) { usk.update(i); }
+
+ assertEquals(usk.getEstimate(), u, 0.0);
+ assertEquals(sk1.getRetainedEntries(false), u);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkEstModeMemoryArr() {
+ int k = 4096;
+ int u = 2*k;
+
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg);
+ DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) { usk.update(i); }
+
+ assertEquals(usk.getEstimate(), u, u*.05);
+ assertTrue(sk1.getRetainedEntries(false) > k);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkEstModeNativeMemory() {
+ int k = 4096;
+ int u = 2*k;
+ int segCapacity = (k << 4) + (Family.QUICKSELECT.getMinPreLongs() << 3);
+
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(segCapacity, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg);
+ DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) { usk.update(i); }
+ double est = usk.getEstimate();
+ println(""+est);
+ assertEquals(usk.getEstimate(), u, u*.05);
+ assertTrue(sk1.getRetainedEntries(false) > k);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void checkConstructReconstructFromMemory() {
+ int k = 4096;
+ int u = 2*k;
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment wseg = makeNativeMemorySegment(k, arena);
+
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg);
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) { usk.update(i); } //force estimation
+
+ double est1 = usk.getEstimate();
+ int count1 = usk.getRetainedEntries(false);
+ assertEquals(est1, u, u*.05);
+ assertTrue(count1 >= k);
+
+ byte[] serArr;
+ double est2;
+ int count2;
+
+ serArr = usk.toByteArray();
+
+ MemorySegment seg2 = MemorySegment.ofArray(serArr);
+
+ //reconstruct to Native/Direct
+ UpdateSketch usk2 = Sketches.wrapUpdateSketch(seg2);
+
+ est2 = usk2.getEstimate();
+ count2 = usk2.getRetainedEntries(false);
+
+ assertEquals(count2, count1);
+ assertEquals(est2, est1, 0.0);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test(expectedExceptions = SketchesReadOnlyException.class)
+ public void updateAfterReadOnlyWrap() {
+ UpdateSketch usk1 = UpdateSketch.builder().build();
+ UpdateSketch usk2 = (UpdateSketch) Sketch.wrap(MemorySegment.ofArray(usk1.toByteArray()));
+ usk2.update(0);
+ }
+
+ public void updateAfterWritableWrap() {
+ UpdateSketch usk1 = UpdateSketch.builder().build();
+ UpdateSketch usk2 = UpdateSketch.wrap(MemorySegment.ofArray(usk1.toByteArray()));
+ usk2.update(0);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkNegativeHashes() {
+ int k = 512;
+ UpdateSketch qs = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build();
+ qs.hashUpdate(-1L);
+ }
+
+ @Test
+ public void checkConstructorSrcMemCorruptions() {
+ int k = 1024; //lgNomLongs = 10
+ int u = k; //exact mode, lgArrLongs = 11
+
+ int bytes = Sketches.getMaxUpdateSketchBytes(k);
+ byte[] arr1 = new byte[bytes];
+ MemorySegment seg1 = MemorySegment.ofArray(arr1);
+ ResizeFactor rf = ResizeFactor.X1; //0
+ UpdateSketch usk1 = UpdateSketch.builder().setNominalEntries(k).setResizeFactor(rf).build(seg1);
+ for (int i=0; i>> 1); //corrupt theta and
+ seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 10); //corrupt lgArrLongs
+ try {
+ usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED);
+ fail("Expected SketchesArgumentException");
+ } catch (SketchesArgumentException e) {
+ //pass
+ }
+ seg1.set(JAVA_LONG_UNALIGNED, THETA_LONG, Long.MAX_VALUE); //fix theta and
+ seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 11); //fix lgArrLongs
+ byte badFlags = (byte) (BIG_ENDIAN_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK);
+ seg1.set(JAVA_BYTE, FLAGS_BYTE, badFlags);
+ try {
+ usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED);
+ fail("Expected SketchesArgumentException");
+ } catch (SketchesArgumentException e) {
+ //pass
+ }
+
+ byte[] arr2 = Arrays.copyOfRange(arr1, 0, bytes-1); //corrupt length
+ MemorySegment seg2 = MemorySegment.ofArray(arr2);
+ try {
+ usk2 = DirectQuickSelectSketch.writableWrap(seg2, ThetaUtil.DEFAULT_UPDATE_SEED);
+ fail("Expected SketchesArgumentException");
+ } catch (SketchesArgumentException e) {
+ //pass
+ }
+ }
+
+ @Test
+ public void checkCorruptRFWithInsufficientArray() {
+ int k = 1024; //lgNomLongs = 10
+
+ int bytes = Sketches.getMaxUpdateSketchBytes(k);
+ byte[] arr = new byte[bytes];
+ MemorySegment seg = MemorySegment.ofArray(arr);
+ ResizeFactor rf = ResizeFactor.X8; // 3
+ UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setResizeFactor(rf).build(seg);
+ usk.update(0);
+
+ insertLgResizeFactor(seg, 0); // corrupt RF: X1
+ UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ assertEquals(dqss.getResizeFactor(), ResizeFactor.X2); // force-promote to X2
+ }
+
+ @Test
+ public void checkFamilyAndRF() {
+ int k = 16;
+ MemorySegment seg = MemorySegment.ofArray(new byte[(k*16) + 24]);
+ UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg);
+ assertEquals(sketch.getFamily(), Family.QUICKSELECT);
+ assertEquals(sketch.getResizeFactor(), ResizeFactor.X8);
+ }
+
+ //checks Alex's bug where lgArrLongs > lgNomLongs +1.
+ @Test
+ public void checkResizeInBigMem() {
+ int k = 1 << 14;
+ int u = 1 << 20;
+ MemorySegment seg = MemorySegment.ofArray(new byte[(8*k*16) +24]);
+ UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg);
+ for (int i=0; i 98663.0);
+ assertTrue(est < 101530.0);
+ }
+
+ @Test
+ public void checkForDruidBug2() { //update union with just sketch memory reference
+ final int k = 16384;
+ final UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i = 0; i < 100000; i++) {
+ usk.update(Integer.toString(i));
+ }
+ usk.rebuild(); //optional but created the symptom
+ final MemorySegment memIn = MemorySegment.ofArray(new byte[usk.getCompactBytes()]);
+ usk.compact(true, memIn); //side effect of loading the memIn
+
+ //create empty target union in off-heap mem
+ final MemorySegment mem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]);
+ final Union union1 = SetOperation.builder().setNominalEntries(k).buildUnion(mem);
+
+ union1.union(memIn);
+
+ final CompactSketch csk = union1.getResult();
+
+ assertTrue(csk.getTheta() < 0.2);
+ assertEquals(csk.getRetainedEntries(true), 16384);
+ final double est = csk.getEstimate();
+ assertTrue(est > 98663.0);
+ assertTrue(est < 101530.0);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //Disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/EmptyTest.java b/src/test/java/org/apache/datasketches/theta2/EmptyTest.java
new file mode 100644
index 000000000..c5492cf34
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/EmptyTest.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+import org.testng.annotations.Test;
+
+
+/**
+ * Empty essentially means that the sketch has never seen data.
+ *
+ * @author Lee Rhodes
+ */
+public class EmptyTest {
+
+ @Test
+ public void checkEmpty() {
+ final UpdateSketch sk1 = Sketches.updateSketchBuilder().build();
+ final UpdateSketch sk2 = Sketches.updateSketchBuilder().build();
+ final Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+
+ final int u = 100;
+ for (int i = 0; i < u; i++) { //disjoint
+ sk1.update(i);
+ sk2.update(i + u);
+ }
+ inter.intersect(sk1);
+ inter.intersect(sk2);
+
+ final CompactSketch csk = inter.getResult();
+ //The intersection of two disjoint, exact-mode sketches is empty, T == 1.0.
+ println(csk.toString());
+ assertTrue(csk.isEmpty());
+
+ final AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
+ final CompactSketch csk2 = aNotB.aNotB(csk, sk1);
+ //The AnotB of an empty, T == 1.0 sketch with another exact-mode sketch is empty, T == 1.0
+ assertTrue(csk2.isEmpty());
+ }
+
+ @Test
+ public void checkNotEmpty() {
+ final UpdateSketch sk1 = Sketches.updateSketchBuilder().build();
+ final UpdateSketch sk2 = Sketches.updateSketchBuilder().build();
+ final Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+
+ final int u = 10000; //estimating
+ for (int i = 0; i < u; i++) { //disjoint
+ sk1.update(i);
+ sk2.update(i + u);
+ }
+ inter.intersect(sk1);
+ inter.intersect(sk2);
+
+ final CompactSketch csk = inter.getResult();
+ println(csk.toString());
+ //The intersection of two disjoint, est-mode sketches is not-empty, T < 1.0.
+ assertFalse(csk.isEmpty());
+
+ AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
+ final CompactSketch csk2 = aNotB.aNotB(csk, sk1); //empty, T < 1.0; with est-mode sketch
+ println(csk2.toString());
+ //The AnotB of an empty, T < 1.0 sketch with another exact-mode sketch is not-empty.
+ assertFalse(csk2.isEmpty());
+
+ final UpdateSketch sk3 = Sketches.updateSketchBuilder().build();
+ aNotB = Sketches.setOperationBuilder().buildANotB();
+ final CompactSketch csk3 = aNotB.aNotB(sk3, sk1); //empty, T == 1.0; with est-mode sketch
+ println(csk3.toString());
+ //the AnotB of an empty, T == 1.0 sketch with another est-mode sketch is empty, T < 1.0
+ assertTrue(csk3.isEmpty());
+ }
+
+ @Test
+ public void checkPsampling() {
+ final UpdateSketch sk1 = Sketches.updateSketchBuilder().setP(.5F).build();
+ assertTrue(sk1.isEmpty());
+ //An empty P-sampling sketch where T < 1.0 and has never seen data is also empty
+ // and will have a full preamble of 24 bytes. But when compacted, theta returns to 1.0, so
+ // it will be stored as only 8 bytes.
+ assertEquals(sk1.compact().toByteArray().length, 8);
+ }
+
+ //These 3 tests reproduce a failure mode where an "old" empty sketch of 8 bytes without
+ // its empty-flag bit set is read.
+ @Test
+ public void checkBackwardCompatibility1() {
+ final int k = 16;
+ final int bytes = Sketches.getMaxUnionBytes(k); //288
+ final Union union = SetOperation.builder().buildUnion(MemorySegment.ofArray(new byte[bytes]));
+ final MemorySegment mem = badEmptySk();
+ final Sketch wsk = Sketches.wrapSketch(mem);
+ union.union(wsk); //union has memory
+ }
+
+ @Test
+ public void checkBackwardCompatibility2() {
+ final Union union = SetOperation.builder().setNominalEntries(16).buildUnion();
+ final MemorySegment mem = badEmptySk();
+ final Sketch wsk = Sketches.wrapSketch(mem);
+ union.union(wsk); //heap union
+ }
+
+ @Test
+ public void checkBackwardCompatibility3() {
+ final MemorySegment mem = badEmptySk();
+ Sketches.heapifySketch(mem);
+ }
+
+ @Test
+ public void checkEmptyToCompact() {
+ final UpdateSketch sk1 = Sketches.updateSketchBuilder().build();
+ final CompactSketch csk = sk1.compact();
+ assertTrue(csk instanceof EmptyCompactSketch);
+ final CompactSketch csk2 = csk.compact();
+ assertTrue(csk2 instanceof EmptyCompactSketch);
+ final CompactSketch csk3 = csk.compact(true, MemorySegment.ofArray(new byte[8]));
+ assertTrue(csk3 instanceof DirectCompactSketch);
+ assertEquals(csk2.getCurrentPreambleLongs(), 1);
+ }
+
+
+ //SerVer 2 had an empty sketch where preLongs = 1, but empty bit was not set.
+ private static MemorySegment badEmptySk() {
+ final long preLongs = 1;
+ final long serVer = 2;
+ final long family = 3; //compact
+ final long flags = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK;
+ final long seedHash = 0x93CC;
+ final long badEmptySk = seedHash << 48 | flags << 40
+ | family << 16 | serVer << 8 | preLongs;
+ final MemorySegment wmem = MemorySegment.ofArray(new byte[8]);
+ wmem.set(JAVA_LONG_UNALIGNED, 0, badEmptySk);
+ return wmem;
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java b/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java
new file mode 100644
index 000000000..faba72701
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class ExamplesTest {
+
+ @Test
+ public void simpleCountingSketch() {
+ final int k = 4096;
+ final int u = 1000000;
+
+ final UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i = 0; i < u; i++) {
+ sketch.update(i);
+ }
+
+ println(sketch.toString());
+ }
+ /*
+### HeapQuickSelectSketch SUMMARY:
+ Nominal Entries (k) : 4096
+ Estimate : 1002714.745231455
+ Upper Bound, 95% conf : 1027777.3354974985
+ Lower Bound, 95% conf : 978261.4472857157
+ p : 1.0
+ Theta (double) : 0.00654223948655085
+ Theta (long) : 60341508738660257
+ Theta (long, hex : 00d66048519437a1
+ EstMode? : true
+ Empty? : false
+ Resize Factor : 8
+ Array Size Entries : 8192
+ Retained Entries : 6560
+ Update Seed : 9001
+ Seed Hash : ffff93cc
+### END SKETCH SUMMARY
+ */
+
+ @Test
+ public void theta2dot0Examples() {
+ //Load source sketches
+ final UpdateSketchBuilder bldr = UpdateSketch.builder();
+ final UpdateSketch skA = bldr.build();
+ final UpdateSketch skB = bldr.build();
+ for (int i = 1; i <= 1000; i++) {
+ skA.update(i);
+ skB.update(i + 250);
+ }
+
+ //Union Stateless:
+ Union union = SetOperation.builder().buildUnion();
+ CompactSketch csk = union.union(skA, skB);
+ assert csk.getEstimate() == 1250;
+
+ //Union Stateful:
+ union = SetOperation.builder().buildUnion();
+ union.union(skA); //first call
+ union.union(skB); //2nd through nth calls
+ //...
+ csk = union.getResult();
+ assert csk.getEstimate() == 1250;
+
+ //Intersection Stateless:
+ Intersection inter = SetOperation.builder().buildIntersection();
+ csk = inter.intersect(skA, skB);
+ assert csk.getEstimate() == 750;
+
+ //Intersection Stateful:
+ inter = SetOperation.builder().buildIntersection();
+ inter.intersect(skA); //first call
+ inter.intersect(skB); //2nd through nth calls
+ //...
+ csk = inter.getResult();
+ assert csk.getEstimate() == 750;
+
+ //AnotB Stateless:
+ AnotB diff = SetOperation.builder().buildANotB();
+ csk = diff.aNotB(skA, skB);
+ assert csk.getEstimate() == 250;
+
+ //AnotB Stateful:
+ diff = SetOperation.builder().buildANotB();
+ diff.setA(skA); //first call
+ diff.notB(skB); //2nd through nth calls
+ //...
+ csk = diff.getResult(true);
+ assert csk.getEstimate() == 250;
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //enable/disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java
new file mode 100644
index 000000000..f04b01829
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1;
+import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class ForwardCompatibilityTest {
+
+ @Test
+ public void checkSerVer1_Empty() {
+ CompactSketch csk = EmptyCompactSketch.getInstance();
+ MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly();
+ Sketch sketch = Sketch.heapify(srcSeg);
+ assertEquals(sketch.isEmpty(), true);
+ assertEquals(sketch.isEstimationMode(), false);
+ assertEquals(sketch.isDirect(), false);
+ assertEquals(sketch.hasMemorySegment(), false);
+ assertEquals(sketch.isCompact(), true);
+ assertEquals(sketch.isOrdered(), true);
+ assertTrue(sketch instanceof EmptyCompactSketch);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkSerVer1_badPrelongs() {
+ CompactSketch csk = EmptyCompactSketch.getInstance();
+
+ MemorySegment srcWseg = convertSerVer3toSerVer1(csk);
+ MemorySegment srcseg = srcWseg.asReadOnly();
+ srcWseg.set(JAVA_BYTE, 0, (byte) 1);
+ Sketch.heapify(srcWseg); //throws because bad preLongs
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkSerVer1_tooSmall() {
+ UpdateSketch usk = Sketches.updateSketchBuilder().build();
+ usk.update(1);
+ usk.update(2);
+ CompactSketch csk = usk.compact(true, null);
+ MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly();
+ MemorySegment srcSeg2 = srcSeg.asSlice(0, srcSeg.byteSize() - 8);
+ Sketch.heapify(srcSeg2); //throws because too small
+ }
+
+
+ @Test
+ public void checkSerVer1_1Value() {
+ UpdateSketch usk = Sketches.updateSketchBuilder().build();
+ usk.update(1);
+ CompactSketch csk = usk.compact(true, null);
+ MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly();
+ Sketch sketch = Sketch.heapify(srcSeg);
+ assertEquals(sketch.isEmpty(), false);
+ assertEquals(sketch.isEstimationMode(), false);
+ assertEquals(sketch.isDirect(), false);
+ assertEquals(sketch.hasMemorySegment(), false);
+ assertEquals(sketch.isCompact(), true);
+ assertEquals(sketch.isOrdered(), true);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(sketch instanceof SingleItemSketch);
+ }
+
+ @Test
+ public void checkSerVer2_1PreLong_Empty() {
+ CompactSketch csk = EmptyCompactSketch.getInstance();
+ MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly();
+ Sketch sketch = Sketch.heapify(srcSeg);
+ assertEquals(sketch.isEmpty(), true);
+ assertEquals(sketch.isEstimationMode(), false);
+ assertEquals(sketch.isDirect(), false);
+ assertEquals(sketch.hasMemorySegment(), false);
+ assertEquals(sketch.isCompact(), true);
+ assertEquals(sketch.isOrdered(), true);
+ assertTrue(sketch instanceof EmptyCompactSketch);
+ }
+
+ @Test
+ public void checkSerVer2_2PreLongs_Empty() {
+ UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build();
+ for (int i = 0; i < 2; i++) { usk.update(i); } //exact mode
+ CompactSketch csk = usk.compact(true, null);
+ MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly();
+
+ MemorySegment srcWseg = MemorySegment.ofArray(new byte[16]);
+ MemorySegment.copy(srcSeg, 0, srcWseg, 0, 16);
+ PreambleUtil.setEmpty(srcWseg); //Force
+ assertTrue(PreambleUtil.isEmptyFlag(srcWseg));
+ srcWseg.set(JAVA_INT_UNALIGNED, 8, 0); //corrupt curCount = 0
+
+ Sketch sketch = Sketch.heapify(srcWseg);
+ assertTrue(sketch instanceof EmptyCompactSketch);
+ }
+
+ @Test
+ public void checkSerVer2_3PreLongs_Empty() {
+ UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build();
+ for (int i = 0; i < 32; i++) { usk.update(i); } //est mode
+ CompactSketch csk = usk.compact(true, null);
+ MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly();
+
+ MemorySegment srcWseg = MemorySegment.ofArray(new byte[24]);
+ MemorySegment.copy(srcSeg, 0, srcWseg, 0, 24);
+ PreambleUtil.setEmpty(srcWseg); //Force
+ assertTrue(PreambleUtil.isEmptyFlag(srcWseg));
+ srcWseg.set(JAVA_INT_UNALIGNED, 8, 0); //corrupt curCount = 0
+ srcWseg.set(JAVA_LONG_UNALIGNED, 16, Long.MAX_VALUE); //corrupt to make it look empty
+
+ Sketch sketch = Sketch.heapify(srcWseg); //now serVer=3, EmptyCompactSketch
+ assertTrue(sketch instanceof EmptyCompactSketch);
+ }
+
+ @Test
+ public void checkSerVer2_2PreLongs_1Value() {
+ UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build();
+ usk.update(1); //exact mode
+ CompactSketch csk = usk.compact(true, null);
+ MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly();
+
+ Sketch sketch = Sketch.heapify(srcSeg);
+ assertEquals(sketch.isEmpty(), false);
+ assertEquals(sketch.isEstimationMode(), false);
+ assertEquals(sketch.isDirect(), false);
+ assertEquals(sketch.hasMemorySegment(), false);
+ assertEquals(sketch.isCompact(), true);
+ assertEquals(sketch.isOrdered(), true);
+ assertTrue(sketch instanceof SingleItemSketch);
+ }
+
+ @Test
+ public void checkSerVer2_3PreLongs_1Value() {
+ UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build();
+ for (int i = 0; i < 32; i++) { usk.update(i); } //est mode
+ CompactSketch csk = usk.compact(true, null);
+ MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly();
+
+ MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]);
+ MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32);
+ srcWseg.set(JAVA_INT_UNALIGNED, 8, 1); //corrupt curCount = 1
+ srcWseg.set(JAVA_LONG_UNALIGNED, 16, Long.MAX_VALUE); //corrupt theta to make it look exact
+ long[] cache = csk.getCache();
+ srcWseg.set(JAVA_LONG_UNALIGNED, 24, cache[0]); //corrupt cache with only one value
+
+ Sketch sketch = Sketch.heapify(srcWseg);
+ assertEquals(sketch.isEmpty(), false);
+ assertEquals(sketch.isEstimationMode(), false);
+ assertEquals(sketch.isDirect(), false);
+ assertEquals(sketch.hasMemorySegment(), false);
+ assertEquals(sketch.isCompact(), true);
+ assertEquals(sketch.isOrdered(), true);
+ assertTrue(sketch instanceof SingleItemSketch);
+ }
+
+ @Test
+ public void checkSerVer2_3PreLongs_1Value_ThLessthan1() {
+ UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build();
+ for (int i = 0; i < 32; i++) { usk.update(i); } //est mode
+ CompactSketch csk = usk.compact(true, null);
+ MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly();
+
+ MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]);
+ MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32);
+ srcWseg.set(JAVA_INT_UNALIGNED, 8, 1); //corrupt curCount = 1
+ //srcMemW.putLong(16, Long.MAX_VALUE);
+ long[] cache = csk.getCache();
+ srcWseg.set(JAVA_LONG_UNALIGNED, 24, cache[0]); //corrupt cache with only one value
+
+ Sketch sketch = Sketch.heapify(srcWseg);
+ assertEquals(sketch.isEmpty(), false);
+ assertEquals(sketch.isEstimationMode(), true);
+ assertEquals(sketch.isDirect(), false);
+ assertEquals(sketch.hasMemorySegment(), false);
+ assertEquals(sketch.isCompact(), true);
+ assertEquals(sketch.isOrdered(), true);
+ assertTrue(sketch instanceof HeapCompactSketch);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java
new file mode 100644
index 000000000..47420ae0e
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java
@@ -0,0 +1,696 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.common.Family.ALPHA;
+import static org.apache.datasketches.common.ResizeFactor.X1;
+import static org.apache.datasketches.common.ResizeFactor.X2;
+import static org.apache.datasketches.common.ResizeFactor.X8;
+import static org.apache.datasketches.common.Util.clear;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG;
+import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class HeapAlphaSketchTest {
+ private Family fam_ = ALPHA;
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadSerVer() {
+ int k = 512;
+ int u = k;
+ long seed = ThetaUtil.DEFAULT_UPDATE_SEED;
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed)
+ .setNominalEntries(k).build();
+ HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) {
+ sk1.update(i);
+ }
+
+ assertFalse(usk.isEmpty());
+ assertEquals(usk.getEstimate(), u, 0.0);
+ assertEquals(sk1.getRetainedEntries(false), u);
+
+ byte[] byteArray = usk.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
+
+ Sketch.heapify(seg, seed);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkConstructorKtooSmall() {
+ int k = 256;
+ UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build();
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkAlphaIncompatibleWithMem() {
+ MemorySegment seg = MemorySegment.ofArray(new byte[(512*16)+24]);
+ UpdateSketch.builder().setFamily(Family.ALPHA).setNominalEntries(512).build(seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkIllegalSketchID_UpdateSketch() {
+ int k = 512;
+ int u = k;
+ long seed = ThetaUtil.DEFAULT_UPDATE_SEED;
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed)
+ .setNominalEntries(k).build();
+ HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) {
+ usk.update(i);
+ }
+
+ assertFalse(usk.isEmpty());
+ assertEquals(usk.getEstimate(), u, 0.0);
+ assertEquals(sk1.getRetainedEntries(false), u);
+ byte[] byteArray = usk.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+
+ //try to heapify the corrupted mem
+ Sketch.heapify(seg, seed);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifySeedConflict() {
+ int k = 512;
+ long seed1 = 1021;
+ long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED;
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed1)
+ .setNominalEntries(k).build();
+ byte[] byteArray = usk.toByteArray();
+ MemorySegment srcSeg = MemorySegment.ofArray(byteArray).asReadOnly();
+ Sketch.heapify(srcSeg, seed2);
+ }
+
+ @Test
+ public void checkHeapifyByteArrayExact() {
+ int k = 512;
+ int u = k;
+ long seed = ThetaUtil.DEFAULT_UPDATE_SEED;
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed)
+ .setNominalEntries(k).build();
+
+ for (int i=0; i k);
+ }
+
+ @Test
+ public void checkSamplingMode() {
+ int k = 4096;
+ int u = k;
+ float p = (float)0.5;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setP(p)
+ .setNominalEntries(k).build();
+ HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks
+
+ for (int i = 0; i < u; i++ ) {
+ usk.update(i);
+ }
+
+ double p2 = sk1.getP();
+ double theta = sk1.getTheta();
+ assertTrue(theta <= p2);
+
+ double est = usk.getEstimate();
+ double kdbl = k;
+ assertEquals(kdbl, est, kdbl*.05);
+ double ub = usk.getUpperBound(1);
+ assertTrue(ub > est);
+ double lb = usk.getLowerBound(1);
+ assertTrue(lb < est);
+ }
+
+ @Test
+ public void checkErrorBounds() {
+ int k = 512;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X1)
+ .setNominalEntries(k).build();
+
+ //Exact mode
+ for (int i = 0; i < k; i++ ) {
+ usk.update(i);
+ }
+
+ double est = usk.getEstimate();
+ double lb = usk.getLowerBound(2);
+ double ub = usk.getUpperBound(2);
+ assertEquals(est, ub, 0.0);
+ assertEquals(est, lb, 0.0);
+
+ //Est mode
+ int u = 10*k;
+ for (int i = k; i < u; i++ ) {
+ usk.update(i);
+ usk.update(i); //test duplicate rejection
+ }
+ est = usk.getEstimate();
+ lb = usk.getLowerBound(2);
+ ub = usk.getUpperBound(2);
+ assertTrue(est <= ub);
+ assertTrue(est >= lb);
+ }
+
+ //Empty Tests
+ @Test
+ public void checkEmptyAndP() {
+ //virgin, p = 1.0
+ int k = 1024;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build();
+ HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+ usk.update(1);
+ assertEquals(sk1.getRetainedEntries(true), 1);
+ assertFalse(usk.isEmpty());
+
+ //virgin, p = .001
+ UpdateSketch usk2 = UpdateSketch.builder().setFamily(fam_).setP((float)0.001)
+ .setNominalEntries(k).build();
+ sk1 = (HeapAlphaSketch)usk2;
+ assertTrue(usk2.isEmpty());
+ usk2.update(1); //will be rejected
+ assertEquals(sk1.getRetainedEntries(true), 0);
+ assertFalse(usk2.isEmpty());
+ double est = usk2.getEstimate();
+ //println("Est: "+est);
+ assertEquals(est, 0.0, 0.0); //because curCount = 0
+ double ub = usk2.getUpperBound(2); //huge because theta is tiny!
+ //println("UB: "+ub);
+ assertTrue(ub > 0.0);
+ double lb = usk2.getLowerBound(2);
+ assertTrue(lb <= est);
+ //println("LB: "+lb);
+ }
+
+ @Test
+ public void checkUpperAndLowerBounds() {
+ int k = 512;
+ int u = 2*k;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X2)
+ .setNominalEntries(k).build();
+
+ for (int i = 0; i < u; i++ ) {
+ usk.update(i);
+ }
+
+ double est = usk.getEstimate();
+ double ub = usk.getUpperBound(1);
+ double lb = usk.getLowerBound(1);
+ assertTrue(ub > est);
+ assertTrue(lb < est);
+ }
+
+ @Test
+ public void checkRebuild() {
+ int k = 512;
+ int u = 4*k;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build();
+ HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) {
+ usk.update(i);
+ }
+
+ assertFalse(usk.isEmpty());
+ assertTrue(usk.getEstimate() > 0.0);
+ assertNotEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true));
+
+ sk1.rebuild();
+ assertEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true));
+ sk1.rebuild();
+ assertEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true));
+ }
+
+ @Test
+ public void checkResetAndStartingSubMultiple() {
+ int k = 1024;
+ int u = 4*k;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X8)
+ .setNominalEntries(k).build();
+ HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+
+ for (int i=0; i k);
+ println("Est: "+est);
+ }
+
+ @Test
+ public void checkHeapifyAndWrap() {
+ final int lgK = 9;
+ final int k = 1< k); // in general it might be exactly k, but in this case must be greater
+ }
+
+ @Test
+ public void checkSamplingMode() {
+ int k = 4096;
+ int u = k;
+ float p = (float)0.5;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setP(p).setNominalEntries(k).build();
+ HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks
+
+ for (int i = 0; i < u; i++ ) {
+ usk.update(i);
+ }
+
+ double p2 = sk1.getP();
+ double theta = sk1.getTheta();
+ assertTrue(theta <= p2);
+
+ double est = usk.getEstimate();
+ double kdbl = k;
+ assertEquals(kdbl, est, kdbl*.05);
+ double ub = usk.getUpperBound(1);
+ assertTrue(ub > est);
+ double lb = usk.getLowerBound(1);
+ assertTrue(lb < est);
+ }
+
+ @Test
+ public void checkErrorBounds() {
+ int k = 512;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X1).setNominalEntries(k).build();
+
+ //Exact mode
+ for (int i = 0; i < k; i++ ) {
+ usk.update(i);
+ }
+
+ double est = usk.getEstimate();
+ double lb = usk.getLowerBound(2);
+ double ub = usk.getUpperBound(2);
+ assertEquals(est, ub, 0.0);
+ assertEquals(est, lb, 0.0);
+
+ //Est mode
+ int u = 10*k;
+ for (int i = k; i < u; i++ ) {
+ usk.update(i);
+ usk.update(i); //test duplicate rejection
+ }
+ est = usk.getEstimate();
+ lb = usk.getLowerBound(2);
+ ub = usk.getUpperBound(2);
+ assertTrue(est <= ub);
+ assertTrue(est >= lb);
+ }
+
+ //Empty Tests
+ @Test
+ public void checkEmptyAndP() {
+ //virgin, p = 1.0
+ int k = 1024;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build();
+ HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+ usk.update(1);
+ assertEquals(sk1.getRetainedEntries(true), 1);
+ assertFalse(usk.isEmpty());
+
+ //virgin, p = .001
+ UpdateSketch usk2 = UpdateSketch.builder().setFamily(fam_).setP((float)0.001).setNominalEntries(k).build();
+ sk1 = (HeapQuickSelectSketch)usk2;
+ assertTrue(usk2.isEmpty());
+ usk2.update(1); //will be rejected
+ assertEquals(sk1.getRetainedEntries(true), 0);
+ assertFalse(usk2.isEmpty());
+ double est = usk2.getEstimate();
+ //println("Est: "+est);
+ assertEquals(est, 0.0, 0.0); //because curCount = 0
+ double ub = usk2.getUpperBound(2); //huge because theta is tiny!
+ //println("UB: "+ub);
+ assertTrue(ub > 0.0);
+ double lb = usk2.getLowerBound(2);
+ assertTrue(lb <= est);
+ //println("LB: "+lb);
+ }
+
+ @Test
+ public void checkUpperAndLowerBounds() {
+ int k = 512;
+ int u = 2*k;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X2).setNominalEntries(k).build();
+
+ for (int i = 0; i < u; i++ ) {
+ usk.update(i);
+ }
+
+ double est = usk.getEstimate();
+ double ub = usk.getUpperBound(1);
+ double lb = usk.getLowerBound(1);
+ assertTrue(ub > est);
+ assertTrue(lb < est);
+ }
+
+ @Test
+ public void checkRebuild() {
+ int k = 16;
+ int u = 4*k;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build();
+ HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+
+ for (int i = 0; i< u; i++) {
+ usk.update(i);
+ }
+
+ assertFalse(usk.isEmpty());
+ assertTrue(usk.getEstimate() > 0.0);
+ assertTrue(sk1.getRetainedEntries(false) > k);
+
+ sk1.rebuild();
+ assertEquals(sk1.getRetainedEntries(false), k);
+ assertEquals(sk1.getRetainedEntries(true), k);
+ sk1.rebuild();
+ assertEquals(sk1.getRetainedEntries(false), k);
+ assertEquals(sk1.getRetainedEntries(true), k);
+ }
+
+ @Test
+ public void checkResetAndStartingSubMultiple() {
+ int k = 1024;
+ int u = 4*k;
+
+ UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X8).setNominalEntries(k).build();
+ HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks
+
+ assertTrue(usk.isEmpty());
+
+ for (int i=0; i>> 1;
+ SingleItemSketch sis = SingleItemSketch.create(i);
+ long halfMax = Long.MAX_VALUE >> 1;
+ int count = sis.getCountLessThanThetaLong(halfMax);
+ assertEquals(count, (h < halfMax) ? 1 : 0);
+ }
+ }
+
+ @Test
+ public void checkSerDe() {
+ SingleItemSketch sis = SingleItemSketch.create(1);
+ byte[] byteArr = sis.toByteArray();
+ MemorySegment mem = MemorySegment.ofArray(byteArr);
+ final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED);
+ SingleItemSketch sis2 = SingleItemSketch.heapify(mem, defaultSeedHash);
+ assertEquals(sis2.getEstimate(), 1.0);
+
+ SingleItemSketch sis3 = SingleItemSketch.heapify(mem, defaultSeedHash);
+ assertEquals(sis3.getEstimate(), 1.0);
+
+ Union union = Sketches.setOperationBuilder().buildUnion();
+ union.union(sis);
+ union.union(sis2);
+ union.union(sis3);
+ CompactSketch csk = union.getResult();
+ assertTrue(csk instanceof SingleItemSketch);
+ assertEquals(union.getResult().getEstimate(), 1.0);
+ }
+
+ @Test
+ public void checkRestricted() {
+ SingleItemSketch sis = SingleItemSketch.create(1);
+ assertNull(sis.getMemorySegment());
+ assertEquals(sis.getCompactPreambleLongs(), 1);
+ }
+
+ @Test
+ public void unionWrapped() {
+ Sketch sketch = SingleItemSketch.create(1);
+ Union union = Sketches.setOperationBuilder().buildUnion();
+ MemorySegment mem = MemorySegment.ofArray(sketch.toByteArray());
+ union.union(mem);
+ assertEquals(union.getResult().getEstimate(), 1, 0);
+ }
+
+ @Test
+ public void buildAndCompact() {
+ UpdateSketch sk1;
+ CompactSketch csk;
+ int bytes;
+ //On-heap
+ sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build();
+ sk1.update(1);
+ csk = sk1.compact(true, null);
+ assertTrue(csk instanceof SingleItemSketch);
+ csk = sk1.compact(false, null);
+ assertTrue(csk instanceof SingleItemSketch);
+
+ //Off-heap
+ bytes = Sketches.getMaxUpdateSketchBytes(32);
+ MemorySegment wmem = MemorySegment.ofArray(new byte[bytes]);
+ sk1= Sketches.updateSketchBuilder().setNominalEntries(32).build(wmem);
+ sk1.update(1);
+ csk = sk1.compact(true, null);
+ assertTrue(csk instanceof SingleItemSketch);
+ csk = sk1.compact(false, null);
+ assertTrue(csk instanceof SingleItemSketch);
+
+ bytes = Sketches.getMaxCompactSketchBytes(1);
+ wmem = MemorySegment.ofArray(new byte[bytes]);
+ csk = sk1.compact(true, wmem);
+ assertTrue(csk.isOrdered());
+ csk = sk1.compact(false, wmem);
+ assertTrue(csk.isOrdered());
+ }
+
+ @Test
+ public void intersection() {
+ UpdateSketch sk1, sk2;
+ CompactSketch csk;
+ int bytes;
+ //Intersection on-heap
+ sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build();
+ sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build();
+ sk1.update(1);
+ sk1.update(2);
+ sk2.update(1);
+ Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ inter.intersect(sk1);
+ inter.intersect(sk2);
+ csk = inter.getResult(true, null);
+ assertTrue(csk instanceof SingleItemSketch);
+
+ //Intersection off-heap
+ bytes = Sketches.getMaxIntersectionBytes(32);
+ MemorySegment wmem = MemorySegment.ofArray(new byte[bytes]);
+ inter = Sketches.setOperationBuilder().buildIntersection(wmem);
+ inter.intersect(sk1);
+ inter.intersect(sk2);
+ csk = inter.getResult(true, null);
+ assertTrue(csk instanceof SingleItemSketch);
+ csk = inter.getResult(false, null);
+ assertTrue(csk instanceof SingleItemSketch);
+ }
+
+ @Test
+ public void union() {
+ UpdateSketch sk1, sk2;
+ CompactSketch csk;
+ int bytes;
+ //Union on-heap
+ sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build();
+ sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build();
+ sk1.update(1);
+ sk2.update(1);
+ Union union = Sketches.setOperationBuilder().buildUnion();
+ union.union(sk1);
+ union.union(sk2);
+ csk = union.getResult(true, null);
+ assertTrue(csk instanceof SingleItemSketch);
+
+ //Union off-heap
+ bytes = Sketches.getMaxUnionBytes(32);
+ MemorySegment wmem = MemorySegment.ofArray(new byte[bytes]);
+ union = Sketches.setOperationBuilder().buildUnion(wmem);
+ union.union(sk1);
+ union.union(sk2);
+ csk = union.getResult(true, null);
+ assertTrue(csk instanceof SingleItemSketch);
+ csk = union.getResult(false, null);
+ assertTrue(csk instanceof SingleItemSketch);
+ }
+
+ @Test
+ public void aNotB() {
+ UpdateSketch sk1, sk2;
+ CompactSketch csk;
+ //AnotB on-heap
+ sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build();
+ sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build();
+ sk1.update(1);
+ sk2.update(2);
+ AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
+ aNotB.setA(sk1);
+ aNotB.notB(sk2);
+ csk = aNotB.getResult(true, null, true);
+ assertTrue(csk instanceof SingleItemSketch);
+ //not AnotB off-heap form
+ }
+
+ @Test
+ public void checkHeapifyInstance() {
+ UpdateSketch sk1 = new UpdateSketchBuilder().build();
+ sk1.update(1);
+ UpdateSketch sk2 = new UpdateSketchBuilder().build();
+ sk2.update(1);
+ Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ inter.intersect(sk1);
+ inter.intersect(sk2);
+ MemorySegment wmem = MemorySegment.ofArray(new byte[16]);
+ CompactSketch csk = inter.getResult(false, wmem);
+ assertTrue(csk.isOrdered());
+ Sketch csk2 = Sketches.heapifySketch(wmem);
+ assertTrue(csk2 instanceof SingleItemSketch);
+ println(csk2.toString(true, true, 1, true));
+ }
+
+ @Test
+ public void checkSingleItemBadFlags() {
+ final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED);
+ UpdateSketch sk1 = new UpdateSketchBuilder().build();
+ sk1.update(1);
+ MemorySegment wmem = MemorySegment.ofArray(new byte[16]);
+ sk1.compact(true, wmem);
+ wmem.set(JAVA_BYTE, 5, (byte) 0); //corrupt flags to zero
+ try {
+ SingleItemSketch.heapify(wmem, defaultSeedHash); //fails due to corrupted flags bytes
+ fail();
+ } catch (SketchesArgumentException e) { }
+ }
+
+ @Test
+ public void checkDirectUnionSingleItem2() {
+ Sketch sk = Sketch.wrap(siSkWoutSiFlag24Bytes());
+ assertEquals(sk.getEstimate(), 1.0, 0.0);
+ //println(sk.toString());
+ sk = Sketch.wrap(siSkWithSiFlag24Bytes());
+ assertEquals(sk.getEstimate(), 1.0, 0.0);
+ //println(sk.toString());
+ }
+
+ @Test
+ public void checkSingleItemCompact() {
+ UpdateSketch sk1 = new UpdateSketchBuilder().build();
+ sk1.update(1);
+ CompactSketch csk = sk1.compact();
+ assertTrue(csk instanceof SingleItemSketch);
+ CompactSketch csk2 = csk.compact();
+ assertEquals(csk, csk2);
+ CompactSketch csk3 = csk.compact(true, MemorySegment.ofArray(new byte[16]));
+ assertTrue(csk3 instanceof DirectCompactSketch);
+ assertEquals(csk2.getCurrentPreambleLongs(), 1);
+ assertEquals(csk3.getCurrentPreambleLongs(), 1);
+ }
+
+
+ static final long SiSkPre0WithSiFlag = 0x93cc3a0000030301L;
+ static final long SiSkPre0WoutSiFlag = 0x93cc1a0000030301L;
+ static final long Hash = 0x05a186bdcb7df915L;
+
+ static MemorySegment siSkWithSiFlag24Bytes() {
+ int cap = 24; //8 extra bytes
+ MemorySegment wmem = MemorySegment.ofArray(new byte[cap]);
+ wmem.set(JAVA_LONG_UNALIGNED, 0, SiSkPre0WithSiFlag);
+ wmem.set(JAVA_LONG_UNALIGNED, 8, Hash);
+ return wmem;
+ }
+
+ static MemorySegment siSkWoutSiFlag24Bytes() {
+ int cap = 24; //8 extra bytes
+ MemorySegment wmem = MemorySegment.ofArray(new byte[cap]);
+ wmem.set(JAVA_LONG_UNALIGNED, 0, SiSkPre0WoutSiFlag);
+ wmem.set(JAVA_LONG_UNALIGNED, 8, Hash);
+ return wmem;
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/SketchTest.java b/src/test/java/org/apache/datasketches/theta2/SketchTest.java
new file mode 100644
index 000000000..686ad7ff9
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/SketchTest.java
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.Family.ALPHA;
+import static org.apache.datasketches.common.Family.COMPACT;
+import static org.apache.datasketches.common.Family.QUICKSELECT;
+import static org.apache.datasketches.common.ResizeFactor.X1;
+import static org.apache.datasketches.common.ResizeFactor.X2;
+import static org.apache.datasketches.common.ResizeFactor.X4;
+import static org.apache.datasketches.common.ResizeFactor.X8;
+import static org.apache.datasketches.common.Util.isSameResource;
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1;
+import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2;
+import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs;
+import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK;
+import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK;
+import static org.apache.datasketches.theta2.Sketch.getMaxCompactSketchBytes;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+//import org.apache.datasketches.theta2.Skectches;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class SketchTest {
+
+ @Test
+ public void checkGetMaxBytesWithEntries() {
+ assertEquals(getMaxCompactSketchBytes(10), (10*8) + (Family.COMPACT.getMaxPreLongs() << 3) );
+ }
+
+ @Test
+ public void checkGetCurrentBytes() {
+ int k = 64;
+ int lowQSPreLongs = Family.QUICKSELECT.getMinPreLongs();
+ int lowCompPreLongs = Family.COMPACT.getMinPreLongs();
+ UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(k).build(); // QS Sketch
+ assertEquals(sketch.getCurrentPreambleLongs(), lowQSPreLongs);
+ assertEquals(sketch.getCompactPreambleLongs(), 1); //compact form
+ assertEquals(sketch.getCurrentDataLongs(), k*2);
+ assertEquals(sketch.getCurrentBytes(), (k*2*8) + (lowQSPreLongs << 3));
+ assertEquals(sketch.getCompactBytes(), lowCompPreLongs << 3);
+
+ CompactSketch compSk = sketch.compact(false, null);
+ assertEquals(compSk.getCompactBytes(), 8);
+ assertEquals(compSk.getCurrentBytes(), 8);
+ assertEquals(compSk.getCurrentDataLongs(), 0);
+
+ int compPreLongs = computeCompactPreLongs(sketch.isEmpty(), sketch.getRetainedEntries(true),
+ sketch.getThetaLong());
+ assertEquals(compPreLongs, 1);
+
+ for (int i=0; i 1) { assertEquals(maxCompBytes, 24 + (i * 8)); } //assumes maybe estimation mode
+ }
+ }
+
+ @Test
+ public void checkBuilder() {
+ int k = 2048;
+ int lgK = Integer.numberOfTrailingZeros(k);
+ long seed = 1021;
+ float p = (float)0.5;
+ ResizeFactor rf = X4;
+ Family fam = Family.ALPHA;
+
+ UpdateSketch sk1 = UpdateSketch.builder().setSeed(seed)
+ .setP(p).setResizeFactor(rf).setFamily(fam).setNominalEntries(k).build();
+ String nameS1 = sk1.getClass().getSimpleName();
+ assertEquals(nameS1, "HeapAlphaSketch");
+ assertEquals(sk1.getLgNomLongs(), lgK);
+ assertEquals(sk1.getSeed(), seed);
+ assertEquals(sk1.getP(), p);
+
+ //check reset of defaults
+
+ sk1 = UpdateSketch.builder().build();
+ nameS1 = sk1.getClass().getSimpleName();
+ assertEquals(nameS1, "HeapQuickSelectSketch");
+ assertEquals(sk1.getLgNomLongs(), Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES));
+ assertEquals(sk1.getSeed(), ThetaUtil.DEFAULT_UPDATE_SEED);
+ assertEquals(sk1.getP(), (float)1.0);
+ assertEquals(sk1.getResizeFactor(), ResizeFactor.X8);
+ }
+
+ @Test
+ public void checkBuilderNonPowerOf2() {
+ int k = 1000;
+ UpdateSketch sk = UpdateSketch.builder().setNominalEntries(k).build();
+ assertEquals(sk.getLgNomLongs(), 10);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderIllegalP() {
+ float p = (float)1.5;
+ UpdateSketch.builder().setP(p).build();
+ }
+
+ @Test
+ public void checkBuilderResizeFactor() {
+ ResizeFactor rf;
+ rf = X1;
+ assertEquals(rf.getValue(), 1);
+ assertEquals(rf.lg(), 0);
+ assertEquals(ResizeFactor.getRF(0), X1);
+ rf = X2;
+ assertEquals(rf.getValue(), 2);
+ assertEquals(rf.lg(), 1);
+ assertEquals(ResizeFactor.getRF(1), X2);
+ rf = X4;
+ assertEquals(rf.getValue(), 4);
+ assertEquals(rf.lg(), 2);
+ assertEquals(ResizeFactor.getRF(2), X4);
+ rf = X8;
+ assertEquals(rf.getValue(), 8);
+ assertEquals(rf.lg(), 3);
+ assertEquals(ResizeFactor.getRF(3), X8);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkWrapBadFamily() {
+ UpdateSketch sketch = UpdateSketch.builder().setFamily(Family.ALPHA).setNominalEntries(1024).build();
+ byte[] byteArr = sketch.toByteArray();
+ MemorySegment srcSeg = MemorySegment.ofArray(byteArr);
+ Sketch.wrap(srcSeg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadFamily() {
+ UpdateSketch.builder().setFamily(Family.INTERSECTION).setNominalEntries(1024).build();
+ }
+
+ @SuppressWarnings("static-access")
+ @Test
+ public void checkSerVer() {
+ UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(1024).build();
+ byte[] sketchArray = sketch.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(sketchArray);
+ int serVer = Sketch.getSerializationVersion(seg);
+ assertEquals(serVer, 3);
+ MemorySegment wseg = MemorySegment.ofArray(sketchArray);
+ UpdateSketch sk2 = UpdateSketch.wrap(wseg);
+ serVer = sk2.getSerializationVersion(wseg);
+ assertEquals(serVer, 3);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifyAlphaCompactExcep() {
+ int k = 512;
+ Sketch sketch1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build();
+ byte[] byteArray = sketch1.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ //corrupt:
+ Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK);
+ Sketch.heapify(seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifyQSCompactExcep() {
+ int k = 512;
+ Sketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build();
+ byte[] byteArray = sketch1.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ //corrupt:
+ Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK);
+ Sketch.heapify(seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifyNotCompactExcep() {
+ int k = 512;
+ UpdateSketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build();
+ int bytes = Sketch.getMaxCompactSketchBytes(0);
+ byte[] byteArray = new byte[bytes];
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ sketch1.compact(false, seg);
+ //corrupt:
+ Util.clearBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK);
+ Sketch.heapify(seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifyFamilyExcep() {
+ int k = 512;
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+ byte[] byteArray = union.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ //Improper use
+ Sketch.heapify(seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkWrapAlphaCompactExcep() {
+ int k = 512;
+ Sketch sketch1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build();
+ byte[] byteArray = sketch1.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ //corrupt:
+ Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK);
+ Sketch.wrap(seg);
+
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkWrapQSCompactExcep() {
+ int k = 512;
+ Sketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build();
+ byte[] byteArray = sketch1.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ //corrupt:
+ Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK);
+ Sketch.wrap(seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkWrapNotCompactExcep() {
+ int k = 512;
+ UpdateSketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build();
+ int bytes = Sketch.getMaxCompactSketchBytes(0);
+ byte[] byteArray = new byte[bytes];
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ sketch1.compact(false, seg);
+ //corrupt:
+ Util.clearBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK);
+ Sketch.wrap(seg);
+ }
+
+ @Test
+ public void checkValidSketchID() {
+ assertFalse(Sketch.isValidSketchID(0));
+ assertTrue(Sketch.isValidSketchID(ALPHA.getID()));
+ assertTrue(Sketch.isValidSketchID(QUICKSELECT.getID()));
+ assertTrue(Sketch.isValidSketchID(COMPACT.getID()));
+ }
+
+ @Test
+ public void checkWrapToHeapifyConversion1() {
+ int k = 512;
+ UpdateSketch sketch1 = UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i = 0; i < k; i++) {
+ sketch1.update(i);
+ }
+ double uest1 = sketch1.getEstimate();
+
+ CompactSketch csk = sketch1.compact();
+ assertEquals(csk.getEstimate(), uest1);
+
+ MemorySegment v1seg = convertSerVer3toSerVer1(csk);
+ Sketch csk2 = Sketch.wrap(v1seg); //fails
+ assertFalse(csk2.isDirect());
+ assertFalse(csk2.hasMemorySegment());
+ assertEquals(uest1, csk2.getEstimate(), 0.0);
+
+ MemorySegment v2seg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED);
+ csk2 = Sketch.wrap(v2seg);
+ assertFalse(csk2.isDirect());
+ assertFalse(csk2.hasMemorySegment());
+ assertEquals(uest1, csk2.getEstimate(), 0.0);
+ }
+
+ @Test
+ public void checkIsSameResource() {
+ int k = 16;
+ MemorySegment seg = MemorySegment.ofArray(new byte[(k*16) + 24]); //280
+ MemorySegment cseg = MemorySegment.ofArray(new byte[32]);
+ UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg);
+ sketch.update(1);
+ sketch.update(2);
+ assertTrue(sketch.isSameResource(seg));
+ DirectCompactSketch dcos = (DirectCompactSketch) sketch.compact(true, cseg);
+ assertTrue(isSameResource(dcos.getMemorySegment(), cseg));
+ assertTrue(dcos.isOrdered());
+ //never create 2 sketches with the same memory, so don't do as I do :)
+ DirectCompactSketch dcs = (DirectCompactSketch) sketch.compact(false, cseg);
+ assertTrue(isSameResource(dcs.getMemorySegment(), cseg));
+ assertFalse(dcs.isOrdered());
+
+ Sketch sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ assertFalse(isSameResource(sk.getMemorySegment(),seg));
+ }
+
+ @Test
+ public void checkCountLessThanTheta() {
+ int k = 512;
+ UpdateSketch sketch1 = UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i = 0; i < (2*k); i++) { sketch1.update(i); }
+
+ double theta = sketch1.rebuild().getTheta();
+ final long thetaLong = (long) (LONG_MAX_VALUE_AS_DOUBLE * theta);
+ int count = sketch1.getCountLessThanThetaLong(thetaLong);
+ assertEquals(count, k);
+ }
+
+ private static MemorySegment createCompactSketchMemory(int k, int u) {
+ UpdateSketch usk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i = 0; i < u; i++) { usk.update(i); }
+ int bytes = Sketch.getMaxCompactSketchBytes(usk.getRetainedEntries(true));
+ MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ usk.compact(true, wseg);
+ return wseg;
+ }
+
+ @Test
+ public void checkCompactFlagsOnWrap() {
+ MemorySegment wseg = createCompactSketchMemory(16, 32);
+ Sketch sk = Sketch.wrap(wseg);
+ assertTrue(sk instanceof CompactSketch);
+ int flags = PreambleUtil.extractFlags(wseg);
+
+ int flagsNoCompact = flags & ~COMPACT_FLAG_MASK;
+ PreambleUtil.insertFlags(wseg, flagsNoCompact);
+ try {
+ sk = Sketch.wrap(wseg);
+ fail();
+ } catch (SketchesArgumentException e) { }
+
+ int flagsNoReadOnly = flags & ~READ_ONLY_FLAG_MASK;
+ PreambleUtil.insertFlags(wseg, flagsNoReadOnly);
+ try {
+ sk = Sketch.wrap(wseg);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ PreambleUtil.insertFlags(wseg, flags); //repair to original
+ PreambleUtil.insertSerVer(wseg, 5);
+ try {
+ sk = Sketch.wrap(wseg);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ }
+
+ @Test
+ public void checkCompactSizeAndFlagsOnHeapify() {
+ MemorySegment wseg = createCompactSketchMemory(16, 32);
+ Sketch sk = Sketch.heapify(wseg);
+ assertTrue(sk instanceof CompactSketch);
+ int flags = PreambleUtil.extractFlags(wseg);
+
+ int flagsNoCompact = flags & ~READ_ONLY_FLAG_MASK;
+ PreambleUtil.insertFlags(wseg, flagsNoCompact);
+ try {
+ sk = Sketch.heapify(wseg);
+ fail();
+ } catch (SketchesArgumentException e) { }
+
+ wseg = MemorySegment.ofArray(new byte[7]);
+ PreambleUtil.insertSerVer(wseg, 3);
+ //PreambleUtil.insertFamilyID(wseg, 3);
+ try {
+ sk = Sketch.heapify(wseg);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ }
+
+ @Test
+ public void check2Methods() {
+ int k = 16;
+ Sketch sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ int bytes1 = sk.getCompactBytes();
+ int bytes2 = sk.getCurrentBytes();
+ assertEquals(bytes1, 8);
+ assertEquals(bytes2, 280); //32*8 + 24
+ int retEnt = sk.getRetainedEntries();
+ assertEquals(retEnt, 0);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java
new file mode 100644
index 000000000..09daaad11
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java
@@ -0,0 +1,320 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.common.Util.isSameResource;
+import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1;
+import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.Arena;
+import java.nio.ByteOrder;
+
+import java.lang.foreign.MemorySegment;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+public class UnionImplTest {
+
+ @Test
+ public void checkGetCurrentAndMaxBytes() {
+ final int lgK = 10;
+ final Union union = Sketches.setOperationBuilder().setLogNominalEntries(lgK).buildUnion();
+ assertEquals(union.getCurrentBytes(), 288);
+ assertEquals(union.getMaxUnionBytes(), 16416);
+ }
+
+ @Test
+ public void checkUpdateWithSketch() {
+ final int k = 16;
+ final MemorySegment mem = MemorySegment.ofArray(new byte[k*8 + 24]);
+ final MemorySegment mem2 = MemorySegment.ofArray(new byte[k*8 + 24]);
+ final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i=0; i 0) && !(((lgT - lgA) % lgR) == 0);
+ boolean rf0 = (lgR == 0) && (lgA != lgT);
+ assertTrue((lgRbad == rf0) || (lgRbad == rf123));
+ }
+ }
+ }
+ }
+
+
+ @SuppressWarnings("unused")
+ @Test
+ public void checkCompactOpsMemoryToCompact() {
+ MemorySegment skwseg, cskwseg1, cskwseg2, cskwseg3;
+ CompactSketch csk1, csk2, csk3;
+ int lgK = 6;
+ UpdateSketch sk = Sketches.updateSketchBuilder().setLogNominalEntries(lgK).build();
+ int n = 1 << (lgK + 1);
+ for (int i = 2; i < n; i++) { sk.update(i); }
+ int cbytes = sk.getCompactBytes();
+ byte[] byteArr = sk.toByteArray();
+ skwseg = MemorySegment.ofArray(byteArr);
+ cskwseg1 = MemorySegment.ofArray(new byte[cbytes]);
+ cskwseg2 = MemorySegment.ofArray(new byte[cbytes]);
+ cskwseg3 = MemorySegment.ofArray(new byte[cbytes]);
+ csk1 = sk.compact(true, cskwseg1);
+ csk2 = CompactOperations.memoryToCompact(skwseg, true, cskwseg2);
+ csk3 = CompactOperations.memoryToCompact(cskwseg1, true, cskwseg3);
+ assertTrue(equalContents(cskwseg1,cskwseg2));
+ assertTrue(equalContents(cskwseg1, cskwseg3));
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+}
From a3aa1419db766de9c56c154c76355687af59678d Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Tue, 10 Jun 2025 11:46:16 -0700
Subject: [PATCH 11/25] partial progress 2
---
.../theta2/PairwiseSetOperationsTest.java | 410 ++++++++++++++++++
.../datasketches/theta2/PreambleUtilTest.java | 231 ++++++++++
2 files changed, 641 insertions(+)
create mode 100644 src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java
diff --git a/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java b/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java
new file mode 100644
index 000000000..c0d9faeed
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.fail;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.testng.annotations.Test;
+
+public class PairwiseSetOperationsTest {
+
+ // Intersection
+
+ @Test
+ public void checkIntersectionNoOverlap() {
+ int lgK = 9;
+ int k = 1< k);
+ println(quick1.toString());
+ println(PreambleUtil.preambleToString(mem));
+
+ final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]);
+ final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem);
+ union.union(quick1);
+ println(PreambleUtil.preambleToString(uMem));
+ }
+
+ @Test
+ public void checkToStringWithPrelongsOf2() {
+ final int k = 16;
+ final int u = k;
+ final UpdateSketch quick1 = UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i = 0; i< u; i++) {
+ quick1.update(i);
+ }
+ final byte[] bytes = quick1.compact().toByteArray();
+ println(Sketch.toString(bytes));
+ }
+
+ @Test
+ public void checkPreambleToStringExceptions() {
+ byte[] byteArr = new byte[7];
+ try { //check preLongs < 8 fails
+ Sketch.toString(byteArr);
+ fail("Did not throw SketchesArgumentException.");
+ } catch (final SketchesArgumentException e) {
+ //expected
+ }
+ byteArr = new byte[8];
+ byteArr[0] = (byte) 2; //needs min capacity of 16
+ try { //check preLongs == 2 fails
+ Sketch.toString(MemorySegment.ofArray(byteArr).asReadOnly());
+ fail("Did not throw SketchesArgumentException.");
+ } catch (final SketchesArgumentException e) {
+ //expected
+ }
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadSeedHashFromSeed() {
+ //In the first 64K values 50541 produces a seedHash of 0,
+ ThetaUtil.computeSeedHash(50541);
+ }
+
+ @Test
+ public void checkPreLongs() {
+ final UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(16).build();
+ CompactSketch comp = sketch.compact(false, null);
+ byte[] byteArr = comp.toByteArray();
+ println(Sketch.toString(byteArr)); //PreLongs = 1
+
+ sketch.update(1);
+ comp = sketch.compact(false, null);
+ byteArr = comp.toByteArray();
+ println(Sketch.toString(byteArr)); //PreLongs = 2
+
+ for (int i=2; i<=32; i++) {
+ sketch.update(i);
+ }
+ comp = sketch.compact(false, null);
+ byteArr = comp.toByteArray();
+ println(Sketch.toString(MemorySegment.ofArray(byteArr).asReadOnly())); //PreLongs = 3
+ }
+
+ @Test
+ public void checkInsertsAndExtracts() {
+ final byte[] arr = new byte[32];
+ final MemorySegment wmem = MemorySegment.ofArray(arr);
+
+ int v = 0;
+ insertPreLongs(wmem, ++v);
+ assertEquals(extractPreLongs(wmem), v);
+ insertPreLongs(wmem, 0);
+
+ insertLgResizeFactor(wmem, 3); //limited to 2 bits
+ assertEquals(extractLgResizeFactor(wmem), 3);
+ insertLgResizeFactor(wmem, 0);
+
+ insertSerVer(wmem, ++v);
+ assertEquals(extractSerVer(wmem), v);
+ insertSerVer(wmem, 0);
+
+ insertFamilyID(wmem, ++v);
+ assertEquals(extractFamilyID(wmem), v);
+ insertFamilyID(wmem, 0);
+
+ insertLgNomLongs(wmem, ++v);
+ assertEquals(extractLgNomLongs(wmem), v);
+ insertLgNomLongs(wmem, 0);
+
+ insertLgArrLongs(wmem, ++v);
+ assertEquals(extractLgArrLongs(wmem), v);
+ insertLgArrLongs(wmem, 0);
+
+ insertFlags(wmem, 3);
+ assertEquals(extractFlags(wmem), 3);
+ assertEquals(extractLgResizeRatioV1(wmem), 3); //also at byte 5, limited to 2 bits
+ insertFlags(wmem, 0);
+
+ insertSeedHash(wmem, ++v);
+ assertEquals(extractSeedHash(wmem), v);
+ assertEquals(extractFlagsV1(wmem), v); //also at byte 6
+ insertSeedHash(wmem, 0);
+
+ insertCurCount(wmem, ++v);
+ assertEquals(extractCurCount(wmem), v);
+ insertCurCount(wmem, 0);
+
+ insertP(wmem, (float) 1.0);
+ assertEquals(extractP(wmem), (float) 1.0);
+ insertP(wmem, (float) 0.0);
+
+ insertThetaLong(wmem, ++v);
+ assertEquals(extractThetaLong(wmem), v);
+ insertThetaLong(wmem, 0L);
+
+ insertUnionThetaLong(wmem, ++v);
+ assertEquals(extractUnionThetaLong(wmem), v);
+ insertUnionThetaLong(wmem, 0L);
+
+ setEmpty(wmem);
+ assertTrue(isEmptyFlag(wmem));
+
+ clearEmpty(wmem);
+ assertFalse(isEmptyFlag(wmem));
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
From 5db9cccf23c47061daa3e5436993c0c776aa685b Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Mon, 16 Jun 2025 14:09:28 -0700
Subject: [PATCH 12/25] Theta Rework: completed all classes and tests.
---
.../org/apache/datasketches/common/Util.java | 34 +-
.../theta2/ConcurrentHeapThetaBuffer.java | 16 +
.../theta2/ConcurrentSharedThetaSketch.java | 4 +-
.../theta2/DirectQuickSelectSketch.java | 2 +-
.../theta2/DirectQuickSelectSketchR.java | 3 +-
.../theta2/JaccardSimilarity.java | 179 +++++
.../apache/datasketches/theta2/Sketch.java | 4 +-
.../BoundsOnRatiosInThetaSketchedSets2.java | 121 +++
...ConcurrentDirectQuickSelectSketchTest.java | 4 +-
.../theta2/BackwardConversions.java | 2 +-
...ConcurrentDirectQuickSelectSketchTest.java | 718 +++++++++++++++++
.../ConcurrentHeapQuickSelectSketchTest.java | 745 ++++++++++++++++++
.../theta2/JaccardSimilarityTest.java | 248 ++++++
.../theta2/ReadOnlyMemoryTest.java | 211 +++++
.../datasketches/theta2/SetOperationTest.java | 438 ++++++++++
.../theta2/SetOpsCornerCasesTest.java | 501 ++++++++++++
.../datasketches/theta2/SketchesTest.java | 202 +++++
.../theta2/ThetaSketchCrossLanguageTest.java | 121 +++
...oundsOnRatiosInThetaSketchedSets2Test.java | 94 +++
19 files changed, 3626 insertions(+), 21 deletions(-)
create mode 100644 src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
create mode 100644 src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/SetOperationTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/SketchesTest.java
create mode 100644 src/test/java/org/apache/datasketches/theta2/ThetaSketchCrossLanguageTest.java
create mode 100644 src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java
diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java
index 11615a39a..4701ddf9e 100644
--- a/src/main/java/org/apache/datasketches/common/Util.java
+++ b/src/main/java/org/apache/datasketches/common/Util.java
@@ -909,21 +909,29 @@ public static boolean isSameResource(final MemorySegment seg1, final MemorySegme
}
/**
- * Request a new heap MemorySegment with the given capacityBytes.
+ * Request a new heap MemorySegment with the given capacityBytes and 8-byte aligned or one byte aligned.
*
- * The returned MemorySegment will be constructed from a long[] array.
- * As a result, it will be on-heap and have a memory alignment of 8.
- * If the requested capacity is not divisible by eight, the returned size
- * will be rolled up to the next multiple of eight.
+ * If aligned is true, the returned MemorySegment will be constructed from a long[] array,
+ * and, as a result, it will have a memory alignment of 8 bytes.
+ * If the requested capacity is not exactly divisible by eight, the returned size
+ * will be rolled up to the next multiple of eight bytes.
*
- * @param capacityBytes The new capacity being requested. It must not be negative.
- * @return a new MemorySegment with the requested capacity.
- */
- public static MemorySegment newHeapSegment(final int capacityBytes) {
- final long[] array = ((capacityBytes & 0x7) == 0)
- ? new long[capacityBytes >>> 3]
- : new long[(capacityBytes >>> 3) + 1];
- return MemorySegment.ofArray(array);
+ * If aligned is false, the returned MemorySegment will be constructed from a byte[] array,
+ * and have a memory alignment of 1 byte.
+ *
+ * @param capacityBytes The new capacity being requested. It must not be negative and cannot exceed Integer.MAX_VALUE.
+ * @param aligned if true, the new heap segment will have an alignment of 8 bytes, otherwise the alignment will be 1 byte.
+ * @return a new MemorySegment with the requested capacity and alignment.
+ */
+ public static MemorySegment newHeapSegment(final int capacityBytes, final boolean aligned) {
+ if (aligned) {
+ final int lenLongs = capacityBytes >>> 3;
+ final long[] array = ((capacityBytes & 0x7) == 0)
+ ? new long[lenLongs]
+ : new long[lenLongs + 1];
+ return MemorySegment.ofArray(array);
+ }
+ return MemorySegment.ofArray(new byte[capacityBytes]);
}
/**
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
index c93ed892b..f8f5a0947 100644
--- a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
@@ -23,6 +23,7 @@
import static org.apache.datasketches.theta2.UpdateReturnState.ConcurrentPropagated;
import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta;
+import java.lang.foreign.MemorySegment;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.datasketches.common.ResizeFactor;
@@ -147,6 +148,16 @@ public double getUpperBound(final int numStdDev) {
return shared.getUpperBound(numStdDev);
}
+ @Override
+ public boolean hasMemorySegment() {
+ return shared.hasMemorySegment();
+ }
+
+ @Override
+ public boolean isDirect() {
+ return shared.isDirect();
+ }
+
@Override
public boolean isEmpty() {
return shared.isEmpty();
@@ -157,6 +168,11 @@ public boolean isEstimationMode() {
return shared.isEstimationMode();
}
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return shared.isSameResource(that);
+ }
+
//End of proxies
@Override
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
index 40746c3e6..5c89b3e68 100644
--- a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
@@ -22,6 +22,8 @@
import java.lang.foreign.MemorySegment;
import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.datasketches.common.MemorySegmentStatus;
+
/**
* An internal interface to define the API of a concurrent shared theta sketch.
* It reflects all data processed by a single or multiple update threads, and can serve queries at
@@ -29,7 +31,7 @@
*
* @author eshcar
*/
-interface ConcurrentSharedThetaSketch {
+interface ConcurrentSharedThetaSketch extends MemorySegmentStatus {
long NOT_SINGLE_HASH = -1L;
double MIN_ERROR = 0.0000001;
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
index 32ae0d14d..193385a1f 100644
--- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
@@ -323,7 +323,7 @@ UpdateReturnState hashUpdate(final long hash) {
//}
//final MemorySegment newDstSeg = memReqSvr_.request(wseg_, reqBytes);
- final MemorySegment newDstSeg = newHeapSegment(reqBytes);
+ final MemorySegment newDstSeg = newHeapSegment(reqBytes, false);
moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong);
wseg_ = newDstSeg;
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
index c0db75b16..b7c47de47 100644
--- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
@@ -223,8 +223,7 @@ long[] getCache() {
final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
final long[] cacheArr = new long[1 << lgArrLongs];
- final MemorySegment seg = MemorySegment.ofArray(cacheArr);
- MemorySegment.copy(wseg_, preambleLongs << 3, seg, 0, 8 << lgArrLongs);
+ MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, preambleLongs << 3, cacheArr, 0, 1 << lgArrLongs);
return cacheArr;
}
diff --git a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
new file mode 100644
index 000000000..624dcc3d7
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA;
+import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA;
+import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Jaccard similarity of two Theta Sketches.
+ *
+ * @author Lee Rhodes
+ */
+public final class JaccardSimilarity {
+ private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB
+ private static final double[] ONES = {1.0, 1.0, 1.0};
+
+ /**
+ * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index
+ * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each
+ * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are
+ * distinct from each other. A Jaccard of .95 means the overlap between the two
+ * populations is 95% of the union of the two populations.
+ *
+ *
Note: For very large pairs of sketches, where the configured nominal entries of the sketches
+ * are 2^25 or 2^26, this method may produce unpredictable results.
+ *
+ * @param sketchA given sketch A
+ * @param sketchB given sketch B
+ * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index.
+ * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.
+ */
+ public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) {
+ //Corner case checks
+ if (sketchA == null || sketchB == null) { return ZEROS.clone(); }
+ if (sketchA == sketchB) { return ONES.clone(); }
+ if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); }
+ if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); }
+
+ final int countA = sketchA.getRetainedEntries(true);
+ final int countB = sketchB.getRetainedEntries(true);
+
+ //Create the Union
+ final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS;
+ final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS;
+ final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK);
+ final Union union =
+ SetOperation.builder().setNominalEntries(newK).buildUnion();
+ union.union(sketchA);
+ union.union(sketchB);
+ final Sketch unionAB = union.getResult(false, null);
+ final long thetaLongUAB = unionAB.getThetaLong();
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ final int countUAB = unionAB.getRetainedEntries(true);
+
+ //Check for identical data
+ if (countUAB == countA && countUAB == countB
+ && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+ return ONES.clone();
+ }
+
+ //Create the Intersection
+ final Intersection inter = SetOperation.builder().buildIntersection();
+ inter.intersect(sketchA);
+ inter.intersect(sketchB);
+ inter.intersect(unionAB); //ensures that intersection is a subset of the union
+ final Sketch interABU = inter.getResult(false, null);
+
+ final double lb = getLowerBoundForBoverA(unionAB, interABU);
+ final double est = getEstimateOfBoverA(unionAB, interABU);
+ final double ub = getUpperBoundForBoverA(unionAB, interABU);
+ return new double[] {lb, est, ub};
+ }
+
+ /**
+ * Returns true if the two given sketches have exactly the same hash values and the same
+ * theta values. Thus, they are equivalent.
+ * @param sketchA the given sketch A
+ * @param sketchB the given sketch B
+ * @return true if the two given sketches have exactly the same hash values and the same
+ * theta values.
+ */
+ public static boolean exactlyEqual(final Sketch sketchA, final Sketch sketchB) {
+ //Corner case checks
+ if (sketchA == null || sketchB == null) { return false; }
+ if (sketchA == sketchB) { return true; }
+ if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; }
+ if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; }
+
+ final int countA = sketchA.getRetainedEntries(true);
+ final int countB = sketchB.getRetainedEntries(true);
+
+ //Create the Union
+ final Union union =
+ SetOperation.builder().setNominalEntries(ceilingPowerOf2(countA + countB)).buildUnion();
+ union.union(sketchA);
+ union.union(sketchB);
+ final Sketch unionAB = union.getResult();
+ final long thetaLongUAB = unionAB.getThetaLong();
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ final int countUAB = unionAB.getRetainedEntries(true);
+
+ //Check for identical counts and thetas
+ if (countUAB == countA && countUAB == countB
+ && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Tests similarity of a measured Sketch against an expected Sketch.
+ * Computes the lower bound of the Jaccard index JLB of the measured and
+ * expected sketches.
+ * if JLB ≥ threshold, then the sketches are considered to be
+ * similar with a confidence of 97.7%.
+ *
+ * @param measured the sketch to be tested
+ * @param expected the reference sketch that is considered to be correct.
+ * @param threshold a real value between zero and one.
+ * @return if true, the similarity of the two sketches is greater than the given threshold
+ * with at least 97.7% confidence.
+ */
+ public static boolean similarityTest(final Sketch measured, final Sketch expected,
+ final double threshold) {
+ //index 0: the lower bound
+ //index 1: the mean estimate
+ //index 2: the upper bound
+ final double jRatioLB = jaccard(measured, expected)[0]; //choosing the lower bound
+ return jRatioLB >= threshold;
+ }
+
+ /**
+ * Tests dissimilarity of a measured Sketch against an expected Sketch.
+ * Computes the upper bound of the Jaccard index JUB of the measured and
+ * expected sketches.
+ * if JUB ≤ threshold, then the sketches are considered to be
+ * dissimilar with a confidence of 97.7%.
+ *
+ * @param measured the sketch to be tested
+ * @param expected the reference sketch that is considered to be correct.
+ * @param threshold a real value between zero and one.
+ * @return if true, the dissimilarity of the two sketches is greater than the given threshold
+ * with at least 97.7% confidence.
+ */
+ public static boolean dissimilarityTest(final Sketch measured, final Sketch expected,
+ final double threshold) {
+ //index 0: the lower bound
+ //index 1: the mean estimate
+ //index 2: the upper bound
+ final double jRatioUB = jaccard(measured, expected)[2]; //choosing the upper bound
+ return jRatioUB <= threshold;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java
index 82661aa27..e98396842 100644
--- a/src/main/java/org/apache/datasketches/theta2/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java
@@ -222,7 +222,7 @@ public CompactSketch compact() {
*
*
A new CompactSketch object is created:
* - if dstMem != null
- * - if dstMem == null and this.hasMemory() == true
+ * - if dstMem == null and this.hasMemorySegment() == true
* - if dstMem == null and this has more than 1 item and this.isOrdered() == false
* and dstOrdered == true.
*
@@ -564,7 +564,7 @@ public static String toString(final MemorySegment mem) {
/**
* Gets the internal cache array. For on-heap sketches this will return a reference to the actual
- * cache array. For Memory-based sketches this returns a copy.
+ * cache array. For MemorySegment-based sketches this returns a copy.
* @return the internal cache array.
*/
abstract long[] getCache();
diff --git a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java
new file mode 100644
index 000000000..f8199cc4f
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.thetacommon;
+
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+
+import org.apache.datasketches.common.BoundsOnRatiosInSampledSets;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.theta2.Sketch;
+
+/**
+ * This class is used to compute the bounds on the estimate of the ratio B / A, where:
+ *
+ * - A is a Theta Sketch of population PopA.
+ * - B is a Theta Sketch of population PopB that is a subset of A,
+ * obtained by an intersection of A with some other Theta Sketch C,
+ * which acts like a predicate or selection clause.
+ * - The estimate of the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
+ * - The Upper Bound estimate on the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
+ * - The Lower Bound estimate on the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
+ *
+ * Note: The theta of A cannot be greater than the theta of B.
+ * If B is formed as an intersection of A and some other set C,
+ * then the theta of B is guaranteed to be less than or equal to the theta of B.
+ *
+ * @author Kevin Lang
+ * @author Lee Rhodes
+ */
+public final class BoundsOnRatiosInThetaSketchedSets2 {
+
+ private BoundsOnRatiosInThetaSketchedSets2() {}
+
+ /**
+ * Gets the approximate lower bound for B over A based on a 95% confidence interval
+ * @param sketchA the sketch A
+ * @param sketchB the sketch B
+ * @return the approximate lower bound for B over A
+ */
+ public static double getLowerBoundForBoverA(final Sketch sketchA, final Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries(true);
+ final int countA = (thetaLongB == thetaLongA)
+ ? sketchA.getRetainedEntries(true)
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 0; }
+ final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+ return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f);
+ }
+
+ /**
+ * Gets the approximate upper bound for B over A based on a 95% confidence interval
+ * @param sketchA the sketch A
+ * @param sketchB the sketch B
+ * @return the approximate upper bound for B over A
+ */
+ public static double getUpperBoundForBoverA(final Sketch sketchA, final Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries(true);
+ final int countA = (thetaLongB == thetaLongA)
+ ? sketchA.getRetainedEntries(true)
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 1.0; }
+ final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+ return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f);
+ }
+
+ /**
+ * Gets the estimate for B over A
+ * @param sketchA the sketch A
+ * @param sketchB the sketch B
+ * @return the estimate for B over A
+ */
+ public static double getEstimateOfBoverA(final Sketch sketchA, final Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries(true);
+ final int countA = (thetaLongB == thetaLongA)
+ ? sketchA.getRetainedEntries(true)
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 0.5; }
+
+ return (double) countB / (double) countA;
+ }
+
+ static void checkThetas(final long thetaLongA, final long thetaLongB) {
+ if (thetaLongB > thetaLongA) {
+ throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA.");
+ }
+ }
+}
diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
index 6d6af7047..fe2b138ca 100644
--- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
@@ -696,7 +696,9 @@ static void println(String s) {
}
private static void checkMemoryDirectProxyMethods(Sketch local, Sketch shared) {
- assertEquals(local.hasMemory(), shared.hasMemory());
+ assertEquals(
+ local.hasMemory(),
+ shared.hasMemory());
assertEquals(local.isDirect(), shared.isDirect());
}
diff --git a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
index 0e1348684..bec67b219 100644
--- a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
+++ b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
@@ -220,7 +220,7 @@ public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, fi
final int entries = skV3.getRetainedEntries(true);
final boolean unordered = !(skV3.isOrdered());
final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE
- wseg = Util.newHeapSegment((preLongs + entries) << 3);
+ wseg = Util.newHeapSegment((preLongs + entries) << 3, false);
wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs
wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java
new file mode 100644
index 000000000..7a7b89cef
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java
@@ -0,0 +1,718 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static org.apache.datasketches.theta2.ConcurrentHeapQuickSelectSketchTest.waitForBgPropagationToComplete;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.theta2.ConcurrentHeapQuickSelectSketchTest.SharedLocal;
+import org.apache.datasketches.thetacommon.HashOperations;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author eshcar
+ */
+public class ConcurrentDirectQuickSelectSketchTest {
+ private static final long SEED = ThetaUtil.DEFAULT_UPDATE_SEED;
+
+ @Test
+ public void checkDirectCompactConversion() {
+ int lgK = 9;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ assertTrue(sl.shared instanceof ConcurrentDirectQuickSelectSketch);
+ assertTrue(sl.shared.compact().isCompact());
+ }
+
+ @Test
+ public void checkHeapifyMemoryEstimating() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = 2*k;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+
+ UpdateSketch shared = sl.shared; //off-heap
+ UpdateSketch local = sl.local;
+
+ for (int i=0; i k);
+
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ for (int i=0; i k);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer");
+ assertFalse(local.isDirect());
+ assertTrue(local.hasMemorySegment());
+
+ for (int i=0; i k);
+ }
+
+ @Test
+ public void checkErrorBounds() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ //Exact mode
+ for (int i = 0; i < k; i++ ) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ double est = local.getEstimate();
+ double lb = local.getLowerBound(2);
+ double ub = local.getUpperBound(2);
+ assertEquals(est, ub, 0.0);
+ assertEquals(est, lb, 0.0);
+
+ //Est mode
+ int u = 100*k;
+ for (int i = k; i < u; i++ ) {
+ local.update(i);
+ local.update(i); //test duplicate rejection
+ }
+ waitForBgPropagationToComplete(shared);
+ est = local.getEstimate();
+ lb = local.getLowerBound(2);
+ ub = local.getUpperBound(2);
+ assertTrue(est <= ub);
+ assertTrue(est >= lb);
+ }
+
+
+ @Test
+ public void checkUpperAndLowerBounds() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = 2*k;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ for (int i = 0; i < u; i++ ) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ double est = local.getEstimate();
+ double ub = local.getUpperBound(1);
+ double lb = local.getLowerBound(1);
+ assertTrue(ub > est);
+ assertTrue(lb < est);
+ }
+
+ @Test
+ public void checkRebuild() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = 4*k;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+
+ for (int i = 0; i< u; i++) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertTrue(local.getEstimate() > 0.0);
+ assertTrue(shared.getRetainedEntries(false) >= k);
+
+ shared.rebuild();
+ assertEquals(shared.getRetainedEntries(false), k);
+ assertEquals(shared.getRetainedEntries(true), k);
+ local.rebuild();
+ assertEquals(shared.getRetainedEntries(false), k);
+ assertEquals(shared.getRetainedEntries(true), k);
+ }
+
+ @Test
+ public void checkResetAndStartingSubMultiple() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+
+ int u = 4*k;
+ for (int i = 0; i< u; i++) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertTrue(shared.getRetainedEntries(false) >= k);
+ assertTrue(local.getThetaLong() < Long.MAX_VALUE);
+
+ shared.reset();
+ local.reset();
+ assertTrue(local.isEmpty());
+ assertEquals(shared.getRetainedEntries(false), 0);
+ assertEquals(local.getEstimate(), 0.0, 0.0);
+ assertEquals(local.getThetaLong(), Long.MAX_VALUE);
+ }
+
+ @Test
+ public void checkExactModeMemoryArr() {
+ int lgK = 12;
+ int k = 1 << lgK;
+ int u = k;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertTrue(local.isEmpty());
+
+ for (int i = 0; i< u; i++) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ assertEquals(local.getEstimate(), u, 0.0);
+ assertEquals(shared.getRetainedEntries(false), u);
+ }
+
+ @Test
+ public void checkEstModeMemoryArr() {
+ int lgK = 12;
+ int k = 1 << lgK;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertTrue(local.isEmpty());
+
+ int u = 3*k;
+ for (int i = 0; i< u; i++) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ double est = local.getEstimate();
+ assertTrue((est < (u * 1.05)) && (est > (u * 0.95)));
+ assertTrue(shared.getRetainedEntries(false) >= k);
+ }
+
+ @Test
+ public void checkEstModeNativeMemory() {
+ int lgK = 12;
+ int k = 1 << lgK;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertTrue(local.isEmpty());
+
+ int u = 3*k;
+ for (int i = 0; i< u; i++) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+ double est = local.getEstimate();
+ assertTrue((est < (u * 1.05)) && (est > (u * 0.95)));
+ assertTrue(shared.getRetainedEntries(false) >= k);
+ }
+
+ @Test
+ public void checkConstructReconstructFromMemory() {
+ int lgK = 12;
+ int k = 1 << lgK;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+ int u = 3*k;
+
+ for (int i = 0; i< u; i++) { local.update(i); } //force estimation
+ waitForBgPropagationToComplete(shared);
+
+ double est1 = local.getEstimate();
+ int count1 = shared.getRetainedEntries(false);
+ assertTrue((est1 < (u * 1.05)) && (est1 > (u * 0.95)));
+ assertTrue(count1 >= k);
+
+ byte[] serArr;
+ double est2;
+
+ serArr = shared.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(serArr);
+ UpdateSketch recoveredShared = Sketches.wrapUpdateSketch(seg);
+
+ //reconstruct to Native/Direct
+ final int bytes = Sketch.getMaxUpdateSketchBytes(k);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg);
+ UpdateSketch local2 = sl.bldr.buildLocal(shared);
+ est2 = local2.getEstimate();
+
+ assertEquals(est2, est1, 0.0);
+ }
+
+ @Test
+ public void checkNullMemory() {
+ UpdateSketchBuilder bldr = new UpdateSketchBuilder();
+ final UpdateSketch sk = bldr.build();
+ for (int i = 0; i < 1000; i++) { sk.update(i); }
+ final UpdateSketch shared = bldr.buildSharedFromSketch(sk, null);
+ assertEquals(shared.getRetainedEntries(true), 1000);
+ assertFalse(shared.hasMemorySegment());
+ }
+
+ //checks Alex's bug where lgArrLongs > lgNomLongs +1.
+ @Test
+ public void checkResizeInBigMem() {
+ int lgK = 14;
+ int u = 1 << 20;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useSeg, true, 8); //seg is 8X larger than needed
+ UpdateSketch local = sl.local;
+
+ for (int i = 0; i < u; i++) { local.update(i); }
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkConstructorKtooSmall() {
+ int lgK = 3;
+ boolean useSeg = true;
+ new SharedLocal(lgK, lgK, useSeg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkConstructorMemTooSmall() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ MemorySegment wseg = MemorySegment.ofArray(new byte[k/2]);
+ UpdateSketchBuilder bldr = new UpdateSketchBuilder();
+ bldr.setLogNominalEntries(lgK);
+ bldr.buildShared(wseg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifyIllegalFamilyID_heapify() {
+ int lgK = 9;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte
+ //try to heapify the corrupted seg
+ Sketch.heapify(sl.wseg); //catch in Sketch.constructHeapSketch
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadLgNomLongs() {
+ int lgK = 4;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte
+ DirectQuickSelectSketch.writableWrap(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ @Test
+ public void checkBackgroundPropagation() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ int u = 10*k;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertTrue(local.isEmpty());
+ ConcurrentHeapThetaBuffer sk1 = (ConcurrentHeapThetaBuffer)local; //for internal checks
+
+ int i = 0;
+ for (; i< k; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+ assertFalse(local.isEmpty());
+ assertTrue(local.getEstimate() > 0.0);
+ long theta1 = ((ConcurrentSharedThetaSketch)shared).getVolatileTheta();
+
+ for (; i< u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ long theta2 = ((ConcurrentSharedThetaSketch)shared).getVolatileTheta();
+ int entries = shared.getRetainedEntries(false);
+ assertTrue((entries > k) || (theta2 < theta1),
+ "entries="+entries+" k="+k+" theta1="+theta1+" theta2="+theta2);
+
+ shared.rebuild();
+ assertEquals(shared.getRetainedEntries(false), k);
+ assertEquals(shared.getRetainedEntries(true), k);
+ sk1.rebuild();
+ assertEquals(shared.getRetainedEntries(false), k);
+ assertEquals(shared.getRetainedEntries(true), k);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadSerVer() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertTrue(local.isEmpty());
+
+ for (int i = 0; i< k; i++) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertEquals(local.getEstimate(), k, 0.0);
+ assertEquals(shared.getRetainedEntries(false), k);
+
+ sl.wseg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
+ Sketch.wrap(sl.wseg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkWrapIllegalFamilyID_wrap() {
+ int lgK = 9;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+ //try to wrap the corrupted seg
+ Sketch.wrap(sl.wseg); //catch in Sketch.constructDirectSketch
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkWrapIllegalFamilyID_direct() {
+ int lgK = 9;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+ //try to wrap the corrupted seg
+ DirectQuickSelectSketch.writableWrap(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifySeedConflict() {
+ int lgK = 9;
+ long seed1 = 1021;
+ long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useSeg, true, 1);
+ UpdateSketch shared = sl.shared;
+
+ MemorySegment srcSeg = MemorySegment.ofArray(shared.toByteArray()).asReadOnly();
+ Sketch.heapify(srcSeg, seed2);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkCorruptLgNomLongs() {
+ int lgK = 4;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+
+ sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt
+ Sketch.heapify(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void checkIllegalHashUpdate() {
+ int lgK = 4;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ UpdateSketch shared = sl.shared;
+ shared.hashUpdate(1);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+ private static void checkMemoryDirectProxyMethods(Sketch local, Sketch shared) {
+ assertEquals(
+ local.hasMemorySegment(),
+ shared.hasMemorySegment());
+ assertEquals(local.isDirect(), shared.isDirect());
+ }
+
+ //Does not check hasMemorySegment(), isDirect()
+ private static void checkOtherProxyMethods(Sketch local, Sketch shared) {
+ assertEquals(local.getCompactBytes(), shared.getCompactBytes());
+ assertEquals(local.getCurrentBytes(), shared.getCurrentBytes());
+ assertEquals(local.getEstimate(), shared.getEstimate());
+ assertEquals(local.getLowerBound(2), shared.getLowerBound(2));
+ assertEquals(local.getUpperBound(2), shared.getUpperBound(2));
+ assertEquals(local.isEmpty(), shared.isEmpty());
+ assertEquals(local.isEstimationMode(), shared.isEstimationMode());
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java
new file mode 100644
index 000000000..4685639ec
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java
@@ -0,0 +1,745 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author eshcar
+ */
+public class ConcurrentHeapQuickSelectSketchTest {
+
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadSerVer() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = k;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+
+ for (int i = 0; i< u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertEquals(local.getEstimate(), u, 0.0);
+ assertEquals(shared.getRetainedEntries(false), u);
+
+ byte[] serArr = shared.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(serArr);
+ Sketch sk = Sketch.heapify(seg, sl.seed);
+ assertTrue(sk instanceof HeapQuickSelectSketch); //Intentional promotion to Parent
+
+ seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
+ Sketch.heapify(seg, sl.seed);
+ }
+
+ @Test
+ public void checkPropagationNotOrdered() {
+ int lgK = 8;
+ int k = 1 << lgK;
+ int u = 200*k;
+ SharedLocal sl = new SharedLocal(lgK, 4, false, false);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertEquals((sl.bldr.getLocalLgNominalEntries()), 4);
+ assertTrue(local.isEmpty());
+
+ for (int i = 0; i < u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertTrue(shared.getRetainedEntries(true) <= u);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkIllegalSketchID_UpdateSketch() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = k;
+ SharedLocal sl = new SharedLocal(lgK);
+
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertTrue(local.isEmpty());
+ assertTrue(shared instanceof ConcurrentHeapQuickSelectSketch);
+ for (int i = 0; i< u; i++) {
+ local.update(i);
+ }
+ assertTrue(shared.compact().isCompact());
+
+ assertFalse(local.isEmpty());
+ assertEquals(local.getEstimate(), u, 0.0);
+ assertEquals(shared.getRetainedEntries(false), u);
+ byte[] byteArray = shared.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+
+ //try to heapify the corrupted seg
+ Sketch.heapify(seg, sl.seed);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifySeedConflict() {
+ int lgK = 9;
+ long seed = 1021;
+ long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED;
+ SharedLocal sl = new SharedLocal(lgK, lgK, seed);
+ byte[] byteArray = sl.shared.toByteArray();
+ MemorySegment srcSeg = MemorySegment.ofArray(byteArray);
+ Sketch.heapify(srcSeg, seed2);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifyCorruptLgNomLongs() {
+ int lgK = 4;
+ SharedLocal sl = new SharedLocal(lgK);
+ byte[] serArr = sl.shared.toByteArray();
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr);
+ srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt
+ Sketch.heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void checkIllegalHashUpdate() {
+ int lgK = 4;
+ SharedLocal sl = new SharedLocal(lgK);
+ sl.shared.hashUpdate(1);
+ }
+
+ @Test
+ public void checkHeapifyByteArrayExact() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = k;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ for (int i=0; i k);
+ // it could be exactly k, but in this case must be greater
+ }
+
+ @Test
+ public void checkErrorBounds() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch local = sl.local;
+ UpdateSketch shared = sl.shared;
+
+ //Exact mode
+ //int limit = (int)ConcurrentSharedThetaSketch.computeExactLimit(lim, 0); //? ask Eshcar
+ for (int i = 0; i < k; i++ ) {
+ local.update(i);
+ }
+
+ double est = local.getEstimate();
+ double lb = local.getLowerBound(2);
+ double ub = local.getUpperBound(2);
+ assertEquals(est, ub, 0.0);
+ assertEquals(est, lb, 0.0);
+
+ //Est mode
+ int u = 2 * k;
+ for (int i = k; i < u; i++ ) {
+ local.update(i);
+ local.update(i); //test duplicate rejection
+ }
+ waitForBgPropagationToComplete(shared);
+ est = local.getEstimate();
+ lb = local.getLowerBound(2);
+ ub = local.getUpperBound(2);
+ assertTrue(est <= ub);
+ assertTrue(est >= lb);
+ }
+
+ @Test
+ public void checkRebuild() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ //must build shared first
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+ int t = ((ConcurrentHeapThetaBuffer)local).getHashTableThreshold();
+
+ for (int i = 0; i< t; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertTrue(local.getEstimate() > 0.0);
+ assertTrue(shared.getRetainedEntries(false) > k);
+
+ shared.rebuild();
+ assertEquals(shared.getRetainedEntries(false), k);
+ assertEquals(shared.getRetainedEntries(true), k);
+ shared.rebuild();
+ assertEquals(shared.getRetainedEntries(false), k);
+ assertEquals(shared.getRetainedEntries(true), k);
+ }
+
+ @Test
+ public void checkBuilder() {
+ int lgK = 4;
+ SharedLocal sl = new SharedLocal(lgK);
+ assertEquals(sl.bldr.getLocalLgNominalEntries(), lgK);
+ assertEquals(sl.bldr.getLgNominalEntries(), lgK);
+ println(sl.bldr.toString());
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderSmallNominal() {
+ int lgK = 2; //too small
+ new SharedLocal(lgK);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkNegativeHashes() {
+ int lgK = 9;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch local = sl.local;
+ local.hashUpdate(-1L);
+ }
+
+ @Test
+ public void checkResetAndStartingSubMultiple() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+ int u = 3*k;
+
+ for (int i = 0; i< u; i++) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertTrue(shared.getRetainedEntries(false) >= k);
+ assertTrue(local.getThetaLong() < Long.MAX_VALUE);
+
+ shared.reset();
+ local.reset();
+ assertTrue(local.isEmpty());
+ assertEquals(shared.getRetainedEntries(false), 0);
+ assertEquals(local.getEstimate(), 0.0, 0.0);
+ assertEquals(local.getThetaLong(), Long.MAX_VALUE);
+ }
+
+ @Test
+ public void checkDQStoCompactEmptyForms() {
+ int lgK = 9;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch local = sl.local;
+ UpdateSketch shared = sl.shared;
+
+ //empty
+ local.toString(false, true, 0, false); //exercise toString
+ assertTrue(local instanceof ConcurrentHeapThetaBuffer);
+ double localEst = local.getEstimate();
+ double localLB = local.getLowerBound(2);
+ double uskUB = local.getUpperBound(2);
+ assertFalse(local.isEstimationMode());
+
+ int bytes = local.getCompactBytes();
+ assertEquals(bytes, 8);
+ byte[] segArr2 = new byte[bytes];
+ MemorySegment seg2 = MemorySegment.ofArray(segArr2);
+
+ CompactSketch csk2 = shared.compact(false, seg2);
+ assertEquals(csk2.getEstimate(), localEst);
+ assertEquals(csk2.getLowerBound(2), localLB);
+ assertEquals(csk2.getUpperBound(2), uskUB);
+ assertTrue(csk2.isEmpty());
+ assertFalse(csk2.isEstimationMode());
+ assertTrue(csk2.isOrdered());
+
+ CompactSketch csk3 = shared.compact(true, seg2);
+ csk3.toString(false, true, 0, false);
+ csk3.toString();
+ assertEquals(csk3.getEstimate(), localEst);
+ assertEquals(csk3.getLowerBound(2), localLB);
+ assertEquals(csk3.getUpperBound(2), uskUB);
+ assertTrue(csk3.isEmpty());
+ assertFalse(csk3.isEstimationMode());
+ assertTrue(csk2.isOrdered());
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkMinReqBytes() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ for (int i = 0; i < (4 * k); i++) { sl.local.update(i); }
+ waitForBgPropagationToComplete(sl.shared);
+ byte[] byteArray = sl.shared.toByteArray();
+ byte[] badBytes = Arrays.copyOfRange(byteArray, 0, 24); //corrupt no. bytes
+ MemorySegment seg = MemorySegment.ofArray(badBytes).asReadOnly();
+ Sketch.heapify(seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkThetaAndLgArrLongs() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ for (int i = 0; i < k; i++) { sl.local.update(i); }
+ waitForBgPropagationToComplete(sl.shared);
+ byte[] badArray = sl.shared.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(badArray);
+ PreambleUtil.insertLgArrLongs(seg, 4); //corrupt
+ PreambleUtil.insertThetaLong(seg, Long.MAX_VALUE / 2); //corrupt
+ Sketch.heapify(seg);
+ }
+
+ @Test
+ public void checkFamily() {
+ SharedLocal sl = new SharedLocal();
+ UpdateSketch local = sl.local;
+ assertEquals(local.getFamily(), Family.QUICKSELECT);
+ }
+
+ @Test
+ public void checkBackgroundPropagation() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ int u = 5*k;
+ SharedLocal sl = new SharedLocal(lgK);
+ assertTrue(sl.local.isEmpty());
+
+ int i = 0;
+ for (; i < k; i++) { sl.local.update(i); } //exact
+ waitForBgPropagationToComplete(sl.shared);
+
+ assertFalse(sl.local.isEmpty());
+ assertTrue(sl.local.getEstimate() > 0.0);
+ long theta1 = sl.sharedIf.getVolatileTheta();
+
+ for (; i < u; i++) { sl.local.update(i); } //continue, make it estimating
+ waitForBgPropagationToComplete(sl.shared);
+
+ long theta2 = sl.sharedIf.getVolatileTheta();
+ int entries = sl.shared.getRetainedEntries(false);
+ assertTrue((entries > k) || (theta2 < theta1),
+ "entries= " + entries + " k= " + k + " theta1= " + theta1 + " theta2= " + theta2);
+
+ sl.shared.rebuild();
+ assertEquals(sl.shared.getRetainedEntries(false), k);
+ assertEquals(sl.shared.getRetainedEntries(true), k);
+ sl.local.rebuild();
+ assertEquals(sl.shared.getRetainedEntries(false), k);
+ assertEquals(sl.shared.getRetainedEntries(true), k);
+ }
+
+ @Test
+ public void checkBuilderExceptions() {
+ UpdateSketchBuilder bldr = new UpdateSketchBuilder();
+ try {
+ bldr.setNominalEntries(8);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ try {
+ bldr.setLocalNominalEntries(8);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ try {
+ bldr.setLocalLogNominalEntries(3);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ bldr.setNumPoolThreads(4);
+ assertEquals(bldr.getNumPoolThreads(), 4);
+ bldr.setMaxConcurrencyError(0.04);
+ assertEquals(bldr.getMaxConcurrencyError(), 0.04);
+ bldr.setMaxNumLocalThreads(4);
+ assertEquals(bldr.getMaxNumLocalThreads(), 4);
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void checkToByteArray() {
+ SharedLocal sl = new SharedLocal();
+ sl.local.toByteArray();
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+ static class SharedLocal {
+ static final long DefaultSeed = ThetaUtil.DEFAULT_UPDATE_SEED;
+ final UpdateSketch shared;
+ final ConcurrentSharedThetaSketch sharedIf;
+ final UpdateSketch local;
+ final int sharedLgK;
+ final int localLgK;
+ final long seed;
+ final MemorySegment wseg;
+ final UpdateSketchBuilder bldr = new UpdateSketchBuilder();
+
+ SharedLocal() {
+ this(9, 9, DefaultSeed, false, true, 1);
+ }
+
+ SharedLocal(int lgK) {
+ this(lgK, lgK, DefaultSeed, false, true, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK) {
+ this(sharedLgK, localLgK, DefaultSeed, false, true, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK, long seed) {
+ this(sharedLgK, localLgK, seed, false, true, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK, boolean useSeg) {
+ this(sharedLgK, localLgK, DefaultSeed, useSeg, true, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK, boolean useSeg, boolean ordered) {
+ this(sharedLgK, localLgK, DefaultSeed, useSeg, ordered, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK, long seed, boolean useSeg, boolean ordered, int segMult) {
+ this.sharedLgK = sharedLgK;
+ this.localLgK = localLgK;
+ this.seed = seed;
+ if (useSeg) {
+ int bytes = (((4 << sharedLgK) * segMult) + (Family.QUICKSELECT.getMaxPreLongs())) << 3;
+ wseg = MemorySegment.ofArray(new byte[bytes]);
+ } else {
+ wseg = null;
+ }
+ bldr.setLogNominalEntries(sharedLgK);
+ bldr.setLocalLogNominalEntries(localLgK);
+ bldr.setPropagateOrderedCompact(ordered);
+ bldr.setSeed(this.seed);
+ shared = bldr.buildShared(wseg);
+ local = bldr.buildLocal(shared);
+ sharedIf = (ConcurrentSharedThetaSketch) shared;
+ }
+ }
+
+ static void waitForBgPropagationToComplete(UpdateSketch shared) {
+ try {
+ Thread.sleep(10);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ ConcurrentSharedThetaSketch csts = (ConcurrentSharedThetaSketch)shared;
+ csts.awaitBgPropagationTermination();
+ ConcurrentPropagationService.resetExecutorService(Thread.currentThread().getId());
+ csts.initBgPropagationService();
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java b/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java
new file mode 100644
index 000000000..5d0e42176
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.theta2.JaccardSimilarity.exactlyEqual;
+import static org.apache.datasketches.theta2.JaccardSimilarity.jaccard;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class JaccardSimilarityTest {
+
+ @Test
+ public void checkNullsEmpties() {
+ int minK = 1 << 12;
+ double threshold = 0.95;
+ println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold);
+ //check both null
+ double[] jResults = jaccard(null, null);
+ boolean state = jResults[1] > threshold;
+ println("null \t null:\t" + state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(null, null);
+ assertFalse(state);
+
+ UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build();
+ UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build();
+
+ //check both empty
+ jResults = jaccard(measured, expected);
+ state = jResults[1] > threshold;
+ println("empty\tempty:\t" + state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected);
+ assertTrue(state);
+
+ state = exactlyEqual(measured, measured);
+ assertTrue(state);
+
+ //adjust one
+ expected.update(1);
+ jResults = jaccard(measured, expected);
+ state = jResults[1] > threshold;
+ println("empty\t 1:\t" + state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkExactMode() {
+ int k = 1 << 12;
+ int u = k;
+ double threshold = 0.9999;
+ println("Exact Mode, minK: " + k + "\t Th: " + threshold);
+
+ UpdateSketch measured = UpdateSketch.builder().setNominalEntries(k).build();
+ UpdateSketch expected = UpdateSketch.builder().setNominalEntries(k).build();
+
+ for (int i = 0; i < (u-1); i++) { //one short
+ measured.update(i);
+ expected.update(i);
+ }
+
+ double[] jResults = jaccard(measured, expected);
+ boolean state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected);
+ assertTrue(state);
+
+ measured.update(u-1); //now exactly k entries
+ expected.update(u); //now exactly k entries but differs by one
+ jResults = jaccard(measured, expected);
+ state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkEstMode() {
+ int k = 1 << 12;
+ int u = 1 << 20;
+ double threshold = 0.9999;
+ println("Estimation Mode, minK: " + k + "\t Th: " + threshold);
+
+ UpdateSketch measured = UpdateSketch.builder().setNominalEntries(k).build();
+ UpdateSketch expected = UpdateSketch.builder().setNominalEntries(k).build();
+
+ for (int i = 0; i < u; i++) {
+ measured.update(i);
+ expected.update(i);
+ }
+
+ double[] jResults = jaccard(measured, expected);
+ boolean state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected);
+ assertTrue(state);
+
+ for (int i = u; i < (u + 50); i++) { //empirically determined
+ measured.update(i);
+ }
+
+ jResults = jaccard(measured, expected);
+ state = jResults[1] >= threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected);
+ assertFalse(state);
+
+ println("");
+ }
+
+ /**
+ * Enable printing on this test and you will see that the distribution is pretty tight,
+ * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about
+ * +/- 1.56%.
+ */
+ @Test
+ public void checkSimilarity() {
+ int minK = 1 << 12;
+ int u1 = 1 << 20;
+ int u2 = (int) (u1 * 0.95);
+ double threshold = 0.943;
+ println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+ UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build();
+ UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build();
+
+ for (int i = 0; i < u1; i++) {
+ expected.update(i);
+ }
+
+ for (int i = 0; i < u2; i++) {
+ measured.update(i);
+ }
+
+ double[] jResults = JaccardSimilarity.jaccard(measured, expected);
+ boolean state = JaccardSimilarity.similarityTest(measured, expected, threshold);
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+ //check identity case
+ state = JaccardSimilarity.similarityTest(measured, measured, threshold);
+ assertTrue(state);
+ }
+
+ /**
+ * Enable printing on this test and you will see that the distribution is much looser,
+ * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of
+ * intersection to the union becomes a small number.
+ */
+ @Test
+ public void checkDissimilarity() {
+ int minK = 1 << 12;
+ int u1 = 1 << 20;
+ int u2 = (int) (u1 * 0.05);
+ double threshold = 0.061;
+ println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+ UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build();
+ UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build();
+
+ for (int i = 0; i < u1; i++) {
+ expected.update(i);
+ }
+
+ for (int i = 0; i < u2; i++) {
+ measured.update(i);
+ }
+
+ double[] jResults = JaccardSimilarity.jaccard(measured, expected);
+ boolean state = JaccardSimilarity.dissimilarityTest(measured, expected, threshold);
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+ }
+
+ private static String jaccardString(double[] jResults) {
+ double lb = jResults[0];
+ double est = jResults[1];
+ double ub = jResults[2];
+ return lb + "\t" + est + "\t" + ub + "\t" + ((lb/est) - 1.0) + "\t" + ((ub/est) - 1.0);
+ }
+
+ @Test
+ public void checkMinK() {
+ UpdateSketch skA = UpdateSketch.builder().build(); //4096
+ UpdateSketch skB = UpdateSketch.builder().build(); //4096
+ skA.update(1);
+ skB.update(1);
+ double[] result = JaccardSimilarity.jaccard(skA, skB);
+ println(result[0] + ", " + result[1] + ", " + result[2]);
+ for (int i = 1; i < 4096; i++) {
+ skA.update(i);
+ skB.update(i);
+ }
+ result = JaccardSimilarity.jaccard(skA, skB);
+ println(result[0] + ", " + result[1] + ", " + result[2]);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java b/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java
new file mode 100644
index 000000000..ab0ed1495
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.datasketches.common.SketchesReadOnlyException;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class ReadOnlyMemoryTest {
+
+ @Test
+ public void wrapAndTryUpdatingUpdateSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(updateSketch.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ UpdateSketch sketch = (UpdateSketch) Sketch.wrap(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+
+ boolean thrown = false;
+ try {
+ sketch.update(2);
+ } catch (SketchesReadOnlyException e) {
+ thrown = true;
+ }
+ Assert.assertTrue(thrown);
+ }
+
+ @Test
+ public void wrapCompactUnorderedSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(updateSketch.compact(false, null).toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Sketch sketch = Sketch.wrap(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void wrapCompactOrderedSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(ByteBuffer.wrap(updateSketch.compact().toByteArray())
+ .asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Sketch sketch = Sketch.wrap(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void heapifyUpdateSketch() {
+ UpdateSketch us1 = UpdateSketch.builder().build();
+ us1.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(us1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ // downcasting is not recommended, for testing only
+ UpdateSketch us2 = (UpdateSketch) Sketch.heapify(seg);
+ us2.update(2);
+ assertEquals(us2.getEstimate(), 2.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void heapifyCompactUnorderedSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(updateSketch.compact(false, null).toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Sketch sketch = Sketch.heapify(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void heapifyCompactOrderedSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(updateSketch.compact().toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Sketch sketch = Sketch.heapify(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void heapifyUnion() {
+ Union u1 = SetOperation.builder().buildUnion();
+ u1.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(u1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Union u2 = (Union) SetOperation.heapify(seg);
+ u2.update(2);
+ Assert.assertEquals(u2.getResult().getEstimate(), 2.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void wrapAndTryUpdatingUnion() {
+ Union u1 = SetOperation.builder().buildUnion();
+ u1.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(u1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+
+ Union u2 = (Union) Sketches.wrapSetOperation(seg);
+ Union u3 = Sketches.wrapUnion(seg);
+ Assert.assertEquals(u2.getResult().getEstimate(), 1.0);
+ Assert.assertEquals(u3.getResult().getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+
+ try {
+ u2.update(2);
+ fail();
+ } catch (SketchesReadOnlyException e) {
+ //expected
+ }
+
+ try {
+ u3.update(2);
+ fail();
+ } catch (SketchesReadOnlyException e) {
+ //expected
+ }
+ }
+
+ @Test
+ public void heapifyIntersection() {
+ UpdateSketch us1 = UpdateSketch.builder().build();
+ us1.update(1);
+ us1.update(2);
+ UpdateSketch us2 = UpdateSketch.builder().build();
+ us2.update(2);
+ us2.update(3);
+
+ Intersection i1 = SetOperation.builder().buildIntersection();
+ i1.intersect(us1);
+ i1.intersect(us2);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(i1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Intersection i2 = (Intersection) SetOperation.heapify(seg);
+ i2.intersect(us1);
+ Assert.assertEquals(i2.getResult().getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void wrapIntersection() {
+ UpdateSketch us1 = UpdateSketch.builder().build();
+ us1.update(1);
+ us1.update(2);
+ UpdateSketch us2 = UpdateSketch.builder().build();
+ us2.update(2);
+ us2.update(3);
+
+ Intersection i1 = SetOperation.builder().buildIntersection();
+ i1.intersect(us1);
+ i1.intersect(us2);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(i1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Intersection i2 = (Intersection) SetOperation.wrap(seg);
+ Assert.assertEquals(i2.getResult().getEstimate(), 1.0);
+
+ boolean thrown = false;
+ try {
+ i2.intersect(us1);
+ } catch (SketchesReadOnlyException e) {
+ thrown = true;
+ }
+ Assert.assertTrue(thrown);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java
new file mode 100644
index 000000000..02efffd75
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java
@@ -0,0 +1,438 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.ResizeFactor.X4;
+import static org.apache.datasketches.theta2.Sketch.getMaxUpdateSketchBytes;
+import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class SetOperationTest {
+
+ @Test
+ public void checkBuilder() {
+ final int k = 2048;
+ final long seed = 1021;
+
+ final UpdateSketch usk1 = UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build();
+ final UpdateSketch usk2 = UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build();
+
+ for (int i=0; i Next, we recover the Union SetOp and the 3 sketches and the space for the result. Then
+ * recompute the union using a Union of the same size as the input sketches, where the end result
+ * will be an estimate.
+ */
+ @Test
+ public void checkDirectUnionExample() {
+ //The first task is to compute how much direct memory we need and set the heap large enough.
+ //For the first trial, we will set the Union large enough for an exact result for THIS example.
+ final int sketchNomEntries = 1 << 14; //16K
+ int unionNomEntries = 1 << 15; //32K
+ final int[] heapLayout = getHeapLayout(sketchNomEntries, unionNomEntries);
+
+ //This BB belongs to you and you always retain a link to it until you are completely
+ // done and then let java garbage collect it.
+ //I use a heap backing array, because for this example it is easier to peak into it and
+ // see what is going on.
+ final byte[] backingArr = new byte[heapLayout[5]];
+ final ByteBuffer heapBuf = ByteBuffer.wrap(backingArr).order(ByteOrder.nativeOrder());
+
+ // Attaches a MemorySegment object to the underlying memory of heapBuf.
+ // heapMem will have a Read/Write view of the complete backing memory of heapBuf (direct or not).
+ // Any R/W action from heapMem will be visible via heapBuf and visa versa.
+ //
+ // However, if you had created this WM object directly in raw, off-heap "native" memory
+ // you would have the responsibility to close it when you are done.
+ // But, since it was allocated via BB, it closes it for you.
+ final MemorySegment heapMem = MemorySegment.ofBuffer(heapBuf);
+
+ double result = directUnionTrial1(heapMem, heapLayout, sketchNomEntries, unionNomEntries);
+ println("1st est: "+result);
+ final int expected = sketchNomEntries*2;
+ assertEquals(result, expected, 0.0); //est must be exact.
+
+ //For trial 2, we will use the same union space but use only part of it.
+ unionNomEntries = 1 << 14; //16K
+ result = directUnionTrial2(heapMem, heapLayout, sketchNomEntries, unionNomEntries);
+
+ //intentionally loose bounds
+ assertEquals(result, expected, expected*0.05);
+ println("2nd est: "+result);
+ println("Error %: "+(result/expected -1.0)*100);
+ }
+
+ @Test
+ public void setOpsExample() {
+ println("Set Operations Example:");
+ final int k = 4096;
+ final UpdateSketch skA = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ final UpdateSketch skB = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ final UpdateSketch skC = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+
+ for (int i=1; i<=10; i++) { skA.update(i); }
+ for (int i=1; i<=20; i++) { skB.update(i); }
+ for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set
+
+ final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion();
+ union.union(skA);
+ union.union(skB);
+ // ... continue to iterate on the input sketches to union
+
+ final CompactSketch unionSk = union.getResult(); //the result union sketch
+ println("A U B : "+unionSk.getEstimate()); //the estimate of the union
+
+ //Intersection is similar
+
+ final Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ inter.intersect(unionSk);
+ inter.intersect(skC);
+ // ... continue to iterate on the input sketches to intersect
+
+ final CompactSketch interSk = inter.getResult(); //the result intersection sketch
+ println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the intersection
+
+ //The AnotB operation is a little different as it is stateless:
+
+ final AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
+ final CompactSketch not = aNotB.aNotB(skA, skC);
+
+ println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB operation
+ }
+
+ @Test
+ public void checkIsSameResource() {
+ final int k = 16;
+ final MemorySegment wmem = MemorySegment.ofArray(new byte[k*16 + 32]);//288
+ final MemorySegment emptyMem = MemorySegment.ofArray(new byte[8]);
+ final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(wmem);
+ assertTrue(union.isSameResource(wmem));
+ assertFalse(union.isSameResource(emptyMem));
+
+ final Intersection inter = Sketches.setOperationBuilder().buildIntersection(wmem);
+ assertTrue(inter.isSameResource(wmem));
+ assertFalse(inter.isSameResource(emptyMem));
+
+ final AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
+
+ assertFalse(aNotB.isSameResource(emptyMem));
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //disable here
+ }
+
+ /**
+ * Compute offsets for MyHeap for Union, sketch1, sketch2, sketch3, resultSketch, total layout.
+ * @param sketchNomEntries the configured nominal entries of the sketch
+ * @param unionNomEntries configured nominal entries of the union
+ * @return array of offsets for Union, sketch1, sketch2, sketch3, resultSketch, total layout
+ */
+ private static int[] getHeapLayout(final int sketchNomEntries, final int unionNomEntries) {
+ final int[] heapLayout = new int[6];
+ final int unionBytes = SetOperation.getMaxUnionBytes(unionNomEntries);
+ final int sketchBytes = getMaxUpdateSketchBytes(sketchNomEntries);
+ final int resultBytes = Sketch.getMaxCompactSketchBytes(unionNomEntries);
+ heapLayout[0] = 0; //offset for Union
+ heapLayout[1] = unionBytes; //offset for sketch1
+ heapLayout[2] = unionBytes + sketchBytes; //offset for sketch2
+ heapLayout[3] = unionBytes + 2*sketchBytes; //offset for sketch3
+ heapLayout[4] = unionBytes + 3*sketchBytes; //offset for result
+ heapLayout[5] = unionBytes + 3*sketchBytes + resultBytes; //total
+ return heapLayout;
+ }
+
+ private static double directUnionTrial1(
+ final MemorySegment heapMem, final int[] heapLayout, final int sketchNomEntries, final int unionNomEntries) {
+
+ final int offset = heapLayout[0];
+ final int bytes = heapLayout[1] - offset;
+ final MemorySegment unionMem = heapMem.asSlice(offset, bytes);
+
+ Union union = SetOperation.builder().setNominalEntries(unionNomEntries).buildUnion(unionMem);
+
+ final MemorySegment sketch1mem = heapMem.asSlice(heapLayout[1], heapLayout[2]-heapLayout[1]);
+ final MemorySegment sketch2mem = heapMem.asSlice(heapLayout[2], heapLayout[3]-heapLayout[2]);
+ final MemorySegment sketch3mem = heapMem.asSlice(heapLayout[3], heapLayout[4]-heapLayout[3]);
+ final MemorySegment resultMem = heapMem.asSlice(heapLayout[4], heapLayout[5]-heapLayout[4]);
+
+ //Initialize the 3 sketches
+ final UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch1mem);
+ final UpdateSketch sk2 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch2mem);
+ final UpdateSketch sk3 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch3mem);
+
+ //This little trial has sk1 and sk2 distinct and sk2 overlap both.
+ //Build the sketches.
+ for (int i=0; i< sketchNomEntries; i++) {
+ sk1.update(i);
+ sk2.update(i + sketchNomEntries/2);
+ sk3.update(i + sketchNomEntries);
+ }
+
+ //confirm that each of these 3 sketches is exact.
+ assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0);
+ assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0);
+ assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0);
+
+ //Let's union the first 2 sketches
+ union.union(sk1);
+ union.union(sk2);
+
+ //Let's recover the union and the 3rd sketch
+ union = Sketches.wrapUnion(unionMem);
+ union.union(Sketch.wrap(sketch3mem));
+
+ final Sketch resSk = union.getResult(true, resultMem);
+ final double est = resSk.getEstimate();
+
+ return est;
+ }
+
+ private static double directUnionTrial2(
+ final MemorySegment heapMem, final int[] heapLayout, final int sketchNomEntries, final int unionNomEntries) {
+
+ final MemorySegment unionMem = heapMem.asSlice(heapLayout[0], heapLayout[1]-heapLayout[0]);
+ final MemorySegment sketch1mem = heapMem.asSlice(heapLayout[1], heapLayout[2]-heapLayout[1]);
+ final MemorySegment sketch2mem = heapMem.asSlice(heapLayout[2], heapLayout[3]-heapLayout[2]);
+ final MemorySegment sketch3mem = heapMem.asSlice(heapLayout[3], heapLayout[4]-heapLayout[3]);
+ final MemorySegment resultMem = heapMem.asSlice(heapLayout[4], heapLayout[5]-heapLayout[4]);
+
+ //Recover the 3 sketches
+ final UpdateSketch sk1 = (UpdateSketch) Sketch.wrap(sketch1mem);
+ final UpdateSketch sk2 = (UpdateSketch) Sketch.wrap(sketch2mem);
+ final UpdateSketch sk3 = (UpdateSketch) Sketch.wrap(sketch3mem);
+
+ //confirm that each of these 3 sketches is exact.
+ assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0);
+ assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0);
+ assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0);
+
+ //Create a new union in the same space with a smaller size.
+ Util.clear(unionMem);
+ final Union union = SetOperation.builder().setNominalEntries(unionNomEntries).buildUnion(unionMem);
+ union.union(sk1);
+ union.union(sk2);
+ union.union(sk3);
+
+ final Sketch resSk = union.getResult(true, resultMem);
+ final double est = resSk.getEstimate();
+
+ return est;
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java
new file mode 100644
index 000000000..6848c224e
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java
@@ -0,0 +1,501 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EMPTY;
+import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_HEAP;
+import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_MEMORY_UNORDERED;
+import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EXACT;
+import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.NULL;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Random;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class SetOpsCornerCasesTest {
+
+ /*******************************************/
+ Random rand = new Random(9001); //deterministic
+
+ @Test
+ public void checkSetOpsRandom() {
+ int hiA = 0, loB = 0, hiB = 0;
+ for (int i = 0; i < 1000; i++) {
+ hiA = rand.nextInt(128); //skA fed values between 0 and 127
+ loB = rand.nextInt(64);
+ hiB = loB + rand.nextInt(64); //skB fed up to 63 values starting at loB
+ compareSetOpsRandom(64, 0, hiA, loB, hiB);
+ }
+ }
+
+ private static void compareSetOpsRandom(int k, int loA, int hiA, int loB, int hiB) {
+ UpdateSketch tskA = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ UpdateSketch tskB = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+
+ for (int i = loA; i < hiA; i++) { tskA.update(i); }
+ for (int i = loB; i < hiB; i++) { tskB.update(i); }
+
+ CompactSketch rcskStdU = doStdUnion(tskA, tskB, k, null);
+ CompactSketch rcskPwU = doPwUnion(tskA, tskB, k);
+ checkCornerCase(rcskPwU, rcskStdU);
+
+ CompactSketch rcskStdPairU = doStdPairUnion(tskA, tskB, k, null);
+ checkCornerCase(rcskStdPairU, rcskStdU);
+
+ CompactSketch rcskStdI = doStdIntersection(tskA, tskB, null);
+ CompactSketch rcskPwI = doPwIntersection(tskA, tskB);
+ checkCornerCase(rcskPwI, rcskStdI);
+
+ CompactSketch rcskStdPairI = doStdPairIntersection(tskA, tskB, null);
+ checkCornerCase(rcskStdPairI, rcskStdI);
+
+ CompactSketch rcskStdAnotB = doStdAnotB(tskA, tskB, null);
+ CompactSketch rcskPwAnotB = doPwAnotB(tskA, tskB);
+ checkCornerCase(rcskPwAnotB, rcskStdAnotB);
+
+ CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tskA, tskB, null);
+ checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB);
+ }
+
+ /*******************************************/
+
+ @Test
+ //Check all corner cases against standard Union, Intersection, and AnotB.
+ //The unordered case is not tested
+ public void compareCornerCases() {
+ int k = 64;
+ for (State stateA : State.values()) {
+ for (State stateB : State.values()) {
+ if ((stateA == EST_MEMORY_UNORDERED) || (stateB == EST_MEMORY_UNORDERED)) { continue; }
+ if ((stateA == NULL) || (stateB == NULL)) { continue; }
+ cornerCaseChecks(stateA, stateB, k);
+ cornerCaseChecksMemory(stateA, stateB, k);
+ }
+ }
+ }
+
+// @Test
+// public void checkExactNullSpecificCase() {
+// cornerCaseChecksMemory(State.EXACT, State.NULL, 64);
+// }
+
+ private static void cornerCaseChecksMemory(State stateA, State stateB, int k) {
+ println("StateA: " + stateA + ", StateB: " + stateB);
+ CompactSketch tcskA = generate(stateA, k);
+ CompactSketch tcskB = generate(stateB, k);
+
+ MemorySegment wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxUnionBytes(k)]);
+
+ CompactSketch rcskStdU = doStdUnion(tcskA, tcskB, k, null);
+ CompactSketch rcskPwU = doPwUnion(tcskA, tcskB, k);
+ checkCornerCase(rcskPwU, rcskStdU); //heap, heap
+
+ rcskStdU = doStdUnion(tcskA, tcskB, k, wseg);
+ CompactSketch rcskStdPairU = doStdPairUnion(tcskA, tcskB, k, wseg);
+ checkCornerCase(rcskStdPairU, rcskStdU); //direct, direct
+
+ wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxIntersectionBytes(k)]);
+
+ CompactSketch rcskStdI = doStdIntersection(tcskA, tcskB, null);
+ CompactSketch rcskPwI = doPwIntersection(tcskA, tcskB);
+ checkCornerCase(rcskPwI, rcskStdI); //empty, empty
+
+ rcskStdI = doStdIntersection(tcskA, tcskB, wseg);
+ CompactSketch rcskStdPairI = doStdPairIntersection(tcskA, tcskB, wseg);
+ checkCornerCase(rcskStdPairI, rcskStdI); //empty, empty //direct, direct???
+
+ wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxAnotBResultBytes(k)]);
+
+ CompactSketch rcskStdAnotB = doStdAnotB(tcskA, tcskB, null);
+ CompactSketch rcskPwAnotB = doPwAnotB(tcskA, tcskB);
+ checkCornerCase(rcskPwAnotB, rcskStdAnotB); //heap, heap
+
+ rcskStdAnotB = doStdAnotB(tcskA, tcskB, wseg);
+ CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tcskA, tcskB, wseg);
+ checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB); //direct, heap
+ }
+
+ private static void cornerCaseChecks(State stateA, State stateB, int k) {
+ println("StateA: " + stateA + ", StateB: " + stateB);
+ CompactSketch tcskA = generate(stateA, k);
+ CompactSketch tcskB = generate(stateB, k);
+
+ CompactSketch rcskStdU = doStdUnion(tcskA, tcskB, k, null);
+ CompactSketch rcskPwU = doPwUnion(tcskA, tcskB, k);
+ checkCornerCase(rcskPwU, rcskStdU);
+
+ CompactSketch rcskStdPairU = doStdPairUnion(tcskA, tcskB, k, null);
+ checkCornerCase(rcskStdPairU, rcskStdU);
+
+ CompactSketch rcskStdI = doStdIntersection(tcskA, tcskB, null);
+ CompactSketch rcskPwI = doPwIntersection(tcskA, tcskB);
+ checkCornerCase(rcskPwI, rcskStdI);
+
+ CompactSketch rcskStdPairI = doStdPairIntersection(tcskA, tcskB, null);
+ checkCornerCase(rcskStdPairI, rcskStdI);
+
+ CompactSketch rcskStdAnotB = doStdAnotB(tcskA, tcskB, null);
+ CompactSketch rcskPwAnotB = doPwAnotB(tcskA, tcskB);
+ checkCornerCase(rcskPwAnotB, rcskStdAnotB);
+
+ CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tcskA, tcskB, null);
+ checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB);
+ }
+
+ private static CompactSketch doStdUnion(Sketch tskA, Sketch tskB, int k, MemorySegment wseg) {
+ Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion();
+ union.union(tskA);
+ union.union(tskB);
+ return union.getResult(true, wseg);
+ }
+
+ private static CompactSketch doStdPairUnion(Sketch tskA, Sketch tskB, int k, MemorySegment wseg) {
+ Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion();
+ return union.union(tskA, tskB, true, wseg);
+ }
+
+ private static CompactSketch doStdIntersection(Sketch tskA, Sketch tskB, MemorySegment wseg) {
+ Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ inter.intersect(tskA);
+ inter.intersect(tskB);
+ return inter.getResult(true, wseg);
+ }
+
+ private static CompactSketch doStdPairIntersection(Sketch tskA, Sketch tskB, MemorySegment wseg) {
+ Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ return inter.intersect(tskA, tskB, true, wseg);
+ }
+
+ private static CompactSketch doStdAnotB(Sketch tskA, Sketch tskB, MemorySegment wseg) {
+ AnotB anotb = Sketches.setOperationBuilder().buildANotB();
+ return anotb.aNotB(tskA, tskB, true, wseg);
+ }
+
+ private static CompactSketch doStdStatefulAnotB(Sketch tskA, Sketch tskB, MemorySegment wseg) {
+ AnotB anotb = Sketches.setOperationBuilder().buildANotB();
+ anotb.setA(tskA);
+ anotb.notB(tskB);
+ anotb.getResult(false);
+ return anotb.getResult(true, wseg, true);
+ }
+
+ private static CompactSketch doPwUnion(Sketch tskA, Sketch tskB, int k) {
+ CompactSketch tcskA, tcskB;
+ if (tskA == null) { tcskA = null; }
+ else { tcskA = (tskA instanceof CompactSketch) ? (CompactSketch) tskA : tskA.compact(); }
+ if (tskB == null) { tcskB = null; }
+ else { tcskB = (tskB instanceof CompactSketch) ? (CompactSketch) tskB : tskB.compact(); }
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+ return union.union(tcskA, tcskB);
+ }
+
+ private static CompactSketch doPwIntersection(Sketch tskA, Sketch tskB) {
+ Intersection inter = SetOperation.builder().buildIntersection();
+ return inter.intersect(tskA, tskB);
+ }
+
+ private static CompactSketch doPwAnotB(Sketch tskA, Sketch tskB) {
+ AnotB aNotB = SetOperation.builder().buildANotB();
+ return aNotB.aNotB(tskA, tskB);
+ }
+
+
+ private static void checkCornerCase(Sketch rskA, Sketch rskB) {
+ double estA = rskA.getEstimate();
+ double estB = rskB.getEstimate();
+ boolean emptyA = rskA.isEmpty();
+ boolean emptyB = rskB.isEmpty();
+ long thetaLongA = rskA.getThetaLong();
+ long thetaLongB = rskB.getThetaLong();
+ int countA = rskA.getRetainedEntries(true);
+ int countB = rskB.getRetainedEntries(true);
+ Assert.assertEquals(estB, estA, 0.0);
+ Assert.assertEquals(emptyB, emptyA);
+ Assert.assertEquals(thetaLongB, thetaLongA);
+ Assert.assertEquals(countB, countA);
+ Assert.assertEquals(rskA.getClass().getSimpleName(), rskB.getClass().getSimpleName());
+ }
+
+ /*******************************************/
+
+ @Test
+ public void checkUnionNotOrdered() {
+ int k = 64;
+ CompactSketch skNull = generate(NULL, k);
+ CompactSketch skEmpty = generate(EMPTY, k);
+ CompactSketch skHeap = generate(EST_HEAP, k);
+ CompactSketch skHeapUO = generate(EST_MEMORY_UNORDERED, k);
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+ union.union(skNull, skHeapUO);
+ union.union(skEmpty, skHeapUO);
+ union.union(skHeapUO, skNull);
+ union.union(skHeapUO, skEmpty);
+ union.union(skHeapUO, skHeap);
+ union.union(skHeap, skHeapUO);
+ }
+
+ @Test
+ public void checkSeedHash() {
+ int k = 64;
+ UpdateSketch tmp1 = Sketches.updateSketchBuilder().setNominalEntries(k).setSeed(123).build();
+ tmp1.update(1);
+ tmp1.update(3);
+ CompactSketch skSmallSeed2A = tmp1.compact(true, null);
+
+ UpdateSketch tmp2 = Sketches.updateSketchBuilder().setNominalEntries(k).setSeed(123).build();
+ tmp2.update(1);
+ tmp2.update(2);
+ CompactSketch skSmallSeed2B = tmp2.compact(true, null);
+
+ CompactSketch skExact = generate(EXACT, k);
+ CompactSketch skHeap = generate(EST_HEAP, 2 * k);
+
+ Intersection inter = SetOperation.builder().buildIntersection();
+ AnotB aNotB = SetOperation.builder().buildANotB();
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+
+ //Intersect
+ try {
+ inter.intersect(skExact, skSmallSeed2A);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ inter.intersect(skExact, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ inter.intersect(skSmallSeed2B, skExact);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ inter.intersect(skHeap, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ //A NOT B
+ try {
+ aNotB.aNotB(skExact, skSmallSeed2A);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ aNotB.aNotB(skExact, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ aNotB.aNotB(skSmallSeed2B, skExact);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ aNotB.aNotB(skHeap, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ //Union
+ try {
+ union.union(skExact, skSmallSeed2A);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ union.union(skExact, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ union.union(skSmallSeed2B, skExact);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ union.union(skHeap, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ }
+
+ @Test
+ public void checkPwUnionReduceToK() {
+ int k = 16;
+ CompactSketch skNull = generate(NULL, k);
+ CompactSketch skEmpty = generate(EMPTY, k);
+ CompactSketch skHeap1 = generate(EST_HEAP, k);
+ CompactSketch skHeap2 = generate(EST_HEAP, k);
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+ CompactSketch csk;
+ csk = union.union(skNull, skHeap1);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ csk = union.union(skEmpty, skHeap1);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ csk = union.union(skHeap1, skNull);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ csk = union.union(skHeap1, skEmpty);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ csk = union.union(skHeap1, skHeap2);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+ @Test
+ public void checkGenerator() {
+ int k = 16;
+ CompactSketch csk;
+
+ csk = generate(State.NULL, 0);
+ assertNull(csk);
+
+ csk = generate(State.EMPTY, k);
+ assertEquals(csk.isEmpty(), true);
+ assertEquals(csk.isEstimationMode(), false);
+ assertEquals(csk.getRetainedEntries(true), 0);
+ assertEquals(csk.getThetaLong(), Long.MAX_VALUE);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.SINGLE, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), false);
+ assertEquals(csk.getRetainedEntries(true), 1);
+ assertEquals(csk.getThetaLong(), Long.MAX_VALUE);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.EXACT, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), false);
+ assertEquals(csk.getRetainedEntries(true), k);
+ assertEquals(csk.getThetaLong(), Long.MAX_VALUE);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.EST_HEAP, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), true);
+ assertEquals(csk.getRetainedEntries(true) > k, true);
+ assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.THLT1_CNT0_FALSE, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), true);
+ assertEquals(csk.getRetainedEntries(true), 0);
+ assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.THEQ1_CNT0_TRUE, k);
+ assertEquals(csk.isEmpty(), true);
+ assertEquals(csk.isEstimationMode(), false);
+ assertEquals(csk.getRetainedEntries(true), 0);
+ assertEquals(csk.getThetaLong() < Long.MAX_VALUE, false);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.EST_MEMORY_UNORDERED, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), true);
+ assertEquals(csk.getRetainedEntries(true) > k, true);
+ assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), true);
+ assertEquals(csk.isOrdered(), false);
+ }
+
+ enum State {NULL, EMPTY, SINGLE, EXACT, EST_HEAP, THLT1_CNT0_FALSE, THEQ1_CNT0_TRUE, EST_MEMORY_UNORDERED}
+
+ private static CompactSketch generate(State state, int k) {
+ UpdateSketch sk = null;
+ CompactSketch csk = null;
+
+ switch(state) {
+ case NULL : {
+ //already null
+ break;
+ }
+ case EMPTY : { //results in EmptyCompactSketch
+ csk = Sketches.updateSketchBuilder().setNominalEntries(k).build().compact(true, null);
+ break;
+ }
+ case SINGLE : { //results in SingleItemSketches most of the time
+ sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ sk.update(1);
+ csk = sk.compact(true, null);
+ break;
+ }
+ case EXACT : {
+ sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i = 0; i < k; i++) {
+ sk.update(i);
+ }
+ csk = sk.compact(true, null);
+ break;
+ }
+ case EST_HEAP : {
+ sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i = 0; i < (4 * k); i++) {
+ sk.update(i);
+ }
+ csk = sk.compact(true, null);
+ break;
+ }
+ case THLT1_CNT0_FALSE : {
+ sk = Sketches.updateSketchBuilder().setP((float)0.5).setNominalEntries(k).build();
+ sk.update(7); //above theta
+ assert(sk.getRetainedEntries(true) == 0);
+ csk = sk.compact(true, null); //compact as {Th < 1.0, 0, F}
+ break;
+ }
+ case THEQ1_CNT0_TRUE : {
+ sk = Sketches.updateSketchBuilder().setP((float)0.5).setNominalEntries(k).build();
+ assert(sk.getRetainedEntries(true) == 0);
+ csk = sk.compact(true, null); //compact as {Th < 1.0, 0, T}
+ break;
+ }
+ case EST_MEMORY_UNORDERED : {
+ sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i = 0; i < (4 * k); i++) {
+ sk.update(i);
+ }
+ int bytes = Sketch.getMaxCompactSketchBytes(sk.getRetainedEntries(true));
+ byte[] byteArr = new byte[bytes];
+ MemorySegment wseg = MemorySegment.ofArray(byteArr);
+ csk = sk.compact(false, wseg);
+ break;
+ }
+ }
+ return csk;
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java
new file mode 100644
index 000000000..277aae961
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1;
+import static org.apache.datasketches.theta2.Sketches.getCompactSketchMaxBytes;
+import static org.apache.datasketches.theta2.Sketches.getMaxCompactSketchBytes;
+import static org.apache.datasketches.theta2.Sketches.getMaxIntersectionBytes;
+import static org.apache.datasketches.theta2.Sketches.getMaxUnionBytes;
+import static org.apache.datasketches.theta2.Sketches.getMaxUpdateSketchBytes;
+import static org.apache.datasketches.theta2.Sketches.getSerializationVersion;
+import static org.apache.datasketches.theta2.Sketches.heapifySetOperation;
+import static org.apache.datasketches.theta2.Sketches.heapifySketch;
+import static org.apache.datasketches.theta2.Sketches.setOperationBuilder;
+import static org.apache.datasketches.theta2.Sketches.updateSketchBuilder;
+import static org.apache.datasketches.theta2.Sketches.wrapSetOperation;
+import static org.apache.datasketches.theta2.Sketches.wrapSketch;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class SketchesTest {
+
+ private static MemorySegment getCompactSketchMemory(final int k, final int from, final int to) {
+ final UpdateSketch sk1 = updateSketchBuilder().setNominalEntries(k).build();
+ for (int i=from; i previous);
+ previous = it.get();
+ }
+ }
+ }
+
+ @Test(groups = {CHECK_CPP_FILES})
+ public void deserializeFromCppCompressed() throws IOException {
+ final int[] nArr = {10, 100, 1000, 10000, 100000, 1000000};
+ for (int n: nArr) {
+ final byte[] bytes = Files.readAllBytes(cppPath.resolve("theta_compressed_n" + n + "_cpp.sk"));
+ final CompactSketch sketch = CompactSketch.wrap(MemorySegment.ofArray(bytes));
+ assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
+ assertEquals(sketch.getEstimate(), n, n * 0.03);
+ assertTrue(sketch.isOrdered());
+ final HashIterator it = sketch.iterator();
+ long previous = 0;
+ while (it.next()) {
+ assertTrue(it.get() < sketch.getThetaLong());
+ assertTrue(it.get() > previous);
+ previous = it.get();
+ }
+ }
+ }
+
+ @Test(groups = {CHECK_CPP_FILES})
+ public void deserializeFromCppNonEmptyNoEntries() throws IOException {
+ final byte[] bytes = Files.readAllBytes(cppPath.resolve("theta_non_empty_no_entries_cpp.sk"));
+ final CompactSketch sketch = CompactSketch.wrap(MemorySegment.ofArray(bytes));
+ assertFalse(sketch.isEmpty());
+ assertEquals(sketch.getRetainedEntries(), 0);
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java
new file mode 100644
index 000000000..88dd009c0
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.thetacommon;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.theta2.CompactSketch;
+import org.apache.datasketches.theta2.Intersection;
+import org.apache.datasketches.theta2.Sketches;
+import org.apache.datasketches.theta2.UpdateSketch;
+import org.testng.annotations.Test;
+
+public class BoundsOnRatiosInThetaSketchedSets2Test {
+
+ @Test
+ public void checkNormalReturns() {
+ final UpdateSketch skA = Sketches.updateSketchBuilder().build(); //4K
+ final UpdateSketch skC = Sketches.updateSketchBuilder().build();
+ final int uA = 10000;
+ final int uC = 100000;
+ for (int i = 0; i < uA; i++) { skA.update(i); }
+ for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); }
+ final Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ inter.intersect(skA);
+ inter.intersect(skC);
+ final CompactSketch skB = inter.getResult();
+
+ double est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skB);
+ double lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skB);
+ double ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skB);
+ assertTrue(ub > est);
+ assertTrue(est > lb);
+ assertEquals(est, 0.5, .03);
+ println("ub : " + ub);
+ println("est: " + est);
+ println("lb : " + lb);
+ skA.reset(); //skA is now empty
+ est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skB);
+ lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skB);
+ ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skB);
+ println("ub : " + ub);
+ println("est: " + est);
+ println("lb : " + lb);
+ skC.reset(); //Now both are empty
+ est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skC);
+ lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skC);
+ ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skC);
+ println("ub : " + ub);
+ println("est: " + est);
+ println("lb : " + lb);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkAbnormalReturns() {
+ final UpdateSketch skA = Sketches.updateSketchBuilder().build(); //4K
+ final UpdateSketch skC = Sketches.updateSketchBuilder().build();
+ final int uA = 100000;
+ final int uC = 10000;
+ for (int i = 0; i < uA; i++) { skA.update(i); }
+ for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); }
+ BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skC);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: " + this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //disable here
+ }
+}
From e0a9710ebde3030f6a40d1a134058c6e40250e6e Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Mon, 16 Jun 2025 16:54:17 -0700
Subject: [PATCH 13/25] Remove use of aligned heap segments. Consider in the
future.
---
src/main/java/org/apache/datasketches/common/Util.java | 4 ++--
.../datasketches/theta2/DirectQuickSelectSketch.java | 10 +---------
.../datasketches/theta2/BackwardConversions.java | 2 +-
3 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java
index 4701ddf9e..493ad7879 100644
--- a/src/main/java/org/apache/datasketches/common/Util.java
+++ b/src/main/java/org/apache/datasketches/common/Util.java
@@ -909,7 +909,7 @@ public static boolean isSameResource(final MemorySegment seg1, final MemorySegme
}
/**
- * Request a new heap MemorySegment with the given capacityBytes and 8-byte aligned or one byte aligned.
+ * Request a new heap MemorySegment with the given capacityBytes and either 8-byte aligned or one byte aligned.
*
* If aligned is true, the returned MemorySegment will be constructed from a long[] array,
* and, as a result, it will have a memory alignment of 8 bytes.
@@ -923,7 +923,7 @@ public static boolean isSameResource(final MemorySegment seg1, final MemorySegme
* @param aligned if true, the new heap segment will have an alignment of 8 bytes, otherwise the alignment will be 1 byte.
* @return a new MemorySegment with the requested capacity and alignment.
*/
- public static MemorySegment newHeapSegment(final int capacityBytes, final boolean aligned) {
+ public static MemorySegment alignedHeapSegment(final int capacityBytes, final boolean aligned) {
if (aligned) {
final int lenLongs = capacityBytes >>> 3;
final long[] array = ((capacityBytes & 0x7) == 0)
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
index 193385a1f..213dd7f4a 100644
--- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
@@ -24,7 +24,6 @@
import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
-import static org.apache.datasketches.common.Util.newHeapSegment;
import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK;
import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE;
import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE;
@@ -316,14 +315,7 @@ UpdateReturnState hashUpdate(final long hash) {
tgtLgArrLongs = Math.min(lgArrLongs + lgRF, lgNomLongs + 1);
final int tgtArrBytes = 8 << tgtLgArrLongs;
final int reqBytes = tgtArrBytes + preBytes;
-
- //memReqSvr_ = (memReqSvr_ == null) ? wseg_.getMemoryRequestServer() : memReqSvr_;
- //if (memReqSvr_ == null) { //in case the MRS is not enabled or null.
- // throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand.");
- //}
- //final MemorySegment newDstSeg = memReqSvr_.request(wseg_, reqBytes);
-
- final MemorySegment newDstSeg = newHeapSegment(reqBytes, false);
+ final MemorySegment newDstSeg = MemorySegment.ofArray(new byte[reqBytes]);
moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong);
wseg_ = newDstSeg;
diff --git a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
index bec67b219..74aec9bb8 100644
--- a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
+++ b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
@@ -220,7 +220,7 @@ public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, fi
final int entries = skV3.getRetainedEntries(true);
final boolean unordered = !(skV3.isOrdered());
final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE
- wseg = Util.newHeapSegment((preLongs + entries) << 3, false);
+ wseg = MemorySegment.ofArray(new byte[(preLongs + entries) << 3]);
wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs
wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
From 6b2d7ab2dd0fbb4e4e543c7c4c2f003ff81bed31 Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Wed, 18 Jun 2025 15:01:56 -0700
Subject: [PATCH 14/25] Make classes Final where possible
Make use of MemorySegmentStatus where required
---
.../apache/datasketches/theta2/BitPacking.java | 4 +++-
.../BytesCompactCompressedHashIterator.java | 2 +-
.../theta2/BytesCompactHashIterator.java | 2 +-
.../ConcurrentBackgroundThetaPropagation.java | 2 +-
.../theta2/DirectCompactCompressedSketch.java | 2 +-
.../theta2/ForwardCompatibility.java | 2 ++
.../theta2/HeapCompactHashIterator.java | 2 +-
.../datasketches/theta2/HeapCompactSketch.java | 2 +-
.../datasketches/theta2/HeapHashIterator.java | 2 +-
.../datasketches/theta2/IntersectionImpl.java | 2 +-
.../datasketches/theta2/JaccardSimilarity.java | 2 ++
.../MemoryCompactCompressedHashIterator.java | 2 +-
.../datasketches/theta2/MemoryHashIterator.java | 2 +-
.../theta2/SetOperationBuilder.java | 2 +-
.../org/apache/datasketches/theta2/Sketch.java | 17 -----------------
.../theta2/UpdateSketchBuilder.java | 2 +-
.../theta2/WrappedCompactCompressedSketch.java | 6 +++---
17 files changed, 22 insertions(+), 33 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/theta2/BitPacking.java b/src/main/java/org/apache/datasketches/theta2/BitPacking.java
index e2b6be2fd..66d5a245e 100644
--- a/src/main/java/org/apache/datasketches/theta2/BitPacking.java
+++ b/src/main/java/org/apache/datasketches/theta2/BitPacking.java
@@ -24,7 +24,9 @@
/**
* Used as part of Theta compression.
*/
-public class BitPacking {
+public final class BitPacking {
+
+ private BitPacking() { }
/**
* The bit packing operation
diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java
index 81a985922..6a2ddddd7 100644
--- a/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java
+++ b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java
@@ -22,7 +22,7 @@
/*
* This is to uncompress serial version 4 sketch incrementally
*/
-class BytesCompactCompressedHashIterator implements HashIterator {
+final class BytesCompactCompressedHashIterator implements HashIterator {
private byte[] bytes;
private int offset;
private int entryBits;
diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java
index 9a4754574..3586f54c4 100644
--- a/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java
+++ b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java
@@ -24,7 +24,7 @@
/*
* This is to iterate over serial version 3 sketch representation
*/
-class BytesCompactHashIterator implements HashIterator {
+final class BytesCompactHashIterator implements HashIterator {
final private byte[] bytes;
final private int offset;
final private int numEntries;
diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java
index 2d529c4ce..f578dc6a1 100644
--- a/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java
+++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java
@@ -29,7 +29,7 @@
*
* @author eshcar
*/
-class ConcurrentBackgroundThetaPropagation implements Runnable {
+final class ConcurrentBackgroundThetaPropagation implements Runnable {
// Shared sketch to absorb the data
private final ConcurrentSharedThetaSketch sharedThetaSketch;
diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java
index 9be51c379..8ed907321 100644
--- a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java
@@ -40,7 +40,7 @@
* This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection.
*/
-class DirectCompactCompressedSketch extends DirectCompactSketch {
+final class DirectCompactCompressedSketch extends DirectCompactSketch {
/**
* Construct this sketch with the given MemorySegment.
* @param seg Read-only MemorySegment object.
diff --git a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java
index 9791a7902..a6635653c 100644
--- a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java
+++ b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java
@@ -38,6 +38,8 @@
*/
final class ForwardCompatibility {
+ private ForwardCompatibility() { }
+
/**
* Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch.
* Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java
index b10ffcaaf..bd06f6ecd 100644
--- a/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java
+++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java
@@ -19,7 +19,7 @@
package org.apache.datasketches.theta2;
-class HeapCompactHashIterator implements HashIterator {
+final class HeapCompactHashIterator implements HashIterator {
private long[] cache;
private int index;
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
index 6cffd9818..1c0cbb0cc 100644
--- a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java
@@ -38,7 +38,7 @@
*
* @author Lee Rhodes
*/
-class HeapCompactSketch extends CompactSketch {
+final class HeapCompactSketch extends CompactSketch {
private final long thetaLong_; //computed
private final int curCount_;
private final int preLongs_; //computed
diff --git a/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java
index c2b098c25..29ae42a0e 100644
--- a/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java
+++ b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java
@@ -22,7 +22,7 @@
/**
* @author Lee Rhodes
*/
-class HeapHashIterator implements HashIterator {
+final class HeapHashIterator implements HashIterator {
private long[] cache;
private long thetaLong;
private int index;
diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
index 92ca096c3..74228b1fa 100644
--- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java
@@ -79,7 +79,7 @@
* @author Lee Rhodes
* @author Kevin Lang
*/
-class IntersectionImpl extends Intersection {
+final class IntersectionImpl extends Intersection {
protected final short seedHash_;
protected final boolean readOnly_; //True if this sketch is to be treated as read only
protected final MemorySegment wseg_;
diff --git a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
index 624dcc3d7..de5fff58c 100644
--- a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
+++ b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
@@ -37,6 +37,8 @@ public final class JaccardSimilarity {
private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB
private static final double[] ONES = {1.0, 1.0, 1.0};
+ private JaccardSimilarity() { }
+
/**
* Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index
* J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each
diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
index 31aa6ff92..11d0168a0 100644
--- a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
+++ b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java
@@ -30,7 +30,7 @@
/*
* This is to uncompress serial version 4 sketch incrementally
*/
-class MemoryCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus {
+final class MemoryCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus {
private MemorySegment seg;
private int offset;
private int entryBits;
diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java
index eb2137afd..3022d59ff 100644
--- a/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java
+++ b/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java
@@ -26,7 +26,7 @@
/**
* @author Lee Rhodes
*/
-class MemoryHashIterator implements HashIterator {
+final class MemoryHashIterator implements HashIterator {
private MemorySegment seg;
private int arrLongs;
private long thetaLong;
diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java
index cf64326b8..5a05a7d1f 100644
--- a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java
+++ b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java
@@ -35,7 +35,7 @@
*
* @author Lee Rhodes
*/
-public class SetOperationBuilder {
+public final class SetOperationBuilder {
private int bLgNomLongs;
private long bSeed;
private ResizeFactor bRF;
diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java
index e98396842..5d619c580 100644
--- a/src/main/java/org/apache/datasketches/theta2/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java
@@ -409,23 +409,6 @@ public boolean isEstimationMode() {
*/
public abstract boolean isOrdered();
- /**
- * Returns true if the backing MemorySegment of this object refers to the same MemorySegment of that.
- * They can either have the same off-heap memory location and size, or refer to the same on-heap array object.
- *
- * If both segment are off-heap, they both must have the same starting address and the same size.
- *
- * For on-heap segments, both segments must be based on or derived from the same array object and neither segment
- * can be read-only.
- *
- * Returns false if either argument is null;
- *
- * @param that The given MemorySegment.
- * @return true if the backing MemorySegment of this object hierarchy refers to the same MemorySegment of that.
- */
- @Override
- public abstract boolean isSameResource(final MemorySegment that);
-
/**
* Returns a HashIterator that can be used to iterate over the retained hash values of the
* Theta sketch.
diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java
index e8353888f..0326ceb06 100644
--- a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java
+++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java
@@ -37,7 +37,7 @@
*
* @author Lee Rhodes
*/
-public class UpdateSketchBuilder {
+public final class UpdateSketchBuilder {
private int bLgNomLongs;
private long bSeed;
private ResizeFactor bRF;
diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java
index 1558c49e7..3ba16c3fa 100644
--- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java
@@ -32,8 +32,8 @@
*
* This sketch can only be associated with a Serialization Version 4 format binary image.
*/
-class WrappedCompactCompressedSketch extends WrappedCompactSketch {
-
+final class WrappedCompactCompressedSketch extends WrappedCompactSketch {
+
/**
* Construct this sketch with the given bytes.
* @param bytes containing serialized compact compressed sketch.
@@ -66,7 +66,7 @@ public int getCurrentBytes() {
private static final int START_PACKED_DATA_EXACT_MODE = 8;
private static final int START_PACKED_DATA_ESTIMATION_MODE = 16;
-
+
@Override
public int getRetainedEntries(final boolean valid) { //compact is always valid
// number of entries is stored using variable length encoding
From 8fffa8c36c940a0fda82e212408e31dd525e12e3 Mon Sep 17 00:00:00 2001
From: Lee Rhodes
Date: Tue, 24 Jun 2025 15:46:49 -0700
Subject: [PATCH 15/25] All of Tuple2 converted to FFM
---
.../org/apache/datasketches/common/Util.java | 2 +-
.../BoundsOnRatiosInTupleSketchedSets2.java | 204 ++++++
.../org/apache/datasketches/tuple2/AnotB.java | 636 ++++++++++++++++++
.../datasketches/tuple2/CompactSketch.java | 256 +++++++
.../tuple2/DeserializeResult.java | 55 ++
.../apache/datasketches/tuple2/Filter.java | 76 +++
.../datasketches/tuple2/HashTables.java | 169 +++++
.../datasketches/tuple2/Intersection.java | 254 +++++++
.../tuple2/JaccardSimilarity.java | 370 ++++++++++
.../tuple2/QuickSelectSketch.java | 621 +++++++++++++++++
.../tuple2/SerializerDeserializer.java | 99 +++
.../apache/datasketches/tuple2/Sketch.java | 224 ++++++
.../apache/datasketches/tuple2/Sketches.java | 72 ++
.../apache/datasketches/tuple2/Summary.java | 46 ++
.../tuple2/SummaryDeserializer.java | 42 ++
.../datasketches/tuple2/SummaryFactory.java | 34 +
.../tuple2/SummarySetOperations.java | 56 ++
.../tuple2/TupleSketchIterator.java | 75 +++
.../org/apache/datasketches/tuple2/Union.java | 225 +++++++
.../datasketches/tuple2/UpdatableSketch.java | 190 ++++++
.../tuple2/UpdatableSketchBuilder.java | 107 +++
.../datasketches/tuple2/UpdatableSummary.java | 36 +
.../org/apache/datasketches/tuple2/Util.java | 172 +++++
.../tuple2/adouble/DoubleSketch.java | 85 +++
.../tuple2/adouble/DoubleSummary.java | 162 +++++
.../adouble/DoubleSummaryDeserializer.java | 38 ++
.../tuple2/adouble/DoubleSummaryFactory.java | 46 ++
.../adouble/DoubleSummarySetOperations.java | 83 +++
.../tuple2/adouble/package-info.java | 23 +
.../tuple2/aninteger/IntegerSketch.java | 86 +++
.../tuple2/aninteger/IntegerSummary.java | 162 +++++
.../aninteger/IntegerSummaryDeserializer.java | 38 ++
.../aninteger/IntegerSummaryFactory.java | 46 ++
.../IntegerSummarySetOperations.java | 67 ++
.../tuple2/aninteger/package-info.java | 23 +
.../arrayofdoubles/ArrayOfDoublesAnotB.java | 57 ++
.../ArrayOfDoublesAnotBImpl.java | 237 +++++++
.../ArrayOfDoublesCombiner.java | 35 +
.../ArrayOfDoublesCompactSketch.java | 64 ++
.../ArrayOfDoublesIntersection.java | 184 +++++
.../ArrayOfDoublesQuickSelectSketch.java | 196 ++++++
.../ArrayOfDoublesSetOperationBuilder.java | 137 ++++
.../arrayofdoubles/ArrayOfDoublesSketch.java | 290 ++++++++
.../ArrayOfDoublesSketchIterator.java | 50 ++
.../ArrayOfDoublesSketches.java | 145 ++++
.../arrayofdoubles/ArrayOfDoublesUnion.java | 207 ++++++
.../ArrayOfDoublesUpdatableSketch.java | 229 +++++++
.../ArrayOfDoublesUpdatableSketchBuilder.java | 131 ++++
.../DirectArrayOfDoublesCompactSketch.java | 288 ++++++++
.../DirectArrayOfDoublesIntersection.java | 52 ++
...DirectArrayOfDoublesQuickSelectSketch.java | 433 ++++++++++++
...irectArrayOfDoublesQuickSelectSketchR.java | 42 ++
.../DirectArrayOfDoublesSketchIterator.java | 83 +++
.../DirectArrayOfDoublesUnion.java | 92 +++
.../DirectArrayOfDoublesUnionR.java | 47 ++
.../tuple2/arrayofdoubles/HashTables.java | 130 ++++
.../HeapArrayOfDoublesCompactSketch.java | 233 +++++++
.../HeapArrayOfDoublesIntersection.java | 42 ++
.../HeapArrayOfDoublesQuickSelectSketch.java | 363 ++++++++++
.../HeapArrayOfDoublesSketchIterator.java | 65 ++
.../HeapArrayOfDoublesUnion.java | 73 ++
.../tuple2/arrayofdoubles/package-info.java | 24 +
.../datasketches/tuple2/package-info.java | 25 +
.../tuple2/strings/ArrayOfStringsSketch.java | 103 +++
.../tuple2/strings/ArrayOfStringsSummary.java | 185 +++++
.../ArrayOfStringsSummaryDeserializer.java | 51 ++
.../strings/ArrayOfStringsSummaryFactory.java | 35 +
.../ArrayOfStringsSummarySetOperations.java | 40 ++
.../tuple2/strings/package-info.java | 24 +
.../CompactSketchWithDoubleSummaryTest.java | 189 ++++++
.../datasketches/tuple2/IntegerSummary.java | 81 +++
.../tuple2/IntegerSummaryDeserializer.java | 31 +
.../tuple2/IntegerSummaryFactory.java | 32 +
.../tuple2/JaccardSimilarityTest.java | 457 +++++++++++++
.../apache/datasketches/tuple2/MiscTest.java | 95 +++
.../tuple2/ReadOnlyMemoryTest.java | 121 ++++
.../tuple2/SerializerDeserializerTest.java | 59 ++
.../tuple2/TupleCrossLanguageTest.java | 126 ++++
.../tuple2/TupleExamples2Test.java | 287 ++++++++
.../tuple2/TupleExamplesTest.java | 191 ++++++
.../tuple2/adouble/AdoubleAnotBTest.java | 299 ++++++++
.../adouble/AdoubleIntersectionTest.java | 305 +++++++++
.../tuple2/adouble/AdoubleTest.java | 421 ++++++++++++
.../tuple2/adouble/AdoubleUnionTest.java | 173 +++++
.../tuple2/adouble/FilterTest.java | 152 +++++
.../CornerCaseTupleSetOperationsTest.java | 630 +++++++++++++++++
.../tuple2/aninteger/EngagementTest.java | 143 ++++
.../tuple2/aninteger/IntegerSketchTest.java | 140 ++++
.../aninteger/MikhailsBugTupleTest.java | 74 ++
.../aninteger/ParameterLeakageTest.java | 180 +++++
.../AodSketchCrossLanguageTest.java | 118 ++++
.../ArrayOfDoublesAnotBTest.java | 323 +++++++++
.../ArrayOfDoublesCompactSketchTest.java | 139 ++++
.../ArrayOfDoublesIntersectionTest.java | 311 +++++++++
.../ArrayOfDoublesQuickSelectSketchTest.java | 164 +++++
.../ArrayOfDoublesUnionTest.java | 513 ++++++++++++++
...erCaseArrayOfDoublesSetOperationsTest.java | 581 ++++++++++++++++
...DirectArrayOfDoublesCompactSketchTest.java | 134 ++++
...ctArrayOfDoublesQuickSelectSketchTest.java | 281 ++++++++
.../HeapArrayOfDoublesCompactSketchTest.java | 137 ++++
...apArrayOfDoublesQuickSelectSketchTest.java | 244 +++++++
101 files changed, 16402 insertions(+), 1 deletion(-)
create mode 100644 src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/AnotB.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/CompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/Filter.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/HashTables.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/Intersection.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/Sketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/Sketches.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/Summary.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/Union.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/Util.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/package-info.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java
create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/package-info.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/MiscTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java
create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java
diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java
index 493ad7879..88b7ad7b7 100644
--- a/src/main/java/org/apache/datasketches/common/Util.java
+++ b/src/main/java/org/apache/datasketches/common/Util.java
@@ -937,7 +937,7 @@ public static MemorySegment alignedHeapSegment(final int capacityBytes, final bo
/**
* Sets the bits defined by the bitMask
* @param seg the given MemorySegment
- * @param offsetBytes offset bytes relative to this Memory start
+ * @param offsetBytes offset bytes relative to this MemorySegment start
* @param bitMask the bits set to one will be set
*/
public static void setBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) {
diff --git a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java
new file mode 100644
index 000000000..ea8a20828
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.thetacommon;
+
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+
+import org.apache.datasketches.common.BoundsOnRatiosInSampledSets;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.tuple2.Sketch;
+import org.apache.datasketches.tuple2.Summary;
+
+/**
+ * This class is used to compute the bounds on the estimate of the ratio B / A, where:
+ *
+ * - A is a Tuple Sketch of population PopA.
+ * - B is a Tuple or Theta Sketch of population PopB that is a subset of A,
+ * obtained by an intersection of A with some other Tuple or Theta Sketch C,
+ * which acts like a predicate or selection clause.
+ * - The estimate of the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
+ * - The Upper Bound estimate on the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
+ * - The Lower Bound estimate on the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
+ *
+ * Note: The theta of A cannot be greater than the theta of B.
+ * If B is formed as an intersection of A and some other set C,
+ * then the theta of B is guaranteed to be less than or equal to the theta of B.
+ *
+ * @author Kevin Lang
+ * @author Lee Rhodes
+ * @author David Cromberge
+ */
+public final class BoundsOnRatiosInTupleSketchedSets2 {
+
+ private BoundsOnRatiosInTupleSketchedSets2() {}
+
+ /**
+ * Gets the approximate lower bound for B over A based on a 95% confidence interval
+ * @param sketchA the Tuple sketch A with summary type S
+ * @param sketchB the Tuple sketch B with summary type S
+ * @param Summary
+ * @return the approximate lower bound for B over A
+ */
+ public static double getLowerBoundForBoverA(
+ final Sketch sketchA,
+ final Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries();
+ final int countA = thetaLongB == thetaLongA
+ ? sketchA.getRetainedEntries()
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 0; }
+ final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+ return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f);
+ }
+
+ /**
+ * Gets the approximate lower bound for B over A based on a 95% confidence interval
+ * @param sketchA the Tuple sketch A with summary type S
+ * @param sketchB the Theta sketch B
+ * @param Summary
+ * @return the approximate lower bound for B over A
+ */
+ public static double getLowerBoundForBoverA(
+ final Sketch sketchA,
+ final org.apache.datasketches.theta.Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries();
+ final int countA = thetaLongB == thetaLongA
+ ? sketchA.getRetainedEntries()
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 0; }
+ final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+ return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f);
+ }
+
+ /**
+ * Gets the approximate upper bound for B over A based on a 95% confidence interval
+ * @param sketchA the Tuple sketch A with summary type S
+ * @param sketchB the Tuple sketch B with summary type S
+ * @param Summary
+ * @return the approximate upper bound for B over A
+ */
+ public static double getUpperBoundForBoverA(
+ final Sketch sketchA,
+ final Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries();
+ final int countA = thetaLongB == thetaLongA
+ ? sketchA.getRetainedEntries()
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 1.0; }
+ final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+ return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f);
+ }
+
+ /**
+ * Gets the approximate upper bound for B over A based on a 95% confidence interval
+ * @param sketchA the Tuple sketch A with summary type S
+ * @param sketchB the Theta sketch B
+ * @param Summary
+ * @return the approximate upper bound for B over A
+ */
+ public static double getUpperBoundForBoverA(
+ final Sketch sketchA,
+ final org.apache.datasketches.theta.Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries(true);
+ final int countA = thetaLongB == thetaLongA
+ ? sketchA.getRetainedEntries()
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 1.0; }
+ final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+ return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f);
+ }
+
+ /**
+ * Gets the estimate for B over A
+ * @param sketchA the Tuple sketch A with summary type S
+ * @param sketchB the Tuple sketch B with summary type S
+ * @param Summary
+ * @return the estimate for B over A
+ */
+ public static double getEstimateOfBoverA(
+ final Sketch sketchA,
+ final Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries();
+ final int countA = thetaLongB == thetaLongA
+ ? sketchA.getRetainedEntries()
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 0.5; }
+
+ return (double) countB / (double) countA;
+ }
+
+ /**
+ * Gets the estimate for B over A
+ * @param sketchA the Tuple sketch A with summary type S
+ * @param sketchB the Theta sketch B
+ * @param Summary
+ * @return the estimate for B over A
+ */
+ public static double getEstimateOfBoverA(
+ final Sketch sketchA,
+ final org.apache.datasketches.theta.Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries(true);
+ final int countA = thetaLongB == thetaLongA
+ ? sketchA.getRetainedEntries()
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 0.5; }
+
+ return (double) countB / (double) countA;
+ }
+
+ static void checkThetas(final long thetaLongA, final long thetaLongB) {
+ if (thetaLongB > thetaLongA) {
+ throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA.");
+ }
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/AnotB.java b/src/main/java/org/apache/datasketches/tuple2/AnotB.java
new file mode 100644
index 000000000..46ff084ae
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/AnotB.java
@@ -0,0 +1,636 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.exactLog2OfLong;
+import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable;
+import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.common.SuppressFBWarnings;
+import org.apache.datasketches.thetacommon.SetOperationCornerCases;
+import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction;
+import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Computes a set difference, A-AND-NOT-B, of two generic tuple sketches.
+ * This class includes both stateful and stateless operations.
+ *
+ * The stateful operation is as follows:
+ *
+ * AnotB anotb = new AnotB();
+ *
+ * anotb.setA(Sketch skA); //The first argument.
+ * anotb.notB(Sketch skB); //The second (subtraction) argument.
+ * anotb.notB(Sketch skC); // ...any number of additional subtractions...
+ * anotb.getResult(false); //Get an interim result.
+ * anotb.notB(Sketch skD); //Additional subtractions.
+ * anotb.getResult(true); //Final result and resets the AnotB operator.
+ *
+ *
+ * The stateless operation is as follows:
+ *
+ * AnotB anotb = new AnotB();
+ *
+ * CompactSketch csk = anotb.aNotB(Sketch skA, Sketch skB);
+ *
+ *
+ * Calling the setA operation a second time essentially clears the internal state and loads
+ * the new sketch.
+ *
+ * The stateless and stateful operations are independent of each other.
+ *
+ * @param Type of Summary
+ *
+ * @author Lee Rhodes
+ */
+@SuppressFBWarnings(value = "DP_DO_INSIDE_DO_PRIVILEGED", justification = "Defer fix")
+public final class AnotB {
+ private boolean empty_ = true;
+ private long thetaLong_ = Long.MAX_VALUE;
+ private long[] hashArr_ = null; //always in compact form, not necessarily sorted
+ private S[] summaryArr_ = null; //always in compact form, not necessarily sorted
+ private int curCount_ = 0;
+
+ private static final Method GET_CACHE;
+
+ static {
+ try {
+ GET_CACHE = org.apache.datasketches.theta2.Sketch.class.getDeclaredMethod("getCache");
+ GET_CACHE.setAccessible(true);
+ } catch (final Exception e) {
+ throw new SketchesStateException("Could not reflect getCache(): " + e);
+ }
+ }
+
+ /**
+ * This is part of a multistep, stateful AnotB operation and sets the given Tuple sketch as the
+ * first argument A of A-AND-NOT-B. This overwrites the internal state of this
+ * AnotB operator with the contents of the given sketch.
+ * This sets the stage for multiple following notB steps.
+ *
+ * An input argument of null will throw an exception.
+ *
+ * Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
+ * That is distinctly different from the java null, which represents a nonexistent object.
+ * In most cases it is a programming error due to some object that was not properly initialized.
+ * With a null as the first argument, we cannot know what the user's intent is.
+ * Since it is very likely that a null is a programming error, we throw a an exception.
+ *
+ * An empty input argument will set the internal state to empty.
+ *
+ * Rationale: An empty set is a mathematically legal concept. Although it makes any subsequent,
+ * valid argument for B irrelevant, we must allow this and assume the user knows what they are
+ * doing.
+ *
+ * Performing {@link #getResult(boolean)} just after this step will return a compact form of
+ * the given argument.
+ *
+ * @param skA The incoming sketch for the first argument, A.
+ */
+ public void setA(final Sketch skA) {
+ if (skA == null) {
+ reset();
+ throw new SketchesArgumentException("The input argument A may not be null");
+ }
+
+ empty_ = skA.isEmpty();
+ thetaLong_ = skA.getThetaLong();
+ final DataArrays da = getCopyOfDataArraysTuple(skA);
+ summaryArr_ = da.summaryArr; //it may be null
+ hashArr_ = da.hashArr; //it may be null
+ curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
+ }
+
+ /**
+ * This is part of a multistep, stateful AnotB operation and sets the given Tuple sketch as the
+ * second (or n+1th) argument B of A-AND-NOT-B.
+ * Performs an AND NOT operation with the existing internal state of this AnotB operator.
+ *
+ * An input argument of null or empty is ignored.
+ *
+ * Rationale: A null for the second or following arguments is more tolerable because
+ * A NOT null is still A even if we don't know exactly what the null represents. It
+ * clearly does not have any content that overlaps with A. Also, because this can be part of
+ * a multistep operation with multiple notB steps. Other following steps can still produce
+ * a valid result.
+ *
+ * Use {@link #getResult(boolean)} to obtain the result.
+ *
+ * @param skB The incoming Tuple sketch for the second (or following) argument B.
+ */
+ public void notB(final Sketch skB) {
+ if (skB == null) { return; } //ignore
+
+ final long thetaLongB = skB.getThetaLong();
+ final int countB = skB.getRetainedEntries();
+ final boolean emptyB = skB.isEmpty();
+
+ final int id =
+ SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB);
+ final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
+ final AnotbAction anotbAction = cCase.getAnotbAction();
+
+ switch (anotbAction) {
+ case EMPTY_1_0_T: {
+ reset();
+ break;
+ }
+ case DEGEN_MIN_0_F: {
+ reset();
+ thetaLong_ = min(thetaLong_, thetaLongB);
+ empty_ = false;
+ break;
+ }
+ case DEGEN_THA_0_F: {
+ empty_ = false;
+ curCount_ = 0;
+ //thetaLong_ is ok
+ break;
+ }
+ case TRIM_A: {
+ thetaLong_ = min(thetaLong_, thetaLongB);
+ final DataArrays da = trimAndCopyDataArrays(hashArr_, summaryArr_, thetaLong_, true);
+ hashArr_ = da.hashArr;
+ curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
+ summaryArr_ = da.summaryArr;
+ //empty_ = is whatever SkA is,
+ break;
+ }
+ case SKETCH_A: {
+ break; //result is already in A
+ }
+ case FULL_ANOTB: { //both A and B should have valid entries.
+ thetaLong_ = min(thetaLong_, thetaLongB);
+ final DataArrays daR = getCopyOfResultArraysTuple(thetaLong_, curCount_, hashArr_, summaryArr_, skB);
+ hashArr_ = daR.hashArr;
+ curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
+ summaryArr_ = daR.summaryArr;
+ //empty_ = is whatever SkA is,
+ }
+ //default: not possible
+ }
+ }
+
+ /**
+ * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the
+ * second (or n+1th) argument B of A-AND-NOT-B.
+ * Performs an AND NOT operation with the existing internal state of this AnotB operator.
+ * Calls to this method can be intermingled with calls to
+ * {@link #notB(org.apache.datasketches.theta2.Sketch)}.
+ *
+ * An input argument of null or empty is ignored.
+ *
+ * Rationale: A null for the second or following arguments is more tolerable because
+ * A NOT null is still A even if we don't know exactly what the null represents. It
+ * clearly does not have any content that overlaps with A. Also, because this can be part of
+ * a multistep operation with multiple notB steps. Other following steps can still produce
+ * a valid result.
+ *
+ * Use {@link #getResult(boolean)} to obtain the result.
+ *
+ * @param skB The incoming Theta sketch for the second (or following) argument B.
+ */
+ public void notB(final org.apache.datasketches.theta2.Sketch skB) {
+ if (skB == null) { return; } //ignore
+
+ final long thetaLongB = skB.getThetaLong();
+ final int countB = skB.getRetainedEntries();
+ final boolean emptyB = skB.isEmpty();
+
+ final int id =
+ SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB);
+ final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
+ final AnotbAction anotbAction = cCase.getAnotbAction();
+
+ switch (anotbAction) {
+ case EMPTY_1_0_T: {
+ reset();
+ break;
+ }
+ case DEGEN_MIN_0_F: {
+ reset();
+ thetaLong_ = min(thetaLong_, thetaLongB);
+ empty_ = false;
+ break;
+ }
+ case DEGEN_THA_0_F: {
+ empty_ = false;
+ curCount_ = 0;
+ //thetaLong_ is ok
+ break;
+ }
+ case TRIM_A: {
+ thetaLong_ = min(thetaLong_, thetaLongB);
+ final DataArrays da = trimAndCopyDataArrays(hashArr_, summaryArr_,thetaLong_, true);
+ hashArr_ = da.hashArr;
+ curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
+ summaryArr_ = da.summaryArr;
+ break;
+ }
+ case SKETCH_A: {
+ break; //result is already in A
+ }
+ case FULL_ANOTB: { //both A and B should have valid entries.
+ thetaLong_ = min(thetaLong_, thetaLongB);
+ final DataArrays daB = getCopyOfResultArraysTheta(thetaLong_, curCount_, hashArr_, summaryArr_, skB);
+ hashArr_ = daB.hashArr;
+ curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
+ summaryArr_ = daB.summaryArr;
+ //empty_ = is whatever SkA is,
+ }
+ //default: not possible
+ }
+ }
+
+ /**
+ * Gets the result of the multistep, stateful operation AnotB that have been executed with calls
+ * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or
+ * {@link #notB(org.apache.datasketches.theta2.Sketch)}).
+ *
+ * @param reset If true, clears this operator to the empty state after this result is
+ * returned. Set this to false if you wish to obtain an intermediate result.
+ * @return the result of this operation as an unordered {@link CompactSketch}.
+ */
+ public CompactSketch getResult(final boolean reset) {
+ final CompactSketch result;
+ if (curCount_ == 0) {
+ result = new CompactSketch<>(null, null, thetaLong_, thetaLong_ == Long.MAX_VALUE);
+ } else {
+
+ result = new CompactSketch<>(hashArr_, Util.copySummaryArray(summaryArr_), thetaLong_, false);
+ }
+ if (reset) { reset(); }
+ return result;
+ }
+
+ /**
+ * Returns the A-and-not-B set operation on the two given Tuple sketches.
+ *
+ * This a stateless operation and has no impact on the internal state of this operator.
+ * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)},
+ * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta2.Sketch)}, and
+ * {@link #getResult(boolean)} methods.
+ *
+ * If either argument is null an exception is thrown.
+ *
+ * Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
+ * That is distinctly different from the java null, which represents a nonexistent object.
+ * In most cases it is a programming error due to some object that was not properly initialized.
+ * With a null as the first argument, we cannot know what the user's intent is.
+ * With a null as the second argument, we can't ignore it as we must return a result and there is
+ * no following possible viable arguments for the second argument.
+ * Since it is very likely that a null is a programming error, we throw an exception.
+ *
+ * @param skA The incoming Tuple sketch for the first argument
+ * @param skB The incoming Tuple sketch for the second argument
+ * @param Type of Summary
+ * @return the result as an unordered {@link CompactSketch}
+ */
+ @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR",
+ justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase")
+ public static CompactSketch aNotB(
+ final Sketch skA,
+ final Sketch skB) {
+ if (skA == null || skB == null) {
+ throw new SketchesArgumentException("Neither argument may be null for this stateless operation.");
+ }
+
+ final long thetaLongA = skA.getThetaLong();
+ final int countA = skA.getRetainedEntries();
+ final boolean emptyA = skA.isEmpty();
+
+ final long thetaLongB = skB.getThetaLong();
+ final int countB = skB.getRetainedEntries();
+ final boolean emptyB = skB.isEmpty();
+
+ final int id =
+ SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
+ final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
+ final AnotbAction anotbAction = cCase.getAnotbAction();
+
+ CompactSketch result = null;
+
+ switch (anotbAction) {
+ case EMPTY_1_0_T: {
+ result = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
+ break;
+ }
+ case DEGEN_MIN_0_F: {
+ final long thetaLong = min(thetaLongA, thetaLongB);
+ result = new CompactSketch<>(null, null, thetaLong, false);
+ break;
+ }
+ case DEGEN_THA_0_F: {
+ result = new CompactSketch<>(null, null, thetaLongA, false);
+ break;
+ }
+ case TRIM_A: {
+ final DataArrays daA = getCopyOfDataArraysTuple(skA);
+ final long[] hashArrA = daA.hashArr;
+ final S[] summaryArrA = daA.summaryArr;
+ final long minThetaLong = min(thetaLongA, thetaLongB);
+ final DataArrays da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false);
+ result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_);
+ break;
+ }
+ case SKETCH_A: {
+ final DataArrays daA = getCopyOfDataArraysTuple(skA);
+ result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_);
+ break;
+ }
+ case FULL_ANOTB: { //both A and B should have valid entries.
+ final DataArrays daA = getCopyOfDataArraysTuple(skA);
+ final long minThetaLong = min(thetaLongA, thetaLongB);
+ final DataArrays daR =
+ getCopyOfResultArraysTuple(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB);
+ final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length;
+ if (countR == 0) {
+ result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE);
+ } else {
+ result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false);
+ }
+ }
+ //default: not possible
+ }
+ return result;
+ }
+
+ /**
+ * Returns the A-and-not-B set operation on a Tuple sketch and a Theta sketch.
+ *
+ * This a stateless operation and has no impact on the internal state of this operator.
+ * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)},
+ * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta2.Sketch)}, and
+ * {@link #getResult(boolean)} methods.
+ *
+ * If either argument is null an exception is thrown.
+ *
+ * Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
+ * That is distinctly different from the java null, which represents a nonexistent object.
+ * In most cases it is a programming error due to some object that was not properly initialized.
+ * With a null as the first argument, we cannot know what the user's intent is.
+ * With a null as the second argument, we can't ignore it as we must return a result and there is
+ * no following possible viable arguments for the second argument.
+ * Since it is very likely that a null is a programming error for either argument
+ * we throw a an exception.
+ *
+ * @param skA The incoming Tuple sketch for the first argument
+ * @param skB The incoming Theta sketch for the second argument
+ * @param Type of Summary
+ * @return the result as an unordered {@link CompactSketch}
+ */
+ @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR",
+ justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase")
+ public static CompactSketch aNotB(
+ final Sketch skA,
+ final org.apache.datasketches.theta2.Sketch skB) {
+ if (skA == null || skB == null) {
+ throw new SketchesArgumentException("Neither argument may be null for this stateless operation.");
+ }
+
+ final long thetaLongA = skA.getThetaLong();
+ final int countA = skA.getRetainedEntries();
+ final boolean emptyA = skA.isEmpty();
+
+ final long thetaLongB = skB.getThetaLong();
+ final int countB = skB.getRetainedEntries();
+ final boolean emptyB = skB.isEmpty();
+
+ final int id =
+ SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
+ final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
+ final AnotbAction anotbAction = cCase.getAnotbAction();
+
+ CompactSketch result = null;
+
+ switch (anotbAction) {
+ case EMPTY_1_0_T: {
+ result = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
+ break;
+ }
+ case DEGEN_MIN_0_F: {
+ final long thetaLong = min(thetaLongA, thetaLongB);
+ result = new CompactSketch<>(null, null, thetaLong, false);
+ break;
+ }
+ case DEGEN_THA_0_F: {
+ result = new CompactSketch<>(null, null, thetaLongA, false);
+ break;
+ }
+ case TRIM_A: {
+ final DataArrays daA = getCopyOfDataArraysTuple(skA);
+ final long[] hashArrA = daA.hashArr;
+ final S[] summaryArrA = daA.summaryArr;
+ final long minThetaLong = min(thetaLongA, thetaLongB);
+ final DataArrays da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false);
+ result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_);
+ break;
+ }
+ case SKETCH_A: {
+ final DataArrays daA = getCopyOfDataArraysTuple(skA);
+ result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_);
+ break;
+ }
+ case FULL_ANOTB: { //both A and B have valid entries.
+ final DataArrays daA = getCopyOfDataArraysTuple(skA);
+ final long minThetaLong = min(thetaLongA, thetaLongB);
+ @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR",
+ justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase")
+ final DataArrays daR =
+ getCopyOfResultArraysTheta(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB);
+ final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length;
+ if (countR == 0) {
+ result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE);
+ } else {
+ result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false);
+ }
+ }
+ //default: not possible
+ }
+ return result;
+ }
+
+ //restricted
+
+ static class DataArrays {
+ DataArrays() {}
+
+ long[] hashArr;
+ S[] summaryArr;
+ }
+
+ private static DataArrays getCopyOfDataArraysTuple(
+ final Sketch sk) {
+ final CompactSketch csk;
+ final DataArrays da = new DataArrays<>();
+ if (sk instanceof CompactSketch) {
+ csk = (CompactSketch) sk;
+ } else {
+ csk = ((QuickSelectSketch)sk).compact();
+ }
+ final int count = csk.getRetainedEntries();
+ if (count == 0) {
+ da.hashArr = null;
+ da.summaryArr = null;
+ } else {
+ da.hashArr = csk.getHashArr().clone(); //deep copy, may not be sorted
+ da.summaryArr = Util.copySummaryArray(csk.getSummaryArr());
+ }
+ return da;
+ }
+
+ @SuppressWarnings("unchecked")
+ //Both skA and skB must have entries (count > 0)
+ private static DataArrays getCopyOfResultArraysTuple(
+ final long minThetaLong,
+ final int countA,
+ final long[] hashArrA,
+ final S[] summaryArrA,
+ final Sketch skB) {
+ final DataArrays daR = new DataArrays<>();
+
+ //Rebuild/get hashtable of skB
+ final long[] hashTableB;
+
+ if (skB instanceof CompactSketch) {
+ final CompactSketch cskB = (CompactSketch) skB;
+ final int countB = skB.getRetainedEntries();
+ hashTableB = convertToHashTable(cskB.getHashArr(), countB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD);
+ } else {
+ final QuickSelectSketch qskB = (QuickSelectSketch) skB;
+ hashTableB = qskB.getHashTable();
+ }
+
+ //build temporary arrays of skA
+ final long[] tmpHashArrA = new long[countA];
+ final S[] tmpSummaryArrA = Util.newSummaryArray(summaryArrA, countA);
+
+ //search for non matches and build temp arrays
+ final int lgHTBLen = exactLog2OfLong(hashTableB.length);
+ int nonMatches = 0;
+ for (int i = 0; i < countA; i++) {
+ final long hash = hashArrA[i];
+ if (hash != 0 && hash < minThetaLong) { //skips hashes of A >= minTheta
+ final int index = hashSearch(hashTableB, lgHTBLen, hash);
+ if (index == -1) {
+ tmpHashArrA[nonMatches] = hash;
+ tmpSummaryArrA[nonMatches] = (S) summaryArrA[i].copy();
+ nonMatches++;
+ }
+ }
+ }
+ daR.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
+ daR.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches);
+ return daR;
+ }
+
+ @SuppressWarnings("unchecked")
+ private static DataArrays getCopyOfResultArraysTheta(
+ final long minThetaLong,
+ final int countA,
+ final long[] hashArrA,
+ final S[] summaryArrA,
+ final org.apache.datasketches.theta2.Sketch skB) {
+ final DataArrays daB = new DataArrays<>();
+
+ //Rebuild/get hashtable of skB
+ final long[] hashTableB; //read only
+
+ final long[] hashCacheB;
+ try { hashCacheB = (long[])GET_CACHE.invoke(skB);
+ } catch (final Exception e) { throw new SketchesStateException("Reflection Exception " + e); }
+
+ if (skB instanceof org.apache.datasketches.theta2.CompactSketch) {
+ final int countB = skB.getRetainedEntries(true);
+ hashTableB = convertToHashTable(hashCacheB, countB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD);
+ } else {
+ hashTableB = hashCacheB;
+ }
+
+ //build temporary result arrays of skA
+ final long[] tmpHashArrA = new long[countA];
+ final S[] tmpSummaryArrA = Util.newSummaryArray(summaryArrA, countA);
+
+ //search for non matches and build temp arrays
+ final int lgHTBLen = exactLog2OfLong(hashTableB.length);
+ int nonMatches = 0;
+ for (int i = 0; i < countA; i++) {
+ final long hash = hashArrA[i];
+ if (hash != 0 && hash < minThetaLong) { //skips hashes of A >= minTheta
+ final int index = hashSearch(hashTableB, lgHTBLen, hash);
+ if (index == -1) { //not found
+ tmpHashArrA[nonMatches] = hash;
+ tmpSummaryArrA[nonMatches] = (S) summaryArrA[i].copy();
+ nonMatches++;
+ }
+ }
+ }
+ //trim the arrays
+ daB.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
+ daB.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches);
+ return daB;
+ }
+
+ @SuppressWarnings("unchecked")
+ private static DataArrays trimAndCopyDataArrays(
+ final long[] hashArr,
+ final S[] summaryArr,
+ final long minThetaLong,
+ final boolean copy) {
+
+ //build temporary arrays
+ final int countIn = hashArr.length;
+ final long[] tmpHashArr = new long[countIn];
+ final S[] tmpSummaryArr = Util.newSummaryArray(summaryArr, countIn);
+ int countResult = 0;
+ for (int i = 0; i < countIn; i++) {
+ final long hash = hashArr[i];
+ if (hash < minThetaLong) {
+ tmpHashArr[countResult] = hash;
+ tmpSummaryArr[countResult] = (S) (copy ? summaryArr[i].copy() : summaryArr[i]);
+ countResult++;
+ } else { continue; }
+ }
+ //Remove empty slots
+ final DataArrays da = new DataArrays<>();
+ da.hashArr = Arrays.copyOfRange(tmpHashArr, 0, countResult);
+ da.summaryArr = Arrays.copyOfRange(tmpSummaryArr, 0, countResult);
+ return da;
+ }
+
+ /**
+ * Resets this operation back to the empty state.
+ */
+ public void reset() {
+ empty_ = true;
+ thetaLong_ = Long.MAX_VALUE;
+ hashArr_ = null;
+ summaryArr_ = null;
+ curCount_ = 0;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java
new file mode 100644
index 000000000..36d7a9b5f
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.thetacommon.HashOperations.count;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.reflect.Array;
+import java.nio.ByteOrder;
+
+import org.apache.datasketches.common.ByteArrayUtil;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+
+/**
+ * CompactSketches are never created directly. They are created as a result of
+ * the compact() method of an UpdatableSketch or as a result of the getResult()
+ * method of a set operation like Union, Intersection or AnotB. CompactSketch
+ * consists of a compact list (i.e. no intervening spaces) of hash values,
+ * corresponding list of Summaries, and a value for theta. The lists may or may
+ * not be ordered. CompactSketch is read-only.
+ *
+ * @param type of Summary
+ */
+public final class CompactSketch extends Sketch {
+ private static final byte serialVersionWithSummaryClassNameUID = 1;
+ private static final byte serialVersionUIDLegacy = 2;
+ private static final byte serialVersionUID = 3;
+ private static final short defaultSeedHash = (short) 37836; // for compatibility with C++
+ private final long[] hashArr_;
+ private S[] summaryArr_;
+
+ private enum FlagsLegacy { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
+
+ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED }
+
+ /**
+ * Create a CompactSketch from correct components
+ * @param hashArr compacted hash array
+ * @param summaryArr compacted summary array
+ * @param thetaLong long value of theta
+ * @param empty empty flag
+ */
+ CompactSketch(final long[] hashArr, final S[] summaryArr, final long thetaLong, final boolean empty) {
+ super(thetaLong, empty, null);
+ super.thetaLong_ = thetaLong;
+ super.empty_ = empty;
+ hashArr_ = hashArr;
+ summaryArr_ = summaryArr;
+ }
+
+ /**
+ * This is to create an instance of a CompactSketch given a serialized form
+ *
+ * @param seg MemorySegment object with serialized CompactSketch
+ * @param deserializer the SummaryDeserializer
+ */
+ CompactSketch(final MemorySegment seg, final SummaryDeserializer deserializer) {
+ super(Long.MAX_VALUE, true, null);
+ int offset = 0;
+ final byte preambleLongs = seg.get(JAVA_BYTE, offset++);
+ final byte version = seg.get(JAVA_BYTE, offset++);
+ final byte familyId = seg.get(JAVA_BYTE, offset++);
+ SerializerDeserializer.validateFamily(familyId, preambleLongs);
+ if (version > serialVersionUID) {
+ throw new SketchesArgumentException(
+ "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + version);
+ }
+ SerializerDeserializer
+ .validateType(seg.get(JAVA_BYTE, offset++), SerializerDeserializer.SketchType.CompactSketch);
+ if (version <= serialVersionUIDLegacy) { // legacy serial format
+ final byte flags = seg.get(JAVA_BYTE, offset++);
+ final boolean isBigEndian = (flags & 1 << FlagsLegacy.IS_BIG_ENDIAN.ordinal()) > 0;
+ if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+ throw new SketchesArgumentException("Byte order mismatch");
+ }
+ empty_ = (flags & 1 << FlagsLegacy.IS_EMPTY.ordinal()) > 0;
+ final boolean isThetaIncluded = (flags & 1 << FlagsLegacy.IS_THETA_INCLUDED.ordinal()) > 0;
+ if (isThetaIncluded) {
+ thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset);
+ offset += Long.BYTES;
+ } else {
+ thetaLong_ = Long.MAX_VALUE;
+ }
+ final boolean hasEntries = (flags & 1 << FlagsLegacy.HAS_ENTRIES.ordinal()) > 0;
+ if (hasEntries) {
+ int classNameLength = 0;
+ if (version == serialVersionWithSummaryClassNameUID) {
+ classNameLength = seg.get(JAVA_BYTE, offset++);
+ }
+ final int count = seg.get(JAVA_INT_UNALIGNED, offset);
+ offset += Integer.BYTES;
+ if (version == serialVersionWithSummaryClassNameUID) {
+ offset += classNameLength;
+ }
+ hashArr_ = new long[count];
+
+ for (int i = 0; i < count; i++) {
+ hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset);
+ offset += Long.BYTES;
+ }
+ for (int i = 0; i < count; i++) {
+ offset += readSummary(seg, offset, i, count, deserializer);
+ }
+ } else {
+ hashArr_ = new long[0];
+ summaryArr_ = null;
+ }
+ } else { // current serial format
+ offset++; //skip unused byte
+ final byte flags = seg.get(JAVA_BYTE, offset++);
+ offset += 2; //skip 2 unused bytes
+ empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
+ thetaLong_ = Long.MAX_VALUE;
+ int count = 0;
+ if (!empty_) {
+ if (preambleLongs == 1) {
+ count = 1;
+ } else {
+ count = seg.get(JAVA_INT_UNALIGNED, offset);
+ offset += Integer.BYTES;
+ offset += 4; // unused
+ if (preambleLongs > 2) {
+ thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset);
+ offset += Long.BYTES;
+ }
+ }
+ }
+ hashArr_ = new long[count];
+
+ for (int i = 0; i < count; i++) {
+ hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset);
+ offset += Long.BYTES;
+ offset += readSummary(seg, offset, i, count, deserializer);
+ }
+ }
+ }
+
+ @SuppressWarnings({"unchecked"})
+ private int readSummary(final MemorySegment seg, final int offset, final int i, final int count,
+ final SummaryDeserializer deserializer) {
+ final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset);
+ final DeserializeResult result = deserializer.heapifySummary(segRegion);
+ final S summary = result.getObject();
+ final Class summaryType = (Class) result.getObject().getClass();
+ if (summaryArr_ == null) {
+ summaryArr_ = (S[]) Array.newInstance(summaryType, count);
+ }
+ summaryArr_[i] = summary;
+ return result.getSize();
+ }
+
+ @Override
+ public CompactSketch compact() {
+ return this;
+ }
+
+ long[] getHashArr() {
+ return hashArr_;
+ }
+
+ S[] getSummaryArr() {
+ return summaryArr_;
+ }
+
+ @Override
+ public int getRetainedEntries() {
+ return hashArr_ == null ? 0 : hashArr_.length;
+ }
+
+ @Override
+ public int getCountLessThanThetaLong(final long thetaLong) {
+ return count(hashArr_, thetaLong);
+ }
+
+ // Layout of first 8 bytes:
+ // Long || Start Byte Adr:
+ // Adr:
+ // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ // 0 || seed hash | Flags | unused | SkType | FamID | SerVer | Preamble_Longs |
+ @Override
+ public byte[] toByteArray() {
+ final int count = getRetainedEntries();
+ final boolean isSingleItem = count == 1 && !isEstimationMode();
+ final int preambleLongs = isEmpty() || isSingleItem ? 1 : isEstimationMode() ? 3 : 2;
+
+ int summariesSizeBytes = 0;
+ final byte[][] summariesBytes = new byte[count][];
+ if (count > 0) {
+ for (int i = 0; i < count; i++) {
+ summariesBytes[i] = summaryArr_[i].toByteArray();
+ summariesSizeBytes += summariesBytes[i].length;
+ }
+ }
+
+ final int sizeBytes = Long.BYTES * preambleLongs + Long.BYTES * count + summariesSizeBytes;
+ final byte[] bytes = new byte[sizeBytes];
+ int offset = 0;
+ bytes[offset++] = (byte) preambleLongs;
+ bytes[offset++] = serialVersionUID;
+ bytes[offset++] = (byte) Family.TUPLE.getID();
+ bytes[offset++] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal();
+ offset++; // unused
+ bytes[offset++] = (byte) (
+ (1 << Flags.IS_COMPACT.ordinal())
+ | (1 << Flags.IS_READ_ONLY.ordinal())
+ | (isEmpty() ? 1 << Flags.IS_EMPTY.ordinal() : 0)
+ );
+ ByteArrayUtil.putShortLE(bytes, offset, defaultSeedHash);
+ offset += Short.BYTES;
+ if (!isEmpty()) {
+ if (!isSingleItem) {
+ ByteArrayUtil.putIntLE(bytes, offset, count);
+ offset += Integer.BYTES;
+ offset += 4; // unused
+ if (isEstimationMode()) {
+ ByteArrayUtil.putLongLE(bytes, offset, thetaLong_);
+ offset += Long.BYTES;
+ }
+ }
+ }
+ for (int i = 0; i < count; i++) {
+ ByteArrayUtil.putLongLE(bytes, offset, hashArr_[i]);
+ offset += Long.BYTES;
+ System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length);
+ offset += summariesBytes[i].length;
+ }
+ return bytes;
+ }
+
+ @Override
+ public TupleSketchIterator iterator() {
+ return new TupleSketchIterator<>(hashArr_, summaryArr_);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java b/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java
new file mode 100644
index 000000000..a5fc38c31
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+/**
+ * Returns an object and its size in bytes as a result of a deserialize operation
+ * @param Type of object
+ */
+public class DeserializeResult {
+ private final T object;
+ private final int size;
+
+ /**
+ * Creates an instance.
+ * @param object Deserialized object.
+ * @param size Deserialized size in bytes.
+ */
+ public DeserializeResult(final T object, final int size) {
+ this.object = object;
+ this.size = size;
+ }
+
+ /**
+ * Returns Deserialized object
+ * @return Deserialized object
+ */
+ public T getObject() {
+ return object;
+ }
+
+ /**
+ * Returns size in bytes occupied by the object in the serialized form
+ * @return size in bytes occupied by the object in the serialized form
+ */
+ public int getSize() {
+ return size;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/Filter.java b/src/main/java/org/apache/datasketches/tuple2/Filter.java
new file mode 100644
index 000000000..dbd61b576
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/Filter.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import java.lang.reflect.Array;
+import java.util.Arrays;
+import java.util.function.Predicate;
+
+/**
+ * Class for filtering entries from a {@link Sketch} given a {@link Summary}
+ *
+ * @param Summary type against which apply the {@link Predicate}
+ */
+public class Filter {
+ private final Predicate predicate;
+
+ /**
+ * Filter constructor with a {@link Predicate}
+ * @param predicate Predicate to use in this filter. If the Predicate returns False, the
+ * element is discarded. If the Predicate returns True, then the element is kept in the
+ * {@link Sketch}
+ */
+ public Filter(final Predicate predicate) {
+ this.predicate = predicate;
+ }
+
+ /**
+ * Filters elements on the provided {@link Sketch}
+ *
+ * @param sketchIn The sketch against which apply the {@link Predicate}
+ * @return A new Sketch with some of the entries filtered out based on the {@link Predicate}
+ */
+ @SuppressWarnings("unchecked")
+ public CompactSketch filter(final Sketch sketchIn) {
+ if (sketchIn == null) {
+ return new CompactSketch<>(null, null, Long.MAX_VALUE, true);
+ }
+ final long[] hashes = new long[sketchIn.getRetainedEntries()];
+ T[] summaries = null; // lazy init to get class from the first entry
+ int i = 0;
+ final TupleSketchIterator it = sketchIn.iterator();
+ while (it.next()) {
+ final T summary = it.getSummary();
+ if (predicate.test(summary)) {
+ hashes[i] = it.getHash();
+ if (summaries == null) {
+ summaries = (T[]) Array.newInstance(summary.getClass(), sketchIn.getRetainedEntries());
+ }
+ summaries[i++] = (T) summary.copy();
+ }
+ }
+ final boolean isEmpty = i == 0 && !sketchIn.isEstimationMode();
+ if (i == 0) {
+ return new CompactSketch<>(null, null, sketchIn.getThetaLong(), isEmpty);
+ }
+ return new CompactSketch<>(Arrays.copyOf(hashes, i), Arrays.copyOf(summaries, i), sketchIn.getThetaLong(), isEmpty);
+ }
+}
+
diff --git a/src/main/java/org/apache/datasketches/tuple2/HashTables.java b/src/main/java/org/apache/datasketches/tuple2/HashTables.java
new file mode 100644
index 000000000..913c53196
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/HashTables.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.Math.ceil;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly;
+import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;
+
+import java.lang.reflect.Array;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+@SuppressWarnings("unchecked")
+class HashTables {
+ long[] hashTable = null;
+ S[] summaryTable = null;
+ int lgTableSize = 0;
+ int numKeys = 0;
+
+ HashTables() { }
+
+ //must have valid entries
+ void fromSketch(final Sketch sketch) {
+ numKeys = sketch.getRetainedEntries();
+ lgTableSize = getLgTableSize(numKeys);
+
+ hashTable = new long[1 << lgTableSize];
+ final TupleSketchIterator it = sketch.iterator();
+ while (it.next()) {
+ final long hash = it.getHash();
+ final int index = hashInsertOnly(hashTable, lgTableSize, hash);
+ final S mySummary = (S)it.getSummary().copy();
+ if (summaryTable == null) {
+ summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize);
+ }
+ summaryTable[index] = mySummary;
+ }
+ }
+
+ //must have valid entries
+ void fromSketch(final org.apache.datasketches.theta2.Sketch sketch, final S summary) {
+ numKeys = sketch.getRetainedEntries(true);
+ lgTableSize = getLgTableSize(numKeys);
+
+ hashTable = new long[1 << lgTableSize];
+ final org.apache.datasketches.theta2.HashIterator it = sketch.iterator();
+ while (it.next()) {
+ final long hash = it.get();
+ final int index = hashInsertOnly(hashTable, lgTableSize, hash);
+ final S mySummary = (S)summary.copy();
+ if (summaryTable == null) {
+ summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize);
+ }
+ summaryTable[index] = mySummary;
+ }
+ }
+
+ private void fromArrays(final long[] hashArr, final S[] summaryArr, final int count) {
+ numKeys = count;
+ lgTableSize = getLgTableSize(count);
+
+ summaryTable = null;
+ hashTable = new long[1 << lgTableSize];
+ for (int i = 0; i < count; i++) {
+ final long hash = hashArr[i];
+ final int index = hashInsertOnly(hashTable, lgTableSize, hash);
+ final S mySummary = summaryArr[i];
+ if (summaryTable == null) {
+ summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize);
+ }
+ summaryTable[index] = summaryArr[i];
+ }
+ }
+
+ //For Tuple Sketches
+ HashTables getIntersectHashTables(
+ final Sketch nextTupleSketch,
+ final long thetaLong,
+ final SummarySetOperations summarySetOps) {
+
+ //Match nextSketch data with local instance data, filtering by theta
+ final int maxMatchSize = min(numKeys, nextTupleSketch.getRetainedEntries());
+ final long[] matchHashArr = new long[maxMatchSize];
+ final S[] matchSummariesArr = Util.newSummaryArray(summaryTable, maxMatchSize);
+ int matchCount = 0;
+ final TupleSketchIterator it = nextTupleSketch.iterator();
+
+ while (it.next()) {
+ final long hash = it.getHash();
+ if (hash >= thetaLong) { continue; }
+ final int index = hashSearch(hashTable, lgTableSize, hash);
+ if (index < 0) { continue; }
+ //Copy the intersecting items from local hashTables_
+ // sequentially into local matchHashArr_ and matchSummaries_
+ matchHashArr[matchCount] = hash;
+ matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable[index], it.getSummary());
+ matchCount++;
+ }
+ final HashTables resultHT = new HashTables<>();
+ resultHT.fromArrays(matchHashArr, matchSummariesArr, matchCount);
+ return resultHT;
+ }
+
+ //For Theta Sketches
+ HashTables getIntersectHashTables(
+ final org.apache.datasketches.theta2.Sketch nextThetaSketch,
+ final long thetaLong,
+ final SummarySetOperations summarySetOps,
+ final S summary) {
+
+ final Class summaryType = (Class) summary.getClass();
+
+ //Match nextSketch data with local instance data, filtering by theta
+ final int maxMatchSize = min(numKeys, nextThetaSketch.getRetainedEntries());
+ final long[] matchHashArr = new long[maxMatchSize];
+ final S[] matchSummariesArr = (S[]) Array.newInstance(summaryType, maxMatchSize);
+ int matchCount = 0;
+ final org.apache.datasketches.theta2.HashIterator it = nextThetaSketch.iterator();
+
+ //scan B & search A(hashTable) for match
+ while (it.next()) {
+ final long hash = it.get();
+ if (hash >= thetaLong) { continue; }
+ final int index = hashSearch(hashTable, lgTableSize, hash);
+ if (index < 0) { continue; }
+ //Copy the intersecting items from local hashTables_
+ // sequentially into local matchHashArr_ and matchSummaries_
+ matchHashArr[matchCount] = hash;
+ matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable[index], summary);
+ matchCount++;
+ }
+ final HashTables resultHT = new HashTables<>();
+ resultHT.fromArrays(matchHashArr, matchSummariesArr, matchCount);
+ return resultHT;
+ }
+
+ void clear() {
+ hashTable = null;
+ summaryTable = null;
+ lgTableSize = 0;
+ numKeys = 0;
+ }
+
+ static int getLgTableSize(final int count) {
+ final int tableSize = max(ceilingPowerOf2((int) ceil(count / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS);
+ return Integer.numberOfTrailingZeros(tableSize);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/Intersection.java b/src/main/java/org/apache/datasketches/tuple2/Intersection.java
new file mode 100644
index 000000000..8731df0d4
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/Intersection.java
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.Math.ceil;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+
+/**
+ * Computes an intersection of two or more generic tuple sketches or generic tuple sketches
+ * combined with theta sketches.
+ * A new instance represents the Universal Set. Because the Universal Set
+ * cannot be realized a getResult() on a new instance will produce an error.
+ * Every update() computes an intersection with the internal state, which will never
+ * grow larger and may be reduced to zero.
+ *
+ * @param Type of Summary
+ */
+@SuppressWarnings("unchecked")
+public class Intersection {
+ private final SummarySetOperations summarySetOps_;
+ private boolean empty_;
+ private long thetaLong_;
+ private HashTables hashTables_;
+ private boolean firstCall_;
+
+ /**
+ * Creates new Intersection instance with instructions on how to process two summaries that
+ * intersect.
+ * @param summarySetOps instance of SummarySetOperations
+ */
+ public Intersection(final SummarySetOperations summarySetOps) {
+ summarySetOps_ = summarySetOps;
+ empty_ = false; // universal set at the start
+ thetaLong_ = Long.MAX_VALUE;
+ hashTables_ = new HashTables<>();
+ firstCall_ = true;
+ }
+
+ /**
+ * Perform a stateless intersect set operation on the two given tuple sketches and returns the
+ * result as an unordered CompactSketch on the heap.
+ * @param tupleSketchA The first sketch argument. It must not be null.
+ * @param tupleSketchB The second sketch argument. It must not be null.
+ * @return an unordered CompactSketch on the heap
+ */
+ public CompactSketch intersect(
+ final Sketch tupleSketchA,
+ final Sketch tupleSketchB) {
+ reset();
+ intersect(tupleSketchA);
+ intersect(tupleSketchB);
+ final CompactSketch csk = getResult();
+ reset();
+ return csk;
+ }
+
+ /**
+ * Perform a stateless intersect set operation on a tuple sketch and a theta sketch and returns the
+ * result as an unordered CompactSketch on the heap.
+ * @param tupleSketch The first sketch argument. It must not be null.
+ * @param thetaSketch The second sketch argument. It must not be null.
+ * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+ * This must not be null.
+ * @return an unordered CompactSketch on the heap
+ */
+ public CompactSketch intersect(
+ final Sketch tupleSketch,
+ final org.apache.datasketches.theta2.Sketch
+ thetaSketch, final S summary) {
+ reset();
+ intersect(tupleSketch);
+ intersect(thetaSketch, summary);
+ final CompactSketch csk = getResult();
+ reset();
+ return csk;
+ }
+
+ /**
+ * Performs a stateful intersection of the internal set with the given tupleSketch.
+ * @param tupleSketch input sketch to intersect with the internal state. It must not be null.
+ */
+ public void intersect(final Sketch tupleSketch) {
+ if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); }
+
+ final boolean firstCall = firstCall_;
+ firstCall_ = false;
+
+ // input sketch could be first or next call
+
+ final boolean emptyIn = tupleSketch.isEmpty();
+ if (empty_ || emptyIn) { //empty rule
+ //Whatever the current internal state, we make our local empty.
+ resetToEmpty();
+ return;
+ }
+
+ final long thetaLongIn = tupleSketch.getThetaLong();
+ thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule
+
+ if (tupleSketch.getRetainedEntries() == 0) {
+ hashTables_.clear();
+ return;
+ }
+ // input sketch will have valid entries > 0
+
+ if (firstCall) {
+ //Copy firstSketch data into local instance hashTables_
+ hashTables_.fromSketch(tupleSketch);
+ }
+
+ //Next Call
+ else {
+ if (hashTables_.numKeys == 0) { return; }
+ //process intersect with current hashTables
+ hashTables_ = hashTables_.getIntersectHashTables(tupleSketch, thetaLong_, summarySetOps_);
+ }
+ }
+
+ /**
+ * Performs a stateful intersection of the internal set with the given thetaSketch by combining entries
+ * using the hashes from the theta sketch and summary values from the given summary and rules
+ * from the summarySetOps defined by the Intersection constructor.
+ * @param thetaSketch input theta sketch to intersect with the internal state. It must not be null.
+ * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+ * It will be copied for each matching index. It must not be null.
+ */
+ public void intersect(final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) {
+ if (thetaSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); }
+ if (summary == null) { throw new SketchesArgumentException("Summary cannot be null."); }
+ final boolean firstCall = firstCall_;
+ firstCall_ = false;
+ // input sketch is not null, could be first or next call
+
+ final boolean emptyIn = thetaSketch.isEmpty();
+ if (empty_ || emptyIn) { //empty rule
+ //Whatever the current internal state, we make our local empty.
+ resetToEmpty();
+ return;
+ }
+
+ final long thetaLongIn = thetaSketch.getThetaLong();
+ thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule
+
+ final int countIn = thetaSketch.getRetainedEntries();
+ if (countIn == 0) {
+ hashTables_.clear();
+ return;
+ }
+ // input sketch will have valid entries > 0
+
+ if (firstCall) {
+ final org.apache.datasketches.theta2.Sketch firstSketch = thetaSketch;
+ //Copy firstSketch data into local instance hashTables_
+ hashTables_.fromSketch(firstSketch, summary);
+ }
+
+ //Next Call
+ else {
+ if (hashTables_.numKeys == 0) { return; }
+ hashTables_ = hashTables_.getIntersectHashTables(thetaSketch, thetaLongIn, summarySetOps_, summary);
+ }
+ }
+
+ /**
+ * Gets the internal set as an unordered CompactSketch
+ * @return result of the intersections so far
+ */
+ public CompactSketch getResult() {
+ if (firstCall_) {
+ throw new SketchesStateException(
+ "getResult() with no intervening intersections is not a legal result.");
+ }
+ final int countIn = hashTables_.numKeys;
+ if (countIn == 0) {
+ return new CompactSketch<>(null, null, thetaLong_, empty_);
+ }
+
+ final int tableSize = hashTables_.hashTable.length;
+
+ final long[] hashArr = new long[countIn];
+ final S[] summaryArr = Util.newSummaryArray(hashTables_.summaryTable, countIn);
+
+ //compact the arrays
+ int cnt = 0;
+ for (int i = 0; i < tableSize; i++) {
+ final long hash = hashTables_.hashTable[i];
+ if (hash == 0 || hash > thetaLong_) { continue; }
+ hashArr[cnt] = hash;
+ summaryArr[cnt] = (S) hashTables_.summaryTable[i].copy();
+ cnt++;
+ }
+ assert cnt == countIn;
+ return new CompactSketch<>(hashArr, summaryArr, thetaLong_, empty_);
+ }
+
+ /**
+ * Returns true if there is a valid intersection result available
+ * @return true if there is a valid intersection result available
+ */
+ public boolean hasResult() {
+ return !firstCall_;
+ }
+
+ /**
+ * Resets the internal set to the initial state, which represents the Universal Set
+ */
+ public void reset() {
+ hardReset();
+ }
+
+ private void hardReset() {
+ empty_ = false;
+ thetaLong_ = Long.MAX_VALUE;
+ hashTables_.clear();
+ firstCall_ = true;
+ }
+
+ private void resetToEmpty() {
+ empty_ = true;
+ thetaLong_ = Long.MAX_VALUE;
+ hashTables_.clear();
+ firstCall_ = false;
+ }
+
+ static int getLgTableSize(final int count) {
+ final int tableSize = max(ceilingPowerOf2((int) ceil(count / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS);
+ return Integer.numberOfTrailingZeros(tableSize);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java
new file mode 100644
index 000000000..161478d86
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java
@@ -0,0 +1,370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getEstimateOfBoverA;
+import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getLowerBoundForBoverA;
+import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getUpperBoundForBoverA;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Jaccard similarity of two Tuple Sketches, or alternatively, of a Tuple and Theta Sketch.
+ *
+ * Note: only retained hash values are compared, and the Tuple summary values are not accounted for in the
+ * similarity measure.
+ *
+ * @author Lee Rhodes
+ * @author David Cromberge
+ */
+public final class JaccardSimilarity {
+ private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB
+ private static final double[] ONES = {1.0, 1.0, 1.0};
+
+ /**
+ * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index
+ * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each
+ * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are
+ * distinct from each other. A Jaccard of .95 means the overlap between the two
+ * populations is 95% of the union of the two populations.
+ *
+ * Note: For very large pairs of sketches, where the configured nominal entries of the sketches
+ * are 2^25 or 2^26, this method may produce unpredictable results.
+ *
+ * @param sketchA The first argument, a Tuple sketch with summary type S
+ * @param sketchB The second argument, a Tuple sketch with summary type S
+ * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+ * @param Summary
+ * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index.
+ * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.
+ */
+ public static double[] jaccard(
+ final Sketch sketchA,
+ final Sketch sketchB,
+ final SummarySetOperations summarySetOps) {
+ //Corner case checks
+ if (sketchA == null || sketchB == null) { return ZEROS.clone(); }
+ if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); }
+ if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); }
+
+ final int countA = sketchA.getRetainedEntries();
+ final int countB = sketchB.getRetainedEntries();
+
+ //Create the Union
+ final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS;
+ final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS;
+ final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK);
+ final Union union = new Union<>(newK, summarySetOps);
+ union.union(sketchA);
+ union.union(sketchB);
+
+ final Sketch unionAB = union.getResult();
+ final long thetaLongUAB = unionAB.getThetaLong();
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ final int countUAB = unionAB.getRetainedEntries();
+
+ //Check for identical data
+ if (countUAB == countA && countUAB == countB
+ && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+ return ONES.clone();
+ }
+
+ //Create the Intersection
+ final Intersection inter = new Intersection<>(summarySetOps);
+ inter.intersect(sketchA);
+ inter.intersect(sketchB);
+ inter.intersect(unionAB); //ensures that intersection is a subset of the union
+ final Sketch interABU = inter.getResult();
+
+ final double lb = getLowerBoundForBoverA(unionAB, interABU);
+ final double est = getEstimateOfBoverA(unionAB, interABU);
+ final double ub = getUpperBoundForBoverA(unionAB, interABU);
+ return new double[] {lb, est, ub};
+ }
+
+ /**
+ * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index
+ * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each
+ * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are
+ * distinct from each other. A Jaccard of .95 means the overlap between the two
+ * populations is 95% of the union of the two populations.
+ *
+ *
Note: For very large pairs of sketches, where the configured nominal entries of the sketches
+ * are 2^25 or 2^26, this method may produce unpredictable results.
+ *
+ * @param sketchA The first argument, a Tuple sketch with summary type S
+ * @param sketchB The second argument, a Theta sketch
+ * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+ * This may not be null.
+ * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+ * @param Summary
+ * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index.
+ * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.
+ */
+ public static double[] jaccard(
+ final Sketch sketchA,
+ final org.apache.datasketches.theta2.Sketch sketchB,
+ final S summary, final SummarySetOperations summarySetOps) {
+ // Null case checks
+ if (summary == null) {
+ throw new SketchesArgumentException("Summary cannot be null."); }
+
+ //Corner case checks
+ if (sketchA == null || sketchB == null) { return ZEROS.clone(); }
+ if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); }
+ if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); }
+
+ final int countA = sketchA.getRetainedEntries();
+ final int countB = sketchB.getRetainedEntries(true);
+
+ //Create the Union
+ final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS;
+ final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS;
+ final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK);
+ final Union union = new Union<>(newK, summarySetOps);
+ union.union(sketchA);
+ union.union(sketchB, summary);
+
+ final Sketch unionAB = union.getResult();
+ final long thetaLongUAB = unionAB.getThetaLong();
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ final int countUAB = unionAB.getRetainedEntries();
+
+ //Check for identical data
+ if (countUAB == countA && countUAB == countB
+ && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+ return ONES.clone();
+ }
+
+ //Create the Intersection
+ final Intersection inter = new Intersection<>(summarySetOps);
+ inter.intersect(sketchA);
+ inter.intersect(sketchB, summary);
+ inter.intersect(unionAB); //ensures that intersection is a subset of the union
+ final Sketch interABU = inter.getResult();
+
+ final double lb = getLowerBoundForBoverA(unionAB, interABU);
+ final double est = getEstimateOfBoverA(unionAB, interABU);
+ final double ub = getUpperBoundForBoverA(unionAB, interABU);
+ return new double[] {lb, est, ub};
+ }
+
+ /**
+ * Returns true if the two given sketches have exactly the same hash values and the same
+ * theta values. Thus, they are equivalent.
+ * @param sketchA The first argument, a Tuple sketch with summary type S
+ * @param sketchB The second argument, a Tuple sketch with summary type S
+ * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+ * @param Summary
+ * @return true if the two given sketches have exactly the same hash values and the same
+ * theta values.
+ */
+ public static boolean exactlyEqual(
+ final Sketch sketchA,
+ final Sketch sketchB,
+ final SummarySetOperations summarySetOps) {
+ //Corner case checks
+ if (sketchA == null || sketchB == null) { return false; }
+ if (sketchA == sketchB) { return true; }
+ if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; }
+ if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; }
+
+ final int countA = sketchA.getRetainedEntries();
+ final int countB = sketchB.getRetainedEntries();
+
+ //Create the Union
+ final Union union = new Union<>(ceilingPowerOf2(countA + countB), summarySetOps);
+ union.union(sketchA);
+ union.union(sketchB);
+ final Sketch unionAB = union.getResult();
+ final long thetaLongUAB = unionAB.getThetaLong();
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ final int countUAB = unionAB.getRetainedEntries();
+
+ //Check for identical counts and thetas
+ if (countUAB == countA && countUAB == countB
+ && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the two given sketches have exactly the same hash values and the same
+ * theta values. Thus, they are equivalent.
+ * @param sketchA The first argument, a Tuple sketch with summary type S
+ * @param sketchB The second argument, a Theta sketch
+ * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+ * This may not be null.
+ * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+ * @param Summary
+ * @return true if the two given sketches have exactly the same hash values and the same
+ * theta values.
+ */
+ public static boolean exactlyEqual(
+ final Sketch sketchA,
+ final org.apache.datasketches.theta2.Sketch sketchB,
+ final S summary, final SummarySetOperations summarySetOps) {
+ // Null case checks
+ if (summary == null) {
+ throw new SketchesArgumentException("Summary cannot be null."); }
+
+ //Corner case checks
+ if (sketchA == null || sketchB == null) { return false; }
+ if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; }
+ if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; }
+
+ final int countA = sketchA.getRetainedEntries();
+ final int countB = sketchB.getRetainedEntries(true);
+
+ //Create the Union
+ final Union union = new Union<>(ceilingPowerOf2(countA + countB), summarySetOps);
+ union.union(sketchA);
+ union.union(sketchB, summary);
+ final Sketch unionAB = union.getResult();
+ final long thetaLongUAB = unionAB.getThetaLong();
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ final int countUAB = unionAB.getRetainedEntries();
+
+ //Check for identical counts and thetas
+ if (countUAB == countA && countUAB == countB
+ && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Tests similarity of a measured Sketch against an expected Sketch.
+ * Computes the lower bound of the Jaccard index JLB of the measured and
+ * expected sketches.
+ * if JLB ≥ threshold, then the sketches are considered to be
+ * similar with a confidence of 97.7%.
+ *
+ * @param measured a Tuple sketch with summary type S to be tested
+ * @param expected the reference Tuple sketch with summary type S that is considered to be correct.
+ * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+ * @param threshold a real value between zero and one.
+ * @param Summary
+ * @return if true, the similarity of the two sketches is greater than the given threshold
+ * with at least 97.7% confidence.
+ */
+ public static boolean similarityTest(
+ final Sketch measured, final Sketch expected,
+ final SummarySetOperations summarySetOps,
+ final double threshold) {
+ //index 0: the lower bound
+ //index 1: the mean estimate
+ //index 2: the upper bound
+ final double jRatioLB = jaccard(measured, expected, summarySetOps)[0]; //choosing the lower bound
+ return jRatioLB >= threshold;
+ }
+
+ /**
+ * Tests similarity of a measured Sketch against an expected Sketch.
+ * Computes the lower bound of the Jaccard index JLB of the measured and
+ * expected sketches.
+ * if JLB ≥ threshold, then the sketches are considered to be
+ * similar with a confidence of 97.7%.
+ *
+ * @param measured a Tuple sketch with summary type S to be tested
+ * @param expected the reference Theta sketch that is considered to be correct.
+ * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+ * This may not be null.
+ * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+ * @param threshold a real value between zero and one.
+ * @param Summary
+ * @return if true, the similarity of the two sketches is greater than the given threshold
+ * with at least 97.7% confidence.
+ */
+ public static boolean similarityTest(
+ final Sketch measured, final org.apache.datasketches.theta2.Sketch expected,
+ final S summary, final SummarySetOperations summarySetOps,
+ final double threshold) {
+ //index 0: the lower bound
+ //index 1: the mean estimate
+ //index 2: the upper bound
+ final double jRatioLB = jaccard(measured, expected, summary, summarySetOps)[0]; //choosing the lower bound
+ return jRatioLB >= threshold;
+ }
+
+ /**
+ * Tests dissimilarity of a measured Sketch against an expected Sketch.
+ * Computes the upper bound of the Jaccard index JUB of the measured and
+ * expected sketches.
+ * if JUB ≤ threshold, then the sketches are considered to be
+ * dissimilar with a confidence of 97.7%.
+ *
+ * @param measured a Tuple sketch with summary type S to be tested
+ * @param expected the reference Theta sketch that is considered to be correct.
+ * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+ * @param threshold a real value between zero and one.
+ * @param Summary
+ * @return if true, the dissimilarity of the two sketches is greater than the given threshold
+ * with at least 97.7% confidence.
+ */
+ public static boolean dissimilarityTest(
+ final Sketch measured, final Sketch expected,
+ final SummarySetOperations summarySetOps,
+ final double threshold) {
+ //index 0: the lower bound
+ //index 1: the mean estimate
+ //index 2: the upper bound
+ final double jRatioUB = jaccard(measured, expected, summarySetOps)[2]; //choosing the upper bound
+ return jRatioUB <= threshold;
+ }
+
+ /**
+ * Tests dissimilarity of a measured Sketch against an expected Sketch.
+ * Computes the upper bound of the Jaccard index JUB of the measured and
+ * expected sketches.
+ * if JUB ≤ threshold, then the sketches are considered to be
+ * dissimilar with a confidence of 97.7%.
+ *
+ * @param measured a Tuple sketch with summary type S to be tested
+ * @param expected the reference Theta sketch that is considered to be correct.
+ * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+ * This may not be null.
+ * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+ * @param threshold a real value between zero and one.
+ * @param Summary
+ * @return if true, the dissimilarity of the two sketches is greater than the given threshold
+ * with at least 97.7% confidence.
+ */
+ public static boolean dissimilarityTest(
+ final Sketch measured, final org.apache.datasketches.theta2.Sketch expected,
+ final S summary, final SummarySetOperations summarySetOps,
+ final double threshold) {
+ //index 0: the lower bound
+ //index 1: the mean estimate
+ //index 2: the upper bound
+ final double jRatioUB = jaccard(measured, expected, summary, summarySetOps)[2]; //choosing the upper bound
+ return jRatioUB <= threshold;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java
new file mode 100644
index 000000000..6823cec3d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java
@@ -0,0 +1,621 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.common.Util.checkBounds;
+import static org.apache.datasketches.common.Util.exactLog2OfLong;
+import static org.apache.datasketches.thetacommon.HashOperations.count;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.reflect.Array;
+import java.nio.ByteOrder;
+import java.util.Objects;
+
+import org.apache.datasketches.common.ByteArrayUtil;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.HashOperations;
+import org.apache.datasketches.thetacommon.QuickSelect;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * A generic tuple sketch using the QuickSelect algorithm.
+ *
+ * @param type of Summary
+ */
+class QuickSelectSketch extends Sketch {
+ private static final byte serialVersionUID = 2;
+
+ private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
+
+ private static final int DEFAULT_LG_RESIZE_FACTOR = ResizeFactor.X8.lg();
+ private final int nomEntries_;
+ private final int lgResizeFactor_;
+ private final float samplingProbability_;
+ private int lgCurrentCapacity_;
+ private int retEntries_;
+ private int rebuildThreshold_;
+ private long[] hashTable_;
+ S[] summaryTable_;
+
+ /**
+ * This is to create a new instance of a QuickSelectSketch with default resize factor.
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * given value.
+ * @param summaryFactory An instance of a SummaryFactory.
+ */
+ QuickSelectSketch(
+ final int nomEntries,
+ final SummaryFactory summaryFactory) {
+ this(nomEntries, DEFAULT_LG_RESIZE_FACTOR, summaryFactory);
+ }
+
+ /**
+ * This is to create a new instance of a QuickSelectSketch with custom resize factor
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * given value.
+ * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
+ *
+ * 0 - no resizing (max size allocated),
+ * 1 - double internal hash table each time it reaches a threshold
+ * 2 - grow four times
+ * 3 - grow eight times (default)
+ *
+ * @param summaryFactory An instance of a SummaryFactory.
+ */
+ QuickSelectSketch(
+ final int nomEntries,
+ final int lgResizeFactor,
+ final SummaryFactory summaryFactory) {
+ this(nomEntries, lgResizeFactor, 1f, summaryFactory);
+ }
+
+ /**
+ * This is to create a new instance of a QuickSelectSketch with custom resize factor and sampling
+ * probability
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * or equal to the given value.
+ * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
+ *
+ * 0 - no resizing (max size allocated),
+ * 1 - double internal hash table each time it reaches a threshold
+ * 2 - grow four times
+ * 3 - grow eight times (default)
+ *
+ * @param samplingProbability the given sampling probability
+ * @param summaryFactory An instance of a SummaryFactory.
+ */
+ QuickSelectSketch(
+ final int nomEntries,
+ final int lgResizeFactor,
+ final float samplingProbability,
+ final SummaryFactory summaryFactory) {
+ this(
+ nomEntries,
+ lgResizeFactor,
+ samplingProbability,
+ summaryFactory,
+ Util.getStartingCapacity(nomEntries, lgResizeFactor)
+ );
+ }
+
+ /**
+ * Target constructor for above constructors for a new instance.
+ * @param nomEntries Nominal number of entries.
+ * @param lgResizeFactor log2(resizeFactor)
+ * @param samplingProbability the given sampling probability
+ * @param summaryFactory An instance of a SummaryFactory.
+ * @param startingSize starting size of the sketch.
+ */
+ private QuickSelectSketch(
+ final int nomEntries,
+ final int lgResizeFactor,
+ final float samplingProbability,
+ final SummaryFactory summaryFactory,
+ final int startingSize) {
+ super(
+ (long) (Long.MAX_VALUE * (double) samplingProbability),
+ true,
+ summaryFactory);
+ nomEntries_ = ceilingPowerOf2(nomEntries);
+ lgResizeFactor_ = lgResizeFactor;
+ samplingProbability_ = samplingProbability;
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingSize);
+ retEntries_ = 0;
+ hashTable_ = new long[startingSize]; //must be before setRebuildThreshold
+ rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_);
+ summaryTable_ = null; // wait for the first summary to call Array.newInstance()
+ }
+
+ /**
+ * Copy constructor
+ * @param sketch the QuickSelectSketch to be deep copied.
+ */
+ QuickSelectSketch(final QuickSelectSketch sketch) {
+ super(
+ sketch.thetaLong_,
+ sketch.empty_,
+ sketch.summaryFactory_);
+ nomEntries_ = sketch.nomEntries_;
+ lgResizeFactor_ = sketch.lgResizeFactor_;
+ samplingProbability_ = sketch.samplingProbability_;
+ lgCurrentCapacity_ = sketch.lgCurrentCapacity_;
+ retEntries_ = sketch.retEntries_;
+ hashTable_ = sketch.hashTable_.clone();
+ rebuildThreshold_ = sketch.rebuildThreshold_;
+ summaryTable_ = Util.copySummaryArray(sketch.summaryTable_);
+ }
+
+ /**
+ * This is to create an instance of a QuickSelectSketch given a serialized form
+ * @param seg MemorySegment object with serialized QuickSelectSketch
+ * @param deserializer the SummaryDeserializer
+ * @param summaryFactory the SummaryFactory
+ * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
+ * This capability will be removed in a future release.
+ * Heapifying a CompactSketch is not deprecated.
+ */
+ @Deprecated
+ QuickSelectSketch(
+ final MemorySegment seg,
+ final SummaryDeserializer deserializer,
+ final SummaryFactory summaryFactory) {
+ this(new Validate<>(), seg, deserializer, summaryFactory);
+ }
+
+ /*
+ * This private constructor is used to protect against "Finalizer attacks".
+ * The private static inner class Validate performs validation and deserialization
+ * from the input Memory and may throw exceptions. In order to protect against the attack, we must
+ * perform this validation prior to the constructor's super reaches the Object class.
+ * Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass.
+ * Using an empty final finalizer() is not recommended and is deprecated as of Java9.
+ */
+ private QuickSelectSketch(
+ final Validate val,
+ final MemorySegment seg,
+ final SummaryDeserializer deserializer,
+ final SummaryFactory summaryFactory) {
+ super(val.validate(seg, deserializer), val.myEmpty, summaryFactory);
+ nomEntries_ = val.myNomEntries;
+ lgResizeFactor_ = val.myLgResizeFactor;
+ samplingProbability_ = val.mySamplingProbability;
+ lgCurrentCapacity_ = val.myLgCurrentCapacity;
+ retEntries_ = val.myRetEntries;
+ rebuildThreshold_ = val.myRebuildThreshold;
+ hashTable_ = val.myHashTable;
+ summaryTable_ = val.mySummaryTable;
+ }
+
+ private static final class Validate {
+ //super fields
+ long myThetaLong;
+ boolean myEmpty;
+ //this fields
+ int myNomEntries;
+ int myLgResizeFactor;
+ float mySamplingProbability;
+ int myLgCurrentCapacity;
+ int myRetEntries;
+ int myRebuildThreshold;
+ long[] myHashTable;
+ S[] mySummaryTable;
+
+ @SuppressWarnings("unchecked")
+ long validate(
+ final MemorySegment seg,
+ final SummaryDeserializer> deserializer) {
+ Objects.requireNonNull(seg, "SourceMemory must not be null.");
+ Objects.requireNonNull(deserializer, "Deserializer must not be null.");
+ checkBounds(0, 8, seg.byteSize());
+
+ int offset = 0;
+ final byte preambleLongs = seg.get(JAVA_BYTE, offset++); //byte 0 PreLongs
+ final byte version = seg.get(JAVA_BYTE, offset++); //byte 1 SerVer
+ final byte familyId = seg.get(JAVA_BYTE, offset++); //byte 2 FamID
+ SerializerDeserializer.validateFamily(familyId, preambleLongs);
+ if (version > serialVersionUID) {
+ throw new SketchesArgumentException(
+ "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: "
+ + version);
+ }
+ SerializerDeserializer.validateType(seg.get(JAVA_BYTE, offset++), //byte 3
+ SerializerDeserializer.SketchType.QuickSelectSketch);
+ final byte flags = seg.get(JAVA_BYTE, offset++); //byte 4
+ final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0;
+ if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+ throw new SketchesArgumentException("Endian byte order mismatch");
+ }
+ myNomEntries = 1 << seg.get(JAVA_BYTE, offset++); //byte 5
+ myLgCurrentCapacity = seg.get(JAVA_BYTE, offset++); //byte 6
+ myLgResizeFactor = seg.get(JAVA_BYTE, offset++); //byte 7
+
+ checkBounds(0, preambleLongs * 8L, seg.byteSize());
+ final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0;
+ mySamplingProbability = isInSamplingMode ? seg.get(JAVA_FLOAT_UNALIGNED, offset) : 1f; //bytes 8 - 11
+ if (isInSamplingMode) {
+ offset += Float.BYTES;
+ }
+
+ final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0;
+ if (isThetaIncluded) {
+ myThetaLong = seg.get(JAVA_LONG_UNALIGNED, offset);
+ offset += Long.BYTES;
+ } else {
+ myThetaLong = (long) (Long.MAX_VALUE * (double) mySamplingProbability);
+ }
+
+ int count = 0;
+ final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0;
+ if (hasEntries) {
+ count = seg.get(JAVA_INT_UNALIGNED, offset);
+ offset += Integer.BYTES;
+ }
+ final int currentCapacity = 1 << myLgCurrentCapacity;
+ myHashTable = new long[currentCapacity];
+ for (int i = 0; i < count; i++) {
+ final long hash = seg.get(JAVA_LONG_UNALIGNED, offset);
+ offset += Long.BYTES;
+ final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset);
+ final DeserializeResult> summaryResult = deserializer.heapifySummary(segRegion);
+ final S summary = (S) summaryResult.getObject();
+ offset += summaryResult.getSize();
+ //in-place equivalent to insert(hash, summary):
+ final int index = HashOperations.hashInsertOnly(myHashTable, myLgCurrentCapacity, hash);
+ if (mySummaryTable == null) {
+ mySummaryTable = (S[]) Array.newInstance(summary.getClass(), myHashTable.length);
+ }
+ mySummaryTable[index] = summary;
+ myRetEntries++;
+ myEmpty = false;
+ }
+ myEmpty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
+ myRebuildThreshold = setRebuildThreshold(myHashTable, myNomEntries);
+ return myThetaLong;
+ }
+
+ } //end class Validate
+
+ /**
+ * @return a deep copy of this sketch
+ */
+ QuickSelectSketch copy() {
+ return new QuickSelectSketch<>(this);
+ }
+
+ long[] getHashTable() {
+ return hashTable_;
+ }
+
+ @Override
+ public int getRetainedEntries() {
+ return retEntries_;
+ }
+
+ @Override
+ public int getCountLessThanThetaLong(final long thetaLong) {
+ return count(hashTable_, thetaLong);
+ }
+
+ S[] getSummaryTable() {
+ return summaryTable_;
+ }
+
+ /**
+ * Get configured nominal number of entries
+ * @return nominal number of entries
+ */
+ public int getNominalEntries() {
+ return nomEntries_;
+ }
+
+ /**
+ * Get log_base2 of Nominal Entries
+ * @return log_base2 of Nominal Entries
+ */
+ public int getLgK() {
+ return exactLog2OfLong(nomEntries_);
+ }
+
+ /**
+ * Get configured sampling probability
+ * @return sampling probability
+ */
+ public float getSamplingProbability() {
+ return samplingProbability_;
+ }
+
+ /**
+ * Get current capacity
+ * @return current capacity
+ */
+ public int getCurrentCapacity() {
+ return 1 << lgCurrentCapacity_;
+ }
+
+ /**
+ * Get configured resize factor
+ * @return resize factor
+ */
+ public ResizeFactor getResizeFactor() {
+ return ResizeFactor.getRF(lgResizeFactor_);
+ }
+
+ /**
+ * Rebuilds reducing the actual number of entries to the nominal number of entries if needed
+ */
+ public void trim() {
+ if (retEntries_ > nomEntries_) {
+ updateTheta();
+ resize(hashTable_.length);
+ }
+ }
+
+ /**
+ * Resets this sketch an empty state.
+ */
+ public void reset() {
+ empty_ = true;
+ retEntries_ = 0;
+ thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_);
+ final int startingCapacity = Util.getStartingCapacity(nomEntries_, lgResizeFactor_);
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
+ hashTable_ = new long[startingCapacity];
+ summaryTable_ = null; // wait for the first summary to call Array.newInstance()
+ rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_);
+ }
+
+ /**
+ * Converts the current state of the sketch into a compact sketch
+ * @return compact sketch
+ */
+ @Override
+ @SuppressWarnings("unchecked")
+ public CompactSketch compact() {
+ if (getRetainedEntries() == 0) {
+ if (empty_) { return new CompactSketch<>(null, null, Long.MAX_VALUE, true); }
+ return new CompactSketch<>(null, null, thetaLong_, false);
+ }
+ final long[] hashArr = new long[getRetainedEntries()];
+ final S[] summaryArr = Util.newSummaryArray(summaryTable_, getRetainedEntries());
+ int i = 0;
+ for (int j = 0; j < hashTable_.length; j++) {
+ if (summaryTable_[j] != null) {
+ hashArr[i] = hashTable_[j];
+ summaryArr[i] = (S)summaryTable_[j].copy();
+ i++;
+ }
+ }
+ return new CompactSketch<>(hashArr, summaryArr, thetaLong_, empty_);
+ }
+
+ // Layout of first 8 bytes:
+ // Long || Start Byte Adr:
+ // Adr:
+ // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ // 0 || RF | lgArr | lgNom | Flags | SkType | FamID | SerVer | Preamble_Longs |
+ /**
+ * This serializes an UpdatableSketch (QuickSelectSketch).
+ * @return serialized representation of an UpdatableSketch (QuickSelectSketch).
+ * @deprecated As of 3.0.0, serializing an UpdatableSketch is deprecated.
+ * This capability will be removed in a future release.
+ * Serializing a CompactSketch is not deprecated.
+ */
+ @Deprecated
+ @Override
+ public byte[] toByteArray() {
+ byte[][] summariesBytes = null;
+ int summariesBytesLength = 0;
+ if (retEntries_ > 0) {
+ summariesBytes = new byte[retEntries_][];
+ int i = 0;
+ for (int j = 0; j < summaryTable_.length; j++) {
+ if (summaryTable_[j] != null) {
+ summariesBytes[i] = summaryTable_[j].toByteArray();
+ summariesBytesLength += summariesBytes[i].length;
+ i++;
+ }
+ }
+ }
+ int sizeBytes =
+ Byte.BYTES // preamble longs
+ + Byte.BYTES // serial version
+ + Byte.BYTES // family
+ + Byte.BYTES // sketch type
+ + Byte.BYTES // flags
+ + Byte.BYTES // log2(nomEntries)
+ + Byte.BYTES // log2(currentCapacity)
+ + Byte.BYTES; // log2(resizeFactor)
+ if (isInSamplingMode()) {
+ sizeBytes += Float.BYTES; // samplingProbability
+ }
+ final boolean isThetaIncluded = isInSamplingMode()
+ ? thetaLong_ < samplingProbability_ : thetaLong_ < Long.MAX_VALUE;
+ if (isThetaIncluded) {
+ sizeBytes += Long.BYTES;
+ }
+ if (retEntries_ > 0) {
+ sizeBytes += Integer.BYTES; // count
+ }
+ sizeBytes += Long.BYTES * retEntries_ + summariesBytesLength;
+ final byte[] bytes = new byte[sizeBytes];
+ int offset = 0;
+ bytes[offset++] = PREAMBLE_LONGS;
+ bytes[offset++] = serialVersionUID;
+ bytes[offset++] = (byte) Family.TUPLE.getID();
+ bytes[offset++] = (byte) SerializerDeserializer.SketchType.QuickSelectSketch.ordinal();
+ final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+ bytes[offset++] = (byte) (
+ (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0)
+ | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0)
+ | (empty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0)
+ | (retEntries_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0)
+ | (isThetaIncluded ? 1 << Flags.IS_THETA_INCLUDED.ordinal() : 0)
+ );
+ bytes[offset++] = (byte) Integer.numberOfTrailingZeros(nomEntries_);
+ bytes[offset++] = (byte) lgCurrentCapacity_;
+ bytes[offset++] = (byte) lgResizeFactor_;
+ if (samplingProbability_ < 1f) {
+ ByteArrayUtil.putFloatLE(bytes, offset, samplingProbability_);
+ offset += Float.BYTES;
+ }
+ if (isThetaIncluded) {
+ ByteArrayUtil.putLongLE(bytes, offset, thetaLong_);
+ offset += Long.BYTES;
+ }
+ if (retEntries_ > 0) {
+ ByteArrayUtil.putIntLE(bytes, offset, retEntries_);
+ offset += Integer.BYTES;
+ }
+ if (retEntries_ > 0) {
+ int i = 0;
+ for (int j = 0; j < hashTable_.length; j++) {
+ if (summaryTable_[j] != null) {
+ ByteArrayUtil.putLongLE(bytes, offset, hashTable_[j]);
+ offset += Long.BYTES;
+ System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length);
+ offset += summariesBytes[i].length;
+ i++;
+ }
+ }
+ }
+ return bytes;
+ }
+
+ // non-public methods below
+
+ // this is a special back door insert for merging
+ // not sufficient by itself without keeping track of theta of another sketch
+ @SuppressWarnings("unchecked")
+ void merge(final long hash, final S summary, final SummarySetOperations summarySetOps) {
+ empty_ = false;
+ if (hash > 0 && hash < thetaLong_) {
+ final int index = findOrInsert(hash);
+ if (index < 0) {
+ insertSummary(~index, (S)summary.copy()); //did not find, so insert
+ } else {
+ insertSummary(index, summarySetOps.union(summaryTable_[index], (S) summary.copy()));
+ }
+ rebuildIfNeeded();
+ }
+ }
+
+ boolean isInSamplingMode() {
+ return samplingProbability_ < 1f;
+ }
+
+ void setThetaLong(final long theta) {
+ thetaLong_ = theta;
+ }
+
+ void setEmpty(final boolean value) {
+ empty_ = value;
+ }
+
+ int findOrInsert(final long hash) {
+ final int index = HashOperations.hashSearchOrInsert(hashTable_, lgCurrentCapacity_, hash);
+ if (index < 0) {
+ retEntries_++;
+ }
+ return index;
+ }
+
+ boolean rebuildIfNeeded() {
+ if (retEntries_ <= rebuildThreshold_) {
+ return false;
+ }
+ if (hashTable_.length > nomEntries_) {
+ updateTheta();
+ rebuild();
+ } else {
+ resize(hashTable_.length * (1 << lgResizeFactor_));
+ }
+ return true;
+ }
+
+ void rebuild() {
+ resize(hashTable_.length);
+ }
+
+ void insert(final long hash, final S summary) {
+ final int index = HashOperations.hashInsertOnly(hashTable_, lgCurrentCapacity_, hash);
+ insertSummary(index, summary);
+ retEntries_++;
+ empty_ = false;
+ }
+
+ private void updateTheta() {
+ final long[] hashArr = new long[retEntries_];
+ int i = 0;
+ //Because of the association of the hashTable with the summaryTable we cannot destroy the
+ // hashTable structure. So we must copy. May as well compact at the same time.
+ // Might consider a whole table clone and use the selectExcludingZeros method instead.
+ // Not sure if there would be any speed advantage.
+ for (int j = 0; j < hashTable_.length; j++) {
+ if (summaryTable_[j] != null) {
+ hashArr[i++] = hashTable_[j];
+ }
+ }
+ thetaLong_ = QuickSelect.select(hashArr, 0, retEntries_ - 1, nomEntries_);
+ }
+
+ private void resize(final int newSize) {
+ final long[] oldHashTable = hashTable_;
+ final S[] oldSummaryTable = summaryTable_;
+ hashTable_ = new long[newSize];
+ summaryTable_ = Util.newSummaryArray(summaryTable_, newSize);
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newSize);
+ retEntries_ = 0;
+ for (int i = 0; i < oldHashTable.length; i++) {
+ if (oldSummaryTable[i] != null && oldHashTable[i] < thetaLong_) {
+ insert(oldHashTable[i], oldSummaryTable[i]);
+ }
+ }
+ rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_);
+ }
+
+ private static int setRebuildThreshold(final long[] hashTable, final int nomEntries) {
+ if (hashTable.length > nomEntries) {
+ return (int) (hashTable.length * ThetaUtil.REBUILD_THRESHOLD);
+ } else {
+ return (int) (hashTable.length * ThetaUtil.RESIZE_THRESHOLD);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ protected void insertSummary(final int index, final S summary) {
+ if (summaryTable_ == null) {
+ summaryTable_ = (S[]) Array.newInstance(summary.getClass(), hashTable_.length);
+ }
+ summaryTable_[index] = summary;
+ }
+
+ @Override
+ public TupleSketchIterator iterator() {
+ return new TupleSketchIterator<>(hashTable_, summaryTable_);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java
new file mode 100644
index 000000000..cbe40ae4c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+
+/**
+ * Multipurpose serializer-deserializer for a collection of sketches defined by the enum.
+ */
+public final class SerializerDeserializer {
+
+ /**
+ * Defines the sketch classes that this SerializerDeserializer can handle.
+ */
+ public static enum SketchType {
+ /** QuickSelectSketch */
+ QuickSelectSketch,
+ /** CompactSketch */
+ CompactSketch,
+ /** ArrayOfDoublesQuickSelectSketch */
+ ArrayOfDoublesQuickSelectSketch,
+ /** ArrayOfDoublesCompactSketch */
+ ArrayOfDoublesCompactSketch,
+ /** ArrayOfDoublesUnion */
+ ArrayOfDoublesUnion }
+
+ static final int TYPE_BYTE_OFFSET = 3;
+
+ /**
+ * Validates the preamble-Longs value given the family ID
+ * @param familyId the given family ID
+ * @param preambleLongs the given preambleLongs value
+ */
+ public static void validateFamily(final byte familyId, final byte preambleLongs) {
+ final Family family = Family.idToFamily(familyId);
+ if (family.equals(Family.TUPLE)) {
+ if (preambleLongs < Family.TUPLE.getMinPreLongs() || preambleLongs > Family.TUPLE.getMaxPreLongs()) {
+ throw new SketchesArgumentException(
+ "Possible corruption: Invalid PreambleLongs value for family TUPLE: " + preambleLongs);
+ }
+ } else {
+ throw new SketchesArgumentException(
+ "Possible corruption: Invalid Family: " + family.toString());
+ }
+ }
+
+ /**
+ * Validates the sketch type byte versus the expected value
+ * @param sketchTypeByte the given sketch type byte
+ * @param expectedType the expected value
+ */
+ public static void validateType(final byte sketchTypeByte, final SketchType expectedType) {
+ final SketchType sketchType = getSketchType(sketchTypeByte);
+ if (!sketchType.equals(expectedType)) {
+ throw new SketchesArgumentException("Sketch Type mismatch. Expected " + expectedType.name()
+ + ", got " + sketchType.name());
+ }
+ }
+
+ /**
+ * Gets the sketch type byte from the given MemorySegment image
+ * @param seg the given MemorySegment image
+ * @return the SketchType
+ */
+ public static SketchType getSketchType(final MemorySegment seg) {
+ final byte sketchTypeByte = seg.get(JAVA_BYTE, TYPE_BYTE_OFFSET);
+ return getSketchType(sketchTypeByte);
+ }
+
+ private static SketchType getSketchType(final byte sketchTypeByte) {
+ if ((sketchTypeByte < 0) || (sketchTypeByte >= SketchType.values().length)) {
+ throw new SketchesArgumentException("Invalid Sketch Type " + sketchTypeByte);
+ }
+ return SketchType.values()[sketchTypeByte];
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/Sketch.java b/src/main/java/org/apache/datasketches/tuple2/Sketch.java
new file mode 100644
index 000000000..20315b03d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/Sketch.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static org.apache.datasketches.common.Util.LS;
+
+import org.apache.datasketches.thetacommon.BinomialBoundsN;
+
+/**
+ * This is an equivalent to org.apache.datasketches.theta2.Sketch with
+ * addition of a user-defined Summary object associated with every unique entry
+ * in the sketch.
+ * @param Type of Summary
+ */
+public abstract class Sketch {
+
+ protected static final byte PREAMBLE_LONGS = 1;
+
+ long thetaLong_;
+ boolean empty_ = true;
+ protected SummaryFactory summaryFactory_ = null;
+
+ Sketch(final long thetaLong, final boolean empty, final SummaryFactory summaryFactory) {
+ this.thetaLong_ = thetaLong;
+ this.empty_ = empty;
+ this.summaryFactory_ = summaryFactory;
+ }
+
+ /**
+ * Converts this sketch to a CompactSketch on the Java heap.
+ *
+ *
If this sketch is already in compact form this operation returns this.
+ *
+ * @return this sketch as a CompactSketch on the Java heap.
+ */
+ public abstract CompactSketch compact();
+
+ /**
+ * Estimates the cardinality of the set (number of unique values presented to the sketch)
+ * @return best estimate of the number of unique values
+ */
+ public double getEstimate() {
+ if (!isEstimationMode()) { return getRetainedEntries(); }
+ return getRetainedEntries() / getTheta();
+ }
+
+ /**
+ * Gets the approximate upper error bound given the specified number of Standard Deviations.
+ * This will return getEstimate() if isEmpty() is true.
+ *
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @return the upper bound.
+ */
+ public double getUpperBound(final int numStdDev) {
+ if (!isEstimationMode()) { return getRetainedEntries(); }
+ return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, empty_);
+ }
+
+ /**
+ * Gets the approximate lower error bound given the specified number of Standard Deviations.
+ * This will return getEstimate() if isEmpty() is true.
+ *
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @return the lower bound.
+ */
+ public double getLowerBound(final int numStdDev) {
+ if (!isEstimationMode()) { return getRetainedEntries(); }
+ return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, empty_);
+ }
+
+ /**
+ * Gets the estimate of the true distinct population of subset tuples represented by the count
+ * of entries in a subset of the total retained entries of the sketch.
+ * @param numSubsetEntries number of entries for a chosen subset of the sketch.
+ * @return the estimate of the true distinct population of subset tuples represented by the count
+ * of entries in a subset of the total retained entries of the sketch.
+ */
+ public double getEstimate(final int numSubsetEntries) {
+ if (!isEstimationMode()) { return numSubsetEntries; }
+ return numSubsetEntries / getTheta();
+ }
+
+ /**
+ * Gets the estimate of the lower bound of the true distinct population represented by the count
+ * of entries in a subset of the total retained entries of the sketch.
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @param numSubsetEntries number of entries for a chosen subset of the sketch.
+ * @return the estimate of the lower bound of the true distinct population represented by the count
+ * of entries in a subset of the total retained entries of the sketch.
+ */
+ public double getLowerBound(final int numStdDev, final int numSubsetEntries) {
+ if (!isEstimationMode()) { return numSubsetEntries; }
+ return BinomialBoundsN.getLowerBound(numSubsetEntries, getTheta(), numStdDev, isEmpty());
+ }
+
+ /**
+ * Gets the estimate of the upper bound of the true distinct population represented by the count
+ * of entries in a subset of the total retained entries of the sketch.
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @param numSubsetEntries number of entries for a chosen subset of the sketch.
+ * @return the estimate of the upper bound of the true distinct population represented by the count
+ * of entries in a subset of the total retained entries of the sketch.
+ */
+ public double getUpperBound(final int numStdDev, final int numSubsetEntries) {
+ if (!isEstimationMode()) { return numSubsetEntries; }
+ return BinomialBoundsN.getUpperBound(numSubsetEntries, getTheta(), numStdDev, isEmpty());
+ }
+
+ /**
+ * See Empty
+ * @return true if empty.
+ */
+ public boolean isEmpty() {
+ return empty_;
+ }
+
+ /**
+ * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode).
+ * This is true if theta < 1.0 AND isEmpty() is false.
+ * @return true if the sketch is in estimation mode.
+ */
+ public boolean isEstimationMode() {
+ return thetaLong_ < Long.MAX_VALUE && !isEmpty();
+ }
+
+ /**
+ * Returns number of retained entries
+ * @return number of retained entries
+ */
+ public abstract int getRetainedEntries();
+
+ /**
+ * Gets the number of hash values less than the given theta expressed as a long.
+ * @param thetaLong the given theta as a long between zero and Long.MAX_VALUE.
+ * @return the number of hash values less than the given thetaLong.
+ */
+ public abstract int getCountLessThanThetaLong(final long thetaLong);
+
+ /**
+ * Gets the Summary Factory class of type S
+ * @return the Summary Factory class of type S
+ */
+ public SummaryFactory getSummaryFactory() {
+ return summaryFactory_;
+ }
+
+ /**
+ * Gets the value of theta as a double between zero and one
+ * @return the value of theta as a double
+ */
+ public double getTheta() {
+ return getThetaLong() / (double) Long.MAX_VALUE;
+ }
+
+ /**
+ * This is to serialize a sketch instance to a byte array.
+ *
+ *
As of 3.0.0, serializing an UpdatableSketch is deprecated.
+ * This capability will be removed in a future release.
+ * Serializing a CompactSketch is not deprecated.
+ * @return serialized representation of the sketch
+ */
+ public abstract byte[] toByteArray();
+
+ /**
+ * Returns a SketchIterator
+ * @return a SketchIterator
+ */
+ public abstract TupleSketchIterator iterator();
+
+ /**
+ * Returns Theta as a long
+ * @return Theta as a long
+ */
+ public long getThetaLong() {
+ return isEmpty() ? Long.MAX_VALUE : thetaLong_;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS);
+ sb.append(" Estimate : ").append(getEstimate()).append(LS);
+ sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS);
+ sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS);
+ sb.append(" Theta (double) : ").append(this.getTheta()).append(LS);
+ sb.append(" Theta (long) : ").append(this.getThetaLong()).append(LS);
+ sb.append(" EstMode? : ").append(isEstimationMode()).append(LS);
+ sb.append(" Empty? : ").append(isEmpty()).append(LS);
+ sb.append(" Retained Entries : ").append(this.getRetainedEntries()).append(LS);
+ if (this instanceof UpdatableSketch) {
+ @SuppressWarnings("rawtypes")
+ final UpdatableSketch updatable = (UpdatableSketch) this;
+ sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS);
+ sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS);
+ sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS);
+ sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS);
+ }
+ sb.append("### END SKETCH SUMMARY").append(LS);
+ return sb.toString();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/Sketches.java b/src/main/java/org/apache/datasketches/tuple2/Sketches.java
new file mode 100644
index 000000000..1fdc545dc
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/Sketches.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import java.lang.foreign.MemorySegment;
+
+/**
+ * Convenient static methods to instantiate generic tuple sketches.
+ */
+@SuppressWarnings("deprecation")
+public final class Sketches {
+
+ /**
+ * Creates an empty sketch.
+ * @param Type of Summary
+ * @return an empty instance of Sketch
+ */
+ public static Sketch createEmptySketch() {
+ return new CompactSketch<>(null, null, Long.MAX_VALUE, true);
+ }
+
+ /**
+ * Instantiate a Sketch from a given MemorySegment.
+ * @param Type of Summary
+ * @param seg MemorySegment object representing a Sketch
+ * @param deserializer instance of SummaryDeserializer
+ * @return Sketch created from its MemorySegment representation
+ */
+ public static Sketch heapifySketch(
+ final MemorySegment seg,
+ final SummaryDeserializer deserializer) {
+ final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg);
+ if (sketchType == SerializerDeserializer.SketchType.QuickSelectSketch) {
+ return new QuickSelectSketch<>(seg, deserializer, null);
+ }
+ return new CompactSketch<>(seg, deserializer);
+ }
+
+ /**
+ * Instantiate UpdatableSketch from a given MemorySegment
+ * @param Type of update value
+ * @param Type of Summary
+ * @param seg MemorySegment object representing a Sketch
+ * @param deserializer instance of SummaryDeserializer
+ * @param summaryFactory instance of SummaryFactory
+ * @return Sketch created from its MemorySegment representation
+ */
+ public static > UpdatableSketch heapifyUpdatableSketch(
+ final MemorySegment seg,
+ final SummaryDeserializer deserializer,
+ final SummaryFactory summaryFactory) {
+ return new UpdatableSketch<>(seg, deserializer, summaryFactory);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/Summary.java b/src/main/java/org/apache/datasketches/tuple2/Summary.java
new file mode 100644
index 000000000..8fb028401
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/Summary.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+/**
+ * Interface for user-defined Summary, which is associated with every hash in a tuple sketch
+ */
+public interface Summary {
+
+ /**
+ * Deep copy.
+ *
+ * Caution: This must implement a deep copy.
+ *
+ * @return deep copy of the Summary
+ */
+ public Summary copy();
+
+ /**
+ * This is to serialize a Summary instance to a byte array.
+ *
+ *
The user should encode in the byte array its total size, which is used during
+ * deserialization, especially if the Summary has variable sized elements.
+ *
+ * @return serialized representation of the Summary
+ */
+ public byte[] toByteArray();
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java
new file mode 100644
index 000000000..93d9f54d9
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import java.lang.foreign.MemorySegment;
+
+/**
+ * Interface for deserializing user-defined Summary
+ * @param type of Summary
+ */
+public interface SummaryDeserializer {
+
+ /**
+ * This is to create an instance of a Summary given a serialized representation.
+ * The user may assume that the start of the given MemorySegment is the correct place to start
+ * deserializing. However, the user must be able to determine the number of bytes required to
+ * deserialize the summary as the capacity of the given MemorySegment may
+ * include multiple such summaries and may be much larger than required for a single summary.
+ * @param seg MemorySegment object with serialized representation of a Summary
+ * @return DeserializedResult object, which contains a Summary object and number of bytes read
+ * from the MemorySegment
+ */
+ public DeserializeResult heapifySummary(MemorySegment seg);
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java
new file mode 100644
index 000000000..ea229a26c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+/**
+ * Interface for user-defined SummaryFactory
+ * @param type of Summary
+ */
+public interface SummaryFactory {
+
+ /**
+ * Returns new instance of Summary
+ * @return new instance of Summary
+ */
+ public S newSummary();
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java
new file mode 100644
index 000000000..b0df75dae
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+/**
+ * This is to provide methods of producing unions and intersections of two Summary objects.
+ * @param type of Summary
+ */
+public interface SummarySetOperations {
+
+ /**
+ * This is called by the union operator when both sketches have the same hash value.
+ *
+ *
Caution: Do not modify the input Summary objects. Also do not return them directly,
+ * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is
+ * important to create a new Summary object with the correct contents to be returned. Do not
+ * return null summaries.
+ *
+ * @param a Summary from sketch A
+ * @param b Summary from sketch B
+ * @return union of Summary A and Summary B
+ */
+ public S union(S a, S b);
+
+ /**
+ * This is called by the intersection operator when both sketches have the same hash value.
+ *
+ *
Caution: Do not modify the input Summary objects. Also do not return them directly,
+ * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is
+ * important to create a new Summary object with the correct contents to be returned. Do not
+ * return null summaries.
+ *
+ * @param a Summary from sketch A
+ * @param b Summary from sketch B
+ * @return intersection of Summary A and Summary B
+ */
+ public S intersection(S a, S b);
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java
new file mode 100644
index 000000000..a4faa6018
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+/**
+ * Iterator over a generic tuple sketch
+ * @param Type of Summary
+ */
+public class TupleSketchIterator {
+
+ private final long[] hashArrTbl_; //could be either hashArr or hashTable
+ private final S[] summaryArrTbl_; //could be either summaryArr or summaryTable
+ private int i_;
+
+ TupleSketchIterator(final long[] hashes, final S[] summaries) {
+ hashArrTbl_ = hashes;
+ summaryArrTbl_ = summaries;
+ i_ = -1;
+ }
+
+ /**
+ * Advancing the iterator and checking existence of the next entry
+ * is combined here for efficiency. This results in an undefined
+ * state of the iterator before the first call of this method.
+ * @return true if the next element exists
+ */
+ public boolean next() {
+ if (hashArrTbl_ == null) { return false; }
+ i_++;
+ while (i_ < hashArrTbl_.length) {
+ if (hashArrTbl_[i_] > 0) { return true; }
+ i_++;
+ }
+ return false;
+ }
+
+ /**
+ * Gets the hash from the current entry in the sketch, which is a hash
+ * of the original key passed to update(). The original keys are not
+ * retained. Don't call this before calling next() for the first time
+ * or after getting false from next().
+ * @return hash from the current entry
+ */
+ public long getHash() {
+ return hashArrTbl_[i_];
+ }
+
+ /**
+ * Gets a Summary object from the current entry in the sketch.
+ * Don't call this before calling next() for the first time
+ * or after getting false from next().
+ * @return Summary object for the current entry (this is not a copy!)
+ */
+ public S getSummary() {
+ return summaryArrTbl_[i_];
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/Union.java b/src/main/java/org/apache/datasketches/tuple2/Union.java
new file mode 100644
index 000000000..5945ad53c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/Union.java
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.Math.min;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.QuickSelect;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Compute the union of two or more generic tuple sketches or generic tuple sketches combined with
+ * theta sketches. A new instance represents an empty set.
+ * @param Type of Summary
+ */
+public class Union {
+ private final SummarySetOperations summarySetOps_;
+ private QuickSelectSketch qsk_;
+ private long unionThetaLong_; // need to maintain outside of the sketch
+ private boolean empty_;
+
+ /**
+ * Creates new Union instance with instructions on how to process two summaries that
+ * overlap. This will have the default nominal entries (K).
+ * @param summarySetOps instance of SummarySetOperations
+ */
+ public Union(final SummarySetOperations summarySetOps) {
+ this(ThetaUtil.DEFAULT_NOMINAL_ENTRIES, summarySetOps);
+ }
+
+ /**
+ * Creates new Union instance.
+ * @param nomEntries nominal entries (K). Forced to the nearest power of 2 greater than
+ * given value.
+ * @param summarySetOps instance of SummarySetOperations
+ */
+ public Union(final int nomEntries, final SummarySetOperations summarySetOps) {
+ summarySetOps_ = summarySetOps;
+ qsk_ = new QuickSelectSketch<>(nomEntries, null);
+ unionThetaLong_ = qsk_.getThetaLong();
+ empty_ = true;
+ }
+
+ /**
+ * Perform a stateless, pair-wise union operation between two tuple sketches.
+ * The returned sketch will be cut back to the smaller of the two k values if required.
+ *
+ *
Nulls and empty sketches are ignored.
+ *
+ * @param tupleSketchA The first argument
+ * @param tupleSketchB The second argument
+ * @return the result ordered CompactSketch on the heap.
+ */
+ public CompactSketch union(final Sketch tupleSketchA, final Sketch tupleSketchB) {
+ reset();
+ union(tupleSketchA);
+ union(tupleSketchB);
+ final CompactSketch csk = getResult(true);
+ return csk;
+ }
+
+ /**
+ * Perform a stateless, pair-wise union operation between a tupleSketch and a thetaSketch.
+ * The returned sketch will be cut back to the smaller of the two k values if required.
+ *
+ * Nulls and empty sketches are ignored.
+ *
+ * @param tupleSketch The first argument
+ * @param thetaSketch The second argument
+ * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+ * This may not be null.
+ * @return the result ordered CompactSketch on the heap.
+ */
+ public CompactSketch union(final Sketch tupleSketch,
+ final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) {
+ reset();
+ union(tupleSketch);
+ union(thetaSketch, summary);
+ final CompactSketch csk = getResult(true);
+ return csk;
+ }
+
+ /**
+ * Performs a stateful union of the internal set with the given tupleSketch.
+ * @param tupleSketch input tuple sketch to merge with the internal set.
+ * Nulls and empty sketches are ignored.
+ */
+ public void union(final Sketch tupleSketch) {
+ if (tupleSketch == null || tupleSketch.isEmpty()) { return; }
+ empty_ = false;
+ unionThetaLong_ = min(tupleSketch.thetaLong_, unionThetaLong_);
+ final TupleSketchIterator it = tupleSketch.iterator();
+ while (it.next()) {
+ qsk_.merge(it.getHash(), it.getSummary(), summarySetOps_);
+ }
+ unionThetaLong_ = min(unionThetaLong_, qsk_.thetaLong_);
+ }
+
+ /**
+ * Performs a stateful union of the internal set with the given thetaSketch by combining entries
+ * using the hashes from the theta sketch and summary values from the given summary.
+ * @param thetaSketch the given theta sketch input. If null or empty, it is ignored.
+ * @param summary the given proxy summary for the theta sketch, which doesn't have one. This may
+ * not be null.
+ */
+ public void union(final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) {
+ if (summary == null) {
+ throw new SketchesArgumentException("Summary cannot be null."); }
+ if (thetaSketch == null || thetaSketch.isEmpty()) { return; }
+ empty_ = false;
+ final long thetaIn = thetaSketch.getThetaLong();
+ unionThetaLong_ = min(thetaIn, unionThetaLong_);
+ final org.apache.datasketches.theta2.HashIterator it = thetaSketch.iterator();
+ while (it.next()) {
+ qsk_.merge(it.get(), summary, summarySetOps_); //copies summary
+ }
+ unionThetaLong_ = min(unionThetaLong_, qsk_.thetaLong_);
+ }
+
+ /**
+ * Gets the result of a sequence of stateful union operations as an unordered CompactSketch
+ * @return result of the stateful unions so far. The state of this operation is not reset after the
+ * result is returned.
+ */
+ public CompactSketch getResult() {
+ return getResult(false);
+ }
+
+ /**
+ * Gets the result of a sequence of stateful union operations as an unordered CompactSketch.
+ * @param reset If true, clears this operator to the empty state after this result is
+ * returned. Set this to false if you wish to obtain an intermediate result.
+ * @return result of the stateful union
+ */
+ @SuppressWarnings("unchecked")
+ public CompactSketch getResult(final boolean reset) {
+ final CompactSketch result;
+ if (empty_) {
+ result = qsk_.compact();
+ } else if (unionThetaLong_ >= qsk_.thetaLong_ && qsk_.getRetainedEntries() <= qsk_.getNominalEntries()) {
+ //unionThetaLong_ >= qsk_.thetaLong_ means we can ignore unionThetaLong_. We don't need to rebuild.
+ //qsk_.getRetainedEntries() <= qsk_.getNominalEntries() means we don't need to pull back to k.
+ result = qsk_.compact();
+ } else {
+ final long tmpThetaLong = min(unionThetaLong_, qsk_.thetaLong_);
+
+ //count the number of valid hashes in because Alpha can have dirty values
+ int numHashesIn = 0;
+ TupleSketchIterator it = qsk_.iterator();
+ while (it.next()) { //counts valid hashes
+ if (it.getHash() < tmpThetaLong) { numHashesIn++; }
+ }
+
+ if (numHashesIn == 0) {
+ //numHashes == 0 && empty == false means Theta < 1.0
+ //Therefore, this is a degenerate sketch: theta < 1.0, count = 0, empty = false
+ result = new CompactSketch<>(null, null, tmpThetaLong, empty_);
+ }
+
+ else {
+ //we know: empty == false, count > 0
+ final int numHashesOut;
+ final long thetaLongOut;
+ if (numHashesIn > qsk_.getNominalEntries()) {
+ //we need to trim hashes and need a new thetaLong
+ final long[] tmpHashArr = new long[numHashesIn]; // temporary, order will be destroyed by quick select
+ it = qsk_.iterator();
+ int i = 0;
+ while (it.next()) {
+ final long hash = it.getHash();
+ if (hash < tmpThetaLong) { tmpHashArr[i++] = hash; }
+ }
+ numHashesOut = qsk_.getNominalEntries();
+ thetaLongOut = QuickSelect.select(tmpHashArr, 0, numHashesIn - 1, numHashesOut);
+ } else {
+ numHashesOut = numHashesIn;
+ thetaLongOut = tmpThetaLong;
+ }
+ //now prepare the output arrays
+ final long[] hashArr = new long[numHashesOut];
+ final S[] summaries = Util.newSummaryArray(qsk_.getSummaryTable(), numHashesOut);
+ it = qsk_.iterator();
+ int i = 0;
+ while (it.next()) { //select the qualifying hashes from the gadget synchronized with the summaries
+ final long hash = it.getHash();
+ if (hash < thetaLongOut) {
+ hashArr[i] = hash;
+ summaries[i] = (S) it.getSummary().copy();
+ i++;
+ }
+ }
+ result = new CompactSketch<>(hashArr, summaries, thetaLongOut, empty_);
+ }
+ }
+ if (reset) { reset(); }
+ return result;
+ }
+
+ /**
+ * Resets the internal set to the initial state, which represents an empty set. This is only useful
+ * after sequences of stateful union operations.
+ */
+ public void reset() {
+ qsk_.reset();
+ unionThetaLong_ = qsk_.getThetaLong();
+ empty_ = true;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java
new file mode 100644
index 000000000..c3b761f14
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+
+import org.apache.datasketches.hash.MurmurHash3;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * An extension of QuickSelectSketch<S>, which can be updated with many types of keys.
+ * Summary objects are created using a user-defined SummaryFactory class,
+ * which should allow very flexible parameterization if needed.
+ * Keys are presented to a sketch along with values of a user-defined
+ * update type U. When an entry is inserted into a sketch or a duplicate key is
+ * presented to a sketch then summary.update(U value) method will be called. So
+ * any kind of user-defined accumulation is possible. Summaries also must know
+ * how to copy themselves. Also union and intersection of summaries can be
+ * implemented in a sub-class of SummarySetOperations, which will be used in
+ * case Union or Intersection of two instances of Tuple Sketch is needed
+ * @param Type of the value, which is passed to update method of a Summary
+ * @param Type of the UpdatableSummary<U>
+ */
+public class UpdatableSketch> extends QuickSelectSketch {
+
+ /**
+ * This is to create a new instance of an UpdatableQuickSelectSketch.
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * or equal to the given value.
+ * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
+ *
+ * 0 - no resizing (max size allocated),
+ * 1 - double internal hash table each time it reaches a threshold
+ * 2 - grow four times
+ * 3 - grow eight times (default)
+ *
+ * @param samplingProbability
+ * See Sampling Probability
+ * @param summaryFactory An instance of a SummaryFactory.
+ */
+ public UpdatableSketch(final int nomEntries, final int lgResizeFactor,
+ final float samplingProbability, final SummaryFactory summaryFactory) {
+ super(nomEntries, lgResizeFactor, samplingProbability, summaryFactory);
+ }
+
+ /**
+ * This is to create an instance of a sketch given a serialized form
+ * @param srcSeg MemorySegment object with data of a serialized UpdatableSketch
+ * @param deserializer instance of SummaryDeserializer
+ * @param summaryFactory instance of SummaryFactory
+ * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
+ * This capability will be removed in a future release.
+ * Heapifying a CompactSketch is not deprecated.
+ */
+ @Deprecated
+ public UpdatableSketch(
+ final MemorySegment srcSeg,
+ final SummaryDeserializer deserializer,
+ final SummaryFactory summaryFactory) {
+ super(srcSeg, deserializer, summaryFactory);
+ }
+
+ /**
+ * Copy Constructor
+ * @param sketch the sketch to copy
+ */
+ public UpdatableSketch(final UpdatableSketch sketch) {
+ super(sketch);
+ }
+
+ /**
+ * @return a deep copy of this sketch
+ */
+ @Override
+ public UpdatableSketch copy() {
+ return new UpdatableSketch<>(this);
+ }
+
+ /**
+ * Updates this sketch with a long key and U value.
+ * The value is passed to update() method of the Summary object associated with the key
+ *
+ * @param key The given long key
+ * @param value The given U value
+ */
+ public void update(final long key, final U value) {
+ update(new long[] {key}, value);
+ }
+
+ /**
+ * Updates this sketch with a double key and U value.
+ * The value is passed to update() method of the Summary object associated with the key
+ *
+ * @param key The given double key
+ * @param value The given U value
+ */
+ public void update(final double key, final U value) {
+ update(Util.doubleToLongArray(key), value);
+ }
+
+ /**
+ * Updates this sketch with a String key and U value.
+ * The value is passed to update() method of the Summary object associated with the key
+ *
+ * @param key The given String key
+ * @param value The given U value
+ */
+ public void update(final String key, final U value) {
+ update(Util.stringToByteArray(key), value);
+ }
+
+ /**
+ * Updates this sketch with a byte[] key and U value.
+ * The value is passed to update() method of the Summary object associated with the key
+ *
+ * @param key The given byte[] key
+ * @param value The given U value
+ */
+ public void update(final byte[] key, final U value) {
+ if ((key == null) || (key.length == 0)) { return; }
+ insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ }
+
+ /**
+ * Updates this sketch with a ByteBuffer and U value
+ * The value is passed to the update() method of the Summary object associated with the key
+ *
+ * @param buffer The given ByteBuffer key
+ * @param value The given U value
+ */
+ public void update(final ByteBuffer buffer, final U value) {
+ if (buffer == null || buffer.hasRemaining() == false) { return; }
+ insertOrIgnore(MurmurHash3.hash(buffer, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ }
+
+ /**
+ * Updates this sketch with a int[] key and U value.
+ * The value is passed to update() method of the Summary object associated with the key
+ *
+ * @param key The given int[] key
+ * @param value The given U value
+ */
+ public void update(final int[] key, final U value) {
+ if ((key == null) || (key.length == 0)) { return; }
+ insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ }
+
+ /**
+ * Updates this sketch with a long[] key and U value.
+ * The value is passed to update() method of the Summary object associated with the key
+ *
+ * @param key The given long[] key
+ * @param value The given U value
+ */
+ public void update(final long[] key, final U value) {
+ if ((key == null) || (key.length == 0)) { return; }
+ insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ }
+
+ void insertOrIgnore(final long hash, final U value) {
+ setEmpty(false);
+ if (hash >= getThetaLong()) { return; }
+ int index = findOrInsert(hash);
+ if (index < 0) {
+ index = ~index;
+ insertSummary(index, getSummaryFactory().newSummary());
+ }
+ summaryTable_[index].update(value);
+ rebuildIfNeeded();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java
new file mode 100644
index 000000000..e0343d302
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * For building a new generic tuple UpdatableSketch
+ * @param Type of update value
+ * @param Type of Summary
+ */
+public class UpdatableSketchBuilder> {
+
+ private int nomEntries_;
+ private ResizeFactor resizeFactor_;
+ private float samplingProbability_;
+ private final SummaryFactory summaryFactory_;
+
+ private static final float DEFAULT_SAMPLING_PROBABILITY = 1;
+ private static final ResizeFactor DEFAULT_RESIZE_FACTOR = ResizeFactor.X8;
+
+ /**
+ * Creates an instance of UpdatableSketchBuilder with default parameters
+ * @param summaryFactory An instance of SummaryFactory.
+ */
+ public UpdatableSketchBuilder(final SummaryFactory summaryFactory) {
+ nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES;
+ resizeFactor_ = DEFAULT_RESIZE_FACTOR;
+ samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY;
+ summaryFactory_ = summaryFactory;
+ }
+
+ /**
+ * This is to set the nominal number of entries.
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * or equal to the given value.
+ * @return this UpdatableSketchBuilder
+ */
+ public UpdatableSketchBuilder setNominalEntries(final int nomEntries) {
+ nomEntries_ = 1 << ThetaUtil.checkNomLongs(nomEntries);
+ return this;
+ }
+
+ /**
+ * This is to set the resize factor.
+ * Value of X1 means that the maximum capacity is allocated from the start.
+ * Default resize factor is X8.
+ * @param resizeFactor value of X1, X2, X4 or X8
+ * @return this UpdatableSketchBuilder
+ */
+ public UpdatableSketchBuilder setResizeFactor(final ResizeFactor resizeFactor) {
+ resizeFactor_ = resizeFactor;
+ return this;
+ }
+
+ /**
+ * This is to set sampling probability.
+ * Default probability is 1.
+ * @param samplingProbability sampling probability from 0 to 1
+ * @return this UpdatableSketchBuilder
+ */
+ public UpdatableSketchBuilder setSamplingProbability(final float samplingProbability) {
+ if ((samplingProbability < 0) || (samplingProbability > 1f)) {
+ throw new SketchesArgumentException("sampling probability must be between 0 and 1");
+ }
+ samplingProbability_ = samplingProbability;
+ return this;
+ }
+
+ /**
+ * Returns an UpdatableSketch with the current configuration of this Builder.
+ * @return an UpdatableSketch
+ */
+ public UpdatableSketch build() {
+ return new UpdatableSketch<>(nomEntries_, resizeFactor_.lg(), samplingProbability_,
+ summaryFactory_);
+ }
+
+ /**
+ * Resets the Nominal Entries, Resize Factor and Sampling Probability to their default values.
+ * The assignment of U and S remain the same.
+ */
+ public void reset() {
+ nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES;
+ resizeFactor_ = DEFAULT_RESIZE_FACTOR;
+ samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java
new file mode 100644
index 000000000..2ec5df695
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+/**
+ * Interface for updating user-defined Summary
+ * @param type of update value
+ */
+public interface UpdatableSummary extends Summary {
+
+ /**
+ * This is to provide a method of updating summaries.
+ * This is primarily used internally.
+ * @param value update value
+ * @return this
+ */
+ UpdatableSummary update(U value);
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/Util.java b/src/main/java/org/apache/datasketches/tuple2/Util.java
new file mode 100644
index 000000000..f2f8227ea
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/Util.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.hash.MurmurHash3.hash;
+import static org.apache.datasketches.hash.XxHash.hashCharArr;
+import static org.apache.datasketches.hash.XxHash.hashString;
+
+import java.lang.reflect.Array;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Common utility functions for Tuples
+ */
+public final class Util {
+ private static final int PRIME = 0x7A3C_CA71;
+
+ /**
+ * Converts a double to a long[].
+ * @param value the given double value
+ * @return the long array
+ */
+ public static final long[] doubleToLongArray(final double value) {
+ final double d = (value == 0.0) ? 0.0 : value; // canonicalize -0.0, 0.0
+ final long[] array = { Double.doubleToLongBits(d) }; // canonicalize all NaN & +/- infinity forms
+ return array;
+ }
+
+ /**
+ * Converts a String to a UTF_8 byte array. If the given value is either null or empty this
+ * method returns null.
+ * @param value the given String value
+ * @return the UTF_8 byte array
+ */
+ public static final byte[] stringToByteArray(final String value) {
+ if ((value == null) || value.isEmpty()) { return null; }
+ return value.getBytes(UTF_8);
+ }
+
+ /**
+ * Computes and checks the 16-bit seed hash from the given long seed.
+ * The seed hash may not be zero in order to maintain compatibility with older serialized
+ * versions that did not have this concept.
+ * @param seed See Update Hash Seed
+ * @return the seed hash.
+ */
+ public static short computeSeedHash(final long seed) {
+ final long[] seedArr = {seed};
+ final short seedHash = (short)((hash(seedArr, 0L)[0]) & 0xFFFFL);
+ if (seedHash == 0) {
+ throw new SketchesArgumentException(
+ "The given seed: " + seed + " produced a seedHash of zero. "
+ + "You must choose a different seed.");
+ }
+ return seedHash;
+ }
+
+ /**
+ * Checks the two given seed hashes. If they are not equal, this method throws an Exception.
+ * @param seedHashA given seed hash A
+ * @param seedHashB given seed hash B
+ */
+ public static final void checkSeedHashes(final short seedHashA, final short seedHashB) {
+ if (seedHashA != seedHashB) {
+ throw new SketchesArgumentException("Incompatible Seed Hashes. " + seedHashA + ", "
+ + seedHashB);
+ }
+ }
+
+ /**
+ * Gets the starting capacity of a new sketch given the Nominal Entries and the log Resize Factor.
+ * @param nomEntries the given Nominal Entries
+ * @param lgResizeFactor the given log Resize Factor
+ * @return the starting capacity
+ */
+ public static int getStartingCapacity(final int nomEntries, final int lgResizeFactor) {
+ return 1 << ThetaUtil.startingSubMultiple(
+ // target table size is twice the number of nominal entries
+ Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries) * 2),
+ lgResizeFactor,
+ ThetaUtil.MIN_LG_ARR_LONGS
+ );
+ }
+
+ /**
+ * Concatenate array of Strings to a single String.
+ * @param strArr the given String array
+ * @return the concatenated String
+ */
+ public static String stringConcat(final String[] strArr) {
+ final int len = strArr.length;
+ final StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < len; i++) {
+ sb.append(strArr[i]);
+ if ((i + 1) < len) { sb.append(','); }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns the hash of the given string
+ * @param s the string to hash
+ * @return the hash of the given string
+ */
+ public static long stringHash(final String s) {
+ return hashString(s, 0, s.length(), PRIME);
+ }
+
+ /**
+ * Returns the hash of the concatenated strings
+ * @param strArray array of Strings
+ * @return the hash of concatenated strings.
+ */
+ public static long stringArrHash(final String[] strArray) {
+ final String s = stringConcat(strArray);
+ return hashCharArr(s.toCharArray(), 0, s.length(), PRIME);
+ }
+
+ /**
+ * Will copy compact summary arrays as well as hashed summary tables (with nulls).
+ * @param type of summary
+ * @param summaryArr the given summary array or table
+ * @return the copy
+ */
+ @SuppressWarnings("unchecked")
+ public static S[] copySummaryArray(final S[] summaryArr) {
+ final int len = summaryArr.length;
+ final S[] tmpSummaryArr = newSummaryArray(summaryArr, len);
+ for (int i = 0; i < len; i++) {
+ final S summary = summaryArr[i];
+ if (summary == null) { continue; }
+ tmpSummaryArr[i] = (S) summary.copy();
+ }
+ return tmpSummaryArr;
+ }
+
+ /**
+ * Creates a new Summary Array with the specified length
+ * @param summaryArr example array, only used to obtain the component type. It has no data.
+ * @param length the desired length of the returned array.
+ * @param the summary class type
+ * @return a new Summary Array with the specified length
+ */
+ @SuppressWarnings("unchecked")
+ public static S[] newSummaryArray(final S[] summaryArr, final int length) {
+ final Class summaryType = (Class) summaryArr.getClass().getComponentType();
+ final S[] tmpSummaryArr = (S[]) Array.newInstance(summaryType, length);
+ return tmpSummaryArr;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java
new file mode 100644
index 000000000..ee17bdd7b
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.adouble;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.tuple2.UpdatableSketch;
+
+/**
+ * Extends UpdatableSketch<Double, DoubleSummary>
+ * @author Lee Rhodes
+ */
+public class DoubleSketch extends UpdatableSketch {
+
+ /**
+ * Constructs this sketch with given lgK.
+ * @param lgK Log_base2 of Nominal Entries.
+ * See Nominal Entries
+ * @param mode The DoubleSummary mode to be used
+ */
+ public DoubleSketch(final int lgK, final DoubleSummary.Mode mode) {
+ this(lgK, ResizeFactor.X8.ordinal(), 1.0F, mode);
+ }
+
+ /**
+ * Creates this sketch with the following parameters:
+ * @param lgK Log_base2 of Nominal Entries.
+ * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
+ *
+ * 0 - no resizing (max size allocated),
+ * 1 - double internal hash table each time it reaches a threshold
+ * 2 - grow four times
+ * 3 - grow eight times (default)
+ *
+ * @param samplingProbability
+ * See Sampling Probability
+ * @param mode The DoubleSummary mode to be used
+ */
+ public DoubleSketch(final int lgK, final int lgResizeFactor, final float samplingProbability,
+ final DoubleSummary.Mode mode) {
+ super(1 << lgK, lgResizeFactor, samplingProbability, new DoubleSummaryFactory(mode));
+ }
+
+ /**
+ * Constructs this sketch from a MemorySegment image, which must be from an DoubleSketch, and
+ * usually with data.
+ * @param seg the given MemorySegment
+ * @param mode The DoubleSummary mode to be used
+ * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
+ * This capability will be removed in a future release.
+ * Heapifying a CompactSketch is not deprecated.
+ */
+ @Deprecated
+ public DoubleSketch(final MemorySegment seg, final DoubleSummary.Mode mode) {
+ super(seg, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode));
+ }
+
+ @Override
+ public void update(final String key, final Double value) {
+ super.update(key, value);
+ }
+
+ @Override
+ public void update(final long key, final Double value) {
+ super.update(key, value);
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java
new file mode 100644
index 000000000..c27051423
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.adouble;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.ByteArrayUtil;
+import org.apache.datasketches.tuple2.DeserializeResult;
+import org.apache.datasketches.tuple2.UpdatableSummary;
+
+/**
+ * Summary for generic tuple sketches of type Double.
+ * This summary keeps a double value. On update a predefined operation is performed depending on
+ * the mode.
+ * Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum.
+ */
+public final class DoubleSummary implements UpdatableSummary {
+ private double value_;
+ private final Mode mode_;
+
+ /**
+ * The aggregation modes for this Summary
+ */
+ public enum Mode {
+
+ /**
+ * The aggregation mode is the summation function.
+ *
+ * New retained value = previous retained value + incoming value
+ */
+ Sum,
+
+ /**
+ * The aggregation mode is the minimum function.
+ *
+ * New retained value = min(previous retained value, incoming value)
+ */
+ Min,
+
+ /**
+ * The aggregation mode is the maximum function.
+ *
+ * New retained value = max(previous retained value, incoming value)
+ */
+ Max,
+
+ /**
+ * The aggregation mode is always one.
+ *
+ * New retained value = 1.0
+ */
+ AlwaysOne
+ }
+
+ /**
+ * Creates an instance of DoubleSummary with a given starting value and mode
+ * @param value starting value
+ * @param mode update mode
+ */
+ private DoubleSummary(final double value, final Mode mode) {
+ value_ = value;
+ mode_ = mode;
+ }
+
+ /**
+ * Creates an instance of DoubleSummary with a given mode.
+ * @param mode update mode
+ */
+ public DoubleSummary(final Mode mode) {
+ mode_ = mode;
+ switch (mode) {
+ case Sum:
+ value_ = 0;
+ break;
+ case Min:
+ value_ = Double.POSITIVE_INFINITY;
+ break;
+ case Max:
+ value_ = Double.NEGATIVE_INFINITY;
+ break;
+ case AlwaysOne:
+ value_ = 1.0;
+ break;
+ }
+ }
+
+ @Override
+ public DoubleSummary update(final Double value) {
+ switch (mode_) {
+ case Sum:
+ value_ += value;
+ break;
+ case Min:
+ if (value < value_) { value_ = value; }
+ break;
+ case Max:
+ if (value > value_) { value_ = value; }
+ break;
+ case AlwaysOne:
+ value_ = 1.0;
+ break;
+ }
+ return this;
+ }
+
+ @Override
+ public DoubleSummary copy() {
+ return new DoubleSummary(value_, mode_);
+ }
+
+ /**
+ * Returns current value of the DoubleSummary
+ * @return current value of the DoubleSummary
+ */
+ public double getValue() {
+ return value_;
+ }
+
+ private static final int SERIALIZED_SIZE_BYTES = 9;
+ private static final int VALUE_INDEX = 0;
+ private static final int MODE_BYTE_INDEX = 8;
+
+ @Override
+ public byte[] toByteArray() {
+ final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES];
+ ByteArrayUtil.putDoubleLE(bytes, VALUE_INDEX, value_);
+ bytes[MODE_BYTE_INDEX] = (byte) mode_.ordinal();
+ return bytes;
+ }
+
+ /**
+ * Creates an instance of the DoubleSummary given a serialized representation
+ * @param seg MemorySegment object with serialized DoubleSummary
+ * @return DeserializedResult object, which contains a DoubleSummary object and number of bytes
+ * read from the MemorySegment
+ */
+ public static DeserializeResult fromMemory(final MemorySegment seg) {
+ return new DeserializeResult<>(new DoubleSummary(seg.get(JAVA_DOUBLE_UNALIGNED, VALUE_INDEX),
+ Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java
new file mode 100644
index 000000000..ef9854d78
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.adouble;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.tuple2.DeserializeResult;
+import org.apache.datasketches.tuple2.SummaryDeserializer;
+
+/**
+ * Implements SummaryDeserializer<DoubleSummary>
+ * @author Lee Rhodes
+ */
+public class DoubleSummaryDeserializer implements SummaryDeserializer {
+
+ @Override
+ public DeserializeResult heapifySummary(final MemorySegment seg) {
+ return DoubleSummary.fromMemory(seg);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java
new file mode 100644
index 000000000..18fa33fe1
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.adouble;
+
+import org.apache.datasketches.tuple2.SummaryFactory;
+
+/**
+ * Factory for DoubleSummary.
+ *
+ * @author Lee Rhodes
+ */
+public final class DoubleSummaryFactory implements SummaryFactory {
+
+ private final DoubleSummary.Mode summaryMode_;
+
+ /**
+ * Creates an instance of DoubleSummaryFactory with a given mode
+ * @param summaryMode summary mode
+ */
+ public DoubleSummaryFactory(final DoubleSummary.Mode summaryMode) {
+ summaryMode_ = summaryMode;
+ }
+
+ @Override
+ public DoubleSummary newSummary() {
+ return new DoubleSummary(summaryMode_);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java
new file mode 100644
index 000000000..7bad24567
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.adouble;
+
+import org.apache.datasketches.tuple2.SummarySetOperations;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode;
+
+/**
+ * Methods for defining how unions and intersections of two objects of type DoubleSummary
+ * are performed.
+ */
+public final class DoubleSummarySetOperations implements SummarySetOperations {
+
+ private final Mode unionSummaryMode_;
+
+ /**
+ * Intersection is not well defined or even meaningful between numeric values.
+ * Nevertheless, this can be defined to be a different type of aggregation for intersecting hashes.
+ */
+ private final Mode intersectionSummaryMode_;
+
+ /**
+ * Creates an instance with default mode of sum for both union and intersection.
+ * This exists for backward compatibility.
+ */
+ public DoubleSummarySetOperations() {
+ unionSummaryMode_ = DoubleSummary.Mode.Sum;
+ intersectionSummaryMode_ = DoubleSummary.Mode.Sum;
+ }
+
+ /**
+ * Creates an instance given a DoubleSummary update mode where the mode is the same for both
+ * union and intersection. This exists for backward compatibility.
+ * @param summaryMode DoubleSummary update mode.
+ */
+ public DoubleSummarySetOperations(final Mode summaryMode) {
+ unionSummaryMode_ = summaryMode;
+ intersectionSummaryMode_ = summaryMode;
+ }
+
+ /**
+ * Creates an instance with two modes.
+ * @param unionSummaryMode for unions
+ * @param intersectionSummaryMode for intersections
+ */
+ public DoubleSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) {
+ unionSummaryMode_ = unionSummaryMode;
+ intersectionSummaryMode_ = intersectionSummaryMode;
+ }
+
+ @Override
+ public DoubleSummary union(final DoubleSummary a, final DoubleSummary b) {
+ final DoubleSummary result = new DoubleSummary(unionSummaryMode_);
+ result.update(a.getValue());
+ result.update(b.getValue());
+ return result;
+ }
+
+ @Override
+ public DoubleSummary intersection(final DoubleSummary a, final DoubleSummary b) {
+ final DoubleSummary result = new DoubleSummary(intersectionSummaryMode_);
+ result.update(a.getValue());
+ result.update(b.getValue());
+ return result;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java b/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java
new file mode 100644
index 000000000..c72f3df00
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * This package is for a generic implementation of the Tuple sketch for single Double value.
+ */
+package org.apache.datasketches.tuple2.adouble;
diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java
new file mode 100644
index 000000000..a344f5ef2
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.aninteger;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.tuple2.UpdatableSketch;
+
+/**
+ * Extends UpdatableSketch<Integer, IntegerSummary>
+ * @author Lee Rhodes
+ */
+public class IntegerSketch extends UpdatableSketch {
+
+ /**
+ * Constructs this sketch with given lgK.
+ * @param lgK Log_base2 of Nominal Entries.
+ * See Nominal Entries
+ * @param mode The IntegerSummary mode to be used
+ */
+ public IntegerSketch(final int lgK, final IntegerSummary.Mode mode) {
+ this(lgK, ResizeFactor.X8.ordinal(), 1.0F, mode);
+ }
+
+ /**
+ * Creates this sketch with the following parameters:
+ * @param lgK Log_base2 of Nominal Entries.
+ * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3:
+ *
+ * 0 - no resizing (max size allocated),
+ * 1 - double internal hash table each time it reaches a threshold
+ * 2 - grow four times
+ * 3 - grow eight times (default)
+ *
+ * @param samplingProbability
+ * See Sampling Probability
+ * @param mode The IntegerSummary mode to be used
+ */
+ public IntegerSketch(final int lgK, final int lgResizeFactor, final float samplingProbability,
+ final IntegerSummary.Mode mode) {
+ super(1 << lgK, lgResizeFactor, samplingProbability, new IntegerSummaryFactory(mode));
+ }
+
+ /**
+ * Constructs this sketch from a MemorySegment image, which must be from an IntegerSketch, and
+ * usually with data.
+ * @param seg the given MemorySegment
+ * @param mode The IntegerSummary mode to be used
+ * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
+ * This capability will be removed in a future release.
+ * Heapifying a CompactSketch is not deprecated.
+ */
+ @Deprecated
+ public IntegerSketch(final MemorySegment seg, final IntegerSummary.Mode mode) {
+ super(seg, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode));
+ }
+
+ @Override
+ public void update(final String key, final Integer value) {
+ super.update(key, value);
+ }
+
+ @Override
+ public void update(final long key, final Integer value) {
+ super.update(key, value);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java
new file mode 100644
index 000000000..c7f25ecd9
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.aninteger;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.ByteArrayUtil;
+import org.apache.datasketches.tuple2.DeserializeResult;
+import org.apache.datasketches.tuple2.UpdatableSummary;
+
+/**
+ * Summary for generic tuple sketches of type Integer.
+ * This summary keeps an Integer value. On update a predefined operation is performed depending on
+ * the mode.
+ * Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum.
+ */
+public class IntegerSummary implements UpdatableSummary {
+ private int value_;
+ private final Mode mode_;
+
+ /**
+ * The aggregation modes for this Summary
+ */
+ public enum Mode {
+
+ /**
+ * The aggregation mode is the summation function.
+ *
+ * New retained value = previous retained value + incoming value
+ */
+ Sum,
+
+ /**
+ * The aggregation mode is the minimum function.
+ *
+ * New retained value = min(previous retained value, incoming value)
+ */
+ Min,
+
+ /**
+ * The aggregation mode is the maximum function.
+ *
+ * New retained value = max(previous retained value, incoming value)
+ */
+ Max,
+
+ /**
+ * The aggregation mode is always one.
+ *
+ * New retained value = 1
+ */
+ AlwaysOne
+ }
+
+ /**
+ * Creates an instance of IntegerSummary with a given starting value and mode.
+ * @param value starting value
+ * @param mode update mode
+ */
+ private IntegerSummary(final int value, final Mode mode) {
+ value_ = value;
+ mode_ = mode;
+ }
+
+ /**
+ * Creates an instance of IntegerSummary with a given mode.
+ * @param mode update mode. This should not be called by a user.
+ */
+ public IntegerSummary(final Mode mode) {
+ mode_ = mode;
+ switch (mode) {
+ case Sum:
+ value_ = 0;
+ break;
+ case Min:
+ value_ = Integer.MAX_VALUE;
+ break;
+ case Max:
+ value_ = Integer.MIN_VALUE;
+ break;
+ case AlwaysOne:
+ value_ = 1;
+ break;
+ }
+ }
+
+ @Override
+ public IntegerSummary update(final Integer value) {
+ switch (mode_) {
+ case Sum:
+ value_ += value;
+ break;
+ case Min:
+ if (value < value_) { value_ = value; }
+ break;
+ case Max:
+ if (value > value_) { value_ = value; }
+ break;
+ case AlwaysOne:
+ value_ = 1;
+ break;
+ }
+ return this;
+ }
+
+ @Override
+ public IntegerSummary copy() {
+ return new IntegerSummary(value_, mode_);
+ }
+
+ /**
+ * Returns the current value of the IntegerSummary
+ * @return current value of the IntegerSummary
+ */
+ public int getValue() {
+ return value_;
+ }
+
+ private static final int SERIALIZED_SIZE_BYTES = 5;
+ private static final int VALUE_INDEX = 0;
+ private static final int MODE_BYTE_INDEX = 4;
+
+ @Override
+ public byte[] toByteArray() {
+ final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES];
+ ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_);
+ bytes[MODE_BYTE_INDEX] = (byte) mode_.ordinal();
+ return bytes;
+ }
+
+ /**
+ * Creates an instance of the IntegerSummary given a serialized representation
+ * @param seg MemorySegment object with serialized IntegerSummary
+ * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes
+ * read from the MemorySegment
+ */
+ public static DeserializeResult fromMemory(final MemorySegment seg) {
+ return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX),
+ Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java
new file mode 100644
index 000000000..b981e0db2
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.aninteger;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.tuple2.DeserializeResult;
+import org.apache.datasketches.tuple2.SummaryDeserializer;
+
+/**
+ * Implements SummaryDeserializer<IntegerSummary>
+ * @author Lee Rhodes
+ */
+public class IntegerSummaryDeserializer implements SummaryDeserializer {
+
+ @Override
+ public DeserializeResult heapifySummary(final MemorySegment seg) {
+ return IntegerSummary.fromMemory(seg);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java
new file mode 100644
index 000000000..97bf9df24
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.aninteger;
+
+import org.apache.datasketches.tuple2.SummaryFactory;
+
+/**
+ * Factory for IntegerSummary.
+ *
+ * @author Lee Rhodes
+ */
+public class IntegerSummaryFactory implements SummaryFactory {
+
+ private final IntegerSummary.Mode summaryMode_;
+
+ /**
+ * Creates an instance of IntegerSummaryFactory with a given mode
+ * @param summaryMode summary mode
+ */
+ public IntegerSummaryFactory(final IntegerSummary.Mode summaryMode) {
+ summaryMode_ = summaryMode;
+ }
+
+ @Override
+ public IntegerSummary newSummary() {
+ return new IntegerSummary(summaryMode_);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java
new file mode 100644
index 000000000..5e5555d22
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.aninteger;
+
+import org.apache.datasketches.tuple2.SummarySetOperations;
+import org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode;
+
+/**
+ * Methods for defining how unions and intersections of two objects of type IntegerSummary
+ * are performed.
+ *
+ * @author Lee Rhodes
+ */
+public class IntegerSummarySetOperations implements SummarySetOperations {
+
+ private final Mode unionSummaryMode_;
+
+ /**
+ * Intersection is not well defined or even meaningful between numeric values.
+ * Nevertheless, this can be defined to be a different type of aggregation for intersecting hashes.
+ */
+ private final Mode intersectionSummaryMode_;
+
+ /**
+ * Creates a new instance with two modes
+ * @param unionSummaryMode for unions
+ * @param intersectionSummaryMode for intersections
+ */
+ public IntegerSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) {
+ unionSummaryMode_ = unionSummaryMode;
+ intersectionSummaryMode_ = intersectionSummaryMode;
+ }
+
+ @Override
+ public IntegerSummary union(final IntegerSummary a, final IntegerSummary b) {
+ final IntegerSummary result = new IntegerSummary(unionSummaryMode_);
+ result.update(a.getValue());
+ result.update(b.getValue());
+ return result;
+ }
+
+ @Override
+ public IntegerSummary intersection(final IntegerSummary a, final IntegerSummary b) {
+ final IntegerSummary result = new IntegerSummary(intersectionSummaryMode_);
+ result.update(a.getValue());
+ result.update(b.getValue());
+ return result;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java
new file mode 100644
index 000000000..a80924a62
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * This package is for a generic implementation of the Tuple sketch for single Integer value.
+ */
+package org.apache.datasketches.tuple2.aninteger;
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java
new file mode 100644
index 000000000..55e96be42
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.lang.foreign.MemorySegment;
+
+/**
+ * Computes a set difference of two tuple sketches of type ArrayOfDoubles
+ */
+public abstract class ArrayOfDoublesAnotB {
+
+ ArrayOfDoublesAnotB() {}
+
+ /**
+ * Perform A-and-not-B set operation on the two given sketches.
+ * A null sketch is interpreted as an empty sketch.
+ * This is not an accumulating update. Calling update() more than once
+ * without calling getResult() will discard the result of previous update().
+ * Both input sketches must have the same numValues.
+ *
+ * @param a The incoming sketch for the first argument
+ * @param b The incoming sketch for the second argument
+ */
+ public abstract void update(ArrayOfDoublesSketch a, ArrayOfDoublesSketch b);
+
+ /**
+ * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch
+ * @return compact sketch representing the result of the operation
+ */
+ public abstract ArrayOfDoublesCompactSketch getResult();
+
+ /**
+ * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch
+ * @param seg MemorySegment for the result (can be null)
+ * @return compact sketch representing the result of the operation (off-heap if MemorySegment is
+ * provided)
+ */
+ public abstract ArrayOfDoublesCompactSketch getResult(MemorySegment seg);
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java
new file mode 100644
index 000000000..30a18c1e4
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.exactLog2OfLong;
+import static org.apache.datasketches.thetacommon.HashOperations.continueCondition;
+import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable;
+import static org.apache.datasketches.thetacommon.HashOperations.count;
+import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.SuppressFBWarnings;
+import org.apache.datasketches.thetacommon.SetOperationCornerCases;
+import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction;
+import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.apache.datasketches.tuple2.Util;
+
+/**
+ * Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches.
+ *
+ * This class includes a stateless operation as follows:
+ *
+ *
+ * CompactSketch csk = anotb.aNotB(ArrayOfDoublesSketch skA, ArrayOfDoublesSketch skB);
+ *
+ *
+ * @author Lee Rhodes
+ */
+public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB {
+ private int numValues_;
+ private short seedHash_;
+
+ private long thetaLong_ = Long.MAX_VALUE;
+ private boolean empty_ = true;
+ private long[] keys_;
+ private double[] values_;
+ private int count_;
+
+ ArrayOfDoublesAnotBImpl(final int numValues, final long seed) {
+ numValues_ = numValues;
+ seedHash_ = Util.computeSeedHash(seed);
+ }
+
+ @Override
+ @SuppressFBWarnings(value = "EI_EXPOSE_REP2", justification = "This is OK here")
+ public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch skB) {
+ if (skA == null || skB == null) {
+ throw new SketchesArgumentException("Neither argument may be null.");
+ }
+ numValues_ = skA.getNumValues();
+ seedHash_ = skA.getSeedHash();
+ if (numValues_ != skB.getNumValues()) {
+ throw new SketchesArgumentException("Inputs cannot have different numValues");
+ }
+ if (seedHash_ != skB.getSeedHash()) {
+ throw new SketchesArgumentException("Inputs cannot have different seedHashes");
+ }
+
+ final long thetaLongA = skA.getThetaLong();
+ final int countA = skA.getRetainedEntries();
+ final boolean emptyA = skA.isEmpty();
+
+ final long thetaLongB = skB.getThetaLong();
+ final int countB = skB.getRetainedEntries();
+ final boolean emptyB = skB.isEmpty();
+
+ final int id =
+ SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
+ final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
+ final AnotbAction anotbAction = cCase.getAnotbAction();
+
+ final long minThetaLong = min(thetaLongA, thetaLongB);
+
+ switch (anotbAction) {
+ case EMPTY_1_0_T: {
+ reset();
+ break;
+ }
+ case DEGEN_MIN_0_F: {
+ keys_ = null;
+ values_ = null;
+ thetaLong_ = minThetaLong;
+ empty_ = false;
+ count_ = 0;
+ break;
+ }
+ case DEGEN_THA_0_F: {
+ keys_ = null;
+ values_ = null;
+ thetaLong_ = thetaLongA;
+ empty_ = false;
+ count_ = 0;
+ break;
+ }
+ case TRIM_A: {
+ final DataArrays daA = new DataArrays(skA.getKeys(), skA.getValuesAsOneDimension(), countA);
+ final DataArrays da = trimDataArrays(daA, minThetaLong, numValues_);
+ keys_ = da.hashArr;
+ values_ = da.valuesArr;
+ thetaLong_ = minThetaLong;
+ empty_ = skA.isEmpty();
+ count_ = da.count;
+ break;
+ }
+ case SKETCH_A: {
+ final ArrayOfDoublesCompactSketch csk = skA.compact();
+ keys_ = csk.getKeys();
+ values_ = csk.getValuesAsOneDimension();
+ thetaLong_ = csk.thetaLong_;
+ empty_ = csk.isEmpty();
+ count_ = csk.getRetainedEntries();
+ break;
+ }
+ case FULL_ANOTB: { //both A and B should have valid entries.
+ final long[] keysA = skA.getKeys();
+ final double[] valuesA = skA.getValuesAsOneDimension();
+ final DataArrays daR = getResultArrays(minThetaLong, countA, keysA, valuesA, skB);
+ count_ = daR.count;
+ keys_ = (count_ == 0) ? null : daR.hashArr;
+ values_ = (count_ == 0) ? null : daR.valuesArr;
+ thetaLong_ = minThetaLong;
+ empty_ = (minThetaLong == Long.MAX_VALUE) && (count_ == 0);
+ break;
+ }
+ //default: not possible
+ }
+ }
+
+ @Override
+ public ArrayOfDoublesCompactSketch getResult() {
+ return new HeapArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_);
+ }
+
+ @Override
+ public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) {
+ return new DirectArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_, dstSeg);
+ }
+
+ private static DataArrays getResultArrays(
+ final long minThetaLong,
+ final int countA,
+ final long[] hashArrA,
+ final double[] valuesArrA,
+ final ArrayOfDoublesSketch skB) {
+ final int numValues = skB.numValues_;
+
+ //create hashtable of skB
+ final long[] hashTableB = convertToHashTable(skB.getKeys(), skB.getRetainedEntries(), minThetaLong,
+ ThetaUtil.REBUILD_THRESHOLD);
+
+ //build temporary arrays of skA
+ long[] tmpHashArrA = new long[countA];
+ double[] tmpValuesArrA = new double[countA * numValues];
+
+ //search for non matches and build temp arrays
+ final int lgHTBLen = exactLog2OfLong(hashTableB.length);
+ int nonMatches = 0;
+ for (int i = 0; i < countA; i++) {
+ final long hash = hashArrA[i];
+ if (continueCondition(minThetaLong, hash)) { continue; }
+ final int index = hashSearch(hashTableB, lgHTBLen, hash);
+ if (index == -1) {
+ tmpHashArrA[nonMatches] = hash;
+ System.arraycopy(valuesArrA, i * numValues, tmpValuesArrA, nonMatches * numValues, numValues);
+ nonMatches++;
+ }
+ }
+ tmpHashArrA = Arrays.copyOf(tmpHashArrA, nonMatches);
+ tmpValuesArrA = Arrays.copyOf(tmpValuesArrA, nonMatches * numValues);
+ final DataArrays daR = new DataArrays(tmpHashArrA, tmpValuesArrA, nonMatches);
+ return daR;
+ }
+
+ private static class DataArrays {
+ long[] hashArr;
+ double[] valuesArr;
+ int count;
+
+ DataArrays(final long[] hashArr, final double[] valuesArr, final int count) {
+ this.hashArr = hashArr;
+ this.valuesArr = valuesArr;
+ this.count = count;
+ }
+ }
+
+ private static DataArrays trimDataArrays(final DataArrays da, final long thetaLong, final int numValues) {
+ final long[] hashArrIn = da.hashArr;
+ final double[] valuesArrIn = da.valuesArr;
+ final int count = count(hashArrIn, thetaLong);
+ final long[] hashArrOut = new long[count];
+ final double[] valuesArrOut = new double[count * numValues];
+ int haInIdx;
+ int vaInIdx = 0;
+ int haOutIdx = 0;
+ int vaOutIdx = 0;
+ for (haInIdx = 0; haInIdx < count; haInIdx++, vaInIdx += numValues) {
+ final long hash = hashArrIn[haInIdx];
+ if (continueCondition(thetaLong, hash)) { continue; }
+ hashArrOut[haOutIdx] = hashArrIn[haInIdx];
+ System.arraycopy(valuesArrIn, vaInIdx, valuesArrOut, vaOutIdx, numValues);
+ haOutIdx++;
+ vaOutIdx += numValues;
+ }
+ return new DataArrays(hashArrOut, valuesArrOut, count);
+ }
+
+ private void reset() {
+ empty_ = true;
+ thetaLong_ = Long.MAX_VALUE;
+ keys_ = null;
+ values_ = null;
+ count_ = 0;
+ }
+}
+
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java
new file mode 100644
index 000000000..2679debea
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+/**
+ * Combines two arrays of double values for use with ArrayOfDoubles tuple sketches
+ */
+public interface ArrayOfDoublesCombiner {
+
+ /**
+ * Method of combining two arrays of double values
+ * @param a Array A.
+ * @param b Array B.
+ * @return Result of combining A and B
+ */
+ public double[] combine(double[] a, double[] b);
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java
new file mode 100644
index 000000000..35e8cb15d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+/**
+ * Top level compact tuple sketch of type ArrayOfDoubles. Compact sketches are never created
+ * directly. They are created as a result of the compact() method on a QuickSelectSketch
+ * or the getResult() method of a set operation like Union, Intersection or AnotB.
+ * Compact sketch consists of a compact list (i.e. no intervening spaces) of hash values,
+ * corresponding list of double values, and a value for theta. The lists may or may
+ * not be ordered. A compact sketch is read-only.
+ */
+public abstract class ArrayOfDoublesCompactSketch extends ArrayOfDoublesSketch {
+
+ static final byte serialVersionUID = 1;
+
+ // Layout of retained entries:
+ // Long || Start Byte Adr:
+ // Adr:
+ // || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ // 3 ||-----------------------------------|----------Retained Entries------------|
+
+ static final int EMPTY_SIZE = 16;
+ static final int RETAINED_ENTRIES_INT = 16;
+ // 4 bytes of padding for 8 byte alignment
+ static final int ENTRIES_START = 24;
+
+ ArrayOfDoublesCompactSketch(final int numValues) {
+ super(numValues);
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ final int count = getRetainedEntries();
+ int sizeBytes = EMPTY_SIZE;
+ if (count > 0) {
+ sizeBytes = ENTRIES_START + (SIZE_OF_KEY_BYTES * count)
+ + (SIZE_OF_VALUE_BYTES * numValues_ * count);
+ }
+ return sizeBytes;
+ }
+
+ @Override
+ public int getMaxBytes() {
+ return getCurrentBytes();
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java
new file mode 100644
index 000000000..b2b26a30f
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.Math.min;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.tuple2.Util;
+
+/**
+ * Computes the intersection of two or more tuple sketches of type ArrayOfDoubles.
+ * A new instance represents the Universal Set.
+ * Every update() computes an intersection with the internal set
+ * and can only reduce the internal set.
+ */
+public abstract class ArrayOfDoublesIntersection {
+ //not changed by resetToEmpty() or hardReset()
+ private final short seedHash_;
+ private final int numValues_;
+ //nulled or reset by resetToEmpty
+ private HashTables hashTables_;
+ private boolean empty_;
+ private boolean firstCall_;
+ private long thetaLong_;
+
+ /**
+ * Internal constructor, called by HeapArrayOfDoublesIntersection and DirectArrayOfDoublesIntersection
+ * @param numValues the number of double values in the summary array
+ * @param seed the hash function update seed.
+ */
+ ArrayOfDoublesIntersection(final int numValues, final long seed) {
+ seedHash_ = Util.computeSeedHash(seed);
+ numValues_ = numValues;
+ hashTables_ = null;
+ empty_ = false;
+ thetaLong_ = Long.MAX_VALUE;
+ firstCall_ = true;
+ }
+
+ /**
+ * Performs a stateful intersection of the internal set with the given tupleSketch.
+ * The given tupleSketch and the internal state must have the same numValues.
+ * @param tupleSketch Input sketch to intersect with the internal set.
+ * @param combiner Method of combining two arrays of double values
+ */
+ public void intersect(final ArrayOfDoublesSketch tupleSketch, final ArrayOfDoublesCombiner combiner) {
+ if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); }
+ Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash());
+ if (tupleSketch.numValues_ != numValues_) {
+ throw new SketchesArgumentException(
+ "Input tupleSketch cannot have different numValues from the internal numValues.");
+ }
+
+ final boolean isFirstCall = firstCall_;
+ firstCall_ = false;
+
+ //could be first or next call
+
+ final boolean emptyIn = tupleSketch.isEmpty();
+ if (empty_ || emptyIn) { //empty rule
+ //Whatever the current internal state, we make our local empty.
+ resetToEmpty(); //
+ return;
+ }
+
+ final long thetaLongIn = tupleSketch.getThetaLong();
+ thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule
+
+ if (tupleSketch.getRetainedEntries() == 0) {
+ if (hashTables_ != null) {
+ hashTables_.clear();
+ }
+ }
+ // input sketch will have valid entries > 0
+
+ if (isFirstCall) {
+ //Copy first sketch data into local instance hashTables_
+ hashTables_ = new HashTables(tupleSketch);
+ }
+
+ //Next Call
+ else {
+ assert hashTables_ != null;
+ if (hashTables_.getNumKeys() == 0) { return; }
+ //process intersect with current hashTables
+ hashTables_ = hashTables_.getIntersectHashTables(tupleSketch, thetaLong_, combiner);
+ }
+ }
+
+ /**
+ * Gets the internal set as an on-heap compact sketch.
+ * @return Result of the intersections so far as a compact sketch.
+ */
+ public ArrayOfDoublesCompactSketch getResult() {
+ return getResult(null);
+ }
+
+ /**
+ * Gets the result of stateful intersections so far.
+ * @param dstSeg MemorySegment for the compact sketch (can be null).
+ * @return Result of the intersections so far as a compact sketch.
+ */
+ public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) {
+ if (firstCall_) {
+ throw new SketchesStateException(
+ "getResult() with no intervening intersections is not a legal result.");
+ }
+ long[] hashArrOut = new long[0];
+ double[] valuesArrOut = new double[0];
+ if (hashTables_ != null && hashTables_.getHashTable() != null) {
+ final int numKeys = hashTables_.getNumKeys();
+
+ if (numKeys > 0) {
+ final int tableSize = hashTables_.getHashTable().length;
+
+ hashArrOut = new long[numKeys];
+ valuesArrOut = new double[numKeys * numValues_];
+
+ // & flatten the hash tables
+ int cnt = 0;
+ final long[] hashTable = hashTables_.getHashTable();
+ final double[][] valueTable = hashTables_.getValueTable();
+ for (int i = 0; i < tableSize; i++) {
+ final long hash = hashTable[i];
+ if (hash == 0 || hash > thetaLong_) { continue; }
+ hashArrOut[cnt] = hash;
+ System.arraycopy(valueTable[i], 0, valuesArrOut, cnt * numValues_, numValues_);
+ cnt++;
+ }
+ assert cnt == numKeys;
+ }
+ }
+
+ return (dstSeg == null)
+ ? new HeapArrayOfDoublesCompactSketch(hashArrOut, valuesArrOut,
+ thetaLong_, empty_, numValues_, seedHash_)
+ : new DirectArrayOfDoublesCompactSketch(hashArrOut, valuesArrOut,
+ thetaLong_, empty_, numValues_, seedHash_, dstSeg);
+ }
+
+ /**
+ * Resets the internal set to the initial state, which represents the Universal Set
+ */
+ public void reset() {
+ hardReset();
+ }
+
+ private void hardReset() {
+ empty_ = false;
+ firstCall_ = true;
+ thetaLong_ = Long.MAX_VALUE;
+ if (hashTables_ != null) { hashTables_.clear(); }
+ }
+
+ private void resetToEmpty() {
+ empty_ = true;
+ firstCall_ = false;
+ thetaLong_ = Long.MAX_VALUE;
+ if (hashTables_ != null) { hashTables_.clear(); }
+ }
+
+ protected abstract ArrayOfDoublesQuickSelectSketch createSketch(int nomEntries, int numValues, long seed);
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java
new file mode 100644
index 000000000..7c29d7141
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.QuickSelect;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Top level class for hash table based implementations of tuple sketch of type
+ * ArrayOfDoubles that uses the QuickSelect algorithm.
+ */
+abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSketch {
+
+ static final byte serialVersionUID = 1;
+
+ // Layout of next 16 bytes:
+ // Long || Start Byte Adr:
+ // Adr:
+ // || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
+ // 3 ||-----------P (float)---------------|--------|--lgRF--|--lgArr-|---lgNom---|
+ // || 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 |
+ // 4 ||-----------------------------------|----------Retained Entries------------|
+
+ static final int LG_NOM_ENTRIES_BYTE = 16;
+ static final int LG_CUR_CAPACITY_BYTE = 17;
+ static final int LG_RESIZE_FACTOR_BYTE = 18;
+ // 1 byte of padding for alignment
+ static final int SAMPLING_P_FLOAT = 20;
+ static final int RETAINED_ENTRIES_INT = 24;
+ // 4 bytes of padding for alignment
+ static final int ENTRIES_START = 32;
+
+ static final int DEFAULT_LG_RESIZE_FACTOR = 3;
+
+ // these can be derived from other things, but are kept here for performance
+ int rebuildThreshold_; //absolute value relative to current capacity
+ int lgCurrentCapacity_;
+
+ ArrayOfDoublesQuickSelectSketch(final int numValues, final long seed) {
+ super(numValues, seed);
+ }
+
+ abstract void updateValues(int index, double[] values);
+
+ abstract void setNotEmpty();
+
+ abstract boolean isInSamplingMode();
+
+ abstract void rebuild(int newCapacity);
+
+ abstract long getKey(int index);
+
+ abstract void setValues(int index, double[] values);
+
+ abstract void incrementCount();
+
+ abstract void setThetaLong(long thetaLong);
+
+ abstract int insertKey(long key);
+
+ abstract int findOrInsertKey(long key);
+
+ abstract double[] find(long key);
+
+ abstract int getSerializedSizeBytes();
+
+ abstract void serializeInto(MemorySegment seg);
+
+ @Override
+ public void trim() {
+ if (getRetainedEntries() > getNominalEntries()) {
+ setThetaLong(getNewThetaLong());
+ rebuild();
+ }
+ }
+
+ @Override
+ public int getMaxBytes() {
+ final int nomEntries = getNominalEntries();
+ final int numValues = getNumValues();
+ return getMaxBytes(nomEntries, numValues);
+ }
+
+ @Override
+ public int getCurrentBytes() {
+ return getSerializedSizeBytes();
+ }
+
+ /**
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than or equal to
+ * given value.
+ * @param numValues Number of double values to keep for each key
+ * @return maximum required storage bytes given nomEntries and numValues
+ */
+ static int getMaxBytes(final int nomEntries, final int numValues) {
+ return ENTRIES_START
+ + (SIZE_OF_KEY_BYTES + SIZE_OF_VALUE_BYTES * numValues) * ceilingPowerOf2(nomEntries) * 2;
+ }
+
+ // non-public methods below
+
+ // this is a special back door insert for merging
+ // not sufficient by itself without keeping track of theta of another sketch
+ void merge(final long key, final double[] values) {
+ setNotEmpty();
+ if (key < thetaLong_) {
+ final int index = findOrInsertKey(key);
+ if (index < 0) {
+ incrementCount();
+ setValues(~index, values);
+ } else {
+ updateValues(index, values);
+ }
+ rebuildIfNeeded();
+ }
+ }
+
+ void rebuildIfNeeded() {
+ if (getRetainedEntries() <= rebuildThreshold_) { return; }
+ if (getCurrentCapacity() > getNominalEntries()) {
+ setThetaLong(getNewThetaLong());
+ rebuild();
+ } else {
+ rebuild(getCurrentCapacity() * getResizeFactor().getValue());
+ }
+ }
+
+ void rebuild() {
+ rebuild(getCurrentCapacity());
+ }
+
+ void insert(final long key, final double[] values) {
+ final int index = insertKey(key);
+ setValues(index, values);
+ incrementCount();
+ }
+
+ final void setRebuildThreshold() {
+ if (getCurrentCapacity() > getNominalEntries()) {
+ rebuildThreshold_ = (int) (getCurrentCapacity() * ThetaUtil.REBUILD_THRESHOLD);
+ } else {
+ rebuildThreshold_ = (int) (getCurrentCapacity() * ThetaUtil.RESIZE_THRESHOLD);
+ }
+ }
+
+ @Override
+ void insertOrIgnore(final long key, final double[] values) {
+ if (values.length != getNumValues()) {
+ throw new SketchesArgumentException("input array of values must have " + getNumValues()
+ + " elements, but has " + values.length);
+ }
+ setNotEmpty();
+ if ((key == 0) || (key >= thetaLong_)) { return; }
+ final int index = findOrInsertKey(key);
+ if (index < 0) {
+ incrementCount();
+ setValues(~index, values);
+ } else {
+ updateValues(index, values);
+ }
+ rebuildIfNeeded();
+ }
+
+ long getNewThetaLong() {
+ final long[] keys = new long[getRetainedEntries()];
+ int i = 0;
+ for (int j = 0; j < getCurrentCapacity(); j++) {
+ final long key = getKey(j);
+ if (key != 0) { keys[i++] = key; }
+ }
+ return QuickSelect.select(keys, 0, getRetainedEntries() - 1, getNominalEntries());
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java
new file mode 100644
index 000000000..eaf486a15
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Builds set operations object for tuple sketches of type ArrayOfDoubles.
+ */
+public class ArrayOfDoublesSetOperationBuilder {
+
+ private int nomEntries_;
+ private int numValues_;
+ private long seed_;
+
+ /**
+ * Default Nominal Entries (a.k.a. K)
+ */
+ public static final int DEFAULT_NOMINAL_ENTRIES = 4096;
+
+ /**
+ * Default number of values
+ */
+ public static final int DEFAULT_NUMBER_OF_VALUES = 1;
+
+ /**
+ * Creates an instance of the builder with default parameters
+ */
+ public ArrayOfDoublesSetOperationBuilder() {
+ nomEntries_ = DEFAULT_NOMINAL_ENTRIES;
+ numValues_ = DEFAULT_NUMBER_OF_VALUES;
+ seed_ = ThetaUtil.DEFAULT_UPDATE_SEED;
+ }
+
+ /**
+ * This is to set the nominal number of entries.
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * given value.
+ * @return this builder
+ */
+ public ArrayOfDoublesSetOperationBuilder setNominalEntries(final int nomEntries) {
+ nomEntries_ = nomEntries;
+ return this;
+ }
+
+ /**
+ * This is to set the number of double values associated with each key
+ * @param numValues number of double values
+ * @return this builder
+ */
+ public ArrayOfDoublesSetOperationBuilder setNumberOfValues(final int numValues) {
+ numValues_ = numValues;
+ return this;
+ }
+
+ /**
+ * Sets the long seed value that is required by the hashing function.
+ * @param seed See seed
+ * @return this builder
+ */
+ public ArrayOfDoublesSetOperationBuilder setSeed(final long seed) {
+ seed_ = seed;
+ return this;
+ }
+
+ /**
+ * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder.
+ * The new instance is allocated on the heap if the memory is not provided.
+ * @return an instance of ArrayOfDoublesUnion
+ */
+ public ArrayOfDoublesUnion buildUnion() {
+ return new HeapArrayOfDoublesUnion(nomEntries_, numValues_, seed_);
+ }
+
+ /**
+ * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder
+ * and the given destination MemorySegment.
+ * @param dstSeg destination MemorySegment to be used by the sketch
+ * @return an instance of ArrayOfDoublesUnion
+ */
+ public ArrayOfDoublesUnion buildUnion(final MemorySegment dstSeg) {
+ return new DirectArrayOfDoublesUnion(nomEntries_, numValues_, seed_, dstSeg);
+ }
+
+ /**
+ * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the
+ * builder.
+ * The new instance is allocated on the heap if the memory is not provided.
+ * The number of nominal entries is not relevant to this, so it is ignored.
+ * @return an instance of ArrayOfDoublesIntersection
+ */
+ public ArrayOfDoublesIntersection buildIntersection() {
+ return new HeapArrayOfDoublesIntersection(numValues_, seed_);
+ }
+
+ /**
+ * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the
+ * builder.
+ * The new instance is allocated on the heap if the MemorySegment is not provided.
+ * The number of nominal entries is not relevant to this, so it is ignored.
+ * @param dstSeg destination MemorySegment to be used by the sketch
+ * @return an instance of ArrayOfDoublesIntersection
+ */
+ public ArrayOfDoublesIntersection buildIntersection(final MemorySegment dstSeg) {
+ return new DirectArrayOfDoublesIntersection(numValues_, seed_, dstSeg);
+ }
+
+ /**
+ * Creates an instance of ArrayOfDoublesAnotB based on the current configuration of the builder.
+ * The memory is not relevant to this, so it is ignored if set.
+ * The number of nominal entries is not relevant to this, so it is ignored.
+ * @return an instance of ArrayOfDoublesAnotB
+ */
+ public ArrayOfDoublesAnotB buildAnotB() {
+ return new ArrayOfDoublesAnotBImpl(numValues_, seed_);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java
new file mode 100644
index 000000000..c4163fb1e
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static org.apache.datasketches.common.Util.LS;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.thetacommon.BinomialBoundsN;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.apache.datasketches.tuple2.SerializerDeserializer;
+
+/**
+ * The base class for the tuple sketch of type ArrayOfDoubles, where an array of double values
+ * is associated with each key.
+ * A primitive array of doubles is used here, as opposed to a generic Summary object,
+ * for improved performance.
+ */
+public abstract class ArrayOfDoublesSketch {
+
+ // The concept of being empty is about representing an empty set.
+ // So a sketch can be non-empty, and have no entries.
+ // For example, as a result of a sampling, when some data was presented to the sketch, but no
+ // entries were retained.
+ static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES }
+
+ static final int SIZE_OF_KEY_BYTES = Long.BYTES;
+ static final int SIZE_OF_VALUE_BYTES = Double.BYTES;
+
+ // Common Layout of first 16 bytes and Empty AoDCompactSketch:
+ // Long || Start Byte Adr:
+ // Adr:
+ // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ // 0 || Seed Hash | #Dbls | Flags | SkType | FamID | SerVer | Preamble_Longs |
+ // || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ // 1 ||-------------------------Theta Long------------------------------------------------|
+
+ static final int PREAMBLE_LONGS_BYTE = 0; // not used, always 1
+ static final int SERIAL_VERSION_BYTE = 1;
+ static final int FAMILY_ID_BYTE = 2;
+ static final int SKETCH_TYPE_BYTE = 3;
+ static final int FLAGS_BYTE = 4;
+ static final int NUM_VALUES_BYTE = 5;
+ static final int SEED_HASH_SHORT = 6;
+ static final int THETA_LONG = 8;
+
+ final int numValues_;
+
+ long thetaLong_;
+ boolean isEmpty_ = true;
+
+ ArrayOfDoublesSketch(final int numValues) {
+ numValues_ = numValues;
+ }
+
+ /**
+ * Heapify the given MemorySegment as an ArrayOfDoublesSketch
+ * @param seg the given MemorySegment
+ * @return an ArrayOfDoublesSketch
+ */
+ public static ArrayOfDoublesSketch heapify(final MemorySegment seg) {
+ return heapify(seg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Heapify the given MemorySegment and seed as a ArrayOfDoublesSketch
+ * @param seg the given MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesSketch
+ */
+ public static ArrayOfDoublesSketch heapify(final MemorySegment seg, final long seed) {
+ final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg);
+ if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) {
+ return new HeapArrayOfDoublesQuickSelectSketch(seg, seed);
+ }
+ return new HeapArrayOfDoublesCompactSketch(seg, seed);
+ }
+
+ /**
+ * Wrap the given MemorySegment as an ArrayOfDoublesSketch
+ * @param seg the given MemorySegment
+ * @return an ArrayOfDoublesSketch
+ */
+ public static ArrayOfDoublesSketch wrap(final MemorySegment seg) {
+ return wrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch
+ * @param seg the given MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesSketch
+ */
+ public static ArrayOfDoublesSketch wrap(final MemorySegment seg, final long seed) {
+ final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg);
+ if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) {
+ return new DirectArrayOfDoublesQuickSelectSketchR(seg, seed);
+ }
+ return new DirectArrayOfDoublesCompactSketch(seg, seed);
+ }
+
+ /**
+ * Estimates the cardinality of the set (number of unique values presented to the sketch)
+ * @return best estimate of the number of unique values
+ */
+ public double getEstimate() {
+ if (!isEstimationMode()) { return getRetainedEntries(); }
+ return getRetainedEntries() / getTheta();
+ }
+
+ /**
+ * Gets the approximate upper error bound given the specified number of Standard Deviations.
+ * This will return getEstimate() if isEmpty() is true.
+ *
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @return the upper bound.
+ */
+ public double getUpperBound(final int numStdDev) {
+ if (!isEstimationMode()) { return getRetainedEntries(); }
+ return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_);
+ }
+
+ /**
+ * Gets the approximate lower error bound given the specified number of Standard Deviations.
+ * This will return getEstimate() if isEmpty() is true.
+ *
+ * @param numStdDev
+ * See Number of Standard Deviations
+ * @return the lower bound.
+ */
+ public double getLowerBound(final int numStdDev) {
+ if (!isEstimationMode()) { return getRetainedEntries(); }
+ return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_);
+ }
+
+ /**
+ * Returns true if this sketch's data structure is backed by MemorySegment.
+ * @return true if this sketch's data structure is backed by MemorySegment.
+ */
+ public abstract boolean hasMemorySegment();
+
+ /**
+ * Returns the MemorySegment object if it exists, otherwise null.
+ * @return the MemorySegment object if it exists, otherwise null.
+ */
+ abstract MemorySegment getMemorySegment();
+
+ /**
+ * See Empty
+ * @return true if empty.
+ */
+ public boolean isEmpty() {
+ return isEmpty_;
+ }
+
+ /**
+ * Returns number of double values associated with each key
+ * @return number of double values associated with each key
+ */
+ public int getNumValues() {
+ return numValues_;
+ }
+
+ /**
+ * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode).
+ * This is true if theta < 1.0 AND isEmpty() is false.
+ * @return true if the sketch is in estimation mode.
+ */
+ public boolean isEstimationMode() {
+ return ((thetaLong_ < Long.MAX_VALUE) && !isEmpty());
+ }
+
+ /**
+ * Gets the value of theta as a double between zero and one
+ * @return the value of theta as a double
+ */
+ public double getTheta() {
+ return getThetaLong() / (double) Long.MAX_VALUE;
+ }
+
+ /**
+ * Returns number of retained entries
+ * @return number of retained entries
+ */
+ public abstract int getRetainedEntries();
+
+ /**
+ * Returns the maximum number of bytes for this sketch when serialized.
+ * @return the maximum number of bytes for this sketch when serialized.
+ */
+ public abstract int getMaxBytes();
+
+ /**
+ * For compact sketches this is the same as getMaxBytes().
+ * @return the current number of bytes for this sketch when serialized.
+ */
+ public abstract int getCurrentBytes();
+
+ /**
+ * Returns serialized representation of the sketch
+ * @return serialized representation of the sketch
+ */
+ public abstract byte[] toByteArray();
+
+ /**
+ * Returns array of arrays of double values in the sketch
+ * @return array of arrays of double values in the sketch
+ */
+ public abstract double[][] getValues();
+
+ abstract double[] getValuesAsOneDimension();
+
+ abstract long[] getKeys();
+
+ /**
+ * Returns the value of theta as a long
+ * @return the value of theta as a long
+ */
+ long getThetaLong() {
+ return isEmpty() ? Long.MAX_VALUE : thetaLong_;
+ }
+
+ abstract short getSeedHash();
+
+ /**
+ * Returns an iterator over the sketch
+ * @return an iterator over the sketch
+ */
+ public abstract ArrayOfDoublesSketchIterator iterator();
+
+ /**
+ * Returns this sketch in compact form, which is immutable.
+ * @return this sketch in compact form, which is immutable.
+ */
+ public ArrayOfDoublesCompactSketch compact() {
+ return compact(null);
+ }
+
+ /**
+ * Returns this sketch in compact form, which is immutable.
+ * @param dstSeg the destination MemorySegment
+ * @return this sketch in compact form, which is immutable.
+ */
+ public abstract ArrayOfDoublesCompactSketch compact(MemorySegment dstSeg);
+
+ @Override
+ public String toString() {
+ final int seedHash = Short.toUnsignedInt(getSeedHash());
+ final StringBuilder sb = new StringBuilder();
+ sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS);
+ sb.append(" Estimate : ").append(getEstimate()).append(LS);
+ sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS);
+ sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS);
+ sb.append(" Theta (double) : ").append(getTheta()).append(LS);
+ sb.append(" Theta (long) : ").append(getThetaLong()).append(LS);
+ sb.append(" EstMode? : ").append(isEstimationMode()).append(LS);
+ sb.append(" Empty? : ").append(isEmpty()).append(LS);
+ sb.append(" Retained Entries : ").append(getRetainedEntries()).append(LS);
+ if (this instanceof ArrayOfDoublesUpdatableSketch) {
+ final ArrayOfDoublesUpdatableSketch updatable = (ArrayOfDoublesUpdatableSketch) this;
+ sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS);
+ sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS);
+ sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS);
+ sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS);
+ }
+ sb.append(" Seed Hash : ")
+ .append(Integer.toHexString(seedHash)).append(" | ").append(seedHash).append(LS);
+ sb.append("### END SKETCH SUMMARY").append(LS);
+ return sb.toString();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java
new file mode 100644
index 000000000..71ed63216
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+/**
+ * Interface for iterating over tuple sketches of type ArrayOfDoubles
+ */
+public interface ArrayOfDoublesSketchIterator {
+ /**
+ * Advancing the iterator and checking existence of the next entry
+ * is combined here for efficiency. This results in an undefined
+ * state of the iterator before the first call of this method.
+ * @return true if the next element exists
+ */
+ public boolean next();
+
+ /**
+ * Gets a key from the current entry in the sketch, which is a hash
+ * of the original key passed to update(). The original keys are not
+ * retained. Don't call this before calling next() for the first time
+ * or after getting false from next().
+ * @return hash key from the current entry
+ */
+ public long getKey();
+
+ /**
+ * Gets an array of values from the current entry in the sketch.
+ * Don't call this before calling next() for the first time
+ * or after getting false from next().
+ * @return array of double values for the current entry (may or may not be a copy)
+ */
+ public double[] getValues();
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java
new file mode 100644
index 000000000..36421e14d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Convenient static methods to instantiate tuple sketches of type ArrayOfDoubles.
+ */
+public final class ArrayOfDoublesSketches {
+
+ /**
+ * Heapify the given MemorySegment as an ArrayOfDoublesSketch
+ * @param srcSeg the given source MemorySegment
+ * @return an ArrayOfDoublesSketch
+ */
+ public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg) {
+ return heapifySketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Heapify the given MemorySegment and seed as a ArrayOfDoublesSketch
+ * @param srcSeg the given source MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesSketch
+ */
+ public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg, final long seed) {
+ return ArrayOfDoublesSketch.heapify(srcSeg, seed);
+ }
+
+ /**
+ * Heapify the given MemorySegment as an ArrayOfDoublesUpdatableSketch
+ * @param srcSeg the given source MemorySegment
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg) {
+ return heapifyUpdatableSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Heapify the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch
+ * @param srcSeg the given source MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg, final long seed) {
+ return ArrayOfDoublesUpdatableSketch.heapify(srcSeg, seed);
+ }
+
+ /**
+ * Wrap the given MemorySegment as an ArrayOfDoublesSketch
+ * @param srcSeg the given source MemorySegment
+ * @return an ArrayOfDoublesSketch
+ */
+ public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg) {
+ return wrapSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch
+ * @param srcSeg the given source MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesSketch
+ */
+ public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg, final long seed) {
+ return ArrayOfDoublesSketch.wrap(srcSeg, seed);
+ }
+
+ /**
+ * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch
+ * @param srcSeg the given source MemorySegment
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg) {
+ return wrapUpdatableSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch
+ * @param srcSeg the given source MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg, final long seed) {
+ return ArrayOfDoublesUpdatableSketch.wrap(srcSeg, seed);
+ }
+
+ /**
+ * Heapify the given MemorySegment as an ArrayOfDoublesUnion
+ * @param srcSeg the given source MemorySegment
+ * @return an ArrayOfDoublesUnion
+ */
+ public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg) {
+ return heapifyUnion(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion
+ * @param srcSeg the given source MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesUnion
+ */
+ public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg, final long seed) {
+ return ArrayOfDoublesUnion.heapify(srcSeg, seed);
+ }
+
+ /**
+ * Wrap the given MemorySegment as an ArrayOfDoublesUnion
+ * @param srcSeg the given source MemorySegment
+ * @return an ArrayOfDoublesUnion
+ */
+ public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg) {
+ return wrapUnion(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion
+ * @param srcSeg the given source MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesUnion
+ */
+ public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg, final long seed) {
+ return ArrayOfDoublesUnion.wrap(srcSeg, seed);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java
new file mode 100644
index 000000000..a097ccf47
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.Math.min;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.apache.datasketches.tuple2.SerializerDeserializer;
+import org.apache.datasketches.tuple2.Util;
+
+/**
+ * The base class for unions of tuple sketches of type ArrayOfDoubles.
+ */
+public abstract class ArrayOfDoublesUnion {
+
+ static final byte serialVersionUID = 1;
+ //For layout see toByteArray()
+ static final int PREAMBLE_SIZE_BYTES = 16;
+ static final int PREAMBLE_LONGS_BYTE = 0; // not used, always 1
+ static final int SERIAL_VERSION_BYTE = 1;
+ static final int FAMILY_ID_BYTE = 2;
+ static final int SKETCH_TYPE_BYTE = 3;
+ static final int FLAGS_BYTE = 4;
+ static final int NUM_VALUES_BYTE = 5;
+ static final int SEED_HASH_SHORT = 6;
+ static final int THETA_LONG = 8;
+
+ ArrayOfDoublesQuickSelectSketch gadget_;
+ long unionThetaLong_;
+
+ /**
+ * Constructs this Union initializing it with the given sketch, which can be on-heap or off-heap.
+ * @param sketch the given sketch.
+ */
+ ArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch sketch) {
+ gadget_ = sketch;
+ unionThetaLong_ = sketch.getThetaLong();
+ }
+
+ /**
+ * Heapify the given MemorySegment as an ArrayOfDoublesUnion
+ * @param srcSeg the given source MemorySegment
+ * @return an ArrayOfDoublesUnion
+ */
+ public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg) {
+ return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion
+ * @param srcSeg the given source MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesUnion
+ */
+ public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg, final long seed) {
+ return HeapArrayOfDoublesUnion.heapifyUnion(srcSeg, seed);
+ }
+
+ /**
+ * Wrap the given MemorySegment as an ArrayOfDoublesUnion
+ * @param srcSeg the given source MemorySegment
+ * @return an ArrayOfDoublesUnion
+ */
+ public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg) {
+ return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion
+ * @param srcSeg the given source MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesUnion
+ */
+ public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg, final long seed) {
+ return DirectArrayOfDoublesUnion.wrapUnion(srcSeg, seed, !srcSeg.isReadOnly());
+ }
+
+ /**
+ * Updates the union by adding a set of entries from a given sketch, which can be on-heap or off-heap.
+ * Both the given tupleSketch and the internal state of the Union must have the same numValues.
+ *
+ * Nulls and empty sketches are ignored.
+ *
+ * @param tupleSketch sketch to add to the union
+ */
+ public void union(final ArrayOfDoublesSketch tupleSketch) {
+ if (tupleSketch == null) { return; }
+ Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash());
+ if (gadget_.getNumValues() != tupleSketch.getNumValues()) {
+ throw new SketchesArgumentException("Incompatible sketches: number of values mismatch "
+ + gadget_.getNumValues() + " and " + tupleSketch.getNumValues());
+ }
+
+ if (tupleSketch.isEmpty()) { return; }
+ else { gadget_.setNotEmpty(); }
+
+ setUnionThetaLong(min(min(unionThetaLong_, tupleSketch.getThetaLong()), gadget_.getThetaLong()));
+
+ if (tupleSketch.getRetainedEntries() == 0) { return; }
+ final ArrayOfDoublesSketchIterator it = tupleSketch.iterator();
+ while (it.next()) {
+ if (it.getKey() < unionThetaLong_) {
+ gadget_.merge(it.getKey(), it.getValues());
+ }
+ }
+ // keep the union theta as low as possible for performance
+ if (gadget_.getThetaLong() < unionThetaLong_) {
+ setUnionThetaLong(gadget_.getThetaLong());
+ }
+ }
+
+ /**
+ * Returns the resulting union in the form of a compact sketch
+ * @param dstSeg MemorySegment for the result (can be null)
+ * @return compact sketch representing the union (off-heap if MemorySegment is provided)
+ */
+ public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) {
+ long unionThetaLong = unionThetaLong_;
+ if (gadget_.getRetainedEntries() > gadget_.getNominalEntries()) {
+ unionThetaLong = Math.min(unionThetaLong, gadget_.getNewThetaLong());
+ }
+ if (dstSeg == null) {
+ return new HeapArrayOfDoublesCompactSketch(gadget_, unionThetaLong);
+ }
+ return new DirectArrayOfDoublesCompactSketch(gadget_, unionThetaLong, dstSeg);
+ }
+
+ /**
+ * Returns the resulting union in the form of a compact sketch
+ * @return on-heap compact sketch representing the union
+ */
+ public ArrayOfDoublesCompactSketch getResult() {
+ return getResult(null);
+ }
+
+ /**
+ * Resets the union to an empty state
+ */
+ public void reset() {
+ gadget_.reset();
+ setUnionThetaLong(gadget_.getThetaLong());
+ }
+
+ // Layout of first 16 bytes:
+ // Long || Start Byte Adr:
+ // Adr:
+ // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ // 0 || Seed Hash=0 | #Dbls=0|Flags=0 | SkType | FamID | SerVer | Preamble_Longs |
+ // || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
+ // 1 ||---------------------------Union Theta Long-----------------------------------------|
+ /**
+ * Returns a byte array representation of this object
+ * @return a byte array representation of this object
+ */
+ public byte[] toByteArray() {
+ final int sizeBytes = PREAMBLE_SIZE_BYTES + gadget_.getSerializedSizeBytes();
+ final byte[] byteArray = new byte[sizeBytes];
+ final MemorySegment seg = MemorySegment.ofArray(byteArray);
+ seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1
+ seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID);
+ seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID());
+ seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal());
+ //byte 4-7 automatically zero
+ seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, unionThetaLong_);
+ gadget_.serializeInto(seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES));
+ return byteArray;
+ }
+
+ /**
+ * Returns maximum required storage bytes given nomEntries and numValues
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than or equal to
+ * given value.
+ * @param numValues Number of double values to keep for each key
+ * @return maximum required storage bytes given nomEntries and numValues
+ */
+ public static int getMaxBytes(final int nomEntries, final int numValues) {
+ return ArrayOfDoublesQuickSelectSketch.getMaxBytes(nomEntries, numValues) + PREAMBLE_SIZE_BYTES;
+ }
+
+ void setUnionThetaLong(final long thetaLong) {
+ unionThetaLong_ = thetaLong;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java
new file mode 100644
index 000000000..c61e8944d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.hash.MurmurHash3;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.apache.datasketches.tuple2.Util;
+
+/**
+ * The top level for updatable tuple sketches of type ArrayOfDoubles.
+ */
+public abstract class ArrayOfDoublesUpdatableSketch extends ArrayOfDoublesSketch {
+
+ final long seed_;
+
+ ArrayOfDoublesUpdatableSketch(final int numValues, final long seed) {
+ super(numValues);
+ seed_ = seed;
+ }
+
+ /**
+ * Heapify the given MemorySegment as an ArrayOfDoublesUpdatableSketch
+ * @param seg the given MemorySegment
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg) {
+ return heapify(seg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Heapify the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch
+ * @param seg the given MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg, final long seed) {
+ return new HeapArrayOfDoublesQuickSelectSketch(seg, seed);
+ }
+
+ /**
+ * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch
+ * @param seg the given MemorySegment
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg) {
+ return wrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch
+ * @param seg the given MemorySegment
+ * @param seed the given seed
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg, final long seed) {
+ return new DirectArrayOfDoublesQuickSelectSketch(seg, seed);
+ }
+
+ /**
+ * Updates this sketch with a long key and double values.
+ * The values will be stored or added to the ones associated with the key
+ *
+ * @param key The given long key
+ * @param values The given values
+ */
+ public void update(final long key, final double[] values) {
+ update(new long[] {key}, values);
+ }
+
+ /**
+ * Updates this sketch with a double key and double values.
+ * The values will be stored or added to the ones associated with the key
+ *
+ * @param key The given double key
+ * @param values The given values
+ */
+ public void update(final double key, final double[] values) {
+ update(Util.doubleToLongArray(key), values);
+ }
+
+ /**
+ * Updates this sketch with a String key and double values.
+ * The values will be stored or added to the ones associated with the key
+ *
+ * @param key The given String key
+ * @param values The given values
+ */
+ public void update(final String key, final double[] values) {
+ update(Util.stringToByteArray(key), values);
+ }
+
+ /**
+ * Updates this sketch with a byte[] key and double values.
+ * The values will be stored or added to the ones associated with the key
+ *
+ * @param key The given byte[] key
+ * @param values The given values
+ */
+ public void update(final byte[] key, final double[] values) {
+ if (key == null || key.length == 0) { return; }
+ insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values);
+ }
+
+ /**
+ * Updates this sketch with a ByteBuffer key and double values.
+ * The values will be stored or added to the ones associated with the key
+ *
+ * @param key The given ByteBuffer key
+ * @param values The given values
+ */
+ public void update(final ByteBuffer key, final double[] values) {
+ if (key == null || key.hasRemaining() == false) { return; }
+ insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values);
+ }
+
+ /**
+ * Updates this sketch with a int[] key and double values.
+ * The values will be stored or added to the ones associated with the key
+ *
+ * @param key The given int[] key
+ * @param values The given values
+ */
+ public void update(final int[] key, final double[] values) {
+ if (key == null || key.length == 0) { return; }
+ insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values);
+ }
+
+ /**
+ * Updates this sketch with a long[] key and double values.
+ * The values will be stored or added to the ones associated with the key
+ *
+ * @param key The given long[] key
+ * @param values The given values
+ */
+ public void update(final long[] key, final double[] values) {
+ if (key == null || key.length == 0) { return; }
+ insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values);
+ }
+
+ /**
+ * Gets the configured nominal number of entries
+ * @return nominal number of entries
+ */
+ public abstract int getNominalEntries();
+
+ /**
+ * Gets the configured resize factor
+ * @return resize factor
+ */
+ public abstract ResizeFactor getResizeFactor();
+
+ /**
+ * Gets the configured sampling probability
+ * @return sampling probability
+ */
+ public abstract float getSamplingProbability();
+
+ /**
+ * Rebuilds reducing the actual number of entries to the nominal number of entries if needed
+ */
+ public abstract void trim();
+
+ /**
+ * Resets this sketch an empty state.
+ */
+ public abstract void reset();
+
+ /**
+ * Gets an on-heap compact representation of the sketch
+ * @return compact sketch
+ */
+ @Override
+ public ArrayOfDoublesCompactSketch compact() {
+ return compact(null);
+ }
+
+ /**
+ * Gets an off-heap compact representation of the sketch using the given MemorySegment
+ * @param dstSeg MemorySegment for the compact sketch (can be null)
+ * @return compact sketch (off-heap if MemorySegment is provided)
+ */
+ @Override
+ public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) {
+ if (dstSeg == null) {
+ return new HeapArrayOfDoublesCompactSketch(this);
+ }
+ return new DirectArrayOfDoublesCompactSketch(this, dstSeg);
+ }
+
+ abstract int getCurrentCapacity();
+
+ long getSeed() {
+ return seed_;
+ }
+
+ @Override
+ short getSeedHash() {
+ return Util.computeSeedHash(seed_);
+ }
+
+ /**
+ * Insert if key is less than thetaLong and not a duplicate, otherwise ignore.
+ * @param key the hash value of the input value
+ * @param values array of values to update the summary
+ */
+ abstract void insertOrIgnore(long key, double[] values);
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java
new file mode 100644
index 000000000..a6fa5e118
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * For building a new ArrayOfDoublesUpdatableSketch
+ */
+public class ArrayOfDoublesUpdatableSketchBuilder {
+
+ private int nomEntries_;
+ private ResizeFactor resizeFactor_;
+ private int numValues_;
+ private float samplingProbability_;
+ private long seed_;
+
+ private static final int DEFAULT_NUMBER_OF_VALUES = 1;
+ private static final float DEFAULT_SAMPLING_PROBABILITY = 1;
+ private static final ResizeFactor DEFAULT_RESIZE_FACTOR = ResizeFactor.X8;
+
+ /**
+ * Creates an instance of builder with default parameters
+ */
+ public ArrayOfDoublesUpdatableSketchBuilder() {
+ nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES;
+ resizeFactor_ = DEFAULT_RESIZE_FACTOR;
+ numValues_ = DEFAULT_NUMBER_OF_VALUES;
+ samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY;
+ seed_ = ThetaUtil.DEFAULT_UPDATE_SEED;
+ }
+
+ /**
+ * This is to set the nominal number of entries.
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * or equal to given value.
+ * @return this builder
+ */
+ public ArrayOfDoublesUpdatableSketchBuilder setNominalEntries(final int nomEntries) {
+ nomEntries_ = 1 << ThetaUtil.checkNomLongs(nomEntries);
+ return this;
+ }
+
+ /**
+ * This is to set the resize factor.
+ * Value of X1 means that the maximum capacity is allocated from the start.
+ * Default resize factor is X8.
+ * @param resizeFactor value of X1, X2, X4 or X8
+ * @return this UpdatableSketchBuilder
+ */
+ public ArrayOfDoublesUpdatableSketchBuilder setResizeFactor(final ResizeFactor resizeFactor) {
+ resizeFactor_ = resizeFactor;
+ return this;
+ }
+
+ /**
+ * This is to set sampling probability.
+ * Default probability is 1.
+ * @param samplingProbability sampling probability from 0 to 1
+ * @return this builder
+ */
+ public ArrayOfDoublesUpdatableSketchBuilder
+ setSamplingProbability(final float samplingProbability) {
+ if ((samplingProbability < 0) || (samplingProbability > 1f)) {
+ throw new SketchesArgumentException("sampling probability must be between 0 and 1");
+ }
+ samplingProbability_ = samplingProbability;
+ return this;
+ }
+
+ /**
+ * This is to set the number of double values associated with each key
+ * @param numValues number of double values
+ * @return this builder
+ */
+ public ArrayOfDoublesUpdatableSketchBuilder setNumberOfValues(final int numValues) {
+ numValues_ = numValues;
+ return this;
+ }
+
+ /**
+ * Sets the long seed value that is required by the hashing function.
+ * @param seed See seed
+ * @return this builder
+ */
+ public ArrayOfDoublesUpdatableSketchBuilder setSeed(final long seed) {
+ seed_ = seed;
+ return this;
+ }
+
+ /**
+ * Returns an ArrayOfDoublesUpdatableSketch with the current configuration of this Builder.
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public ArrayOfDoublesUpdatableSketch build() {
+ return new HeapArrayOfDoublesQuickSelectSketch(nomEntries_, resizeFactor_.lg(),
+ samplingProbability_, numValues_, seed_);
+ }
+
+ /**
+ * Returns an ArrayOfDoublesUpdatableSketch with the current configuration of this Builder.
+ * @param dstSeg instance of MemorySegment to be used by the sketch
+ * @return an ArrayOfDoublesUpdatableSketch
+ */
+ public ArrayOfDoublesUpdatableSketch build(final MemorySegment dstSeg) {
+ return new DirectArrayOfDoublesQuickSelectSketch(nomEntries_, resizeFactor_.lg(),
+ samplingProbability_, numValues_, seed_, dstSeg);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java
new file mode 100644
index 000000000..727c9dccf
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteOrder;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.tuple2.SerializerDeserializer;
+import org.apache.datasketches.tuple2.Util;
+
+/**
+ * Direct Compact Sketch of type ArrayOfDoubles.
+ *
+ * This implementation uses data in a given Memory that is owned and managed by the caller.
+ * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ */
+final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch {
+
+ // this value exists only on heap, never serialized
+ private MemorySegment seg_;
+
+ /**
+ * Converts the given UpdatableArrayOfDoublesSketch to this compact form.
+ * @param sketch the given UpdatableArrayOfDoublesSketch
+ * @param dstSeg the given destination MemorySegment.
+ */
+ DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch,
+ final MemorySegment dstSeg) {
+ this(sketch, sketch.getThetaLong(), dstSeg);
+ }
+
+ /**
+ * Converts the given UpdatableArrayOfDoublesSketch to this compact form
+ * trimming if necessary according to given theta
+ * @param sketch the given UpdatableArrayOfDoublesSketch
+ * @param thetaLong new value of thetaLong
+ * @param dstSeg the given destination MemorySegment.
+ */
+ DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch,
+ final long thetaLong, final MemorySegment dstSeg) {
+ super(sketch.getNumValues());
+ checkIfEnoughMemory(dstSeg, sketch.getRetainedEntries(), sketch.getNumValues());
+ seg_ = dstSeg;
+ dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1);
+ dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID);
+ dstSeg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID());
+ dstSeg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte)
+ SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal());
+ final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+ isEmpty_ = sketch.isEmpty();
+ final int count = sketch.getRetainedEntries();
+ dstSeg.set(JAVA_BYTE, FLAGS_BYTE, (byte) (
+ (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0)
+ | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0)
+ | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0)
+ ));
+ dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_);
+ dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(sketch.getSeed()));
+ thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong);
+ dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_);
+ if (count > 0) {
+ int keyOffset = ENTRIES_START;
+ int valuesOffset = keyOffset + (SIZE_OF_KEY_BYTES * sketch.getRetainedEntries());
+ final ArrayOfDoublesSketchIterator it = sketch.iterator();
+ int actualCount = 0;
+ while (it.next()) {
+ if (it.getKey() < thetaLong_) {
+ dstSeg.set(JAVA_LONG_UNALIGNED, keyOffset, it.getKey());
+ MemorySegment.copy(it.getValues(), 0, dstSeg, JAVA_DOUBLE_UNALIGNED, valuesOffset, numValues_);
+ keyOffset += SIZE_OF_KEY_BYTES;
+ valuesOffset += SIZE_OF_VALUE_BYTES * numValues_;
+ actualCount++;
+ }
+ }
+ dstSeg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, actualCount);
+ }
+ }
+
+ /*
+ * Creates an instance from components
+ */
+ DirectArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong,
+ final boolean isEmpty, final int numValues, final short seedHash, final MemorySegment dstSeg) {
+ super(numValues);
+ checkIfEnoughMemory(dstSeg, values.length, numValues);
+ seg_ = dstSeg;
+ dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1);
+ dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID);
+ dstSeg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID());
+ dstSeg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte)
+ SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal());
+ final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+ isEmpty_ = isEmpty;
+ final int count = keys.length;
+ dstSeg.set(JAVA_BYTE, FLAGS_BYTE, (byte) (
+ (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0)
+ | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0)
+ | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0)
+ ));
+ dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_);
+ dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash);
+ thetaLong_ = thetaLong;
+ dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_);
+ if (count > 0) {
+ dstSeg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count);
+ MemorySegment.copy(keys, 0, dstSeg, JAVA_LONG_UNALIGNED, ENTRIES_START, count);
+ MemorySegment.copy(values, 0, dstSeg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values.length);
+ }
+ }
+
+ /**
+ * Wraps the given MemorySegment.
+ * @param seg the given MemorySegment
+ */
+ DirectArrayOfDoublesCompactSketch(final MemorySegment seg) {
+ super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE));
+ seg_ = seg;
+ SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE),
+ seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE));
+ SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE),
+ SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch);
+ final byte version = seg_.get(JAVA_BYTE, SERIAL_VERSION_BYTE);
+ if (version != serialVersionUID) {
+ throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID
+ + ", actual: " + version);
+ }
+ final boolean isBigEndian =
+ (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0;
+ if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+ throw new SketchesArgumentException("Byte order mismatch");
+ }
+
+ isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0;
+ thetaLong_ = seg_.get(JAVA_LONG_UNALIGNED, THETA_LONG);
+ }
+
+ /**
+ * Wraps the given MemorySegment.
+ * @param seg the given MemorySegment.
+ * @param seed See seed
+ */
+ DirectArrayOfDoublesCompactSketch(final MemorySegment seg, final long seed) {
+ super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE));
+ seg_ = seg;
+ SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE),
+ seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE));
+ SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE),
+ SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch);
+ final byte version = seg_.get(JAVA_BYTE, SERIAL_VERSION_BYTE);
+ if (version != serialVersionUID) {
+ throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID
+ + ", actual: " + version);
+ }
+ final boolean isBigEndian =
+ (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0;
+ if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+ throw new SketchesArgumentException("Byte order mismatch");
+ }
+ Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed));
+ isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0;
+ thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG);
+ }
+
+ @Override
+ public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) {
+ if (dstSeg == null) {
+ return new
+ HeapArrayOfDoublesCompactSketch(getKeys(), getValuesAsOneDimension(), thetaLong_, isEmpty_, numValues_,
+ getSeedHash());
+ } else {
+ MemorySegment.copy(seg_, 0, dstSeg, 0, seg_.byteSize());
+ return new DirectArrayOfDoublesCompactSketch(dstSeg);
+ }
+ }
+
+ @Override
+ public int getRetainedEntries() {
+ final boolean hasEntries =
+ (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0;
+ return (hasEntries ? seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) : 0);
+ }
+
+ @Override
+ //converts compact MemorySegment array of double[] to compact double[][]
+ public double[][] getValues() {
+ final int count = getRetainedEntries();
+ final double[][] values = new double[count][];
+ if (count > 0) {
+ int valuesOffset = ENTRIES_START + (SIZE_OF_KEY_BYTES * count);
+ for (int i = 0; i < count; i++) {
+ final double[] array = new double[numValues_];
+ MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, array, 0, numValues_);
+ values[i] = array;
+ valuesOffset += SIZE_OF_VALUE_BYTES * numValues_;
+ }
+ }
+ return values;
+ }
+
+ @Override
+ //converts compact MemorySegment array of double[] to compact double[]
+ double[] getValuesAsOneDimension() {
+ final int count = getRetainedEntries();
+ final int numDoubles = count * numValues_;
+ final double[] values = new double[numDoubles];
+ if (count > 0) {
+ final int valuesOffset = ENTRIES_START + (SIZE_OF_KEY_BYTES * count);
+ MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, values, 0, numDoubles);
+ }
+ return values;
+ }
+
+ @Override
+ //converts compact Memory array of long[] to compact long[]
+ long[] getKeys() {
+ final int count = getRetainedEntries();
+ final long[] keys = new long[count];
+ if (count > 0) {
+ for (int i = 0; i < count; i++) {
+ MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, ENTRIES_START, keys, 0, count);
+ }
+ }
+ return keys;
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ final int sizeBytes = getCurrentBytes();
+ final byte[] byteArray = new byte[sizeBytes];
+ final MemorySegment seg = MemorySegment.ofArray(byteArray);
+ MemorySegment.copy(seg_, 0, seg, 0, sizeBytes);
+ return byteArray;
+ }
+
+ @Override
+ public ArrayOfDoublesSketchIterator iterator() {
+ return new DirectArrayOfDoublesSketchIterator(
+ seg_, ENTRIES_START, getRetainedEntries(), numValues_);
+ }
+
+ @Override
+ short getSeedHash() {
+ return seg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT);
+ }
+
+ @Override
+ public boolean hasMemorySegment() { return true; }
+
+ @Override
+ MemorySegment getMemorySegment() { return seg_; }
+
+ private static void checkIfEnoughMemory(final MemorySegment seg, final int numEntries,
+ final int numValues) {
+ final int sizeNeeded =
+ ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries);
+ if (sizeNeeded > seg.byteSize()) {
+ throw new SketchesArgumentException("Not enough memory: need " + sizeNeeded
+ + " bytes, got " + seg.byteSize() + " bytes");
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java
new file mode 100644
index 000000000..c5771046e
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.lang.foreign.MemorySegment;
+
+/**
+ * Direct Intersection operation for tuple sketches of type ArrayOfDoubles.
+ *
+ * This implementation uses data in a given Memory that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ */
+final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection {
+
+ private MemorySegment seg_;
+
+ /**
+ * Creates an instance of a DirectArrayOfDoublesIntersection with a custom update seed
+ * @param numValues number of double values associated with each key
+ * @param seed See seed
+ * @param dstSeg the destination MemorySegment
+ */
+ DirectArrayOfDoublesIntersection(final int numValues, final long seed, final MemorySegment dstSeg) {
+ super(numValues, seed);
+ seg_ = dstSeg;
+ }
+
+ @Override
+ protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues,
+ final long seed) {
+ return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, seg_);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
new file mode 100644
index 000000000..249723323
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
@@ -0,0 +1,433 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
+import static org.apache.datasketches.common.Util.clear;
+import static org.apache.datasketches.common.Util.clearBits;
+import static org.apache.datasketches.common.Util.setBits;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.HashOperations;
+import org.apache.datasketches.tuple2.SerializerDeserializer;
+import org.apache.datasketches.tuple2.Util;
+
+/**
+ * Direct QuickSelect tuple sketch of type ArrayOfDoubles.
+ *
+ * This implementation uses data in a given MemorySegment that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ */
+class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSketch {
+
+ // these values exist only on heap, never serialized
+ private MemorySegment seg_;
+ // these can be derived from the seg_ contents, but are kept here for performance
+ private int keysOffset_;
+ private int valuesOffset_;
+
+ /**
+ * Construct a new sketch using the given MemorySegment as its backing store.
+ *
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * given value.
+ * @param lgResizeFactor log2(resize factor) - value from 0 to 3:
+ * 0 - no resizing (max size allocated),
+ * 1 - double internal hash table each time it reaches a threshold
+ * 2 - grow four times
+ * 3 - grow eight times (default)
+ * @param samplingProbability
+ * See Sampling Probability
+ * @param numValues Number of double values to keep for each key.
+ * @param seed See seed
+ * @param dstSeg the destination MemorySegment.
+ */
+ DirectArrayOfDoublesQuickSelectSketch(
+ final int nomEntries,
+ final int lgResizeFactor,
+ final float samplingProbability,
+ final int numValues,
+ final long seed,
+ final MemorySegment dstSeg) {
+ this(checkMemory(nomEntries, lgResizeFactor, numValues, dstSeg),
+ //SpotBugs CT_CONSTRUCTOR_THROW is false positive.
+ //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J
+ nomEntries,
+ lgResizeFactor,
+ samplingProbability,
+ numValues,
+ seed,
+ dstSeg);
+ }
+
+ private DirectArrayOfDoublesQuickSelectSketch(
+ final boolean secure, //required part of Finalizer Attack prevention
+ final int nomEntries,
+ final int lgResizeFactor,
+ final float samplingProbability,
+ final int numValues,
+ final long seed,
+ final MemorySegment dstSeg) {
+ super(numValues, seed);
+ seg_ = dstSeg;
+ final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor);
+ seg_.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1);
+ seg_.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID);
+ seg_.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID());
+ seg_.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte)
+ SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch.ordinal());
+ final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+ seg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) (
+ (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0)
+ | (samplingProbability < 1f ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0)
+ | (1 << Flags.IS_EMPTY.ordinal())
+ ));
+ seg_.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues);
+ seg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(seed));
+ thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
+ seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_);
+ seg_.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) Integer.numberOfTrailingZeros(nomEntries));
+ seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity));
+ seg_.set(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor);
+ seg_.set(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT, samplingProbability);
+ seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0);
+ keysOffset_ = ENTRIES_START;
+ valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * startingCapacity);
+ clear(seg_, keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); //clear keys only
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
+ setRebuildThreshold();
+ }
+
+ private static final boolean checkMemory(
+ final int nomEntries,
+ final int lgResizeFactor,
+ final int numValues,
+ final MemorySegment dstSeg) {
+ final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor);
+ checkIfEnoughMemory(dstSeg, startingCapacity, numValues);
+ return true;
+ }
+
+ /**
+ * Wraps the given MemorySegment.
+ * @param seg the given MemorySegment
+ * @param seed update seed
+ */
+ DirectArrayOfDoublesQuickSelectSketch(
+ final MemorySegment seg,
+ final long seed) {
+ this(checkSerVer_Endianness(seg), seg, seed);
+ //SpotBugs CT_CONSTRUCTOR_THROW is false positive.
+ //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J
+ }
+
+ private DirectArrayOfDoublesQuickSelectSketch(
+ final boolean secure, //required part of Finalizer Attack prevention
+ final MemorySegment seg,
+ final long seed) {
+ super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE), seed);
+ seg_ = seg;
+ SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE),
+ seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE));
+ SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE),
+ SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch);
+
+ Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed));
+ keysOffset_ = ENTRIES_START;
+ valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity());
+ // to do: make parent take care of its own parts
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity());
+ thetaLong_ = seg_.get(JAVA_LONG_UNALIGNED, THETA_LONG);
+ isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0;
+ setRebuildThreshold();
+ }
+
+ private static final boolean checkSerVer_Endianness(final MemorySegment seg) {
+ final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE);
+ if (version != serialVersionUID) {
+ throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID
+ + ", actual: " + version);
+ }
+ final boolean isBigEndian =
+ (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0;
+ if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+ throw new SketchesArgumentException("Byte order mismatch");
+ }
+ return true;
+ }
+
+ @Override
+ //converts Memory hashTable of double[] to compacted double[][]
+ public double[][] getValues() {
+ final int count = getRetainedEntries();
+ final double[][] values = new double[count][];
+ if (count > 0) {
+ long keyOffset = keysOffset_;
+ long valuesOffset = valuesOffset_;
+ int cnt = 0;
+ for (int j = 0; j < getCurrentCapacity(); j++) {
+ if (seg_.get(JAVA_LONG_UNALIGNED, keyOffset) != 0) {
+ final double[] array = new double[numValues_];
+ MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, array, 0, numValues_);
+ values[cnt++] = array;
+ }
+ keyOffset += SIZE_OF_KEY_BYTES;
+ valuesOffset += (long)SIZE_OF_VALUE_BYTES * numValues_;
+ }
+ }
+ return values;
+ }
+
+ @Override
+ //converts heap hashTable of double[] to compacted double[]
+ double[] getValuesAsOneDimension() {
+ final int count = getRetainedEntries();
+ final double[] values = new double[count * numValues_];
+ final int cap = getCurrentCapacity();
+ if (count > 0) {
+ long keyOffsetBytes = keysOffset_;
+ long valuesOffsetBytes = valuesOffset_;
+ int cnt = 0;
+ for (int j = 0; j < cap; j++) {
+ if (seg_.get(JAVA_LONG_UNALIGNED, keyOffsetBytes) != 0) {
+ MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffsetBytes, values, cnt++ * numValues_, numValues_);
+ }
+ keyOffsetBytes += SIZE_OF_KEY_BYTES;
+ valuesOffsetBytes += (long)SIZE_OF_VALUE_BYTES * numValues_;
+ }
+ assert cnt == count;
+ }
+ return values;
+ }
+
+ @Override
+ //converts heap hashTable of long[] to compacted long[]
+ long[] getKeys() {
+ final int count = getRetainedEntries();
+ final long[] keys = new long[count];
+ final int cap = getCurrentCapacity();
+ if (count > 0) {
+ long keyOffsetBytes = keysOffset_;
+ int cnt = 0;
+ for (int j = 0; j < cap; j++) {
+ final long key;
+ if ((key = seg_.get(JAVA_LONG_UNALIGNED, keyOffsetBytes)) != 0) {
+ keys[cnt++] = key;
+ }
+ keyOffsetBytes += SIZE_OF_KEY_BYTES;
+ }
+ assert cnt == count;
+ }
+ return keys;
+ }
+
+ @Override
+ public int getRetainedEntries() {
+ return seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT);
+ }
+
+ @Override
+ public int getNominalEntries() {
+ return 1 << seg_.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE);
+ }
+
+ @Override
+ public ResizeFactor getResizeFactor() {
+ return ResizeFactor.getRF(seg_.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE));
+ }
+
+ @Override
+ public float getSamplingProbability() {
+ return seg_.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT);
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ final int sizeBytes = getSerializedSizeBytes();
+ final byte[] byteArray = new byte[sizeBytes];
+ final MemorySegment seg = MemorySegment.ofArray(byteArray);
+ serializeInto(seg);
+ return byteArray;
+ }
+
+ @Override
+ public ArrayOfDoublesSketchIterator iterator() {
+ return new DirectArrayOfDoublesSketchIterator(seg_, keysOffset_, getCurrentCapacity(), numValues_);
+ }
+
+ @Override
+ public boolean hasMemorySegment() { return true; }
+
+ @Override
+ MemorySegment getMemorySegment() { return seg_; }
+
+ @Override
+ int getSerializedSizeBytes() {
+ return valuesOffset_ + (SIZE_OF_VALUE_BYTES * numValues_ * getCurrentCapacity());
+ }
+
+ @Override
+ void serializeInto(final MemorySegment seg) {
+ MemorySegment.copy(seg_, 0, seg, 0, seg.byteSize());
+ }
+
+ @Override
+ public void reset() {
+ if (!isEmpty_) {
+ isEmpty_ = true;
+ setBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal()));
+ }
+ final int lgResizeFactor = seg_.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE);
+ final float samplingProbability = seg_.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT);
+ final int startingCapacity = Util.getStartingCapacity(getNominalEntries(), lgResizeFactor);
+ thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
+ seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_);
+ seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity));
+ seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0);
+ keysOffset_ = ENTRIES_START;
+ valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * startingCapacity);
+ clear(seg_, keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); //clear keys only
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
+ setRebuildThreshold();
+ }
+
+ @Override
+ protected long getKey(final int index) {
+ return seg_.get(JAVA_LONG_UNALIGNED, keysOffset_ + ((long) SIZE_OF_KEY_BYTES * index));
+ }
+
+ @Override
+ protected void incrementCount() {
+ final int count = seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT);
+ if (count == 0) {
+ setBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.HAS_ENTRIES.ordinal()));
+ }
+ seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count + 1);
+ }
+
+ @Override
+ protected final int getCurrentCapacity() {
+ return 1 << seg_.get(JAVA_BYTE, LG_CUR_CAPACITY_BYTE);
+ }
+
+ @Override
+ protected void setThetaLong(final long thetaLong) {
+ thetaLong_ = thetaLong;
+ seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_);
+ }
+
+ @Override
+ protected void setValues(final int index, final double[] values) {
+ long offset = valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index);
+ for (int i = 0; i < numValues_; i++) {
+ seg_.set(JAVA_DOUBLE_UNALIGNED, offset, values[i]);
+ offset += SIZE_OF_VALUE_BYTES;
+ }
+ }
+
+ @Override
+ protected void updateValues(final int index, final double[] values) {
+ long offset = valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index);
+ for (int i = 0; i < numValues_; i++) {
+ seg_.set(JAVA_DOUBLE_UNALIGNED, offset, seg_.get(JAVA_DOUBLE_UNALIGNED, offset) + values[i]);
+ offset += SIZE_OF_VALUE_BYTES;
+ }
+ }
+
+ @Override
+ protected void setNotEmpty() {
+ if (isEmpty_) {
+ isEmpty_ = false;
+ clearBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal()));
+
+ }
+ }
+
+ @Override
+ protected boolean isInSamplingMode() {
+ return (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_IN_SAMPLING_MODE.ordinal())) != 0;
+ }
+
+ // rebuild in the same memory
+ @Override
+ protected void rebuild(final int newCapacity) {
+ final int numValues = getNumValues();
+ checkIfEnoughMemory(seg_, newCapacity, numValues);
+ final int currCapacity = getCurrentCapacity();
+ final long[] keys = new long[currCapacity];
+ final double[] values = new double[currCapacity * numValues];
+ MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, keysOffset_, keys, 0, currCapacity);
+ MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_, values, 0, currCapacity * numValues);
+
+ clear(seg_, keysOffset_, ((long) SIZE_OF_KEY_BYTES * newCapacity) + ((long) SIZE_OF_VALUE_BYTES * newCapacity * numValues));
+ seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0);
+ seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte)Integer.numberOfTrailingZeros(newCapacity));
+ valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * newCapacity);
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newCapacity);
+ for (int i = 0; i < keys.length; i++) {
+ if ((keys[i] != 0) && (keys[i] < thetaLong_)) {
+ insert(keys[i], Arrays.copyOfRange(values, i * numValues, (i + 1) * numValues));
+ }
+ }
+ setRebuildThreshold();
+ }
+
+ @Override
+ protected int insertKey(final long key) {
+ return HashOperations.hashInsertOnlyMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START);
+ }
+
+ @Override
+ protected int findOrInsertKey(final long key) {
+ return HashOperations.hashSearchOrInsertMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START);
+ }
+
+ @Override
+ protected double[] find(final long key) {
+ final int index = HashOperations.hashSearchMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START);
+ if (index == -1) { return null; }
+ final double[] array = new double[numValues_];
+ MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_
+ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index), array, 0, numValues_);
+ return array;
+ }
+
+ private static void checkIfEnoughMemory(final MemorySegment seg, final int numEntries, final int numValues) {
+ final int sizeNeeded =
+ ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries);
+ if (sizeNeeded > seg.byteSize()) {
+ throw new SketchesArgumentException("Not enough memory: need "
+ + sizeNeeded + " bytes, got " + seg.byteSize() + " bytes");
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java
new file mode 100644
index 000000000..7d2af2ba9
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.SketchesReadOnlyException;
+
+final class DirectArrayOfDoublesQuickSelectSketchR extends DirectArrayOfDoublesQuickSelectSketch {
+
+ DirectArrayOfDoublesQuickSelectSketchR(final MemorySegment seg, final long seed) {
+ super(seg, seed);
+ }
+
+ @Override
+ void insertOrIgnore(final long key, final double[] values) {
+ throw new SketchesReadOnlyException();
+ }
+
+ @Override
+ public void trim() {
+ throw new SketchesReadOnlyException();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java
new file mode 100644
index 000000000..63b421f4d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
+/**
+ * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table).
+ *
+ * This implementation uses data in a given Memory that is owned and managed by the caller.
+ * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ */
+final class DirectArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator {
+
+ private MemorySegment seg_;
+ private int offset_;
+ private int numEntries_;
+ private int numValues_;
+ private int i_;
+ private static final int SIZE_OF_KEY_BYTES = 8;
+ private static final int SIZE_OF_VALUE_BYTES = 8;
+
+ DirectArrayOfDoublesSketchIterator(final MemorySegment seg, final int offset, final int numEntries,
+ final int numValues) {
+ seg_ = seg;
+ offset_ = offset;
+ numEntries_ = numEntries;
+ numValues_ = numValues;
+ i_ = -1;
+ }
+
+ @Override
+ public boolean next() {
+ i_++;
+ while (i_ < numEntries_) {
+ final long off = offset_ + ((long) SIZE_OF_KEY_BYTES * i_);
+ if (seg_.get(JAVA_LONG_UNALIGNED, off) != 0) { return true; }
+ i_++;
+ }
+ return false;
+ }
+
+ @Override
+ public long getKey() {
+ final long off = offset_ + ((long) SIZE_OF_KEY_BYTES * i_);
+ return seg_.get(JAVA_LONG_UNALIGNED, off);
+ }
+
+ @Override
+ public double[] getValues() {
+ long off;
+ if (numValues_ == 1) {
+ off = offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) + ((long) SIZE_OF_VALUE_BYTES * i_);
+ return new double[] { seg_.get(JAVA_DOUBLE_UNALIGNED, off) };
+ }
+ final double[] array = new double[numValues_];
+ off = offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) + ((long) SIZE_OF_VALUE_BYTES * i_ * numValues_);
+ MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, off, array, 0, numValues_);
+ return array;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java
new file mode 100644
index 000000000..e546d4756
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.tuple2.SerializerDeserializer;
+
+/**
+ * Direct Union operation for tuple sketches of type ArrayOfDoubles.
+ *
+ * This implementation uses data in a given Memory that is owned and managed by the caller.
+ * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * the JVM to perform garbage collection.
+ */
+class DirectArrayOfDoublesUnion extends ArrayOfDoublesUnion {
+
+ final MemorySegment seg_;
+
+ /**
+ * Creates an instance of DirectArrayOfDoublesUnion
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than given value.
+ * @param numValues Number of double values to keep for each key.
+ * @param seed See seed
+ * @param dstSeg the destination MemorySegment
+ */
+ DirectArrayOfDoublesUnion(final int nomEntries, final int numValues, final long seed,
+ final MemorySegment dstSeg) {
+ super(new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 3, 1f, numValues, seed,
+ dstSeg.asSlice(PREAMBLE_SIZE_BYTES, dstSeg.byteSize() - PREAMBLE_SIZE_BYTES)));
+ seg_ = dstSeg;
+ seg_.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1
+ seg_.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID);
+ seg_.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID());
+ seg_.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal());
+ seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, gadget_.getThetaLong());
+ }
+
+ //Called from wrapUnion below and extended by DirectArrayOfDoublesUnionR
+ DirectArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch gadget, final MemorySegment seg) {
+ super(gadget);
+ seg_ = seg;
+ unionThetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG);
+ }
+
+ @Override
+ void setUnionThetaLong(final long thetaLong) {
+ super.setUnionThetaLong(thetaLong);
+ seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong);
+ }
+
+ static ArrayOfDoublesUnion wrapUnion(final MemorySegment seg, final long seed, final boolean isWritable) {
+ final byte version = seg.get(JAVA_BYTE, ArrayOfDoublesUnion.SERIAL_VERSION_BYTE);
+ if (version != ArrayOfDoublesUnion.serialVersionUID) {
+ throw new SketchesArgumentException("Serial version mismatch. Expected: "
+ + serialVersionUID + ", actual: " + version);
+ }
+ SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE));
+ SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE),
+ SerializerDeserializer.SketchType.ArrayOfDoublesUnion);
+
+ if (isWritable) {
+ final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES);
+ return new DirectArrayOfDoublesUnion(new DirectArrayOfDoublesQuickSelectSketch(sketchSeg, seed), seg);
+ }
+ final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES);
+ return new DirectArrayOfDoublesUnionR(new DirectArrayOfDoublesQuickSelectSketchR(sketchSeg, seed), seg);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java
new file mode 100644
index 000000000..51568fd87
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.SketchesReadOnlyException;
+
+final class DirectArrayOfDoublesUnionR extends DirectArrayOfDoublesUnion {
+
+ /**
+ * Wraps the given Memory.
+ * @param gadget the ArrayOfDoublesQuickSelectSketch
+ * @param seg the destination MemorySegment
+ */
+ DirectArrayOfDoublesUnionR(final ArrayOfDoublesQuickSelectSketch gadget, final MemorySegment seg) {
+ super(gadget, seg);
+ }
+
+ @Override
+ public void union(final ArrayOfDoublesSketch tupleSketch) {
+ throw new SketchesReadOnlyException();
+ }
+
+ @Override
+ public void reset() {
+ throw new SketchesReadOnlyException();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java
new file mode 100644
index 000000000..4baa685d6
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.Math.ceil;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly;
+import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+class HashTables {
+ private long[] hashTable = null;
+ private double[][] valueTable = null;
+ private int numValues = 0;
+ private int lgTableSize = 0;
+ private int numKeys = 0;
+
+ //Construct from sketch
+ HashTables(final ArrayOfDoublesSketch sketchIn) {
+ numKeys = sketchIn.getRetainedEntries();
+ numValues = sketchIn.getNumValues();
+
+ lgTableSize = getLgTableSize(numKeys);
+ final int tableSize = 1 << lgTableSize;
+ hashTable = new long[tableSize];
+ valueTable = new double[tableSize][];
+ final ArrayOfDoublesSketchIterator it = sketchIn.iterator();
+
+ while (it.next()) {
+ final long hash = it.getKey();
+ final int index = hashInsertOnly(hashTable, lgTableSize, hash);
+ valueTable[index] = new double[numValues];
+ System.arraycopy(it.getValues(), 0, valueTable[index], 0, numValues);
+ }
+ }
+
+ //Construct: Load the hash and value tables from packed hash and value arrays
+ private HashTables(final long[] hashArr, final double[][] valuesArr, final int numKeys, final int numValues) {
+ this.numValues = numValues;
+ this.numKeys = numKeys;
+ lgTableSize = getLgTableSize(numKeys);
+
+ final int tableSize = 1 << lgTableSize;
+ hashTable = new long[tableSize];
+ valueTable = new double[tableSize][];
+
+ for (int i = 0; i < numKeys; i++) {
+ final long hash = hashArr[i];
+ final int index = hashInsertOnly(hashTable, lgTableSize, hash);
+ valueTable[index] = new double[numValues];
+ System.arraycopy(valuesArr[i], 0, valueTable[index], 0, numValues);
+ }
+ }
+
+ HashTables getIntersectHashTables(
+ final ArrayOfDoublesSketch nextTupleSketch,
+ final long thetaLong,
+ final ArrayOfDoublesCombiner combiner) {
+ //Match nextSketch data with local instance data, filtering by theta
+ final int maxMatchSize = min(numKeys, nextTupleSketch.getRetainedEntries());
+ assert numValues == nextTupleSketch.numValues_;
+ final long[] matchHashArr = new long[maxMatchSize];
+ final double[][] matchValuesArr = new double[maxMatchSize][];
+
+ //Copy the intersecting items from local hashTables_
+ // sequentially into local packed matchHashArr_ and matchValuesArr
+ int matchCount = 0;
+ final ArrayOfDoublesSketchIterator it = nextTupleSketch.iterator();
+ while (it.next()) {
+ final long hash = it.getKey();
+ if (hash >= thetaLong) { continue; }
+ final int index = hashSearch(hashTable, lgTableSize, hash);
+ if (index < 0) { continue; }
+ matchHashArr[matchCount] = hash;
+ matchValuesArr[matchCount] = combiner.combine(valueTable[index], it.getValues());
+ matchCount++;
+ }
+ return new HashTables(matchHashArr, matchValuesArr, matchCount, numValues);
+ }
+
+ int getNumKeys() {
+ return numKeys;
+ }
+
+ int getNumValues() {
+ return numValues;
+ }
+
+ long[] getHashTable() {
+ return hashTable;
+ }
+
+ double[][] getValueTable() {
+ return valueTable;
+ }
+
+ void clear() {
+ hashTable = null;
+ valueTable = null;
+ numValues = 0;
+ lgTableSize = 0;
+ numKeys = 0;
+ }
+
+ static int getLgTableSize(final int numKeys) {
+ final int tableSize = max(ceilingPowerOf2((int) ceil(numKeys / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS);
+ return Integer.numberOfTrailingZeros(tableSize);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java
new file mode 100644
index 000000000..dc84da82a
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.apache.datasketches.tuple2.SerializerDeserializer;
+import org.apache.datasketches.tuple2.Util;
+
+/**
+ * The on-heap implementation of tuple Compact Sketch of type ArrayOfDoubles.
+ */
+final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch {
+
+ private final short seedHash_;
+ private long[] keys_;
+ private double[] values_;
+
+ /**
+ * Converts the given UpdatableArrayOfDoublesSketch to this compact form.
+ * @param sketch the given UpdatableArrayOfDoublesSketch
+ */
+ HeapArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch) {
+ this(sketch, sketch.getThetaLong());
+ }
+
+ /**
+ * Converts the given UpdatableArrayOfDoublesSketch to this compact form
+ * trimming if necessary according to given thetaLong
+ * @param sketch the given UpdatableArrayOfDoublesSketch
+ * @param thetaLong new value of thetaLong
+ */
+ HeapArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, final long thetaLong) {
+ super(sketch.getNumValues());
+ isEmpty_ = sketch.isEmpty();
+ thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong);
+ seedHash_ = Util.computeSeedHash(sketch.getSeed());
+ final int count = sketch.getRetainedEntries();
+ if (count > 0) {
+ keys_ = new long[count];
+ values_ = new double[count * numValues_];
+ final ArrayOfDoublesSketchIterator it = sketch.iterator();
+ int i = 0;
+ while (it.next()) {
+ final long key = it.getKey();
+ if (key < thetaLong_) {
+ keys_[i] = key;
+ System.arraycopy(it.getValues(), 0, values_, i * numValues_, numValues_);
+ i++;
+ }
+ }
+ // trim if necessary
+ if (i < count) {
+ if (i == 0) {
+ keys_ = null;
+ values_ = null;
+ } else {
+ keys_ = Arrays.copyOf(keys_, i);
+ values_ = Arrays.copyOf(values_, i * numValues_);
+ }
+ }
+ }
+ }
+
+ /*
+ * Creates an instance from components
+ */
+ HeapArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong,
+ final boolean isEmpty, final int numValues, final short seedHash) {
+ super(numValues);
+ keys_ = keys;
+ values_ = values;
+ thetaLong_ = thetaLong;
+ isEmpty_ = isEmpty;
+ seedHash_ = seedHash;
+ }
+
+ /**
+ * This is to create an instance given a serialized form
+ * @param seg the destination segment
+ */
+ HeapArrayOfDoublesCompactSketch(final MemorySegment seg) {
+ this(seg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * This is to create an instance given a serialized form
+ * @param seg the source MemorySegment
+ * @param seed See seed
+ */
+ HeapArrayOfDoublesCompactSketch(final MemorySegment seg, final long seed) {
+ super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE));
+ seedHash_ = seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT);
+ SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE),
+ seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE));
+ SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE),
+ SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch);
+ final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE);
+ if (version != serialVersionUID) {
+ throw new SketchesArgumentException(
+ "Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version);
+ }
+ final boolean isBigEndian =
+ (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0;
+ if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+ throw new SketchesArgumentException("Byte order mismatch");
+ }
+ Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed));
+ isEmpty_ = (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0;
+ thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG);
+ final boolean hasEntries =
+ (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0;
+ if (hasEntries) {
+ final int count = seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT);
+ keys_ = new long[count];
+ values_ = new double[count * numValues_];
+ MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_, 0, count);
+ MemorySegment.copy(seg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_, 0, values_.length);
+ }
+ }
+
+ @Override
+ public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) {
+ if (dstSeg == null) {
+ return new
+ HeapArrayOfDoublesCompactSketch(keys_.clone(), values_.clone(), thetaLong_, isEmpty_, numValues_, seedHash_);
+ } else {
+ final byte[] byteArr = this.toByteArray();
+ MemorySegment.copy(byteArr, 0, dstSeg, JAVA_BYTE, 0, byteArr.length);
+ return new DirectArrayOfDoublesCompactSketch(dstSeg);
+ }
+ }
+
+ @Override
+ public int getRetainedEntries() {
+ return keys_ == null ? 0 : keys_.length;
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ final int count = getRetainedEntries();
+ final int sizeBytes = getCurrentBytes();
+ final byte[] bytes = new byte[sizeBytes];
+ final MemorySegment seg = MemorySegment.ofArray(bytes);
+ seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1);
+ seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID);
+ seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID());
+ seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal());
+ final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+ seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) (
+ ((isBigEndian ? 1 : 0) << Flags.IS_BIG_ENDIAN.ordinal())
+ | ((isEmpty() ? 1 : 0) << Flags.IS_EMPTY.ordinal())
+ | ((count > 0 ? 1 : 0) << Flags.HAS_ENTRIES.ordinal())
+ ));
+ seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_);
+ seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_);
+ seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_);
+ if (count > 0) {
+ seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count);
+ MemorySegment.copy(keys_, 0, seg, JAVA_LONG_UNALIGNED, ENTRIES_START, count);
+ MemorySegment.copy(values_, 0, seg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_.length);
+ }
+ return bytes;
+ }
+
+ @Override
+ //converts compact heap array of double[] to compact double[][]
+ public double[][] getValues() {
+ final int count = getRetainedEntries();
+ final double[][] values = new double[count][];
+ if (count > 0) {
+ int i = 0;
+ for (int j = 0; j < count; j++) {
+ values[i++] = Arrays.copyOfRange(values_, j * numValues_, (j + 1) * numValues_);
+ }
+ }
+ return values;
+ }
+
+ @Override
+ double[] getValuesAsOneDimension() {
+ return values_.clone();
+ }
+
+ @Override
+ long[] getKeys() {
+ return keys_.clone();
+ }
+
+ @Override
+ public ArrayOfDoublesSketchIterator iterator() {
+ return new HeapArrayOfDoublesSketchIterator(keys_, values_, numValues_);
+ }
+
+ @Override
+ short getSeedHash() {
+ return seedHash_;
+ }
+
+ @Override
+ public boolean hasMemorySegment() { return false; }
+
+ @Override
+ MemorySegment getMemorySegment() { return null; }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java
new file mode 100644
index 000000000..dc0383567
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+/**
+ * On-heap implementation of intersection set operation for tuple sketches of type
+ * ArrayOfDoubles.
+ */
+final class HeapArrayOfDoublesIntersection extends ArrayOfDoublesIntersection {
+
+ /**
+ * Creates an instance of a HeapArrayOfDoublesIntersection with a custom update seed
+ * @param numValues number of double values associated with each key
+ * @param seed See seed
+ */
+ HeapArrayOfDoublesIntersection(final int numValues, final long seed) {
+ super(numValues, seed);
+ }
+
+ @Override
+ protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) {
+ return new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java
new file mode 100644
index 000000000..9e33f4e87
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java
@@ -0,0 +1,363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static org.apache.datasketches.common.Util.exactLog2OfLong;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.HashOperations;
+import org.apache.datasketches.tuple2.SerializerDeserializer;
+import org.apache.datasketches.tuple2.Util;
+
+/**
+ * The on-heap implementation of the tuple QuickSelect sketch of type ArrayOfDoubles.
+ */
+
+final class HeapArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSketch {
+
+ private final int lgNomEntries_;
+ private final int lgResizeFactor_;
+ private final float samplingProbability_;
+
+ private int count_;
+ private long[] keys_;
+ private double[] values_;
+
+ /**
+ * This is to create an instance of a QuickSelectSketch with custom resize factor and sampling
+ * probability
+ * @param nomEntries Nominal number of entries. Forced to the smallest power of 2 greater than
+ * or equal to the given value.
+ * @param lgResizeFactor log2(resize factor) - value from 0 to 3:
+ * 0 - no resizing (max size allocated),
+ * 1 - double internal hash table each time it reaches a threshold
+ * 2 - grow four times
+ * 3 - grow eight times (default)
+ * @param samplingProbability
+ * See Sampling Probability
+ * @param numValues number of double values to keep for each key
+ * @param seed See seed
+ */
+ HeapArrayOfDoublesQuickSelectSketch(final int nomEntries, final int lgResizeFactor,
+ final float samplingProbability, final int numValues, final long seed) {
+ super(numValues, seed);
+ lgNomEntries_ = exactLog2OfLong(ceilingPowerOf2(nomEntries));
+ lgResizeFactor_ = lgResizeFactor;
+ samplingProbability_ = samplingProbability;
+ thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
+ final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor);
+ keys_ = new long[startingCapacity];
+ values_ = new double[startingCapacity * numValues];
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
+ setRebuildThreshold();
+ }
+
+ /**
+ * This is to create an instance given a serialized form
+ * @param seg the source MemorySegment
+ * @param seed See seed
+ */
+ HeapArrayOfDoublesQuickSelectSketch(final MemorySegment seg, final long seed) {
+ super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE), seed);
+ SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE),
+ seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE));
+ SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE),
+ SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch);
+ final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE);
+ if (version != serialVersionUID) {
+ throw new SketchesArgumentException("Serial version mismatch. Expected: "
+ + serialVersionUID + ", actual: " + version);
+ }
+ final byte flags = seg.get(JAVA_BYTE, FLAGS_BYTE);
+ final boolean isBigEndian = (flags & (1 << Flags.IS_BIG_ENDIAN.ordinal())) > 0;
+ if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+ throw new SketchesArgumentException("Byte order mismatch");
+ }
+ Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed));
+ isEmpty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0;
+ lgNomEntries_ = seg.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE);
+ thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG);
+ final int currentCapacity = 1 << seg.get(JAVA_BYTE, LG_CUR_CAPACITY_BYTE);
+ lgResizeFactor_ = seg.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE);
+ samplingProbability_ = seg.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT);
+ keys_ = new long[currentCapacity];
+ values_ = new double[currentCapacity * numValues_];
+ final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0;
+ count_ = hasEntries ? seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) : 0;
+ if (count_ > 0) {
+ MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_, 0, currentCapacity);
+ final long off = ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * currentCapacity);
+ MemorySegment.copy(seg, JAVA_DOUBLE_UNALIGNED, off, values_, 0, currentCapacity * numValues_);
+
+ }
+ setRebuildThreshold();
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(currentCapacity);
+ }
+
+ @Override
+ //converts heap hashTable of double[] to compacted double[][]
+ public double[][] getValues() {
+ final int numVal = numValues_;
+ final int count = getRetainedEntries();
+ final double[][] values = new double[count][];
+ if (count > 0) {
+ int cnt = 0;
+ for (int j = 0; j < keys_.length; j++) {
+ if (keys_[j] == 0) { continue; }
+ values[cnt++] = Arrays.copyOfRange(values_, j * numVal, (j + 1) * numVal);
+ }
+ assert cnt == count;
+ }
+ return values;
+ }
+
+ @Override
+ //converts heap hashTable of double[] to compacted double[]
+ double[] getValuesAsOneDimension() {
+ final int numVal = numValues_;
+ final int count = getRetainedEntries();
+ final double[] values = new double[count * numVal];
+ if (count > 0) {
+ int cnt = 0;
+ for (int j = 0; j < keys_.length; j++) {
+ if (keys_[j] == 0) { continue; }
+ System.arraycopy(values_, j * numVal, values, cnt++ * numVal, numVal);
+ }
+ assert cnt == count;
+ }
+ return values;
+ }
+
+ @Override
+ //converts heap hashTable of long[] to compacted long[]
+ long[] getKeys() {
+ final int count = getRetainedEntries();
+ final long[] keysArr = new long[count];
+ if (count > 0) {
+ int cnt = 0;
+ for (int j = 0; j < keys_.length; j++) {
+ if (keys_[j] == 0) { continue; }
+ keysArr[cnt++] = keys_[j];
+ }
+ assert cnt == count;
+ }
+ return keysArr;
+ }
+
+ @Override
+ public int getRetainedEntries() {
+ return count_;
+ }
+
+ @Override
+ public int getNominalEntries() {
+ return 1 << lgNomEntries_;
+ }
+
+ @Override
+ public float getSamplingProbability() {
+ return samplingProbability_;
+ }
+
+ @Override
+ public ResizeFactor getResizeFactor() {
+ return ResizeFactor.getRF(lgResizeFactor_);
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ final byte[] byteArray = new byte[getSerializedSizeBytes()];
+ final MemorySegment seg = MemorySegment.ofArray(byteArray);
+ serializeInto(seg);
+ return byteArray;
+ }
+
+ @Override
+ public ArrayOfDoublesSketchIterator iterator() {
+ return new HeapArrayOfDoublesSketchIterator(keys_, values_, numValues_);
+ }
+
+ @Override
+ int getSerializedSizeBytes() {
+ return ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues_)) * getCurrentCapacity());
+ }
+
+ // X/Y: X = Byte index for just AoDQuickSelectSketch
+ // Y = Byte index when combined with Union Preamble
+ // Long || Start Byte Adr:
+ // Adr:
+ // First 16 bytes are preamble from AoDUnion
+ // || 7/23 | 6/22 | 5/21 | 4/20 | 3/19 | 2/18 | 1/17 | 0/16 |
+ // 0/2 || Seed Hash | #Dbls | Flags | SkType2 | FamID | SerVer | Preamble_Longs |
+ // || 15/31 | 14/30 | 13/29 | 12/28 | 11/27 | 10/26 | 9/25 | 8/24 |
+ // 1/3 ||------------------------------Theta Long----------------------------------------------|
+ // || 23/39 | 22/38 | 21/37 | 20/36 | 19/35 | 18/34 | 17/33 | 16/32 |
+ // 2/4 || Sampling P Float | | LgRF |lgCapLongs| LgNomEntries |
+ // || 31/47 | 30/46 | 29/45 | 28/44 | 27/43 | 26/42 | 25/41 | 24/40 |
+ // 3/5 || | Retained Entries Int |
+ // || | 32/48 |
+ // 4/6 || Keys Array longs * keys[] Length |
+ // || Values Array doubles * values[] Length |
+
+ @Override
+ void serializeInto(final MemorySegment seg) {
+ seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1);
+ seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID);
+ seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID());
+ seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE,
+ (byte) SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch.ordinal());
+ final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+ seg.set(JAVA_BYTE, FLAGS_BYTE, (byte)(
+ (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0)
+ | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0)
+ | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0)
+ | (count_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0)
+ ));
+ seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_);
+ seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(seed_));
+ seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_);
+ seg.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) lgNomEntries_);
+ seg.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(keys_.length));
+ seg.set(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor_);
+ seg.set(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT, samplingProbability_);
+ seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count_);
+ if (count_ > 0) {
+ MemorySegment.copy(keys_, 0, seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_.length);
+ final long off = ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * keys_.length);
+ MemorySegment.copy(values_, 0, seg, JAVA_DOUBLE_UNALIGNED, off, values_.length);
+ }
+ }
+
+ @Override
+ public boolean hasMemorySegment() { return false; }
+
+ @Override
+ MemorySegment getMemorySegment() { return null; }
+
+ @Override
+ public void reset() {
+ isEmpty_ = true;
+ count_ = 0;
+ thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_);
+ final int startingCapacity = Util.getStartingCapacity(1 << lgNomEntries_, lgResizeFactor_);
+ keys_ = new long[startingCapacity];
+ values_ = new double[startingCapacity * numValues_];
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
+ setRebuildThreshold();
+ }
+
+ @Override
+ protected long getKey(final int index) {
+ return keys_[index];
+ }
+
+ @Override
+ protected void incrementCount() {
+ count_++;
+ }
+
+ @Override
+ protected void setValues(final int index, final double[] values) {
+ if (numValues_ == 1) {
+ values_[index] = values[0];
+ } else {
+ System.arraycopy(values, 0, values_, index * numValues_, numValues_);
+ }
+ }
+
+ @Override
+ protected void updateValues(final int index, final double[] values) {
+ if (numValues_ == 1) {
+ values_[index] += values[0];
+ } else {
+ final int offset = index * numValues_;
+ for (int i = 0; i < numValues_; i++) {
+ values_[offset + i] += values[i];
+ }
+ }
+ }
+
+ @Override
+ protected void setNotEmpty() {
+ isEmpty_ = false;
+ }
+
+ @Override
+ protected boolean isInSamplingMode() {
+ return samplingProbability_ < 1f;
+ }
+
+ @Override
+ protected void setThetaLong(final long thetaLong) {
+ thetaLong_ = thetaLong;
+ }
+
+ @Override
+ protected int getCurrentCapacity() {
+ return keys_.length;
+ }
+
+ @Override
+ protected void rebuild(final int newCapacity) {
+ final long[] oldKeys = keys_;
+ final double[] oldValues = values_;
+ keys_ = new long[newCapacity];
+ values_ = new double[newCapacity * numValues_];
+ count_ = 0;
+ lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newCapacity);
+ for (int i = 0; i < oldKeys.length; i++) {
+ if ((oldKeys[i] != 0) && (oldKeys[i] < thetaLong_)) {
+ insert(oldKeys[i], Arrays.copyOfRange(oldValues, i * numValues_, (i + 1) * numValues_));
+ }
+ }
+ setRebuildThreshold();
+ }
+
+ @Override
+ protected int insertKey(final long key) {
+ return HashOperations.hashInsertOnly(keys_, lgCurrentCapacity_, key);
+ }
+
+ @Override
+ protected int findOrInsertKey(final long key) {
+ return HashOperations.hashSearchOrInsert(keys_, lgCurrentCapacity_, key);
+ }
+
+ @Override
+ protected double[] find(final long key) {
+ final int index = HashOperations.hashSearch(keys_, lgCurrentCapacity_, key);
+ if (index == -1) { return null; }
+ return Arrays.copyOfRange(values_, index * numValues_, (index + 1) * numValues_);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java
new file mode 100644
index 000000000..7d77978e8
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import java.util.Arrays;
+
+/**
+ * Iterator over the on-heap ArrayOfDoublesSketch (compact or hash table)
+ */
+final class HeapArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator {
+
+ private long[] keys_;
+ private double[] values_;
+ private int numValues_;
+ private int i_;
+
+ HeapArrayOfDoublesSketchIterator(final long[] keys, final double[] values, final int numValues) {
+ keys_ = keys;
+ values_ = values;
+ numValues_ = numValues;
+ i_ = -1;
+ }
+
+ @Override
+ public boolean next() {
+ if (keys_ == null) { return false; }
+ i_++;
+ while (i_ < keys_.length) {
+ if (keys_[i_] != 0) { return true; }
+ i_++;
+ }
+ return false;
+ }
+
+ @Override
+ public long getKey() {
+ return keys_[i_];
+ }
+
+ @Override
+ public double[] getValues() {
+ if (numValues_ == 1) {
+ return new double[] { values_[i_] };
+ }
+ return Arrays.copyOfRange(values_, i_ * numValues_, (i_ + 1) * numValues_);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java
new file mode 100644
index 000000000..6603aad95
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.tuple2.SerializerDeserializer;
+
+/**
+ * The on-heap implementation of the Union set operation for tuple sketches of type
+ * ArrayOfDoubles.
+ */
+final class HeapArrayOfDoublesUnion extends ArrayOfDoublesUnion {
+
+ /**
+ * Creates an instance of HeapArrayOfDoublesUnion with a custom seed
+ * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
+ * given value.
+ * @param numValues Number of double values to keep for each key.
+ * @param seed See seed
+ */
+ HeapArrayOfDoublesUnion(final int nomEntries, final int numValues, final long seed) {
+ super(new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 3, 1f, numValues, seed));
+ }
+
+ HeapArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch gadget, final long unionThetaLong) {
+ super(gadget);
+ unionThetaLong_ = unionThetaLong;
+ }
+
+ /**
+ * This is to create an instance given a serialized form and a custom seed
+ * @param seg the source MemorySegment
+ * @param seed See seed
+ * @return a ArrayOfDoublesUnion on the Java heap
+ */
+ static ArrayOfDoublesUnion heapifyUnion(final MemorySegment seg, final long seed) {
+ final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE);
+ if (version != serialVersionUID) {
+ throw new SketchesArgumentException("Serial version mismatch. Expected: "
+ + serialVersionUID + ", actual: " + version);
+ }
+ SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE));
+ SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE),
+ SerializerDeserializer.SketchType.ArrayOfDoublesUnion);
+
+ final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES);
+ final ArrayOfDoublesQuickSelectSketch sketch = new HeapArrayOfDoublesQuickSelectSketch(sketchSeg, seed);
+ return new HeapArrayOfDoublesUnion(sketch, seg.get(JAVA_LONG_UNALIGNED, THETA_LONG));
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java
new file mode 100644
index 000000000..5044b0e3e
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * This package is for a concrete implementation of the Tuple sketch for an array of double values.
+ */
+
+package org.apache.datasketches.tuple2.arrayofdoubles;
diff --git a/src/main/java/org/apache/datasketches/tuple2/package-info.java b/src/main/java/org/apache/datasketches/tuple2/package-info.java
new file mode 100644
index 000000000..1cb15c83e
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * The tuple package contains a number of sketches based on the same
+ * fundamental algorithms of the Theta Sketch Framework and extend these
+ * concepts for whole new families of sketches.
+ */
+package org.apache.datasketches.tuple2;
diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java
new file mode 100644
index 000000000..636c90d12
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.strings;
+
+import static org.apache.datasketches.tuple2.Util.stringArrHash;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.tuple2.UpdatableSketch;
+
+/**
+ * Extends UpdatableSketch<String[], ArrayOfStringsSummary>
+ * @author Lee Rhodes
+ */
+public class ArrayOfStringsSketch extends UpdatableSketch {
+
+ /**
+ * Constructs new sketch with default K = 4096 (lgK = 12), default ResizeFactor=X8,
+ * and default p = 1.0.
+ */
+ public ArrayOfStringsSketch() {
+ this(12);
+ }
+
+ /**
+ * Constructs new sketch with default ResizeFactor=X8, default p = 1.0 and given lgK.
+ * @param lgK Log_base2 of Nominal Entries.
+ * See Nominal Entries
+ */
+ public ArrayOfStringsSketch(final int lgK) {
+ this(lgK, ResizeFactor.X8, 1.0F);
+ }
+
+ /**
+ * Constructs new sketch with given ResizeFactor, p and lgK.
+ * @param lgK Log_base2 of Nominal Entries.
+ * See Nominal Entries
+ * @param rf ResizeFactor
+ * See Resize Factor
+ * @param p sampling probability
+ * See Sampling Probability
+ */
+ public ArrayOfStringsSketch(final int lgK, final ResizeFactor rf, final float p) {
+ super(1 << lgK, rf.lg(), p, new ArrayOfStringsSummaryFactory());
+ }
+
+ /**
+ * Constructs this sketch from a Memory image, which must be from an ArrayOfStringsSketch, and
+ * usually with data.
+ * @param seg the given Memory
+ * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
+ * This capability will be removed in a future release.
+ * Heapifying a CompactSketch is not deprecated.
+ */
+ @Deprecated
+ public ArrayOfStringsSketch(final MemorySegment seg) {
+ super(seg, new ArrayOfStringsSummaryDeserializer(), new ArrayOfStringsSummaryFactory());
+ }
+
+ /**
+ * Copy Constructor
+ * @param sketch the sketch to copy
+ */
+ public ArrayOfStringsSketch(final ArrayOfStringsSketch sketch) {
+ super(sketch);
+ }
+
+ /**
+ * @return a deep copy of this sketch
+ */
+ @Override
+ public ArrayOfStringsSketch copy() {
+ return new ArrayOfStringsSketch(this);
+ }
+
+ /**
+ * Updates the sketch with String arrays for both key and value.
+ * @param strArrKey the given String array key
+ * @param strArr the given String array value
+ */
+ public void update(final String[] strArrKey, final String[] strArr) {
+ super.update(stringArrHash(strArrKey), strArr);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java
new file mode 100644
index 000000000..66eed2a8f
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.strings;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.datasketches.tuple2.Util.stringArrHash;
+import static org.apache.datasketches.tuple2.Util.stringConcat;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.tuple2.UpdatableSummary;
+
+/**
+ * Implements UpdatableSummary<String[]>
+ * @author Lee Rhodes
+ */
+public final class ArrayOfStringsSummary implements UpdatableSummary {
+
+ private String[] stringArr = null;
+
+ ArrayOfStringsSummary() { //required for ArrayOfStringsSummaryFactory
+ stringArr = null;
+ }
+
+ //Used by copy() and in test
+ ArrayOfStringsSummary(final String[] stringArr) {
+ this.stringArr = stringArr.clone();
+ checkNumNodes(stringArr.length);
+ }
+
+ //used by fromMemory and in test
+ /**
+ * This reads a MemorySegment that has a layout similar to the C struct:
+ * {@snippet :
+ * typedef struct {
+ * int totBytes;
+ * byte nodes; //number of Nodes.
+ * Node[nodes] = { Node[0], Node[1], ... }
+ * }
+ * }
+ * Where a Node has a layout similar to the C struct:
+ * {@snippet :
+ * typedef struct {
+ * int numBytes;
+ * byte[] byteArray; //UTF-8 byte array. Not null terminated.
+ * }
+ * }
+ * @param seg the MemorySegment containing the Summary data
+ */
+ ArrayOfStringsSummary(final MemorySegment seg) {
+ int pos = 0;
+ final int totBytes = seg.get(JAVA_INT_UNALIGNED, pos); pos += Integer.BYTES;
+ checkInBytes(seg, totBytes);
+ final int nodes = seg.get(JAVA_BYTE, pos); pos += Byte.BYTES;
+ checkNumNodes(nodes);
+ final String[] stringArr = new String[nodes];
+ for (int i = 0; i < nodes; i++) {
+ final int len = seg.get(JAVA_INT_UNALIGNED, pos); pos += Integer.BYTES;
+ final byte[] byteArr = new byte[len];
+ MemorySegment.copy(seg, JAVA_BYTE, pos, byteArr, 0, len); pos += len;
+ stringArr[i] = new String(byteArr, UTF_8);
+ }
+ assert pos == totBytes;
+ this.stringArr = stringArr;
+ }
+
+ @Override
+ public ArrayOfStringsSummary copy() {
+ final ArrayOfStringsSummary nodes = new ArrayOfStringsSummary(stringArr);
+ return nodes;
+ }
+
+ @Override
+ public byte[] toByteArray() {
+ final ComputeBytes cb = new ComputeBytes(stringArr);
+ final int totBytes = cb.totBytes_;
+ final byte[] out = new byte[totBytes];
+ final MemorySegment wseg = MemorySegment.ofArray(out);
+ int pos = 0;
+ wseg.set(JAVA_INT_UNALIGNED, pos, totBytes); pos += Integer.BYTES;
+ final int numNodes = cb.numNodes_;
+ wseg.set(JAVA_BYTE, pos, (byte)numNodes); pos += Byte.BYTES;
+ for (int i = 0; i < numNodes; i++) {
+ final int nodeLen = cb.nodeLengthsArr_[i];
+ wseg.set(JAVA_INT_UNALIGNED, pos, nodeLen); pos += Integer.BYTES;
+ MemorySegment.copy(cb.nodeBytesArr_[i], 0, wseg, JAVA_BYTE, pos, nodeLen); pos += nodeLen;
+ }
+ assert pos == totBytes;
+ return out;
+ }
+
+ //From UpdatableSummary
+
+ @Override
+ public ArrayOfStringsSummary update(final String[] value) {
+ if (stringArr == null) {
+ stringArr = value.clone();
+ }
+ return this;
+ }
+
+ //From Object
+
+ @Override
+ public int hashCode() {
+ return (int) stringArrHash(stringArr);
+ }
+
+ @Override
+ public boolean equals(final Object summary) {
+ if (summary == null || !(summary instanceof ArrayOfStringsSummary)) {
+ return false;
+ }
+ final String thatStr = stringConcat(((ArrayOfStringsSummary) summary).stringArr);
+ final String thisStr = stringConcat(stringArr);
+ return thisStr.equals(thatStr);
+ }
+
+ /**
+ * Returns the nodes array for this summary.
+ * @return the nodes array for this summary.
+ */
+ public String[] getValue() {
+ return stringArr.clone();
+ }
+
+ //also used in test
+ static void checkNumNodes(final int numNodes) {
+ if (numNodes > 127 || numNodes < 0) {
+ throw new SketchesArgumentException("Number of nodes cannot exceed 127 or be negative.");
+ }
+ }
+
+ //also used in test
+ static void checkInBytes(final MemorySegment seg, final int totBytes) {
+ if (seg.byteSize() < totBytes) {
+ throw new SketchesArgumentException("Incoming Memory has insufficient capacity.");
+ }
+ }
+
+ /**
+ * Computes total bytes and number of nodes from the given string array.
+ */
+ private static class ComputeBytes {
+ final byte numNodes_;
+ final int[] nodeLengthsArr_;
+ final byte[][] nodeBytesArr_;
+ final int totBytes_;
+
+ ComputeBytes(final String[] stringArr) {
+ numNodes_ = (byte) stringArr.length;
+ checkNumNodes(numNodes_);
+ nodeLengthsArr_ = new int[numNodes_];
+ nodeBytesArr_ = new byte[numNodes_][];
+ int sumNodeBytes = 0;
+ for (int i = 0; i < numNodes_; i++) {
+ nodeBytesArr_[i] = stringArr[i].getBytes(UTF_8);
+ nodeLengthsArr_[i] = nodeBytesArr_[i].length;
+ sumNodeBytes += nodeLengthsArr_[i];
+ }
+ totBytes_ = sumNodeBytes + (numNodes_ + 1) * Integer.BYTES + 1;
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java
new file mode 100644
index 000000000..f48f6c95e
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.strings;
+
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.tuple2.DeserializeResult;
+import org.apache.datasketches.tuple2.SummaryDeserializer;
+
+/**
+ * Implements SummaryDeserializer<ArrayOfStringsSummary>
+ * @author Lee Rhodes
+ */
+public class ArrayOfStringsSummaryDeserializer implements SummaryDeserializer {
+
+ @Override
+ public DeserializeResult heapifySummary(final MemorySegment seg) {
+ return ArrayOfStringsSummaryDeserializer.fromMemory(seg);
+ }
+
+ /**
+ * Also used in test.
+ * @param seg the given MemorySegment
+ * @return the DeserializeResult
+ */
+ static DeserializeResult fromMemory(final MemorySegment seg) {
+ final ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(seg);
+ final int totBytes = seg.get(JAVA_INT_UNALIGNED, 0);
+ return new DeserializeResult<>(nsum, totBytes);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java
new file mode 100644
index 000000000..b0fb5a539
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.strings;
+
+import org.apache.datasketches.tuple2.SummaryFactory;
+
+/**
+ * Implements SummaryFactory<ArrayOfStringsSummary>
+ * @author Lee Rhodes
+ */
+public class ArrayOfStringsSummaryFactory implements SummaryFactory {
+
+ @Override
+ public ArrayOfStringsSummary newSummary() {
+ return new ArrayOfStringsSummary();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java
new file mode 100644
index 000000000..07225f45c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.strings;
+
+import org.apache.datasketches.tuple2.SummarySetOperations;
+
+/**
+ * Implements SummarySetOperations<ArrayOfStringsSummary>
+ * @author Lee Rhodes
+ */
+public class ArrayOfStringsSummarySetOperations implements SummarySetOperations {
+
+ @Override
+ public ArrayOfStringsSummary union(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) {
+ return a.copy();
+ }
+
+ @Override
+ public ArrayOfStringsSummary intersection(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) {
+ return a.copy();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java b/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java
new file mode 100644
index 000000000..25a2be3e6
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * This package is for a generic implementation of the Tuple sketch for single String value.
+ */
+
+package org.apache.datasketches.tuple2.strings;
diff --git a/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java b/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java
new file mode 100644
index 000000000..fdaf1de26
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode;
+import org.apache.datasketches.tuple2.adouble.DoubleSummaryDeserializer;
+import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class CompactSketchWithDoubleSummaryTest {
+ private final DoubleSummary.Mode mode = Mode.Sum;
+
+ @Test
+ public void emptyFromNonPublicConstructorNullArray() {
+ CompactSketch sketch =
+ new CompactSketch<>(null, null, Long.MAX_VALUE, true);
+ Assert.assertTrue(sketch.isEmpty());
+ Assert.assertFalse(sketch.isEstimationMode());
+ Assert.assertEquals(sketch.getEstimate(), 0.0);
+ Assert.assertEquals(sketch.getLowerBound(1), 0.0);
+ Assert.assertEquals(sketch.getUpperBound(1), 0.0);
+ Assert.assertEquals(sketch.getRetainedEntries(), 0);
+ Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
+ Assert.assertEquals(sketch.getTheta(), 1.0);
+ TupleSketchIterator it = sketch.iterator();
+ Assert.assertNotNull(it);
+ Assert.assertFalse(it.next());
+ sketch.toString();
+ }
+
+ @Test
+ public void emptyFromNonPublicConstructor() {
+ long[] keys = new long[0];
+ DoubleSummary[] summaries =
+ (DoubleSummary[]) java.lang.reflect.Array.newInstance(DoubleSummary.class, 0);
+ CompactSketch sketch =
+ new CompactSketch<>(keys, summaries, Long.MAX_VALUE, true);
+ Assert.assertTrue(sketch.isEmpty());
+ Assert.assertFalse(sketch.isEstimationMode());
+ Assert.assertEquals(sketch.getEstimate(), 0.0);
+ Assert.assertEquals(sketch.getLowerBound(1), 0.0);
+ Assert.assertEquals(sketch.getUpperBound(1), 0.0);
+ Assert.assertEquals(sketch.getRetainedEntries(), 0);
+ Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
+ Assert.assertEquals(sketch.getTheta(), 1.0);
+ TupleSketchIterator it = sketch.iterator();
+ Assert.assertNotNull(it);
+ Assert.assertFalse(it.next());
+ }
+
+ @Test
+ public void emptyFromQuickSelectSketch() {
+ UpdatableSketch us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
+ CompactSketch sketch = us.compact();
+ Assert.assertTrue(sketch.isEmpty());
+ Assert.assertFalse(sketch.isEstimationMode());
+ Assert.assertEquals(sketch.getEstimate(), 0.0);
+ Assert.assertEquals(sketch.getLowerBound(1), 0.0);
+ Assert.assertEquals(sketch.getUpperBound(1), 0.0);
+ Assert.assertEquals(sketch.getRetainedEntries(), 0);
+ Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
+ Assert.assertEquals(sketch.getTheta(), 1.0);
+ TupleSketchIterator it = sketch.iterator();
+ Assert.assertNotNull(it);
+ Assert.assertFalse(it.next());
+ }
+
+ @Test
+ public void exactModeFromQuickSelectSketch() {
+ UpdatableSketch us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
+ us.update(1, 1.0);
+ us.update(2, 1.0);
+ us.update(3, 1.0);
+ us.update(1, 1.0);
+ us.update(2, 1.0);
+ us.update(3, 1.0);
+ CompactSketch sketch = us.compact();
+ Assert.assertFalse(sketch.isEmpty());
+ Assert.assertFalse(sketch.isEstimationMode());
+ Assert.assertEquals(sketch.getEstimate(), 3.0);
+ Assert.assertEquals(sketch.getLowerBound(1), 3.0);
+ Assert.assertEquals(sketch.getUpperBound(1), 3.0);
+ Assert.assertEquals(sketch.getRetainedEntries(), 3);
+ Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
+ Assert.assertEquals(sketch.getTheta(), 1.0);
+ TupleSketchIterator it = sketch.iterator();
+ int count = 0;
+ while (it.next()) {
+ Assert.assertEquals(it.getSummary().getValue(), 2.0);
+ count++;
+ }
+ Assert.assertEquals(count, 3);
+ }
+
+ @Test
+ public void serializeDeserializeSmallExact() {
+ UpdatableSketch us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
+ us.update("a", 1.0);
+ us.update("b", 1.0);
+ us.update("c", 1.0);
+ CompactSketch sketch1 = us.compact();
+ Sketch sketch2 =
+ Sketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray()),
+ new DoubleSummaryDeserializer());
+ Assert.assertFalse(sketch2.isEmpty());
+ Assert.assertFalse(sketch2.isEstimationMode());
+ Assert.assertEquals(sketch2.getEstimate(), 3.0);
+ Assert.assertEquals(sketch2.getLowerBound(1), 3.0);
+ Assert.assertEquals(sketch2.getUpperBound(1), 3.0);
+ Assert.assertEquals(sketch2.getRetainedEntries(), 3);
+ Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE);
+ Assert.assertEquals(sketch2.getTheta(), 1.0);
+ TupleSketchIterator it = sketch2.iterator();
+ int count = 0;
+ while (it.next()) {
+ Assert.assertEquals(it.getSummary().getValue(), 1.0);
+ count++;
+ }
+ Assert.assertEquals(count, 3);
+ }
+
+ @Test
+ public void serializeDeserializeEstimation() throws Exception {
+ UpdatableSketch us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
+ for (int i = 0; i < 8192; i++) {
+ us.update(i, 1.0);
+ }
+ us.trim();
+ CompactSketch sketch1 = us.compact();
+ byte[] bytes = sketch1.toByteArray();
+
+ // for binary testing
+ //TestUtil.writeBytesToFile(bytes, "CompactSketchWithDoubleSummary4K.sk");
+
+ Sketch sketch2 =
+ Sketches.heapifySketch(MemorySegment.ofArray(bytes), new DoubleSummaryDeserializer());
+ Assert.assertFalse(sketch2.isEmpty());
+ Assert.assertTrue(sketch2.isEstimationMode());
+ Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate());
+ Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong());
+ TupleSketchIterator it = sketch2.iterator();
+ int count = 0;
+ while (it.next()) {
+ Assert.assertEquals(it.getSummary().getValue(), 1.0);
+ count++;
+ }
+ Assert.assertEquals(count, 4096);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void deserializeWrongType() {
+ UpdatableSketch us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
+ for (int i = 0; i < 8192; i++) {
+ us.update(i, 1.0);
+ }
+ CompactSketch sketch1 = us.compact();
+ Sketches.heapifyUpdatableSketch(MemorySegment.ofArray(sketch1.toByteArray()),
+ new DoubleSummaryDeserializer(),
+ new DoubleSummaryFactory(mode));
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java
new file mode 100644
index 000000000..6c4fa49ec
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.ByteArrayUtil;
+
+/**
+ * Summary for generic tuple sketches of type Integer.
+ * This summary keeps an Integer value.
+ */
+public class IntegerSummary implements UpdatableSummary {
+ private int value_;
+
+ /**
+ * Creates an instance of IntegerSummary with a given starting value.
+ * @param value starting value
+ */
+ public IntegerSummary(final int value) {
+ value_ = value;
+ }
+
+ @Override
+ public IntegerSummary update(final Integer value) {
+ value_ += value;
+ return this;
+ }
+
+ @Override
+ public IntegerSummary copy() {
+ return new IntegerSummary(value_);
+ }
+
+ /**
+ * @return current value of the IntegerSummary
+ */
+ public int getValue() {
+ return value_;
+ }
+
+ private static final int SERIALIZED_SIZE_BYTES = 4;
+ private static final int VALUE_INDEX = 0;
+
+ @Override
+ public byte[] toByteArray() {
+ final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES];
+ ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_);
+ return bytes;
+ }
+
+ /**
+ * Creates an instance of the IntegerSummary given a serialized representation
+ * @param seg MemorySegment object with serialized IntegerSummary
+ * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes
+ * read from the MemorySegment
+ */
+ public static DeserializeResult fromMemory(final MemorySegment seg) {
+ return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX)), SERIALIZED_SIZE_BYTES);
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java
new file mode 100644
index 000000000..877fd1d65
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import java.lang.foreign.MemorySegment;
+
+public class IntegerSummaryDeserializer implements SummaryDeserializer {
+
+ @Override
+ public DeserializeResult heapifySummary(final MemorySegment seg) {
+ return IntegerSummary.fromMemory(seg);
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java
new file mode 100644
index 000000000..6edf88608
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+/**
+ * Factory for IntegerSummary.
+ */
+public class IntegerSummaryFactory implements SummaryFactory {
+
+ @Override
+ public IntegerSummary newSummary() {
+ return new IntegerSummary(0);
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java b/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java
new file mode 100644
index 000000000..a4dab704e
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java
@@ -0,0 +1,457 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import org.apache.datasketches.tuple2.adouble.DoubleSummary;
+import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory;
+import org.apache.datasketches.tuple2.adouble.DoubleSummarySetOperations;
+import org.testng.annotations.Test;
+
+import org.apache.datasketches.theta2.UpdateSketch;
+import org.apache.datasketches.theta2.UpdateSketchBuilder;
+import static org.apache.datasketches.tuple2.JaccardSimilarity.dissimilarityTest;
+import static org.apache.datasketches.tuple2.JaccardSimilarity.exactlyEqual;
+import static org.apache.datasketches.tuple2.JaccardSimilarity.jaccard;
+import static org.apache.datasketches.tuple2.JaccardSimilarity.similarityTest;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+/**
+ * @author Lee Rhodes
+ * @author David Cromberge
+ */
+public class JaccardSimilarityTest {
+ private final DoubleSummary.Mode umode = DoubleSummary.Mode.Sum;
+ private final DoubleSummarySetOperations dsso = new DoubleSummarySetOperations();
+ private final DoubleSummaryFactory factory = new DoubleSummaryFactory(umode);
+ private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder();
+ private final UpdatableSketchBuilder tupleBldr = new UpdatableSketchBuilder<>(factory);
+ private final Double constSummary = 1.0;
+
+ @Test
+ public void checkNullsEmpties1() { // tuple, tuple
+ int minK = 1 << 12;
+ double threshold = 0.95;
+ println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold);
+ //check both null
+ double[] jResults = jaccard(null, null, dsso);
+ boolean state = jResults[1] > threshold;
+ println("null \t null:\t" + state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(null, null, dsso);
+ assertFalse(state);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build();
+ final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).build();
+
+ //check both empty
+ jResults = jaccard(measured, expected, dsso);
+ state = jResults[1] > threshold;
+ println("empty\tempty:\t" + state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected, dsso);
+ assertTrue(state);
+
+ state = exactlyEqual(measured, measured, dsso);
+ assertTrue(state);
+
+ //adjust one
+ expected.update(1, constSummary);
+ jResults = jaccard(measured, expected, dsso);
+ state = jResults[1] > threshold;
+ println("empty\t 1:\t" + state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected, dsso);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkNullsEmpties2() { // tuple, theta
+ int minK = 1 << 12;
+ double threshold = 0.95;
+ println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold);
+ //check both null
+ double[] jResults = jaccard(null, null, factory.newSummary(), dsso);
+ boolean state = jResults[1] > threshold;
+ println("null \t null:\t" + state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(null, null, factory.newSummary(), dsso);
+ assertFalse(state);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build();
+ final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build();
+
+ //check both empty
+ jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+ state = jResults[1] > threshold;
+ println("empty\tempty:\t" + state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+ assertTrue(state);
+
+ state = exactlyEqual(measured, measured, dsso);
+ assertTrue(state);
+
+ //adjust one
+ expected.update(1);
+ jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+ state = jResults[1] > threshold;
+ println("empty\t 1:\t" + state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkExactMode1() { // tuple, tuple
+ int k = 1 << 12;
+ int u = k;
+ double threshold = 0.9999;
+ println("Exact Mode, minK: " + k + "\t Th: " + threshold);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build();
+ final UpdatableSketch expected = tupleBldr.setNominalEntries(k).build();
+
+ for (int i = 0; i < (u-1); i++) { //one short
+ measured.update(i, constSummary);
+ expected.update(i, constSummary);
+ }
+
+ double[] jResults = jaccard(measured, expected, dsso);
+ boolean state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected, dsso);
+ assertTrue(state);
+
+ measured.update(u-1, constSummary); //now exactly k entries
+ expected.update(u, constSummary); //now exactly k entries but differs by one
+ jResults = jaccard(measured, expected, dsso);
+ state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected, dsso);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkExactMode2() { // tuple, theta
+ int k = 1 << 12;
+ int u = k;
+ double threshold = 0.9999;
+ println("Exact Mode, minK: " + k + "\t Th: " + threshold);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build();
+ final UpdateSketch expected = thetaBldr.setNominalEntries(k).build();
+
+ for (int i = 0; i < (u-1); i++) { //one short
+ measured.update(i, constSummary);
+ expected.update(i);
+ }
+
+ double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+ boolean state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+ assertTrue(state);
+
+ measured.update(u-1, constSummary); //now exactly k entries
+ expected.update(u); //now exactly k entries but differs by one
+ jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+ state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkEstMode1() { // tuple, tuple
+ int k = 1 << 12;
+ int u = 1 << 20;
+ double threshold = 0.9999;
+ println("Estimation Mode, minK: " + k + "\t Th: " + threshold);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build();
+ final UpdatableSketch expected = tupleBldr.setNominalEntries(k).build();
+
+ for (int i = 0; i < u; i++) {
+ measured.update(i, constSummary);
+ expected.update(i, constSummary);
+ }
+
+ double[] jResults = jaccard(measured, expected, dsso);
+ boolean state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected, dsso);
+ assertTrue(state);
+
+ for (int i = u; i < (u + 50); i++) { //empirically determined
+ measured.update(i, constSummary);
+ }
+
+ jResults = jaccard(measured, expected, dsso);
+ state = jResults[1] >= threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected, dsso);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkEstMode2() { // tuple, theta
+ int k = 1 << 12;
+ int u = 1 << 20;
+ double threshold = 0.9999;
+ println("Estimation Mode, minK: " + k + "\t Th: " + threshold);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build();
+ final UpdateSketch expected = thetaBldr.setNominalEntries(k).build();
+
+ for (int i = 0; i < u; i++) {
+ measured.update(i, constSummary);
+ expected.update(i);
+ }
+
+ double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+ boolean state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+ assertTrue(state);
+
+ for (int i = u; i < (u + 50); i++) { //empirically determined
+ measured.update(i, constSummary);
+ }
+
+ jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+ state = jResults[1] >= threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+ assertFalse(state);
+
+ println("");
+ }
+
+ /**
+ * Enable printing on this test and you will see that the distribution is pretty tight,
+ * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about
+ * +/- 1.56%.
+ */
+ @Test
+ public void checkSimilarity1() { // tuple, tuple
+ int minK = 1 << 12;
+ int u1 = 1 << 20;
+ int u2 = (int) (u1 * 0.95);
+ double threshold = 0.943;
+ println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build();
+ final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).build();
+
+ for (int i = 0; i < u1; i++) {
+ expected.update(i, constSummary);
+ }
+
+ for (int i = 0; i < u2; i++) {
+ measured.update(i, constSummary);
+ }
+
+ double[] jResults = jaccard(measured, expected, dsso);
+ boolean state = similarityTest(measured, expected, dsso, threshold);
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+ //check identity case
+ state = similarityTest(measured, measured, dsso, threshold);
+ assertTrue(state);
+ }
+
+ /**
+ * Enable printing on this test and you will see that the distribution is pretty tight,
+ * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about
+ * +/- 1.56%.
+ */
+ @Test
+ public void checkSimilarity2() { // tuple, theta
+ int minK = 1 << 12;
+ int u1 = 1 << 20;
+ int u2 = (int) (u1 * 0.95);
+ double threshold = 0.943;
+ println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build();
+ final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build();
+
+ for (int i = 0; i < u1; i++) {
+ expected.update(i);
+ }
+
+ for (int i = 0; i < u2; i++) {
+ measured.update(i, constSummary);
+ }
+
+ double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+ boolean state = similarityTest(measured, expected, factory.newSummary(), dsso, threshold);
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+ //check identity case
+ state = similarityTest(measured, measured, dsso, threshold);
+ assertTrue(state);
+ }
+
+ /**
+ * Enable printing on this test and you will see that the distribution is much looser,
+ * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of
+ * intersection to the union becomes a small number.
+ */
+ @Test
+ public void checkDissimilarity1() { // tuple, tuple
+ int minK = 1 << 12;
+ int u1 = 1 << 20;
+ int u2 = (int) (u1 * 0.05);
+ double threshold = 0.061;
+ println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build();
+ final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build();
+
+ for (int i = 0; i < u1; i++) {
+ expected.update(i, constSummary);
+ }
+
+ for (int i = 0; i < u2; i++) {
+ measured.update(i, constSummary);
+ }
+
+ double[] jResults = jaccard(measured, expected, dsso);
+ boolean state = dissimilarityTest(measured, expected, dsso, threshold);
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+ }
+
+ /**
+ * Enable printing on this test and you will see that the distribution is much looser,
+ * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of
+ * intersection to the union becomes a small number.
+ */
+ @Test
+ public void checkDissimilarity2() { // tuple, theta
+ int minK = 1 << 12;
+ int u1 = 1 << 20;
+ int u2 = (int) (u1 * 0.05);
+ double threshold = 0.061;
+ println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+ final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build();
+ final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build();
+
+ for (int i = 0; i < u1; i++) {
+ expected.update(i);
+ }
+
+ for (int i = 0; i < u2; i++) {
+ measured.update(i, constSummary);
+ }
+
+ double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+ boolean state = dissimilarityTest(measured, expected, factory.newSummary(), dsso, threshold);
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+ }
+
+ private static String jaccardString(double[] jResults) {
+ double lb = jResults[0];
+ double est = jResults[1];
+ double ub = jResults[2];
+ return lb + "\t" + est + "\t" + ub + "\t" + ((lb/est) - 1.0) + "\t" + ((ub/est) - 1.0);
+ }
+
+ @Test
+ public void checkMinK1() { // tuple, tuple
+ final UpdatableSketch skA = tupleBldr.build(); //4096
+ final UpdatableSketch skB = tupleBldr.build(); //4096
+ skA.update(1, constSummary);
+ skB.update(1, constSummary);
+ double[] result = jaccard(skA, skB, dsso);
+ println(result[0] + ", " + result[1] + ", " + result[2]);
+ for (int i = 1; i < 4096; i++) {
+ skA.update(i, constSummary);
+ skB.update(i, constSummary);
+ }
+ result = jaccard(skA, skB, dsso);
+ println(result[0] + ", " + result[1] + ", " + result[2]);
+ }
+
+ @Test
+ public void checkMinK2() { // tuple, theta
+ final UpdatableSketch skA = tupleBldr.build(); //4096
+ final UpdateSketch skB = UpdateSketch.builder().build(); //4096
+ skA.update(1, constSummary);
+ skB.update(1);
+ double[] result = jaccard(skA, skB, factory.newSummary(), dsso);
+ println(result[0] + ", " + result[1] + ", " + result[2]);
+ for (int i = 1; i < 4096; i++) {
+ skA.update(i, constSummary);
+ skB.update(i);
+ }
+ result = jaccard(skA, skB, factory.newSummary(), dsso);
+ println(result[0] + ", " + result[1] + ", " + result[2]);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(Object o) {
+ //System.out.println(o.toString()); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/MiscTest.java b/src/test/java/org/apache/datasketches/tuple2/MiscTest.java
new file mode 100644
index 000000000..fb353d6a9
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/MiscTest.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode;
+import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class MiscTest {
+
+ @Test
+ public void checkUpdatableSketchBuilderReset() {
+ final DoubleSummary.Mode mode = Mode.Sum;
+ final UpdatableSketchBuilder bldr =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode));
+ bldr.reset();
+ final UpdatableSketch sk = bldr.build();
+ assertTrue(sk.isEmpty());
+ }
+
+ @Test
+ public void checkStringToByteArray() {
+ Util.stringToByteArray("");
+ }
+
+ @Test
+ public void checkDoubleToLongArray() {
+ final long[] v = Util.doubleToLongArray(-0.0);
+ assertEquals(v[0], 0);
+ }
+
+ //@Test
+ public void checkById() {
+ final int[] ids = {0,1,2, 5, 6 };
+ final int len = ids.length;
+ for (int i = 0; i < len; i++) {
+ for (int j = 0; j < len; j++) {
+ final int id = ids[i] << 3 | ids[j];
+ final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
+ final String interResStr = cCase.getIntersectAction().getActionDescription();
+ final String anotbResStr = cCase.getAnotbAction().getActionDescription();
+ println(Integer.toOctalString(id) + "\t" + cCase + "\t" + cCase.getCaseDescription()
+ + "\t" + interResStr + "\t" + anotbResStr);
+ }
+ }
+ }
+
+ @Test
+ public void checkCopyCtor() {
+ final DoubleSummary.Mode mode = Mode.Sum;
+ final UpdatableSketchBuilder bldr =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode));
+ bldr.reset();
+ final UpdatableSketch sk = bldr.build();
+ sk.update(1.0, 1.0);
+ assertEquals(sk.getRetainedEntries(), 1);
+ final UpdatableSketch sk2 = sk.copy();
+ assertEquals(sk2.getRetainedEntries(), 1);
+ }
+
+
+ /**
+ *
+ * @param o object to print
+ */
+ private static void println(final Object o) {
+ //System.out.println(o.toString()); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java b/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java
new file mode 100644
index 000000000..eb45779c6
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import org.apache.datasketches.common.SketchesReadOnlyException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class ReadOnlyMemoryTest {
+
+ @Test
+ public void wrapAndTryUpdatingSketch() {
+ final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ sketch1.update(1, new double[] {1});
+ final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch)
+ ArrayOfDoublesSketches.wrapSketch(Memory.wrap(sketch1.toByteArray()));
+ Assert.assertEquals(sketch2.getEstimate(), 1.0);
+ sketch2.toByteArray();
+ boolean thrown = false;
+ try {
+ sketch2.update(2, new double[] {1});
+ } catch (final SketchesReadOnlyException e) {
+ thrown = true;
+ }
+ try {
+ sketch2.trim();
+ } catch (final SketchesReadOnlyException e) {
+ thrown = true;
+ }
+ Assert.assertTrue(thrown);
+ }
+
+ @Test
+ public void heapifyAndUpdateSketch() {
+ final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ sketch1.update(1, new double[] {1});
+ // downcasting is not recommended, for testing only
+ final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch)
+ ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray()));
+ sketch2.update(2, new double[] {1});
+ Assert.assertEquals(sketch2.getEstimate(), 2.0);
+ }
+
+ @Test
+ public void wrapAndTryUpdatingUnionEstimationMode() {
+ final int numUniques = 10000;
+ int key = 0;
+ final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ for (int i = 0; i < numUniques; i++) {
+ sketch1.update(key++, new double[] {1});
+ }
+ final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion();
+ union1.union(sketch1);
+ final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.wrapUnion(Memory.wrap(union1.toByteArray()));
+ final ArrayOfDoublesSketch resultSketch = union2.getResult();
+ Assert.assertTrue(resultSketch.isEstimationMode());
+ Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04);
+
+ // make sure union update actually needs to modify the union
+ final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ for (int i = 0; i < numUniques; i++) {
+ sketch2.update(key++, new double[] {1});
+ }
+
+ boolean thrown = false;
+ try {
+ union2.union(sketch2);
+ } catch (final SketchesReadOnlyException e) {
+ thrown = true;
+ }
+ Assert.assertTrue(thrown);
+ }
+
+ @Test
+ public void heapifyAndUpdateUnion() {
+ final int numUniques = 10000;
+ int key = 0;
+ final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ for (int i = 0; i < numUniques; i++) {
+ sketch1.update(key++, new double[] {1});
+ }
+ final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion();
+ union1.union(sketch1);
+ final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.heapifyUnion(Memory.wrap(union1.toByteArray()));
+ final ArrayOfDoublesSketch resultSketch = union2.getResult();
+ Assert.assertTrue(resultSketch.isEstimationMode());
+ Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04);
+
+ // make sure union update actually needs to modify the union
+ final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ for (int i = 0; i < numUniques; i++) {
+ sketch2.update(key++, new double[] {1});
+ }
+ union2.union(sketch2);
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java
new file mode 100644
index 000000000..65311b17c
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class SerializerDeserializerTest {
+
+ @Test
+ public void validSketchType() {
+ byte[] bytes = new byte[4];
+ bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal();
+ Assert.assertEquals(SerializerDeserializer.getSketchType(MemorySegment.ofArray(bytes)), SerializerDeserializer.SketchType.CompactSketch);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void invalidSketchType() {
+ byte[] bytes = new byte[4];
+ bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = 33;
+ SerializerDeserializer.getSketchType(MemorySegment.ofArray(bytes));
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void validateFamilyNotTuple() {
+ SerializerDeserializer.validateFamily((byte) 1, (byte) 0);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void validateFamilyWrongPreambleLength() {
+ SerializerDeserializer.validateFamily((byte) Family.TUPLE.getID(), (byte) 0);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadSeedHash() {
+ org.apache.datasketches.tuple.Util.computeSeedHash(50541);
+ }
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java b/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java
new file mode 100644
index 000000000..196c82510
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES;
+import static org.apache.datasketches.common.TestUtil.CHECK_CPP_HISTORICAL_FILES;
+import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES;
+import static org.apache.datasketches.common.TestUtil.cppPath;
+import static org.apache.datasketches.common.TestUtil.javaPath;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+import java.io.IOException;
+import java.nio.file.Files;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.TestUtil;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary;
+import org.apache.datasketches.tuple2.adouble.DoubleSummaryDeserializer;
+import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUnion;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class TupleCrossLanguageTest {
+
+ @Test(groups = {CHECK_CPP_HISTORICAL_FILES})
+ public void serialVersion1Compatibility() {
+ final byte[] byteArr = TestUtil.getResourceBytes("CompactSketchWithDoubleSummary4K_serialVersion1.sk");
+ Sketch sketch = Sketches.heapifySketch(MemorySegment.ofArray(byteArr), new DoubleSummaryDeserializer());
+ Assert.assertTrue(sketch.isEstimationMode());
+ Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.99);
+ Assert.assertEquals(sketch.getRetainedEntries(), 4096);
+ int count = 0;
+ TupleSketchIterator it = sketch.iterator();
+ while (it.next()) {
+ Assert.assertEquals(it.getSummary().getValue(), 1.0);
+ count++;
+ }
+ Assert.assertEquals(count, 4096);
+ }
+
+ @Test(groups = {CHECK_CPP_HISTORICAL_FILES})
+ public void version2Compatibility() {
+ final byte[] byteArr = TestUtil.getResourceBytes("TupleWithTestIntegerSummary4kTrimmedSerVer2.sk");
+ Sketch sketch1 = Sketches.heapifySketch(MemorySegment.ofArray(byteArr), new IntegerSummaryDeserializer());
+
+ // construct the same way
+ final int lgK = 12;
+ final int K = 1 << lgK;
+ final UpdatableSketchBuilder builder =
+ new UpdatableSketchBuilder<>(new IntegerSummaryFactory());
+ final UpdatableSketch updatableSketch = builder.build();
+ for (int i = 0; i < 2 * K; i++) {
+ updatableSketch.update(i, 1);
+ }
+ updatableSketch.trim();
+ Sketch sketch2 = updatableSketch.compact();
+
+ Assert.assertEquals(sketch1.getRetainedEntries(), sketch2.getRetainedEntries());
+ Assert.assertEquals(sketch1.getThetaLong(), sketch2.getThetaLong());
+ Assert.assertEquals(sketch1.isEmpty(), sketch2.isEmpty());
+ Assert.assertEquals(sketch1.isEstimationMode(), sketch2.isEstimationMode());
+ }
+
+ @Test(groups = {CHECK_CPP_FILES})
+ public void deserializeFromCppIntegerSummary() throws IOException {
+ final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
+ for (int n: nArr) {
+ final byte[] bytes = Files.readAllBytes(cppPath.resolve("tuple_int_n" + n + "_cpp.sk"));
+ final Sketch sketch =
+ Sketches.heapifySketch(MemorySegment.ofArray(bytes), new IntegerSummaryDeserializer());
+ assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
+ assertTrue(n > 1000 ? sketch.isEstimationMode() : !sketch.isEstimationMode());
+ assertEquals(sketch.getEstimate(), n, n * 0.03);
+ final TupleSketchIterator it = sketch.iterator();
+ while (it.next()) {
+ assertTrue(it.getHash() < sketch.getThetaLong());
+ assertTrue(it.getSummary().getValue() < n);
+ }
+ }
+ }
+
+ @Test(groups = {GENERATE_JAVA_FILES})
+ public void generateForCppIntegerSummary() throws IOException {
+ final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
+ for (int n: nArr) {
+ final UpdatableSketch sk =
+ new UpdatableSketchBuilder<>(new IntegerSummaryFactory()).build();
+ for (int i = 0; i < n; i++) {
+ sk.update(i, i);
+ }
+ Files.newOutputStream(javaPath.resolve("tuple_int_n" + n + "_java.sk")).write(sk.compact().toByteArray());
+ }
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class, groups = {CHECK_CPP_HISTORICAL_FILES})
+ public void noSupportHeapifyV0_9_1() throws Exception {
+ final byte[] byteArr = TestUtil.getResourceBytes("ArrayOfDoublesUnion_v0.9.1.sk");
+ ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(byteArr));
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class, groups = {CHECK_CPP_HISTORICAL_FILES})
+ public void noSupportWrapV0_9_1() throws Exception {
+ final byte[] byteArr = TestUtil.getResourceBytes("ArrayOfDoublesUnion_v0.9.1.sk");
+ ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(byteArr));
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java b/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java
new file mode 100644
index 000000000..8cc8b7975
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+ import static org.testng.Assert.assertEquals;
+
+import org.apache.datasketches.theta2.UpdateSketch;
+import org.apache.datasketches.theta2.UpdateSketchBuilder;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode;
+import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory;
+import org.apache.datasketches.tuple2.adouble.DoubleSummarySetOperations;
+import org.testng.annotations.Test;
+
+ /**
+ * Tests for Version 2.0.0
+ * @author Lee Rhodes
+ */
+ public class TupleExamples2Test {
+ private final DoubleSummary.Mode umode = Mode.Sum;
+ private final DoubleSummary.Mode imode = Mode.AlwaysOne;
+ private final DoubleSummarySetOperations dsso0 = new DoubleSummarySetOperations();
+ private final DoubleSummarySetOperations dsso1 = new DoubleSummarySetOperations(umode);
+ private final DoubleSummarySetOperations dsso2 = new DoubleSummarySetOperations(umode, imode);
+ private final DoubleSummaryFactory ufactory = new DoubleSummaryFactory(umode);
+ private final DoubleSummaryFactory ifactory = new DoubleSummaryFactory(imode);
+ private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder();
+ private final UpdatableSketchBuilder tupleBldr =
+ new UpdatableSketchBuilder<>(ufactory);
+
+
+ @Test
+ public void example1() { // stateful: tuple, theta, use dsso2
+ //Load source sketches
+ final UpdatableSketch tupleSk = tupleBldr.build();
+ final UpdateSketch thetaSk = thetaBldr.build();
+ for (int i = 1; i <= 12; i++) {
+ tupleSk.update(i, 1.0);
+ thetaSk.update(i + 3);
+ }
+
+ //Union
+ final Union union = new Union<>(dsso2);
+ union.union(tupleSk);
+ union.union(thetaSk, ufactory.newSummary().update(1.0));
+ final CompactSketch ucsk = union.getResult();
+ int entries = ucsk.getRetainedEntries();
+ println("Union Stateful: tuple, theta: " + entries);
+ final TupleSketchIterator uiter = ucsk.iterator();
+ int counter = 1;
+ int twos = 0;
+ int ones = 0;
+ while (uiter.next()) {
+ final int i = (int)uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ if (i == 1) { ones++; }
+ if (i == 2) { twos++; }
+ }
+ assertEquals(ones, 6);
+ assertEquals(twos, 9);
+
+ //Intersection
+ final Intersection inter = new Intersection<>(dsso2);
+ inter.intersect(tupleSk);
+ inter.intersect(thetaSk, ifactory.newSummary().update(1.0));
+ final CompactSketch icsk = inter.getResult();
+ entries = icsk.getRetainedEntries();
+ println("Intersection Stateful: tuple, theta: " + entries);
+ final TupleSketchIterator iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = (int)iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 1
+ assertEquals(i, 1);
+ }
+ }
+
+ @Test
+ public void example2() { //stateless: tuple1, tuple2, use dsso2
+ //Load source sketches
+ final UpdatableSketch tupleSk1 = tupleBldr.build();
+ final UpdatableSketch tupleSk2 = tupleBldr.build();
+
+ for (int i = 1; i <= 12; i++) {
+ tupleSk1.update(i, 1.0);
+ tupleSk2.update(i + 3, 1.0);
+ }
+
+ //Union
+ final Union union = new Union<>(dsso2);
+ final CompactSketch ucsk = union.union(tupleSk1, tupleSk2);
+ int entries = ucsk.getRetainedEntries();
+ println("Union: " + entries);
+ final TupleSketchIterator uiter = ucsk.iterator();
+ int counter = 1;
+ int twos = 0;
+ int ones = 0;
+ while (uiter.next()) {
+ final int i = (int)uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ if (i == 1) { ones++; }
+ if (i == 2) { twos++; }
+ }
+ assertEquals(ones, 6);
+ assertEquals(twos, 9);
+
+ //Intersection
+ final Intersection inter = new Intersection<>(dsso2);
+ final CompactSketch icsk = inter.intersect(tupleSk1, tupleSk2);
+ entries = icsk.getRetainedEntries();
+ println("Intersection: " + entries);
+ final TupleSketchIterator iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = (int)iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2
+ assertEquals(i, 1);
+ }
+ }
+
+ @Test
+ public void example3() { //stateless: tuple1, tuple2, use dsso2
+ //Load source sketches
+ final UpdatableSketch tupleSk = tupleBldr.build();
+ final UpdateSketch thetaSk = thetaBldr.build();
+ for (int i = 1; i <= 12; i++) {
+ tupleSk.update(i, 1.0);
+ thetaSk.update(i + 3);
+ }
+
+ //Union
+ final Union union = new Union<>(dsso2);
+ final CompactSketch ucsk =
+ union.union(tupleSk, thetaSk, ufactory.newSummary().update(1.0));
+ int entries = ucsk.getRetainedEntries();
+ println("Union: " + entries);
+ final TupleSketchIterator uiter = ucsk.iterator();
+ int counter = 1;
+ int twos = 0;
+ int ones = 0;
+ while (uiter.next()) {
+ final int i = (int)uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ if (i == 1) { ones++; }
+ if (i == 2) { twos++; }
+ }
+ assertEquals(ones, 6);
+ assertEquals(twos, 9);
+
+ //Intersection
+ final Intersection inter = new Intersection<>(dsso2);
+ final CompactSketch icsk =
+ inter.intersect(tupleSk, thetaSk, ufactory.newSummary().update(1.0));
+ entries = icsk.getRetainedEntries();
+ println("Intersection: " + entries);
+ final TupleSketchIterator iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = (int)iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2
+ assertEquals(i, 1);
+ }
+ }
+
+ @Test
+ public void example4() { //stateful: tuple, theta, Mode=sum for both, use dsso0
+ //Load source sketches
+ final UpdatableSketch tupleSk = tupleBldr.build();
+ final UpdateSketch thetaSk = thetaBldr.build();
+ for (int i = 1; i <= 12; i++) {
+ tupleSk.update(i, 1.0);
+ thetaSk.update(i + 3);
+ }
+
+ //Union
+ final Union union = new Union<>(dsso0);
+ union.union(tupleSk);
+ union.union(thetaSk, ufactory.newSummary().update(1.0));
+ final CompactSketch ucsk = union.getResult();
+ int entries = ucsk.getRetainedEntries();
+ println("Union Stateful: tuple, theta: " + entries);
+ final TupleSketchIterator uiter = ucsk.iterator();
+ int counter = 1;
+ int twos = 0;
+ int ones = 0;
+ while (uiter.next()) {
+ final int i = (int)uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ if (i == 1) { ones++; }
+ if (i == 2) { twos++; }
+ }
+ assertEquals(ones, 6);
+ assertEquals(twos, 9);
+
+ //Intersection
+ final Intersection inter = new Intersection<>(dsso0);
+ inter.intersect(tupleSk);
+ inter.intersect(thetaSk, ifactory.newSummary().update(1.0));
+ final CompactSketch icsk = inter.getResult();
+ entries = icsk.getRetainedEntries();
+ println("Intersection Stateful: tuple, theta: " + entries);
+ final TupleSketchIterator iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = (int)iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 1
+ assertEquals(i, 2);
+ }
+ }
+
+ @Test
+ public void example5() { //stateful, tuple, theta, Mode=sum for both, use dsso1
+ //Load source sketches
+ final UpdatableSketch tupleSk = tupleBldr.build();
+ final UpdateSketch thetaSk = thetaBldr.build();
+ for (int i = 1; i <= 12; i++) {
+ tupleSk.update(i, 1.0);
+ thetaSk.update(i + 3);
+ }
+
+ //Union
+ final Union union = new Union<>(dsso1);
+ union.union(tupleSk);
+ union.union(thetaSk, ufactory.newSummary().update(1.0));
+ final CompactSketch ucsk = union.getResult();
+ int entries = ucsk.getRetainedEntries();
+ println("Union Stateful: tuple, theta: " + entries);
+ final TupleSketchIterator uiter = ucsk.iterator();
+ int counter = 1;
+ int twos = 0;
+ int ones = 0;
+ while (uiter.next()) {
+ final int i = (int)uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ if (i == 1) { ones++; }
+ if (i == 2) { twos++; }
+ }
+ assertEquals(ones, 6);
+ assertEquals(twos, 9);
+
+ //Intersection
+ final Intersection inter = new Intersection<>(dsso1);
+ inter.intersect(tupleSk);
+ inter.intersect(thetaSk, ifactory.newSummary().update(1.0));
+ final CompactSketch icsk = inter.getResult();
+ entries = icsk.getRetainedEntries();
+ println("Intersection Stateful: tuple, theta: " + entries);
+ final TupleSketchIterator iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = (int)iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 1
+ assertEquals(i, 2);
+ }
+ }
+
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //enable/disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java b/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java
new file mode 100644
index 000000000..a1409ac37
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2;
+
+import static org.testng.Assert.assertEquals;
+
+import org.apache.datasketches.theta2.UpdateSketch;
+import org.apache.datasketches.theta2.UpdateSketchBuilder;
+import org.apache.datasketches.tuple2.aninteger.IntegerSummary;
+import org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode;
+import org.apache.datasketches.tuple2.aninteger.IntegerSummaryFactory;
+import org.apache.datasketches.tuple2.aninteger.IntegerSummarySetOperations;
+import org.testng.annotations.Test;
+
+/**
+ * Tests for Version 2.0.0
+ * @author Lee Rhodes
+ */
+public class TupleExamplesTest {
+ private final IntegerSummary.Mode umode = Mode.Sum;
+ private final IntegerSummary.Mode imode = Mode.AlwaysOne;
+ private final IntegerSummarySetOperations isso = new IntegerSummarySetOperations(umode, imode);
+ private final IntegerSummaryFactory ufactory = new IntegerSummaryFactory(umode);
+ private final IntegerSummaryFactory ifactory = new IntegerSummaryFactory(imode);
+ private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder();
+ private final UpdatableSketchBuilder tupleBldr =
+ new UpdatableSketchBuilder<>(ufactory);
+
+
+ @Test
+ public void example1() {
+ //Load source sketches
+ final UpdatableSketch tupleSk = tupleBldr.build();
+ final UpdateSketch thetaSk = thetaBldr.build();
+ for (int i = 1; i <= 12; i++) {
+ tupleSk.update(i, 1);
+ thetaSk.update(i + 3);
+ }
+
+ //Union stateful: tuple, theta
+ final Union union = new Union<>(isso);
+ union.union(tupleSk);
+ union.union(thetaSk, ufactory.newSummary().update(1));
+ final CompactSketch ucsk = union.getResult();
+ int entries = ucsk.getRetainedEntries();
+ println("Union Stateful: tuple, theta: " + entries);
+ final TupleSketchIterator uiter = ucsk.iterator();
+ int counter = 1;
+ int twos = 0;
+ int ones = 0;
+ while (uiter.next()) {
+ final int i = uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ if (i == 1) { ones++; }
+ if (i == 2) { twos++; }
+ }
+ assertEquals(ones, 6);
+ assertEquals(twos, 9);
+
+ //Intersection stateful: tuple, theta
+ final Intersection inter = new Intersection<>(isso);
+ inter.intersect(tupleSk);
+ inter.intersect(thetaSk, ifactory.newSummary().update(1));
+ final CompactSketch icsk = inter.getResult();
+ entries = icsk.getRetainedEntries();
+ println("Intersection Stateful: tuple, theta: " + entries);
+ final TupleSketchIterator iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 1
+ assertEquals(i, 1);
+ }
+ }
+
+ @Test
+ public void example2() {
+ //Load source sketches
+ final UpdatableSketch tupleSk1 = tupleBldr.build();
+ final UpdatableSketch tupleSk2 = tupleBldr.build();
+
+ for (int i = 1; i <= 12; i++) {
+ tupleSk1.update(i, 1);
+ tupleSk2.update(i + 3, 1);
+ }
+
+ //Union, stateless: tuple1, tuple2
+ final Union union = new Union<>(isso);
+ final CompactSketch ucsk = union.union(tupleSk1, tupleSk2);
+ int entries = ucsk.getRetainedEntries();
+ println("Union: " + entries);
+ final TupleSketchIterator uiter = ucsk.iterator();
+ int counter = 1;
+ int twos = 0;
+ int ones = 0;
+ while (uiter.next()) {
+ final int i = uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ if (i == 1) { ones++; }
+ if (i == 2) { twos++; }
+ }
+ assertEquals(ones, 6);
+ assertEquals(twos, 9);
+
+ //Intersection stateless: tuple1, tuple2
+ final Intersection inter = new Intersection<>(isso);
+ final CompactSketch icsk = inter.intersect(tupleSk1, tupleSk2);
+ entries = icsk.getRetainedEntries();
+ println("Intersection: " + entries);
+ final TupleSketchIterator iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2
+ assertEquals(i, 1);
+ }
+ }
+
+ @Test
+ public void example3() {
+ //Load source sketches
+ final UpdatableSketch tupleSk = tupleBldr.build();
+ final UpdateSketch thetaSk = thetaBldr.build();
+ for (int i = 1; i <= 12; i++) {
+ tupleSk.update(i, 1);
+ thetaSk.update(i + 3);
+ }
+
+ //Union, stateless: tuple1, tuple2
+ final Union union = new Union<>(isso);
+ final CompactSketch ucsk =
+ union.union(tupleSk, thetaSk, ufactory.newSummary().update(1));
+ int entries = ucsk.getRetainedEntries();
+ println("Union: " + entries);
+ final TupleSketchIterator uiter = ucsk.iterator();
+ int counter = 1;
+ int twos = 0;
+ int ones = 0;
+ while (uiter.next()) {
+ final int i = uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ if (i == 1) { ones++; }
+ if (i == 2) { twos++; }
+ }
+ assertEquals(ones, 6);
+ assertEquals(twos, 9);
+
+ //Intersection stateless: tuple1, tuple2
+ final Intersection inter = new Intersection<>(isso);
+ final CompactSketch icsk =
+ inter.intersect(tupleSk, thetaSk, ufactory.newSummary().update(1));
+ entries = icsk.getRetainedEntries();
+ println("Intersection: " + entries);
+ final TupleSketchIterator iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2
+ assertEquals(i, 1);
+ }
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //enable/disable here
+ }
+}
diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java
new file mode 100644
index 000000000..8951c528c
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java
@@ -0,0 +1,299 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple2.adouble;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.theta2.UpdateSketch;
+import org.apache.datasketches.theta2.UpdateSketchBuilder;
+import org.apache.datasketches.tuple2.AnotB;
+import org.apache.datasketches.tuple2.CompactSketch;
+import org.apache.datasketches.tuple2.Sketch;
+import org.apache.datasketches.tuple2.TupleSketchIterator;
+import org.apache.datasketches.tuple2.UpdatableSketch;
+import org.apache.datasketches.tuple2.UpdatableSketchBuilder;
+import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class AdoubleAnotBTest {
+ private static final DoubleSummary.Mode mode = Mode.Sum;
+ private final Results results = new Results();
+
+ private static void threeMethodsWithTheta(
+ final AnotB aNotB,
+ final Sketch skA,
+ final Sketch